fix errorsHEAD master

There are multiple subtle errors in the previous version, both in the codes and in the description of the BNF format. This version should fix some problems now. This version can successfully parse the grammar of its own grammar format, which is quite nice. See test/check_reader.c for parsing this format.
author: JSDurand <mmemmew@gmail.com> 2022-02-08 00:29:10 +0800
committer: JSDurand <mmemmew@gmail.com> 2022-02-08 12:33:05 +0800
commit: 5426d9e2a6b820e34809d639838b26643df9ab17 (patch)
tree: 111f2b478b671092e3f2e64a6171970b8a5cdf99 /src/grammar.c
parent: aaa12504c6095b2cdfa213a3d4b269bbd5e7038a (diff)
1 files changed, 131 insertions, 19 deletions
diff --git a/src/grammar.c b/src/grammar.c
index 1ea52b0..55a1e07 100644
--- a/src/grammar.c
+++ b/src/grammar.c
@@ -63,6 +63,13 @@ new_ptd(str *user_name, str *raw_name, dfa *dfap)
   return result;
 }
 
+P_ATTR
+str *
+ptd_user_name(PTD *p)
+{
+  return p->user_name;
+}
+
 void
 destroy_ptd(PTD *p, int flag)
 {
@@ -91,14 +98,8 @@ destroy_ptd_no_free(void *e)
   destroy_ptd((PTD *)e, 0);
 }
 
-/* REVIEW: Add some statistic counts to assist the hash tables.  For
-   example, store the total number of terminals as an integer; then
-   the hash table can be hinted at initialization to contain that many
-   elements, which can reduce the number of time a hash table needs to
-   expand and re-insert all its keys, which then helps the
-   performance.  But this is not the top priority thing to do, and
-   might be postponed until a proto-type of the parser generator is
-   usable already. */
+/* FIXME: Add a list of terminals, so that we don't need to use hash
+   tables. */
 
 /* rule_grps and names should have the same length */
 struct Grammar_s {
@@ -160,20 +161,85 @@ destroy_rule_group_free_first(void *rule_grp)
 static void
 print_sep()
 {
-  printf(", ");
+  printf(",\n");
 }
 
 static void
-print_rule_group(void *rule_grp)
+print_rule_group(Grammar *g, void *rule_grp)
 {
   Rule_group *rg = (Rule_group *) rule_grp;
 
+  char s[5];
+  str *strp = (str *) new_utf8(s, 5);
+
   for (int i = 0; i < list_length(rg->rights); i++) {
-    List *str = (List *) list_nth(rg->rights, i);
-    printf("Rule %lu => ", rg->left);
-    map_list_between(str, print_tnt, print_sep);
+    /* fleprintf("i = %d\n", i); */
+    List *strls = (List *) list_nth(rg->rights, i);
+    /* printf("Rule "); */
+    for (int j = 0;
+         (UNUM) j < ((cpa *)list_nth(g->names, rg->left))->size;
+         j++) {
+
+      if (encode
+          (*(((cpa *)list_nth(g->names, rg->left))->array+j),
+           strp)) {
+        destroy_str(strp, 0);
+        fleprintf0("Fail to encode!\n");
+        return;
+      }
+
+      *(s+str_length(strp)) = 0;
+      printf("%s", s);
+      str_set_length(strp, 5);
+    }
+    printf(" := ");
+
+    for (int j = 0; j < list_length(strls); j++) {
+      /* fleprintf("j = %d, len = %ld\n", j, list_length(strls)); */
+      TNT *tntp = (TNT *) list_nth(strls, j);
+      PTD *ptdp = NULL;
+      switch (tntp->type) {
+      case TERMINAL:
+        printf("'");
+        if (encode
+            (tntp->data.t,
+             strp)) {
+          destroy_str(strp, 0);
+          fleprintf0("Fail to encode!\n");
+          return;
+        }
+        printf("%s'", s);
+        str_set_length(strp, 5);
+        break;
+      case NONTERMINAL:
+        for (int k = 0;
+             (UNUM) k < ((cpa *)list_nth(g->names,
+                                         tntp->data.nt))->size;
+             k++) {
+          if (encode
+              (*(((cpa *)list_nth(g->names, tntp->data.nt))->array+k),
+               strp)) {
+            destroy_str(strp, 0);
+            fleprintf0("Fail to encode!\n");
+            return;
+          }
+          printf("%s", s);
+          str_set_length(strp, 5);
+        }
+        break;
+      default:
+        /* predicate */
+        ptdp = (PTD *) list_nth(g->predicates, tntp->data.pt);
+        /* fleprintf("pt = %lu\n", tntp->data.pt); */
+        printf("[%s]", get_data(ptdp->user_name));
+        break;
+      }
+      if (j+1<list_length(strls)) printf(", ");
+    }
     printf("\n");
   }
+
+  destroy_str((str*) strp, 0);
 }
 
 TNT *
@@ -386,7 +452,7 @@ find_in_cpa_list(CCR_MOD(NUM *) string, NUM size,
 }
 
 void
-print_name(void *element)
+eprint_name(void *element)
 {
   cpa *array = (cpa *) element;
   char *carray = MYALLOC(char, 5);
@@ -404,6 +470,32 @@ print_name(void *element)
     carray = get_data(string);
     *(carray+str_length(string)) = 0;
 
+    eprintf("%s", carray);
+    str_set_length(string, 5);
+  }
+
+  destroy_str(string, 1);
+}
+
+void
+print_name(void *element)
+{
+  cpa *array = (cpa *) element;
+  char *carray = MYALLOC(char, 5);
+  str *string = new_str(carray, 5);
+
+  for (UNUM i = 0; i < array->size; i++) {
+    int result = encode(*(array->array+i), string);
+
+    if (result) {
+      fleprintf("%llu, fail to encode %ld\n", i, *(array->array+i));
+      str_set_length(string, 5);
+      continue;
+    }
+
+    carray = get_data(string);
+    *(carray+str_length(string)) = 0;
+
     printf("%s", carray);
     str_set_length(string, 5);
   }
@@ -411,8 +503,6 @@ print_name(void *element)
   destroy_str(string, 1);
 }
 
-/* REVIEW: Print the names of non-terminals out, instead of printing
-   the numbers? */
 void
 print_grammar(CC_MOD(Grammar *) g)
 {
@@ -420,7 +510,7 @@ print_grammar(CC_MOD(Grammar *) g)
   printf("Non-terminals...\n");
   map_list_between(g->names, print_name, print_sep);
 
-  printf("\n");
+  printf("\n\n");
 
   printf("Predicates...\n");
 
@@ -431,9 +521,11 @@ print_grammar(CC_MOD(Grammar *) g)
            get_data(ptdp->user_name), get_data(ptdp->raw_name));
   }
 
+  printf("\n");
+
   printf("Rules...\n");
   for (int i = 0; i < list_length(g->rule_grps); i++) {
-    print_rule_group(list_nth(g->rule_grps, i));
+    print_rule_group((Grammar *) g, list_nth(g->rule_grps, i));
     printf("\n");
   }
 
@@ -568,6 +660,7 @@ grammar_ptd(CCR_MOD(Grammar *) g, PT pt)
   return (PTD *) list_nth(g->predicates, pt);
 }
 
+P_ATTR
 NUM
 grammar_left_len(CCR_MOD(Grammar *)g)
 {
@@ -581,6 +674,13 @@ grammar_names(CCR_MOD(Grammar *)g)
   return g->names;
 }
 
+P_ATTR
+List *
+grammar_preds(CCR_MOD(Grammar *)g)
+{
+  return g->predicates;
+}
+
 /* A transitive closure algorithm */
 void
 epsilon_nts(CC_MOD(Grammar *) g, BOOL * const restrict nts)
@@ -682,11 +782,13 @@ nt_first(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts,
               BREAKOUT;
             } else {
               free(tempT);
+              BREAKOUT;
             }
             break;
           case PREDICATE:
             SAFE_MALLOC(NUM, tempPT, 1, return 1;);
             *tempPT = top->data.pt;
+
             if (first &&
                 ht_find(predicate_hts+i, tempPT) == NULL) {
               changed = 1;
@@ -694,6 +796,7 @@ nt_first(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts,
               BREAKOUT;
             } else {
               free(tempPT);
+              BREAKOUT;
             }
             break;
           default:
@@ -777,7 +880,6 @@ tnt_first(CC_MOD(ht *) terminal_hts, CC_MOD(ht *) predicate_hts,
 
   for (NUM i = 0; i < tnt_len; i++) {
     top = (TNT *) list_nth(tnts, i);
-
     switch (top->type) {
     case TERMINAL:
       if (ht_find(result_terminals, &(top->data.t)) == NULL) {
@@ -797,6 +899,9 @@ tnt_first(CC_MOD(ht *) terminal_hts, CC_MOD(ht *) predicate_hts,
       break;
     default:
       current = top->data.nt;
+      /* if (tnt_len == 2 && current == 1) {
+       *   fleprintf0("generating for empty\n");
+       * } */
 
       if (current >= (NT) len || current < 0) {
         fleprintf("Wrong non-terminal: %ld>%ld\n", current, len);
@@ -949,6 +1054,13 @@ nt_follow(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts,
             for (NUM ell = 0; ell < ht_len;) {
               SAFE_MALLOC(NUM, tempN, 1, return 1;);
               *tempN = **(keys+ell++);
+              /* if (top->data.nt == 17 ||
+               *     top->data.nt == 14 ||
+               *     top->data.nt == 13) {
+               *   fleprintf("add pred %ld to NT %ld\n", *tempN,
+               *             top->data.nt);
+               *   eprintf("i = %ld, k = %ld, ell = %ld\n", i, k, ell);
+               * } */
               if (ht_find(result_predicates+top->data.nt,
                           tempN) == NULL) {
                 changed = 1;
author	JSDurand <mmemmew@gmail.com>	2022-02-08 00:29:10 +0800
committer	JSDurand <mmemmew@gmail.com>	2022-02-08 12:33:05 +0800
commit	5426d9e2a6b820e34809d639838b26643df9ab17 (patch)
tree	111f2b478b671092e3f2e64a6171970b8a5cdf99 /src/grammar.c
parent	aaa12504c6095b2cdfa213a3d4b269bbd5e7038a (diff)