fix errorsHEAD master

There are multiple subtle errors in the previous version, both in the codes and in the description of the BNF format. This version should fix some problems now. This version can successfully parse the grammar of its own grammar format, which is quite nice. See test/check_reader.c for parsing this format.
author: JSDurand <mmemmew@gmail.com> 2022-02-08 00:29:10 +0800
committer: JSDurand <mmemmew@gmail.com> 2022-02-08 12:33:05 +0800
commit: 5426d9e2a6b820e34809d639838b26643df9ab17 (patch)
tree: 111f2b478b671092e3f2e64a6171970b8a5cdf99
parent: aaa12504c6095b2cdfa213a3d4b269bbd5e7038a (diff)
11 files changed, 951 insertions, 103 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index 77c2dfe..5375d8e 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -30,8 +30,8 @@ check_list_SOURCES = test/check_list.c list.c
 check_grammar_SOURCES = test/check_grammar.c list.c ht.c grammar.c \
 utf8.c str.c dfa.c
 
-check_reader_SOURCES = test/check_reader.c list.c grammar.c reader.c \
-str.c utf8.c util.c ht.c dfa.c
+check_reader_SOURCES = test/check_reader.c crf.c cnp.c grammar.c list.c \
+util.c ht.c utf8.c str.c dfa.c bsr.c tuple.c
 
 check_dfa_SOURCES = test/check_dfa.c dfa.c list.c str.c utf8.c
 
diff --git a/src/bnf.bnf b/src/bnf.bnf
index df8ef3c..0a339fc 100644
--- a/src/bnf.bnf
+++ b/src/bnf.bnf
@@ -1,27 +1,41 @@
 # A grammar file for reading BNF notation
+# FIXME: Too many errors!
 
-[id]: a-zA-Z-_+*@
+[space]: %x20\t
 
-[notbracket]: ^[]
+[id]: a-zA-Z\-_+*@
+
+[rule_name_char]: ^[]:%x20\n
 
 [notnewline]: ^\n\r
 
+[class_range_char]: ^\n\r\^\\
+
+[class_single_char]: ^\n\r\^\\\-
+
+[any]:
+
+[double_string_char]: ^"\n
+
+[single_string_char]: ^'\n
+
 --
 
-BNF: predicate_section "--\n" rules_section
+BNF: "#" notnewlines empty BNF
+BNF: predicate_section "--\n" empty rules_section
 BNF: rules_section
 BNF:
 
-spaces: space spaces
-spaces: space
+spaces: [space] spaces
+spaces: [space]
 
-space: " "
-space: "\t"
+optional_spaces: [space] optional_spaces
+optional_spaces:
 
-empty: spaces empty
-empty: "\n" empty "\n"
-empty: "\r" empty "\n"
-empty: "#" notnewlines "\n"
+empty: [space] empty
+empty: "\n" empty
+empty: "\r" empty
+empty: "\n#" notnewlines
 empty:
 
 notnewlines: [notnewline] notnewlines
@@ -30,13 +44,13 @@ notnewlines:
 predicate_section: predicate empty "\n" predicate_section
 predicate_section:
 
-predicate: "[" ids "]:" spaces class
+predicate: "[" ids "]:" optional_spaces class
 
 ids: [id] ids
-ids:
+ids: [id]
 
 # Yes, a class specification can be empty, in which case the predicate
-# is equivalent with the default "any" predicate.
+# matches everything.
 
 class: positive_class
 class: "^" positive_class
@@ -44,23 +58,38 @@ class: "^" positive_class
 positive_class: positive_specification positive_class
 positive_class:
 
-positive_specification: enotnewline
-positive_specification: enotnewline "-" enotnewline
+positive_specification: class_single_chars
+positive_specification: class_range_chars "-" class_range_chars
+
+class_single_chars: [class_single_char]
+class_single_chars: "\\" [any]
 
-# Extended not-newline, or escaped not-newline
-enotnewline: [notnewline]
-enotnewline: "\\" [any]
+class_range_chars: [class_range_char]
+class_range_chars: "\\" [any]
 
 rules_section: rule empty "\n" rules_section
+rules_section:
 
-rule: rule_name ":" spaces rule_rhs
-rule: rule_name ":" rule_rhs
+rule: rule_name optional_spaces ":" optional_spaces rule_rhs
 
-rule_name: [notbracket] rule_name
+rule_name: [rule_name_char] rule_name
 rule_name:
 
-spaces-or-escaped-newline: spaces
-spaces-or-escaped-newline: "\\\n"
+spaces_or_escaped_newline: optional_spaces
+spaces_or_escaped_newline: "\\\n"
+
+rule_rhs: ids spaces_or_escaped_newline rule_rhs
+rule_rhs: '[' ids ']' spaces_or_escaped_newline rule_rhs
+rule_rhs: '"' double_string_chars '"' spaces_or_escaped_newline rule_rhs
+rule_rhs: "'" single_string_chars "'" spaces_or_escaped_newline rule_rhs
+rule_rhs: 
+
+double_string_chars: [double_string_char]
+double_string_chars: "\\" [any]
+double_string_chars: [double_string_char] double_string_char
+double_string_chars: "\\" [any] double_string_chars
 
-rule_rhs: ids spaces-or-escaped-newline rule_rhs
-rule_rhs:
+single_string_chars: [single_string_char]
+single_string_chars: "\\" [any]
+single_string_chars: [single_string_char] single_string_char
+single_string_chars: "\\" [any] single_string_chars
diff --git a/src/bsr.c b/src/bsr.c
index 4b7953a..2736a1b 100644
--- a/src/bsr.c
+++ b/src/bsr.c
@@ -242,8 +242,60 @@ print_bsr_f(pair5 label)
   print_name(list_nth(grammar_names(bsr_print_grammar),
                       label.x));
   printf(" := ");
+  /* List *string = rg_nth(grammar_rule(bsr_print_grammar, label.x),
+   *                       label.u);
+   * 
+   * char s[5];
+   * str *strp = (str *) new_utf8(s, 5);
+   * 
+   * for (NUM i = 0; i < list_length(string); i++) {
+   *   TNT *tntp = (TNT *)list_nth(string, i);
+   *   PTD *ptdp = NULL;
+   *   switch (tntp->type) {
+   *   case TERMINAL:
+   *     printf("'");
+   *     if (encode
+   *         (tntp->data.t, strp)) {
+   *       destroy_str(strp, 0);
+   *       fleprintf0("Fail to encode\n");
+   *       return;
+   *     }
+   *     printf("%s'", s);
+   *     str_set_length(strp, 5);
+   *     break;
+   *   case NONTERMINAL:
+   *     for (int k = 0;
+   *          (UNUM) k <
+   *            ((cpa *) list_nth(grammar_names(bsr_print_grammar),
+   *                              tntp->data.nt))->size;
+   *          k++) {
+   *       if (encode
+   *           (*(((cpa *)list_nth
+   *               (grammar_names(bsr_print_grammar),
+   *                tntp->data.nt))->array+k),
+   *            strp)) {
+   *         destroy_str(strp, 0);
+   *         fleprintf0("Fail to encode!\n");
+   *         return;
+   *       }
+   *       printf("%s", s);
+   *       str_set_length(strp, 5);
+   *     }
+   *     break;
+   *   default:
+   *     ptdp = (PTD *) list_nth
+   *       (grammar_preds(bsr_print_grammar), tntp->data.pt);
+   *     printf("[%s]", get_data(ptd_user_name(ptdp)));
+   *     break;
+   *   }
+   *   if (i+1<list_length(string)) printf(", ");
+   *   else printf(",\n");
+   * }
+   * 
+   * destroy_str(strp, 0); */
+
   map_list_between
-    (rg_nth (grammar_rule(bsr_print_grammar, label.x), label.u),
+    (rg_nth(grammar_rule(bsr_print_grammar, label.x), label.u),
      print_tnt, print_sep);
   printf(", %ld, %ld, %ld)", label.y, label.v, label.z);
 
diff --git a/src/bsr.h b/src/bsr.h
index 1b91c3e..39abbe9 100644
--- a/src/bsr.h
+++ b/src/bsr.h
@@ -25,8 +25,6 @@
    hash-table's almost constant-time look-up feature to implement
    this. */
 
-/* FIXME: Don't use hash-tables, as that uses too much space! */
-
 /* A BSR set has two types.
 
    The first type is of the form
diff --git a/src/cnp.c b/src/cnp.c
index 3c76f52..b28f967 100644
--- a/src/cnp.c
+++ b/src/cnp.c
@@ -1,6 +1,21 @@
+#include <time.h>
 #include <string.h>
 #include "cnp.h"
 
+#define TITO struct timespec tic, toc
+
+#define TIC do {                                \
+    clock_gettime(CLOCK_MONOTONIC_RAW, &tic);   \
+  } while (0)
+
+#define TOC do {                                \
+    clock_gettime(CLOCK_MONOTONIC_RAW, &toc);   \
+    printf("\nTotal time = %f seconds\n",       \
+           (toc.tv_nsec - tic.tv_nsec) /        \
+           1000000000.0 +                       \
+           toc.tv_sec - tic.tv_sec);            \
+  } while (0)
+
 struct Environment_s {
   grammar_info *gi;
   /* RESULT_TS and RESULT_PS are temporary arrays used in the
@@ -32,7 +47,6 @@ nt_add(Environment *env, NUM X, NUM j)
   for (NUM i = 0; i < rg_len(RG); i++) {
     BOOL result =
       test_select(env, *(env->string+j), X, rg_nth(RG, i));
-    /* fleprintf("i = %ld, after test select\n", i); */
 
     for (NUM k = 0; k < grammar_left_len(GI->g); k++) {
       ht_reset(env->result_ts+k, DELETE_KEY);
@@ -42,12 +56,10 @@ nt_add(Environment *env, NUM X, NUM j)
     if (env->error) break;
 
     if (result) {
-      /* fleprintf("i = %ld, j = %ld\n", i, j); */
       env->error =
         desc_add(env->pr, env->pu, j,
                  (pair4) { .x = X, .y = i, .z = 0, .u = j });
 
-      /* fleprintf("env->error = %d\n", env->error); */
       /* print_procr(env->pr); */
       /* fleprintf("pr len = %ld\n", env->pr->len);
        * fleprintf("pr ini = %d\n", (env->pr->array)->initialized); */
@@ -73,22 +85,40 @@ test_select(Environment *env, NUM b, NUM X, CCR_MOD(List *) tnts)
   ht *result_ts = env->result_ts;
   ht *result_ps = env->result_ps;
 
+  ht_reset(result_ts, DELETE_KEY);
+  ht_reset(result_ps, DELETE_KEY);
+  
   if (tnt_first(gi->pts, gi->pps, gi->nts, grammar_left_len(gi->g),
                 tnts, result_ts, result_ps)) {
     fleprintf0("Fail to find the first set of the TNT string.\n");
     goto cleanup;
   }
+  /* if (X == 3) {
+   *   fleprintf0("for empty the first has ");
+   *   for (NUM i = 0; i < ht_size(result_ts); i++) {
+   *     eprintf("%ld", **((NUM **)ht_keys(result_ts)+i));
+   *     if (i+1<ht_size(result_ts)) eprintf(", ");
+   *     else eprintf("\n");
+   *   }
+   *   if (list_length(tnts) && ((TNT *)list_nth(tnts, 0))->data.nt == 1)
+   *     fleprintf("first of tnts is %ld\n",
+   *               ((TNT *)list_nth(tnts, 0))->data.nt);
+   * } */
 
   if (ht_find(result_ts, &b) != NULL) {
     result = 1;
+
+#ifdef DEBUG
+    fleprintf("test succeeds against %ld\n", b);
+#endif
+
     goto success;
   }
-
+  /* fleprintf("hi, size = %ld\n", ht_size(result_ps)); */
   for (NUM i = 0; i < ht_size(result_ps); i++) {
-    PTD *ptdp = grammar_ptd(gi->g, **((PT **) ht_keys(result_ps+i)));
+    PTD *ptdp = grammar_ptd(gi->g, **((PT **) ht_keys(result_ps)+i));
 
     if (ptd_run(ptdp, b)) {
-      /* fleprintf("i = %ld\n", i); */
       result = 1;
       goto success;
     }
@@ -104,24 +134,27 @@ test_select(Environment *env, NUM b, NUM X, CCR_MOD(List *) tnts)
       goto success;
       break;
     default:
-      if (!(gi->nts+TOP->data.nt)) goto success;
+      if (!(*(gi->nts+(TOP->data.nt)))) goto success;
       break;
     }
 
 #undef TOP
 
   }
-
+  /* fleprintf("hi %ld\n", X); */
   if (ht_find(gi->sts+X, &b) != NULL) {
     result = 1;
+
+#ifdef DEBUG
+    fleprintf("followed by %ld\n", b);
+#endif
+
     goto success;
   }
 
   for (NUM i = 0; i < ht_size(gi->sps+X); i++) {
-    PTD *ptdp = grammar_ptd(gi->g, **((PT **) ht_keys(gi->sps+X)));
-    /* fleprintf("i = %ld\n", i); */
+    PTD *ptdp = grammar_ptd(gi->g, **((PT **) ht_keys(gi->sps+X)+i));
     if (ptd_run(ptdp, b)) {
-      /* fleprintf("i = %ld\n", i); */
       result = 1;
       goto success;
     }
@@ -154,9 +187,11 @@ rtn_internal(pair6 label)
                   label.v, label.w
                 }))) return;
 
-  /* fleprintf("added descriptor (%ld, %ld, %ld, %ld, %ld)\n",
-   *           label.z, label.u, label.v, label.w,
-   *           rtn_internal_num); */
+#ifdef DEBUG
+    fleprintf("added descriptor (%ld, %ld, %ld, %ld, %ld)\n",
+              label.z, label.u, label.v, label.w,
+              rtn_internal_num);
+#endif
 
   if ((rtn_internal_env->error =
        bsr_add(rtn_internal_env->gi->g,
@@ -175,9 +210,11 @@ rtn(Environment *env, NUM X, NUM k, NUM j)
   pair3 label = (pair3) { .x = X, .y = k, .z = j };
   pair2 label2 = (pair2) { .x = X, .y = k };
 
-  if (spa_belong(env->spap, label)) return;
+#ifdef DEBUG
+  fleprintf("encounter %ld, %ld, %ld\n", X, k, j);
+#endif
 
-  /* fleprintf0("new spa\n"); */
+  if (spa_belong(env->spap, label)) return;
 
   if ((env->error = spa_insert(env->spap, label))) return;
 
@@ -267,7 +304,12 @@ cnp_call(Environment *env, pair3 label, NUM i, NUM j)
   }
 
   NUM X = (NUM) xtnt->data.nt;
-  /* fleprintf("X = %ld, j = %ld\n", X, j); */
+
+#ifdef DEBUG
+  fleprintf0("X = ");
+  eprint_name(list_nth(grammar_names(env->gi->g), X));
+  eprintf(", j = %ld\n", j);
+#endif
 
   pair2 node = (pair2) { .x = X, .y = j };
   pair4 u = (pair4)
@@ -279,11 +321,21 @@ cnp_call(Environment *env, pair3 label, NUM i, NUM j)
     };
 
   if (!(crf_find_node(env->crfp, node))) {
+
+#ifdef DEBUG
+    fleprintf("adding node (%ld, %ld)\n", X, j);
+#endif
+
     if (crf_add_node(env->crfp, node)) {
       fleprintf0("fail to add node to crf\n");
       goto cleanup;
     }
 
+#ifdef DEBUG
+    fleprintf("adding edge to (%ld, %ld, %ld, %ld)\n",
+              u.x, u.y, u.z, u.u);
+#endif
+
     if (crf_add_edge(env->crfp, node, u)) {
       fleprintf0("fail to add edge to crf\n");
       goto cleanup;
@@ -292,11 +344,14 @@ cnp_call(Environment *env, pair3 label, NUM i, NUM j)
     /* errors will be stored in ENV, so we simply return */
     nt_add(env, X, j);
 
-    /* fleprintf("X = %ld, j = %ld\n", X, j); */
-
     return;
   }
 
+#ifdef DEBUG
+    fleprintf("adding edge to (%ld, %ld, %ld, %ld)\n",
+              u.x, u.y, u.z, u.u);
+#endif
+
   if (!(crf_find_edge(env->crfp, node, u))) {
     if (crf_add_edge(env->crfp, node, u)) {
       fleprintf0("fail to add edge to crf\n");
@@ -378,11 +433,13 @@ env_follow_p(CCR_MOD(Environment *) env, NUM X, NUM t)
 
   for (NUM i = 0; i < ht_size(env->gi->sps+X); i++) {
     PTD *ptdp = grammar_ptd(env->gi->g,
-                            **((PT **) ht_keys(env->gi->sps+X)));
+                            **((PT **) ht_keys(env->gi->sps+X)+i));
     if (ptd_run(ptdp, t)) return 1;
   }
 
-  /* fleprintf("X = %ld, t = %ld\n", X, t); */
+#ifdef DEBUG
+  fleprintf("X = %ld, t = %ld\n", X, t);
+#endif
 
   return 0;
 }
@@ -679,6 +736,9 @@ cnp_parse(Grammar *g, str *string)
 
       if (env_follow_p(env, current_prodecor->x,
                        *(num_string+current_grade))) {
+#ifdef DEBUG
+        fleprintf0("returning from an empty rule\n");
+#endif
         rtn(env, current_prodecor->x,
             current_prodecor->u, current_grade);
       }
@@ -693,6 +753,9 @@ cnp_parse(Grammar *g, str *string)
     if (current_prodecor->z == list_length(right)) {
       if (env_follow_p(env, current_prodecor->x,
                        *(num_string+current_grade))) {
+#ifdef DEBUG
+        fleprintf0("returning from the end\n");
+#endif
         rtn(env, current_prodecor->x,
             current_prodecor->u, current_grade);
       }
@@ -744,7 +807,17 @@ cnp_parse(Grammar *g, str *string)
            ((TNT *) list_nth(right, current_prodecor->z-1))->type ==
            PREDICATE) {
 #ifdef DEBUG
-      fleprintf0("found terminal\n");
+      if (((TNT *) list_nth(right, current_prodecor->z-1))->type ==
+          TERMINAL)
+        fleprintf("found terminal %ld\n",
+                  ((TNT *)
+                   list_nth
+                   (right, current_prodecor->z-1))->data.t);
+      else
+        fleprintf("found predicate terminal %ld\n",
+                  ((TNT *)
+                   list_nth
+                   (right, current_prodecor->z-1))->data.pt);
 #endif
       /* add to BSR set */
       errorp =
@@ -771,8 +844,12 @@ cnp_parse(Grammar *g, str *string)
 
         goto continue_label;
       }
+
 #ifdef DEBUG
       fleprintf0("terminal not at the end\n");
+      fleprintf("testing X = %ld, z = %ld, input = %ld\n",
+                current_prodecor->x, current_prodecor->z,
+                *(num_string+current_grade));
 #endif
       /* else test select */
       tnts =
@@ -818,10 +895,12 @@ cnp_parse(Grammar *g, str *string)
         .z = current_prodecor->z
       }, current_prodecor->u, current_grade);
 
-    /* fleprintf("after call { %ld, %ld, %ld, %ld, %ld }\n",
-     *           current_prodecor->x, current_prodecor->y,
-     *           current_prodecor->z, current_prodecor->u,
-     *           current_grade); */
+#ifdef DEBUG
+    fleprintf("after call { %ld, %ld, %ld, %ld, %ld }\n",
+              current_prodecor->x, current_prodecor->y,
+              current_prodecor->z, current_prodecor->u,
+              current_grade);
+#endif
 
   continue_label:
 
diff --git a/src/crf.c b/src/crf.c
index 1c9fea9..5dba8bf 100644
--- a/src/crf.c
+++ b/src/crf.c
@@ -95,8 +95,10 @@ crf_add_edge(crf *crfp, pair2 source, pair4 label)
       label.u
     };
 
-  /* fleprintf("adding (%ld, %ld, %ld, %ld, %ld, %ld)\n",
-   *           p6.x, p6.y, p6.z, p6.u, p6.v, p6.w); */
+#ifdef DEBUG
+  fleprintf("adding (%ld, %ld, %ld, %ld, %ld, %ld)\n",
+            p6.x, p6.y, p6.z, p6.u, p6.v, p6.w);
+#endif
 
   NUM *find_result = luple6_find(crfp->lup, p6);
 
@@ -118,7 +120,9 @@ crf_add_edge(crf *crfp, pair2 source, pair4 label)
 void
 destroy_crf(crf *crfp)
 {
+  /* fleprintf0("hi\n"); */
   destroy_luple6(crfp->lup);
+  /* fleprintf0("hi\n"); */
   free(crfp);
 }
 
diff --git a/src/dfa.c b/src/dfa.c
index 2edda77..f3f62fb 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -717,8 +717,8 @@ run_dfa(const dfa * const restrict table, const NUM code)
       if (code >= (table->data.sp.ranges+i)->beg &&
           code <= (table->data.sp.ranges+i)->end) {
 #ifdef DEBUG
-        fleprintf("code = %ld, beg = %ld, end = %ld\n",
-                  code,
+        fleprintf("i = %d, code = %ld, beg = %ld, end = %ld\n",
+                  i, code,
                   (table->data.sp.ranges+i)->beg,
                   (table->data.sp.ranges+i)->end);
 #endif
@@ -727,17 +727,18 @@ run_dfa(const dfa * const restrict table, const NUM code)
     return 0;
     break;
   case DFA_TYPE_SPECIAL_NEG:
-    for (int i = 0; i < table->data.sp.len; i++)
+    for (int i = 0; i < table->data.sp.len; i++) {
       if (code >= (table->data.sp.ranges+i)->beg &&
           code <= (table->data.sp.ranges+i)->end) {
 #ifdef DEBUG
-        fleprintf("code = %ld, beg = %ld, end = %ld\n",
-                  code,
+        fleprintf("code = %ld, i = %d, beg = %ld, end = %ld\n",
+                  code, i,
                   (table->data.sp.ranges+i)->beg,
                   (table->data.sp.ranges+i)->end);
 #endif
         return 0;
       }
+    }
     return 1;
     break;
   case DFA_TYPE_SPECIAL_BOTH:
diff --git a/src/grammar.c b/src/grammar.c
index 1ea52b0..55a1e07 100644
--- a/src/grammar.c
+++ b/src/grammar.c
@@ -63,6 +63,13 @@ new_ptd(str *user_name, str *raw_name, dfa *dfap)
   return result;
 }
 
+P_ATTR
+str *
+ptd_user_name(PTD *p)
+{
+  return p->user_name;
+}
+
 void
 destroy_ptd(PTD *p, int flag)
 {
@@ -91,14 +98,8 @@ destroy_ptd_no_free(void *e)
   destroy_ptd((PTD *)e, 0);
 }
 
-/* REVIEW: Add some statistic counts to assist the hash tables.  For
-   example, store the total number of terminals as an integer; then
-   the hash table can be hinted at initialization to contain that many
-   elements, which can reduce the number of time a hash table needs to
-   expand and re-insert all its keys, which then helps the
-   performance.  But this is not the top priority thing to do, and
-   might be postponed until a proto-type of the parser generator is
-   usable already. */
+/* FIXME: Add a list of terminals, so that we don't need to use hash
+   tables. */
 
 /* rule_grps and names should have the same length */
 struct Grammar_s {
@@ -160,20 +161,85 @@ destroy_rule_group_free_first(void *rule_grp)
 static void
 print_sep()
 {
-  printf(", ");
+  printf(",\n");
 }
 
 static void
-print_rule_group(void *rule_grp)
+print_rule_group(Grammar *g, void *rule_grp)
 {
   Rule_group *rg = (Rule_group *) rule_grp;
 
+  char s[5];
+  str *strp = (str *) new_utf8(s, 5);
+
   for (int i = 0; i < list_length(rg->rights); i++) {
-    List *str = (List *) list_nth(rg->rights, i);
-    printf("Rule %lu => ", rg->left);
-    map_list_between(str, print_tnt, print_sep);
+    /* fleprintf("i = %d\n", i); */
+    List *strls = (List *) list_nth(rg->rights, i);
+    /* printf("Rule "); */
+    for (int j = 0;
+         (UNUM) j < ((cpa *)list_nth(g->names, rg->left))->size;
+         j++) {
+
+      if (encode
+          (*(((cpa *)list_nth(g->names, rg->left))->array+j),
+           strp)) {
+        destroy_str(strp, 0);
+        fleprintf0("Fail to encode!\n");
+        return;
+      }
+
+      *(s+str_length(strp)) = 0;
+      printf("%s", s);
+      str_set_length(strp, 5);
+    }
+    printf(" := ");
+
+    for (int j = 0; j < list_length(strls); j++) {
+      /* fleprintf("j = %d, len = %ld\n", j, list_length(strls)); */
+      TNT *tntp = (TNT *) list_nth(strls, j);
+      PTD *ptdp = NULL;
+      switch (tntp->type) {
+      case TERMINAL:
+        printf("'");
+        if (encode
+            (tntp->data.t,
+             strp)) {
+          destroy_str(strp, 0);
+          fleprintf0("Fail to encode!\n");
+          return;
+        }
+        printf("%s'", s);
+        str_set_length(strp, 5);
+        break;
+      case NONTERMINAL:
+        for (int k = 0;
+             (UNUM) k < ((cpa *)list_nth(g->names,
+                                         tntp->data.nt))->size;
+             k++) {
+          if (encode
+              (*(((cpa *)list_nth(g->names, tntp->data.nt))->array+k),
+               strp)) {
+            destroy_str(strp, 0);
+            fleprintf0("Fail to encode!\n");
+            return;
+          }
+          printf("%s", s);
+          str_set_length(strp, 5);
+        }
+        break;
+      default:
+        /* predicate */
+        ptdp = (PTD *) list_nth(g->predicates, tntp->data.pt);
+        /* fleprintf("pt = %lu\n", tntp->data.pt); */
+        printf("[%s]", get_data(ptdp->user_name));
+        break;
+      }
+      if (j+1<list_length(strls)) printf(", ");
+    }
     printf("\n");
   }
+
+  destroy_str((str*) strp, 0);
 }
 
 TNT *
@@ -386,7 +452,7 @@ find_in_cpa_list(CCR_MOD(NUM *) string, NUM size,
 }
 
 void
-print_name(void *element)
+eprint_name(void *element)
 {
   cpa *array = (cpa *) element;
   char *carray = MYALLOC(char, 5);
@@ -404,6 +470,32 @@ print_name(void *element)
     carray = get_data(string);
     *(carray+str_length(string)) = 0;
 
+    eprintf("%s", carray);
+    str_set_length(string, 5);
+  }
+
+  destroy_str(string, 1);
+}
+
+void
+print_name(void *element)
+{
+  cpa *array = (cpa *) element;
+  char *carray = MYALLOC(char, 5);
+  str *string = new_str(carray, 5);
+
+  for (UNUM i = 0; i < array->size; i++) {
+    int result = encode(*(array->array+i), string);
+
+    if (result) {
+      fleprintf("%llu, fail to encode %ld\n", i, *(array->array+i));
+      str_set_length(string, 5);
+      continue;
+    }
+
+    carray = get_data(string);
+    *(carray+str_length(string)) = 0;
+
     printf("%s", carray);
     str_set_length(string, 5);
   }
@@ -411,8 +503,6 @@ print_name(void *element)
   destroy_str(string, 1);
 }
 
-/* REVIEW: Print the names of non-terminals out, instead of printing
-   the numbers? */
 void
 print_grammar(CC_MOD(Grammar *) g)
 {
@@ -420,7 +510,7 @@ print_grammar(CC_MOD(Grammar *) g)
   printf("Non-terminals...\n");
   map_list_between(g->names, print_name, print_sep);
 
-  printf("\n");
+  printf("\n\n");
 
   printf("Predicates...\n");
 
@@ -431,9 +521,11 @@ print_grammar(CC_MOD(Grammar *) g)
            get_data(ptdp->user_name), get_data(ptdp->raw_name));
   }
 
+  printf("\n");
+
   printf("Rules...\n");
   for (int i = 0; i < list_length(g->rule_grps); i++) {
-    print_rule_group(list_nth(g->rule_grps, i));
+    print_rule_group((Grammar *) g, list_nth(g->rule_grps, i));
     printf("\n");
   }
 
@@ -568,6 +660,7 @@ grammar_ptd(CCR_MOD(Grammar *) g, PT pt)
   return (PTD *) list_nth(g->predicates, pt);
 }
 
+P_ATTR
 NUM
 grammar_left_len(CCR_MOD(Grammar *)g)
 {
@@ -581,6 +674,13 @@ grammar_names(CCR_MOD(Grammar *)g)
   return g->names;
 }
 
+P_ATTR
+List *
+grammar_preds(CCR_MOD(Grammar *)g)
+{
+  return g->predicates;
+}
+
 /* A transitive closure algorithm */
 void
 epsilon_nts(CC_MOD(Grammar *) g, BOOL * const restrict nts)
@@ -682,11 +782,13 @@ nt_first(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts,
               BREAKOUT;
             } else {
               free(tempT);
+              BREAKOUT;
             }
             break;
           case PREDICATE:
             SAFE_MALLOC(NUM, tempPT, 1, return 1;);
             *tempPT = top->data.pt;
+
             if (first &&
                 ht_find(predicate_hts+i, tempPT) == NULL) {
               changed = 1;
@@ -694,6 +796,7 @@ nt_first(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts,
               BREAKOUT;
             } else {
               free(tempPT);
+              BREAKOUT;
             }
             break;
           default:
@@ -777,7 +880,6 @@ tnt_first(CC_MOD(ht *) terminal_hts, CC_MOD(ht *) predicate_hts,
 
   for (NUM i = 0; i < tnt_len; i++) {
     top = (TNT *) list_nth(tnts, i);
-
     switch (top->type) {
     case TERMINAL:
       if (ht_find(result_terminals, &(top->data.t)) == NULL) {
@@ -797,6 +899,9 @@ tnt_first(CC_MOD(ht *) terminal_hts, CC_MOD(ht *) predicate_hts,
       break;
     default:
       current = top->data.nt;
+      /* if (tnt_len == 2 && current == 1) {
+       *   fleprintf0("generating for empty\n");
+       * } */
 
       if (current >= (NT) len || current < 0) {
         fleprintf("Wrong non-terminal: %ld>%ld\n", current, len);
@@ -949,6 +1054,13 @@ nt_follow(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts,
             for (NUM ell = 0; ell < ht_len;) {
               SAFE_MALLOC(NUM, tempN, 1, return 1;);
               *tempN = **(keys+ell++);
+              /* if (top->data.nt == 17 ||
+               *     top->data.nt == 14 ||
+               *     top->data.nt == 13) {
+               *   fleprintf("add pred %ld to NT %ld\n", *tempN,
+               *             top->data.nt);
+               *   eprintf("i = %ld, k = %ld, ell = %ld\n", i, k, ell);
+               * } */
               if (ht_find(result_predicates+top->data.nt,
                           tempN) == NULL) {
                 changed = 1;
diff --git a/src/grammar.h b/src/grammar.h
index 89a66ba..bf418e0 100644
--- a/src/grammar.h
+++ b/src/grammar.h
@@ -75,6 +75,8 @@ typedef struct PT_DATA_s PTD;
 /* On error return NULL */
 PTD *new_ptd(str *user_name, str *raw_name, dfa *dfap);
 
+P_ATTR str *ptd_user_name(PTD *p);
+
 /* FLAG is used to destroy the strings contained within */
 void destroy_ptd(PTD *p, int flag);
 
@@ -134,6 +136,8 @@ find_in_cpa_list(CCR_MOD(NUM *) string, NUM size,
 /* assume element is a cpa pointer */
 void print_name(void *element);
 
+void eprint_name(void *element);
+
 void print_tnt(void *element);
 
 void print_rule(void *r);
@@ -167,9 +171,10 @@ void destroy_rule_no_free(void *rule);
 void destroy_cpa_and_free_all(void *element);
 void destroy_grammar(void *grammar, int flag);
 
-NUM grammar_left_len(CCR_MOD(Grammar *)g);
+P_ATTR NUM grammar_left_len(CCR_MOD(Grammar *)g);
 
-List *grammar_names(CCR_MOD(Grammar *)g);
+P_ATTR List *grammar_names(CCR_MOD(Grammar *)g);
+P_ATTR List *grammar_preds(CCR_MOD(Grammar *)g);
 
 /* look up a rule */
 Rule_group *grammar_rule(CCR_MOD(Grammar *) g, NT nt);
diff --git a/src/test/check_reader.c b/src/test/check_reader.c
index a4e184f..45025ff 100644
--- a/src/test/check_reader.c
+++ b/src/test/check_reader.c
@@ -1,18 +1,517 @@
+#include <string.h>
 #include <stdlib.h>
 #include <stdio.h>
+#include <time.h>
+#include <limits.h>
 
-#include "../util.h"
-#include "../list.h"
-#include "../grammar.h"
-#include "../reader.h"
+#include "../cnp.h"
+
+#define TITO struct timespec tic, toc
+
+#define TIC do {                                \
+    clock_gettime(CLOCK_MONOTONIC_RAW, &tic);   \
+  } while (0)
+
+#define TOC do {                                \
+    clock_gettime(CLOCK_MONOTONIC_RAW, &toc);   \
+    printf("\nTotal time = %f seconds\n",       \
+           (toc.tv_nsec - tic.tv_nsec) /        \
+           1000000000.0 +                       \
+           toc.tv_sec - tic.tv_sec);            \
+  } while (0)
+
+#define READ_INTO_CPA(N, U, L, I, VA, VI, CP) do {      \
+    U = new_utf8(N, L);                                 \
+    I = get_info((str *)U, 0);                          \
+    VI = 0;                                             \
+    for (NUM index = 0;                                 \
+         I.value >= 0 && index < str_length((str*) U);  \
+         index += I.step, VI++) {                       \
+      I = get_info((str*)U, index);                     \
+    }                                                   \
+    SAFE_MALLOC(NUM, VA, VI, return 1;);                \
+    I = get_info((str *)U, 0);                          \
+    VI = 0;                                             \
+    for (NUM index = 0;                                 \
+         I.value >= 0 && index < str_length((str *) U); \
+         index += I.step, VI++) {                       \
+      I = get_info((str *)U, index);                    \
+      *(VA+VI) = I.value;                               \
+    }                                                   \
+    SAFE_MALLOC(cpa, CP, 1, return 1;);                 \
+    CP->array = VA;                                     \
+    CP->size = VI;                                      \
+    if (add_to_list(names, CP)) {                       \
+      fleprintf0("Fail to add to names\n");             \
+      return 1;                                         \
+    }                                                   \
+    destroy_str((str *)U, 1);                           \
+  } while (0)
+
+#define READ_TNT_STRING(LEFT, FORMAT, LEN, ...) do {            \
+    tnt_string = new_tnt_string(FORMAT, LEN, __VA_ARGS__);      \
+    if (!tnt_string) {                                          \
+      fleprintf("left = %d, f = %s, l = %d, "                   \
+                "cannot create tnt string\n",                   \
+                LEFT, FORMAT, LEN);                             \
+      map_list(rules, destroy_rule_and_free_all);               \
+      destroy_list(rules, 0);                                   \
+      map_list(names, destroy_cpa_and_free_all);                \
+      destroy_list(names, 0);                                   \
+      return 1;                                                 \
+    }                                                           \
+    rule = new_rule(LEFT, tnt_string);                          \
+    add_to_list(rules, rule);                                   \
+  } while (0)
+
+#define ADD_EMPTY_RULE(N) do {                  \
+    rule = new_rule(N, new_list());             \
+    add_to_list(rules, rule);                   \
+  } while (0)
 
 int
-main(U_ATTR int argc, U_ATTR char **argv)
+main(int UNUSED argc, char UNUSED **argv)
 {
-  /* return 77; */
+  List *tnt_string = NULL;
+  Rule *rule = NULL;
+  List *rules = new_list();
+  List *names = new_list();
+  List *preds = new_list();
+
+  char *user_name = NULL;
+  char *raw_name = NULL;
+
+  str *user_name_s = NULL, *raw_name_s = NULL;
+
+  char *name = NULL;
+  utf8* uname = NULL;
+
+  int name_len = 0;
+
+  str_info info = EMPTY_STR_INFO;
+
+  NUM *varray = NULL;
+  NUM vindex = 0;
+  cpa *cpap = NULL;
+
+  SAFE_MALLOC(char, name, 3, return 1;);
+
+  *name = 'B';
+  *(name+1) = 'N';
+  *(name+2) = 'F';
+
+  READ_INTO_CPA(name, uname, 3, info, varray, vindex, cpap);
+
+  SAFE_MALLOC(char, name, 6, return 1;);
+
+  *name = 's';
+  *(name+1) = 'p';
+  *(name+2) = 'a';
+  *(name+3) = 'c';
+  *(name+4) = 'e';
+  *(name+5) = 's';
+
+  READ_INTO_CPA(name, uname, 6, info, varray, vindex, cpap);
+
+  SAFE_MALLOC(char, name, 15, return 1;);
+
+  *name = 'o';
+  *(name+1) = 'p';
+  *(name+2) = 't';
+  *(name+3) = 'i';
+  *(name+4) = 'o';
+  *(name+5) = 'n';
+  *(name+6) = 'a';
+  *(name+7) = 'l';
+  *(name+8) = '_';
+  *(name+9) = 's';
+  *(name+10) = 'p';
+  *(name+11) = 'a';
+  *(name+12) = 'c';
+  *(name+13) = 'e';
+  *(name+14) = 's';
+
+  READ_INTO_CPA(name, uname, 15, info, varray, vindex, cpap);
+
+  SAFE_MALLOC(char, name, 5, return 1;);
+
+  *name = 'e';
+  *(name+1) = 'm';
+  *(name+2) = 'p';
+  *(name+3) = 't';
+  *(name+4) = 'y';
+
+  READ_INTO_CPA(name, uname, 5, info, varray, vindex, cpap);
+
+  SAFE_MALLOC(char, name, 11, return 1;);
+
+  memcpy(name, "notnewlines", 11);
+
+  READ_INTO_CPA(name, uname, 11, info, varray, vindex, cpap);
+
+  SAFE_MALLOC(char, name, 17, return 1;);
+
+  memcpy(name, "predicate_section", 17);
+
+  READ_INTO_CPA(name, uname, 17, info, varray, vindex, cpap);
+
+  SAFE_MALLOC(char, name, 9, return 1;);
+
+  memcpy(name, "predicate", 9);
+
+  READ_INTO_CPA(name, uname, 9, info, varray, vindex, cpap);
+
+  name_len = 3;
+  SAFE_MALLOC(char, name, name_len, return 1;);
+
+  memcpy(name, "ids", name_len);
+
+  READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap);
+
+  name_len = 5;
+  SAFE_MALLOC(char, name, name_len, return 1;);
+
+  memcpy(name, "class", name_len);
+
+  READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap);
+
+  name_len = 14;
+  SAFE_MALLOC(char, name, name_len, return 1;);
+
+  memcpy(name, "positive_class", name_len);
+
+  READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap);
+
+  name_len = 22;
+  SAFE_MALLOC(char, name, name_len, return 1;);
+
+  memcpy(name, "positive_specification", name_len);
+
+  READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap);
+
+  name_len = 18;
+  SAFE_MALLOC(char, name, name_len, return 1;);
+
+  memcpy(name, "class_single_chars", name_len);
+
+  READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap);
+
+  name_len = 17;
+  SAFE_MALLOC(char, name, name_len, return 1;);
+
+  memcpy(name, "class_range_chars", name_len);
+
+  READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap);
+
+  name_len = 13;
+  SAFE_MALLOC(char, name, name_len, return 1;);
+
+  memcpy(name, "rules_section", name_len);
+
+  READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap);
+
+  name_len = 4;
+  SAFE_MALLOC(char, name, name_len, return 1;);
+
+  memcpy(name, "rule", name_len);
+
+  READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap);
+
+  name_len = 9;
+  SAFE_MALLOC(char, name, name_len, return 1;);
+
+  memcpy(name, "rule_name", name_len);
+
+  READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap);
+
+  name_len = 25;
+  SAFE_MALLOC(char, name, name_len, return 1;);
+
+  memcpy(name, "spaces-or-escaped-newline", name_len);
+
+  READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap);
+
+  name_len = 8;
+  SAFE_MALLOC(char, name, name_len, return 1;);
+
+  memcpy(name, "rule_rhs", name_len);
+
+  READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap);
+
+  name_len = 19;
+  SAFE_MALLOC(char, name, name_len, return 1;);
+
+  memcpy(name, "double_string_chars", name_len);
+
+  READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap);
+
+  name_len = 19;
+  SAFE_MALLOC(char, name, name_len, return 1;);
+
+  memcpy(name, "single_string_chars", name_len);
+
+  READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap);
+
+  READ_TNT_STRING(0, "tnnn", 4, (T) '#', (NT) 4, (NT) 3, (NT) 0);
+  READ_TNT_STRING(0, "ntttnn", 6, (NT) 5,
+                  (T) '-', (T) '-', (T) 0xa, (NT) 3, (NT) 13);
+  READ_TNT_STRING(0, "n", 1, (NT) 13);
+  ADD_EMPTY_RULE(0);
+
+  READ_TNT_STRING(1, "pn", 2, (PT) 0, (NT) 1);
+  READ_TNT_STRING(1, "p", 1, (PT) 0);
+
+  READ_TNT_STRING(2, "pn", 2, (PT) 0, (NT) 2);
+  ADD_EMPTY_RULE(2);
+
+  READ_TNT_STRING(3, "pn", 2, (PT) 0, (NT) 3);
+  READ_TNT_STRING(3, "tn", 2, (T) 0xa, (NT) 3);
+  READ_TNT_STRING(3, "tn", 2, (T) 0xd, (NT) 3);
+  READ_TNT_STRING(3, "ttnn", 4, (T) 0xa, (T) 0x23,
+                  (NT) 4, (NT) 3);
+  ADD_EMPTY_RULE(3);
+
+  READ_TNT_STRING(4, "pn", 2, (PT) 3, (NT) 4);
+  ADD_EMPTY_RULE(4);
+
+  READ_TNT_STRING(5, "nntn", 4, (NT) 6, (NT) 3, (T) 0xa,  (NT) 5);
+  ADD_EMPTY_RULE(5);
+
+  READ_TNT_STRING(6, "tnttnn", 6, (T) '[', (NT) 7,
+                  (T) ']', (T) ':', (NT) 2, (NT) 8);
+
+  READ_TNT_STRING(7, "pn", 2, (PT) 1, (NT) 7);
+  READ_TNT_STRING(7, "p", 1, (PT) 1);
+
+  READ_TNT_STRING(8, "n", 1, (NT) 9);
+  READ_TNT_STRING(8, "tn", 2, (T) '^', (NT) 9);
+
+  READ_TNT_STRING(9, "nn", 2, (NT) 10, (NT) 9);
+  ADD_EMPTY_RULE(9);
+
+  READ_TNT_STRING(10, "n", 1, (NT) 11);
+  READ_TNT_STRING(10, "ntn", 3, (NT) 12, (T) '-', (NT) 12);
+
+  READ_TNT_STRING(11, "p", 1, (PT) 5);
+  READ_TNT_STRING(11, "tp", 2, (T) 0x5c, (PT) 6);
+
+  READ_TNT_STRING(12, "p", 1, (PT) 4);
+  READ_TNT_STRING(12, "tp", 2, (T) 0x5c, (PT) 6);
+
+  READ_TNT_STRING(13, "nntn", 4, (NT) 14, (NT) 3, (T) 0xa, (NT) 13);
+  ADD_EMPTY_RULE(13);
+
+  READ_TNT_STRING(14, "nntnn", 5,
+                  (NT) 15, (NT) 2, (T) ':',
+                  (NT) 2, (NT) 17);
+
+  READ_TNT_STRING(15, "pn", 2, (PT) 2, (NT) 15);
+  ADD_EMPTY_RULE(15);
+
+  READ_TNT_STRING(16, "n", 1, (NT) 2);
+  READ_TNT_STRING(16, "tt", 2, (T) 0x5c, (T) 0xa);
+
+  READ_TNT_STRING(17, "nnn", 3, (NT) 7, (NT) 16, (NT) 17);
+  READ_TNT_STRING(17, "tntnn", 5,
+                  (T) '[', (NT) 7, (T) ']', (NT) 16, (NT) 17);
+  READ_TNT_STRING(17, "tntnn", 5,
+                  (T) 0x22, (NT) 18, (T) 0x22, (NT) 16, (NT) 17);
+  READ_TNT_STRING(17, "tntnn", 5,
+                  (T) 0x27, (NT) 19, (T) 0x27, (NT) 16, (NT) 17);
+  ADD_EMPTY_RULE(17);
+
+  READ_TNT_STRING(18, "p", 1, (PT) 7);
+  READ_TNT_STRING(18, "tp", 2, (T) '\\', (PT) 6);
+  READ_TNT_STRING(18, "pn", 2, (PT) 7, (NT) 18);
+  READ_TNT_STRING(18, "tpn", 3, (T) '\\', (PT) 6, (NT) 18);
+
+  READ_TNT_STRING(19, "p", 1, (PT) 8);
+  READ_TNT_STRING(19, "tp", 2, (T) '\\', (PT) 6);
+  READ_TNT_STRING(19, "pn", 2, (PT) 8, (NT) 19);
+  READ_TNT_STRING(19, "tpn", 3, (T) '\\', (PT) 6, (NT) 19);
+
+  SAFE_MALLOC(char, user_name, 6, return 1;);
+  memcpy(user_name, "space", 6);
+  user_name_s = (str *) new_utf8(user_name, 6);
+
+  SAFE_MALLOC(char, raw_name, 7, return 1;);
+  memcpy(raw_name, "%x20\\t", 7);
+  raw_name_s = (str *) new_utf8(raw_name, 7);
+
+  if (add_to_list(preds,
+                  new_ptd(user_name_s, raw_name_s,
+                          dfa_from_ranges
+                          (2, (NUM[]) { ' ', ' ', 9, 9 })))) {
+    fleprintf0("Fail to add a predicate\n");
+    return 1;
+  }
+
+  SAFE_MALLOC(char, user_name, 3, return 1;);
+  memcpy(user_name, "id", 3);
+  user_name_s = (str *) new_utf8(user_name, 3);
+
+  SAFE_MALLOC(char, raw_name, 12, return 1;);
+  memcpy(raw_name, "a-zA-Z-_+*@", 12);
+  raw_name_s = (str *) new_utf8(raw_name, 12);
+
+  if (add_to_list(preds,
+                  new_ptd(user_name_s, raw_name_s,
+                          dfa_from_ranges
+                          (6, (NUM[]) {
+                            'a', 'z', 'A', 'Z',
+                            '-', '-', '_', '_',
+                            '*', '+', '@', '@'
+                          })))) {
+    fleprintf0("Fail to add a predicate\n");
+    return 1;
+  }
+
+  SAFE_MALLOC(char, user_name, 15, return 1;);
+  memcpy(user_name, "rule_name_char", 15);
+  user_name_s = (str *) new_utf8(user_name, 15);
+
+  SAFE_MALLOC(char, raw_name, 12, return 1;);
+  memcpy(raw_name, "^[]:%x20\\n#", 12);
+  raw_name_s = (str *) new_utf8(raw_name, 12);
+
+  if (add_to_list(preds,
+                  new_ptd(user_name_s, raw_name_s,
+                          dfa_from_ranges_neg
+                          (6, (NUM[]) {
+                            '[', '[', ']', ']',
+                            ':', ':', ' ', ' ',
+                            0xa, 0xa, 0x23, 0x23
+                          })))) {
+    fleprintf0("Fail to add a predicate\n");
+    return 1;
+  }
+
+  SAFE_MALLOC(char, user_name, 11, return 1;);
+  memcpy(user_name, "notnewline", 11);
+  user_name_s = (str *) new_utf8(user_name, 11);
+
+  SAFE_MALLOC(char, raw_name, 6, return 1;);
+  memcpy(raw_name, "^\\n\\r", 6);
+  raw_name_s = (str *) new_utf8(raw_name, 6);
+
+  if (add_to_list(preds,
+                  new_ptd(user_name_s, raw_name_s,
+                          dfa_from_ranges_neg
+                          (2, (NUM[]) {
+                            '\n', '\n', '\r', '\r'
+                          })))) {
+    fleprintf0("Fail to add a predicate\n");
+    return 1;
+  }
+
+  SAFE_MALLOC(char, user_name, 17, return 1;);
+  memcpy(user_name, "class_range_char", 17);
+  user_name_s = (str *) new_utf8(user_name, 17);
+
+  SAFE_MALLOC(char, raw_name, 10, return 1;);
+  memcpy(raw_name, "^\\n\\r\\^\\\\", 10);
+  raw_name_s = (str *) new_utf8(raw_name, 10);
+
+  if (add_to_list(preds,
+                  new_ptd(user_name_s, raw_name_s,
+                          dfa_from_ranges_neg
+                          (4, (NUM[]) {
+                            '\n', '\n', '\r', '\r',
+                            '^', '^', '\\', '\\'
+                          })))) {
+    fleprintf0("Fail to add a predicate\n");
+    return 1;
+  }
+
+  SAFE_MALLOC(char, user_name, 18, return 1;);
+  memcpy(user_name, "class_single_char", 18);
+  user_name_s = (str *) new_utf8(user_name, 18);
+
+  SAFE_MALLOC(char, raw_name, 11, return 1;);
+  memcpy(raw_name, "^\\n\\r\\^\\\\-", 11);
+  raw_name_s = (str *) new_utf8(raw_name, 11);
+
+  if (add_to_list(preds,
+                  new_ptd(user_name_s, raw_name_s,
+                          dfa_from_ranges_neg
+                          (5, (NUM[]) {
+                            '\n', '\n', '\r', '\r',
+                            '^', '^', '\\', '\\',
+                            '-', '-'
+                          })))) {
+    fleprintf0("Fail to add a predicate\n");
+    return 1;
+  }
+
+  SAFE_MALLOC(char, user_name, 4, return 1;);
+  memcpy(user_name, "any", 4);
+  user_name_s = (str *) new_utf8(user_name, 4);
+
+  SAFE_MALLOC(char, raw_name, 2, return 2;);
+  memcpy(raw_name, " ", 2);
+  raw_name_s = (str *) new_utf8(raw_name, 2);
+
+  if (add_to_list(preds,
+                  new_ptd(user_name_s, raw_name_s,
+                          dfa_from_ranges
+                          (1, (NUM[]) {
+                            0, 0x10ffff
+                          })))) {
+    fleprintf0("Fail to add a predicate\n");
+    return 1;
+  }
+
+  SAFE_MALLOC(char, user_name, 19, return 1;);
+  memcpy(user_name, "double_string_char", 19);
+  user_name_s = (str *) new_utf8(user_name, 19);
+
+  SAFE_MALLOC(char, raw_name, 5, return 1;);
+  memcpy(raw_name, "^\"\\n", 5);
+  raw_name_s = (str *) new_utf8(raw_name, 5);
+
+  if (add_to_list(preds,
+                  new_ptd(user_name_s, raw_name_s,
+                          dfa_from_ranges_neg
+                          (2, (NUM[]) {
+                            0x22, 0x22, 0xa, 0xa
+                          })))) {
+    fleprintf0("Fail to add a predicate\n");
+    return 1;
+  }
+
+  SAFE_MALLOC(char, user_name, 19, return 1;);
+  memcpy(user_name, "single_string_char", 19);
+  user_name_s = (str *) new_utf8(user_name, 19);
+
+  SAFE_MALLOC(char, raw_name, 5, return 1;);
+  memcpy(raw_name, "^'\\n", 5);
+  raw_name_s = (str *) new_utf8(raw_name, 5);
+
+  if (add_to_list(preds,
+                  new_ptd(user_name_s, raw_name_s,
+                          dfa_from_ranges_neg
+                          (2, (NUM[]) {
+                            0x27, 0x27, 0xa, 0xa
+                          })))) {
+    fleprintf0("Fail to add a predicate\n");
+    return 1;
+  }
+
+  Grammar *g = new_grammar();
+
+  build_grammar(g, rules, names, preds);
+
+  print_grammar(g);
+
+  /* utf8 *string = new_utf8("# comment\n\n[P]: a-z\n\n--\n\nS : AB\n"
+   *                         "# another comment\nA: \"S\"\nB : [P]\n",
+   *                         65); */
+
+  char *file_name = "bnf.bnf";
+  char *buffer = NULL;
+
+  SAFE_MALLOC(char, buffer, 1<<9, return 1;);
 
-  char *file_name = "brainfuck.bnf";
-  char *buffer = MYALLOC(char, 512);
   NUM buffer_size = 0;
 
   if (read_entire_file(file_name, &buffer, &buffer_size)) {
@@ -21,17 +520,80 @@ main(U_ATTR int argc, U_ATTR char **argv)
     return 1;
   }
 
-  utf8 *s = new_utf8(buffer, buffer_size);
+  /* The size includes the trailing null byte, so we shall exclude
+     it. */
+  buffer_size--;
+
+  str *string = (str *) new_utf8(buffer, buffer_size);
+
+  printf("\nPrinting the input...\n%s\n", get_data(string));
+
+  printf("Input size = %ld", buffer_size);
+
+  TITO;
+
+  TIC;
 
-  Grammar *g = read_grammar_from_bnf((str *) s);
+  Environment *env = cnp_parse(g, string);
 
-  if (g) {
-    print_grammar(g);
-    destroy_grammar(g, 2);
-    destroy_str((str *)s, 1);
+  TOC;
 
-    return 0;
+  if (env) {
+    if (!(env_error_p(env))) {
+      BOOL result = bsr_lookup
+        (env_bsrp(env), 0, 0, str_length((str *) string));
+
+      if (result) {
+        printf("\nSuccessfully parsed the input!\n");
+      } else {
+        printf("\nThe input does not parse!\n");
+      }
+
+      printf("\nAll BSRs follow:\n\n");
+      if (argc == 1)
+        bsr_print(env_bsrp(env), env_grammar(env), 1);
+    } else {
+      printf("There are errors!\n");
+    }
+
+    destroy_env(env);
   }
 
-  return 1;
+  destroy_grammar(g, 1);
+
+  destroy_list(rules, 1);
+
+  destroy_str(string, 1);
+
+  return 0;
 }
+
+
+/* archives */
+
+/* char *file_name = "brainfuck.bnf";
+ * char *buffer = NULL;
+ *
+ * SAFE_MALLOC(char, buffer, 1<<9, return 1;);
+ *
+ * NUM buffer_size = 0;
+ *
+ * if (read_entire_file(file_name, &buffer, &buffer_size)) {
+ *   fleprintf("Cannot read file %s", file_name);
+ *   free(buffer);
+ *   return 1;
+ * }
+ *
+ * utf8 *s = new_utf8(buffer, buffer_size);
+ *
+ * Grammar *g = read_grammar_from_bnf((str *) s);
+ *
+ * if (g) {
+ *   print_grammar(g);
+ *   destroy_grammar(g, 2);
+ *   destroy_str((str *)s, 1);
+ *
+ *   return 0;
+ * }
+ *
+ * return 1; */
diff --git a/src/tuple.c b/src/tuple.c
index 92f2e6a..190aa19 100644
--- a/src/tuple.c
+++ b/src/tuple.c
@@ -185,7 +185,9 @@ LONSTRUCT(6);
 
 static void
 destroy_tuple1(tuple1 tup, NUM * UNUSED len) {
+  /* fleprintf0("hi\n"); */
   if (!(tup.initialized)) return;
+  /* fleprintf0("hi\n"); */
   free(tup.array);
 }
 
@@ -196,8 +198,11 @@ void
 destroy_tuple4(tuple4 tup, NUM *len)
 {
   if (!(tup.initialized)) return;
-  for (NUM i4 = 0; i4 < *len; i4++)
+  /* fleprintf("len = %ld\n", *len); */
+  for (NUM i4 = 0; i4 < *len; i4++) {
+    /* fleprintf("i4 = %ld\n", i4); */
     destroy_tuple3(*(tup.array+i4), len+1);
+  }
   free(tup.array);
 }
 
@@ -296,7 +301,7 @@ add_to_tuple_6_pt_2(tuple6 *tup, NUM *len, pair2 label)
   if (!(((tup->array+label.x)->array+label.y)->initialized)) {
     SAFE_CALLOC(tuple3,
                 ((tup->array+label.x)->array+label.y)->array,
-                *(len+1), goto cleanup;);
+                *(len+2), goto cleanup;);
     ((tup->array+label.x)->array+label.y)->initialized = 1;
   }
 
@@ -610,6 +615,7 @@ luple5_free_1(luple5 *lup, NUM label)
   if (!((tuple->array+label)->initialized)) return;
 
   destroy_tuple4(*(tuple->array+label), lup->lengths+1);
+  /* fleprintf("label = %ld\n", label); */
 
   (tuple->array+label)->initialized = 0;
 }
author	JSDurand <mmemmew@gmail.com>	2022-02-08 00:29:10 +0800
committer	JSDurand <mmemmew@gmail.com>	2022-02-08 12:33:05 +0800
commit	5426d9e2a6b820e34809d639838b26643df9ab17 (patch)
tree	111f2b478b671092e3f2e64a6171970b8a5cdf99
parent	aaa12504c6095b2cdfa213a3d4b269bbd5e7038a (diff)