predicates start working now

Now we have a working implementation of predicates. It now only remains to write the parser of grammars. Of course we shall generate this parser by this parser generator itself, because why not. ;-P
author: JSDurand <mmemmew@gmail.com> 2022-02-05 23:23:03 +0800
committer: JSDurand <mmemmew@gmail.com> 2022-02-05 23:23:03 +0800
commit: 1c8cbfd09ff9dd02f6d8d938c45e3f29a35f6f32 (patch)
tree: e3d6bb3b5246bcfa38662615ad59663d5b0377d1 /src
parent: 510b10b96b546fcc6c6b6be85050305ddd192a41 (diff)
7 files changed, 275 insertions, 32 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index 2b2d7d8..d097f1f 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -48,8 +48,8 @@ ht.c utf8.c str.c dfa.c tuple.c
 check_cnp_SOURCES = test/check_cnp.c crf.c cnp.c grammar.c list.c \
 util.c ht.c utf8.c str.c dfa.c bsr.c tuple.c
 
-check_pred_SOURCES = test/check_pred.c dfa.c list.c str.c utf8.c \
-grammar.c util.c ht.c
+check_pred_SOURCES = test/check_pred.c crf.c cnp.c grammar.c list.c \
+util.c ht.c utf8.c str.c dfa.c bsr.c tuple.c
 
 check_tuple_SOURCES = test/check_tuple.c tuple.c util.c
 
diff --git a/src/cnp.c b/src/cnp.c
index fe5a4dc..ce5a527 100644
--- a/src/cnp.c
+++ b/src/cnp.c
@@ -32,6 +32,7 @@ nt_add(Environment *env, NUM X, NUM j)
   for (NUM i = 0; i < rg_len(RG); i++) {
     BOOL result =
       test_select(env, *(env->string+j), X, rg_nth(RG, i));
+    /* fleprintf("i = %ld, after test select\n", i); */
 
     for (NUM k = 0; k < grammar_left_len(GI->g); k++) {
       ht_reset(env->result_ts+k, DELETE_KEY);
@@ -83,10 +84,15 @@ test_select(Environment *env, NUM b, NUM X, CCR_MOD(List *) tnts)
     goto success;
   }
 
-  /* TODO: predicates haven't been implemented yet */
-  /* for (NUM i = 0; i < ht_size(result_ps); i++) {
-   *
-   * } */
+  for (NUM i = 0; i < ht_size(result_ps); i++) {
+    PTD *ptdp = grammar_ptd(gi->g, **((PT **) ht_keys(result_ps+i)));
+
+    if (ptd_run(ptdp, b)) {
+      /* fleprintf("i = %ld\n", i); */
+      result = 1;
+      goto success;
+    }
+  }
 
   for (NUM i = 0; i < list_length(tnts); i++) {
 
@@ -106,9 +112,20 @@ test_select(Environment *env, NUM b, NUM X, CCR_MOD(List *) tnts)
 
   }
 
-  if (ht_find(gi->sts+X, &b) != NULL) result = 1;
+  if (ht_find(gi->sts+X, &b) != NULL) {
+    result = 1;
+    goto success;
+  }
 
-  /* TODO: predicates haven't been implemented yet */
+  for (NUM i = 0; i < ht_size(gi->sps+X); i++) {
+    PTD *ptdp = grammar_ptd(gi->g, **((PT **) ht_keys(gi->sps+X)));
+    /* fleprintf("i = %ld\n", i); */
+    if (ptd_run(ptdp, b)) {
+      /* fleprintf("i = %ld\n", i); */
+      result = 1;
+      goto success;
+    }
+  }
 
   goto success;
 
@@ -244,9 +261,13 @@ cnp_call(Environment *env, pair3 label, NUM i, NUM j)
 
   TNT *xtnt = (TNT *) list_nth(tnts, label.z - 1);
 
-  if (xtnt->type != NONTERMINAL) goto cleanup;
+  if (xtnt->type != NONTERMINAL) {
+    fleprintf("Wrong type: %d\n", xtnt->type);
+    goto cleanup;
+  }
 
   NUM X = (NUM) xtnt->data.nt;
+  /* fleprintf("X = %ld, j = %ld\n", X, j); */
 
   pair2 node = (pair2) { .x = X, .y = j };
   pair4 u = (pair4)
@@ -258,20 +279,29 @@ cnp_call(Environment *env, pair3 label, NUM i, NUM j)
     };
 
   if (!(crf_find_node(env->crfp, node))) {
-    if (crf_add_node(env->crfp, node)) goto cleanup;
+    if (crf_add_node(env->crfp, node)) {
+      fleprintf0("fail to add node to crf\n");
+      goto cleanup;
+    }
 
-    if (crf_add_edge(env->crfp, node, u)) goto cleanup;
+    if (crf_add_edge(env->crfp, node, u)) {
+      fleprintf0("fail to add edge to crf\n");
+      goto cleanup;
+    }
 
     /* errors will be stored in ENV, so we simply return */
     nt_add(env, X, j);
 
-    /* printf("X = %ld, j = %ld\n", X, j); */
+    /* fleprintf("X = %ld, j = %ld\n", X, j); */
 
     return;
   }
 
   if (!(crf_find_edge(env->crfp, node, u))) {
-    if (crf_add_edge(env->crfp, node, u)) goto cleanup;
+    if (crf_add_edge(env->crfp, node, u)) {
+      fleprintf0("fail to add edge to crf\n");
+      goto cleanup;
+    }
 
     call_internal_env = env;
     call_internal_label5 =
@@ -344,7 +374,17 @@ H_ATTR
 BOOL
 env_follow_p(CCR_MOD(Environment *) env, NUM X, NUM t)
 {
-  return ht_find(((env->gi->sts)+X), &t) != NULL;
+  if (ht_find(((env->gi->sts)+X), &t) != NULL) return 1;
+
+  for (NUM i = 0; i < ht_size(env->gi->sps+X); i++) {
+    PTD *ptdp = grammar_ptd(env->gi->g,
+                            **((PT **) ht_keys(env->gi->sps+X)));
+    if (ptd_run(ptdp, t)) return 1;
+  }
+
+  /* fleprintf("X = %ld, t = %ld\n", X, t); */
+
+  return 0;
 }
 
 void
@@ -688,7 +728,9 @@ cnp_parse(Grammar *g, str *string)
 
     /* while there are terminals */
     while (((TNT *) list_nth(right, current_prodecor->z-1))->type ==
-           TERMINAL) {
+           TERMINAL ||
+           ((TNT *) list_nth(right, current_prodecor->z-1))->type ==
+           PREDICATE) {
       /* fleprintf0("found terminal\n"); */
       /* add to BSR set */
       errorp =
diff --git a/src/dfa.c b/src/dfa.c
index fb67047..9da201d 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -623,11 +623,14 @@ run_dfa(const dfa * const restrict table, const NUM code)
     break;
   }
 
-  char *s = MYALLOC(char, 5);
+  /* negative codes have special purposes */
+  if (code < 0) return 0;
+
+  char s[5];
   str *strp = new_str(s, 5);
 
   if (encode(code, strp)) {
-    destroy_str(strp, 1);
+    destroy_str(strp, 0);
     fleprintf0("Fail to encode\n");
     return 0;
   }
@@ -636,33 +639,33 @@ run_dfa(const dfa * const restrict table, const NUM code)
 
   for (NUM i = 0; i < str_length(strp); i++) {
     unsigned char c = (unsigned char) *(s+i);
+
     int lookup_value = 0;
 
     for (int j = 0, len = 0; len <= c;) {
+
       len += *(*(table->data.table.table+current_row)+(j<<1)+1);
       lookup_value = *(*(table->data.table.table+current_row)
                        +(j++<<1));
     }
 
-    /* fleprintf("lookup_value = %d\n", lookup_value); */
-
     if (lookup_value > 0) {
       current_row = lookup_value;
     } else {
       switch (lookup_value) {
       case DFA_STATE_REJECT:
-        destroy_str(strp, 1);
+        destroy_str(strp, 0);
         return 0;
         break;
       case DFA_STATE_UNKNOWN:
         switch (table->type) {
         case DFA_TYPE_BOTH:
         case DFA_TYPE_POSITIVE:
-          destroy_str(strp, 1);
+          destroy_str(strp, 0);
           return 0;
           break;
         case DFA_TYPE_NEGATIVE:
-          destroy_str(strp, 1);
+          destroy_str(strp, 0);
           return 1;
           break;
         default:
@@ -670,26 +673,26 @@ run_dfa(const dfa * const restrict table, const NUM code)
              the table is of the special type.  But I am apparently
              paranoia. */
           fleprintf0("Special table in the wrong place\n");
-          destroy_str(strp, 1);
+          destroy_str(strp, 0);
           return 0;
           break;
         }
         break;
       case DFA_STATE_ACCEPT:
-        destroy_str(strp, 1);
+        destroy_str(strp, 0);
         return 1;
         break;
       default:
         fleprintf("Unknown state number for char %d: %d\n",
                   c, lookup_value);
-        destroy_str(strp, 1);
+        destroy_str(strp, 0);
         return 0;
         break;
       }
     }
   }
 
-  destroy_str(strp, 1);
+  destroy_str(strp, 0);
 
   return 0;
 }
diff --git a/src/grammar.c b/src/grammar.c
index 0b2be8d..1ea52b0 100644
--- a/src/grammar.c
+++ b/src/grammar.c
@@ -73,6 +73,12 @@ destroy_ptd(PTD *p, int flag)
   free(p);
 }
 
+BOOL
+ptd_run(PTD *p, NUM code)
+{
+  return run_dfa(p->dfap, code);
+}
+
 static void
 destroy_ptd_free_all(void *e)
 {
@@ -85,7 +91,7 @@ destroy_ptd_no_free(void *e)
   destroy_ptd((PTD *)e, 0);
 }
 
-/* TODO: Add some statistic counts to assist the hash tables.  For
+/* REVIEW: Add some statistic counts to assist the hash tables.  For
    example, store the total number of terminals as an integer; then
    the hash table can be hinted at initialization to contain that many
    elements, which can reduce the number of time a hash table needs to
@@ -411,9 +417,21 @@ void
 print_grammar(CC_MOD(Grammar *) g)
 {
   printf("Printing a grammar:\n");
+  printf("Non-terminals...\n");
   map_list_between(g->names, print_name, print_sep);
+
   printf("\n");
 
+  printf("Predicates...\n");
+
+  for (int i = 0; i < list_length(g->predicates); i++) {
+    PTD *ptdp = (PTD *) list_nth(g->predicates, i);
+
+    printf("%s: %s\n",
+           get_data(ptdp->user_name), get_data(ptdp->raw_name));
+  }
+
+  printf("Rules...\n");
   for (int i = 0; i < list_length(g->rule_grps); i++) {
     print_rule_group(list_nth(g->rule_grps, i));
     printf("\n");
@@ -539,6 +557,17 @@ grammar_rule(CCR_MOD(Grammar *) g, NT nt)
   return (Rule_group *) list_nth(g->rule_grps, nt);
 }
 
+PTD *
+grammar_ptd(CCR_MOD(Grammar *) g, PT pt)
+{
+  if ((NUM) pt >= list_length(g->predicates)) {
+    fleprintf("Invalid predicate terminal: %lu\n", pt);
+    return NULL;
+  }
+
+  return (PTD *) list_nth(g->predicates, pt);
+}
+
 NUM
 grammar_left_len(CCR_MOD(Grammar *)g)
 {
diff --git a/src/grammar.h b/src/grammar.h
index e18ad0e..89a66ba 100644
--- a/src/grammar.h
+++ b/src/grammar.h
@@ -78,6 +78,8 @@ PTD *new_ptd(str *user_name, str *raw_name, dfa *dfap);
 /* FLAG is used to destroy the strings contained within */
 void destroy_ptd(PTD *p, int flag);
 
+BOOL ptd_run(PTD *p, NUM code);
+
 /* If a TNT of type TERMINAL has value END_OF_INPUT, then it means,
    surprisingly, the end of input. */
 enum { END_OF_INPUT = -1 };
@@ -172,6 +174,9 @@ List *grammar_names(CCR_MOD(Grammar *)g);
 /* look up a rule */
 Rule_group *grammar_rule(CCR_MOD(Grammar *) g, NT nt);
 
+/* lookup a predicate */
+PTD *grammar_ptd(CCR_MOD(Grammar *) g, PT pt);
+
 /* NTS should be already allocated to be an array of BOOLs.  The size
    of this array should be the same as the length of the array of
    non-terminals of G.  If any of the above two conditions is not
diff --git a/src/test/check_cnp.c b/src/test/check_cnp.c
index a21d74e..f01a68c 100644
--- a/src/test/check_cnp.c
+++ b/src/test/check_cnp.c
@@ -9,8 +9,9 @@ print_label6(pair6 label)
          label.x, label.y, label.z, label.u, label.v, label.w);
 }
 
-#define TIC   struct timespec tic, toc;                         \
-  do { clock_gettime(CLOCK_MONOTONIC_RAW, &tic); } while (0)
+#define TITO struct timespec tic, toc
+
+#define TIC do { clock_gettime(CLOCK_MONOTONIC_RAW, &tic); } while (0)
 
 #define TOC do {                                        \
     clock_gettime(CLOCK_MONOTONIC_RAW, &toc);           \
@@ -53,7 +54,7 @@ print_label6(pair6 label)
     add_to_list(rules, rule);                                   \
   } while (0)
 
-#define GRAMMAR 2
+#define GRAMMAR 4
 
 int
 main(int UNUSED argc, char ** UNUSED argv)
@@ -197,6 +198,8 @@ main(int UNUSED argc, char ** UNUSED argv)
 
   printf("\nPrinting the input...\n%s\n", get_data((str *) string));
 
+  TITO;
+
   TIC;
 
   Environment *env = cnp_parse(g, (str *) string);
diff --git a/src/test/check_pred.c b/src/test/check_pred.c
index b0b95b3..2767c45 100644
--- a/src/test/check_pred.c
+++ b/src/test/check_pred.c
@@ -1,7 +1,168 @@
-#include "../grammar.h"
+#include "time.h"
+#include "../cnp.h"
+
+#define TITO struct timespec tic, toc
+
+#define TIC do { clock_gettime(CLOCK_MONOTONIC_RAW, &tic); } while (0)
+
+#define TOC do {                                        \
+    clock_gettime(CLOCK_MONOTONIC_RAW, &toc);           \
+    printf("\nTotal time = %f seconds\n",               \
+           (toc.tv_nsec - tic.tv_nsec) / 1000000000.0 + \
+           toc.tv_sec - tic.tv_sec);                    \
+  } while (0)
+
+#define READ_INTO_CPA(N, U, L, I, VA, VI, CP) do {      \
+    U = new_utf8(N, L);                                 \
+    I = get_info((str *)U, 0);                          \
+    VA = MYALLOC(NUM, 2);                               \
+    VI = 0;                                             \
+    for (NUM index = 0;                                 \
+         I.value >= 0 && index < str_length((str *) U); \
+         index += I.step, VI++) {                       \
+      I = get_info((str *)U, index);                    \
+      *(VA+VI) = I.value;                               \
+    }                                                   \
+    CP = MYALLOC(cpa, 1);                               \
+    CP->array = VA;                                     \
+    CP->size = VI;                                      \
+    add_to_list(names, CP);                             \
+    destroy_str((str *)U, 1);                           \
+  } while (0)
+
+#define READ_TNT_STRING(LEFT, FORMAT, LEN, ...) do {            \
+    tnt_string = new_tnt_string(FORMAT, LEN, __VA_ARGS__);      \
+    if (!tnt_string) {                                          \
+      fleprintf("left = %d, f = %s, l = %d, "                   \
+                "cannot create tnt string\n",                   \
+                LEFT, FORMAT, LEN);                             \
+      map_list(rules, destroy_rule_and_free_all);               \
+      destroy_list(rules, 0);                                   \
+      map_list(names, destroy_cpa_and_free_all);                \
+      destroy_list(names, 0);                                   \
+      return 1;                                                 \
+    }                                                           \
+    rule = new_rule(LEFT, tnt_string);                          \
+    add_to_list(rules, rule);                                   \
+  } while (0)
 
 int
 main(int UNUSED argc, char ** UNUSED argv)
 {
-  return 77;
+  /* have a grammar for testing */
+  List *tnt_string = NULL;
+  Rule *rule = NULL;
+  List *rules = new_list();
+  List *names = new_list();
+  List *preds = new_list();
+
+  char *user_name = NULL;
+  char *raw_name = NULL;
+
+  str *user_name_s = NULL, *raw_name_s = NULL;
+  NUM *pred_bytes = NULL, pred_bytes_len = 0;
+
+  char *name = MYALLOC(char, 2);
+  utf8* uname = NULL;
+
+  str_info info = EMPTY_STR_INFO;
+
+  NUM *varray = NULL;
+  NUM vindex = 0;
+  cpa *cpap = NULL;
+
+  *(name+1) = 0;
+  *name = 'S';
+
+  READ_INTO_CPA(name, uname, 1, info, varray, vindex, cpap);
+
+  READ_TNT_STRING(0, "pn", 2, (PT) 0, (NT) 0);
+  rule = new_rule(0, new_list());
+  add_to_list(rules, rule);
+
+  SAFE_MALLOC(char, user_name, 6, return 1;);
+  *(user_name) = 'a';
+  *(user_name+1) = 's';
+  *(user_name+2) = 'c';
+  *(user_name+3) = 'i';
+  *(user_name+4) = 'i';
+  *(user_name+5) = 0;
+
+  user_name_s = new_utf8(user_name, 5);
+
+  SAFE_MALLOC(char, raw_name, 7, return 1;);
+
+  *(raw_name+0) = 'a';
+  *(raw_name+1) = '-';
+  *(raw_name+2) = 'z';
+  *(raw_name+3) = 'A';
+  *(raw_name+4) = '-';
+  *(raw_name+5) = 'Z';
+  *(raw_name+6) = 0;
+
+  raw_name_s = new_utf8(raw_name, 6);
+
+  SAFE_MALLOC(NUM, pred_bytes, 52, return 1;);
+
+  for (int i = 0; i < 26; i++)
+    *(pred_bytes+pred_bytes_len++) = 'a' + i;
+
+  for (int i = 0; i < 26; i++)
+    *(pred_bytes+pred_bytes_len++) = 'A' + i;
+
+  if (add_to_list(preds, new_ptd(user_name_s, raw_name_s,
+                                 dfa_from_bytes
+                                 (pred_bytes_len, pred_bytes)))) {
+    fleprintf0("Fail to add a predicate\n");
+    return 1;
+  }
+
+  free(pred_bytes);
+
+  Grammar *g = new_grammar();
+
+  build_grammar(g, rules, names, preds);
+
+  print_grammar(g);
+
+  utf8 *string = new_utf8("awdfsdjbfsjdhxy", 15);
+
+  printf("\nPrinting the input...\n%s\n", get_data((str *) string));
+
+  TITO;
+
+  TIC;
+
+  Environment *env = cnp_parse(g, (str *) string);
+
+  TOC;
+
+  if (env) {
+    if (!(env_error_p(env))) {
+      BOOL result = bsr_lookup
+        (env_bsrp(env), 0, 0, str_length((str *) string));
+
+      if (result) {
+        printf("\nSuccessfully parsed the input!\n");
+      } else {
+        printf("\nThe input does not parse!\n");
+      }
+
+      printf("\nAll BSRs follow:\n\n");
+      if (argc == 1)
+        bsr_print(env_bsrp(env), env_grammar(env), 1);
+    } else {
+      printf("There are errors!\n");
+    }
+
+    destroy_env(env);
+  }
+
+  destroy_grammar(g, 1);
+
+  destroy_list(rules, 1);
+
+  free(string);
+
+  return 0;
 }
author	JSDurand <mmemmew@gmail.com>	2022-02-05 23:23:03 +0800
committer	JSDurand <mmemmew@gmail.com>	2022-02-05 23:23:03 +0800
commit	1c8cbfd09ff9dd02f6d8d938c45e3f29a35f6f32 (patch)
tree	e3d6bb3b5246bcfa38662615ad59663d5b0377d1 /src
parent	510b10b96b546fcc6c6b6be85050305ddd192a41 (diff)