temporary commit

Now we can read grammars from a file. But we need to check if it works for reading strings still.
author: JSDurand <mmemmew@gmail.com> 2022-01-04 11:51:58 +0800
committer: JSDurand <mmemmew@gmail.com> 2022-01-04 11:51:58 +0800
commit: 55dc897da6e81f2a26cfc7e66ac942824773498b (patch)
tree: fce0d7d57832907c991d551833bf5eecde947dd2 /src/grammar.c
parent: 53b8b6ffab5a968db75e9babddf4e2dbb2c688a3 (diff)
1 files changed, 253 insertions, 27 deletions
diff --git a/src/grammar.c b/src/grammar.c
index fd38ac8..118a10e 100644
--- a/src/grammar.c
+++ b/src/grammar.c
@@ -1,21 +1,85 @@
 #include "grammar.h"
 
-struct TNT_s {
-  int type;                     /* 0 => T and NT otherwise */
-  union { T t; NT nt; } data;
-};
-
 struct Rule_s {
   NT left;
-  List *right;
+  List *right;                  /* a list of TNT's */
 };
 
+typedef struct {
+  /* this group holds rules for this non-terminal */
+  NT left;
+  /* a list of lists of TNT's */
+  List *rights;
+} Rule_group;
+
+/* rule_grps and names should have the same length */
 struct Grammar_s {
-  List *rules;
-  /* a list of names of non-terminals */
+  /* a list of Rule_group's */
+  List *rule_grps;
+  /* a list of names of non-terminals
+
+     Each element is an array of code-points.
+
+     To print them, first encode them into normal strings. */
   List *names;
 };
 
+static void
+destroy_rule_group(void *rule_grp, int flag)
+{
+  Rule_group *rg = (Rule_group *) rule_grp;
+
+  switch (flag) {
+  case 1:
+    map_list(rg->rights, destroy_list_free_all);
+    break;
+  case 2:
+    map_list(rg->rights, destroy_list_free_first);
+    break;
+  default:
+    map_list(rg->rights, destroy_list_no_free);
+    break;
+  }
+
+  destroy_list(rg->rights, 0);
+
+  free(rule_grp);
+}
+
+UNUSED
+static void
+destroy_rule_group_no_free(void *rule_grp)
+{
+  destroy_rule_group(rule_grp, 0);
+}
+
+UNUSED
+static void
+destroy_rule_group_free_all(void *rule_grp)
+{
+  destroy_rule_group(rule_grp, 1);
+}
+
+UNUSED
+static void
+destroy_rule_group_free_first(void *rule_grp)
+{
+  destroy_rule_group(rule_grp, 2);
+}
+
+static void
+print_rule_group(void *rule_grp)
+{
+  Rule_group *rg = (Rule_group *) rule_grp;
+
+  for (int i = 0; i < list_length(rg->rights); i++) {
+    List *str = (List *) list_nth(rg->rights, i);
+    printf("Rule %u => ", rg->left);
+    map_list(str, print_tnt);
+    printf("\n");
+  }
+}
+
 TNT *
 new_tnt(int type, ...)
 {
@@ -53,11 +117,104 @@ new_grammar()
   return g;
 }
 
-void
+/* We classify the rules into different rule groups. */
+unsigned char
 build_grammar(Grammar *g, List *rules, List *names)
 {
-  g->rules = rules;
   g->names = names;
+
+  NUM len = list_length(names);
+  NUM rule_len = list_length(rules);
+
+  /* If a rule corresponds to a non-terminal not on the list, signal
+     an error. */
+  for (int i = 0; i < rule_len; i++) {
+    if (((Rule *) list_nth(rules, i))->left >= len) {
+      fleprintf("%s:%d:%d, Rule contains weird non-terminal\n", i);
+      return 1;
+    }
+  }
+
+  List *rule_grps = new_list();
+
+  if (rule_grps == NULL) {
+    fleprintf0("%s:%d, Cannot create list\n");
+    return 1;
+  }
+
+  if (list_assure_size(rule_grps, len)) {
+    fleprintf0("%s:%d, Cannot assure size of rule groups\n");
+    return 1;
+  }
+
+  List *temp_ls = NULL;
+  void *temp_pointer = NULL;
+
+  /* Initialize the list of rule groups */
+  for (int i = 0; i < len; i++) {
+    if ((temp_ls = new_list()) == NULL) {
+      fleprintf("%s:%d:%d, Cannot create list\n", i);
+
+      map_list(rule_grps, destroy_rule_group_no_free);
+      destroy_list(rule_grps, 0);
+      return 1;
+    }
+
+    temp_pointer = MYALLOC(Rule_group, 1);
+
+    if (temp_pointer == NULL) {
+      fleprintf0("%s:%d, Cannot malloc\n");
+      map_list(rule_grps, destroy_rule_group_no_free);
+      destroy_list(rule_grps, 0);
+      destroy_list(temp_ls, 0);
+      return 1;
+    }
+
+    ((Rule_group *) temp_pointer)->left = i;
+    ((Rule_group *) temp_pointer)->rights = temp_ls;
+
+    int result = add_to_list(rule_grps, temp_pointer);
+
+    if (result) {
+      fleprintf("%s:%d:%d, Cannot add to list\n", i);
+
+      map_list(rule_grps, destroy_rule_group_no_free);
+      destroy_list(rule_grps, 0);
+      destroy_list(temp_ls, 0);
+      free(temp_pointer);
+      return 1;
+    }
+  }
+
+  /* Now fill in rule groups */
+
+  for (int i = 0; i < rule_len; i++) {
+    Rule *r = (Rule *) list_nth(rules, i);
+
+    int result = add_to_list
+      (((Rule_group *) list_nth(rule_grps, r->left))->rights,
+       r->right);
+
+    if (result) {
+      fleprintf("%s:%d:%d, Cannot add to list\n", i);
+
+      for (int j = 0; j < list_length(rule_grps); j++) {
+        Rule_group *rg = (Rule_group *) list_nth(rule_grps, j);
+
+        map_list(rg->rights, destroy_list_free_all);
+        destroy_list(rg->rights, 0);
+        free(rg);
+      }
+
+      destroy_list(rule_grps, 0);
+
+      return 1;
+    }
+  }
+
+  g->rule_grps = rule_grps;
+
+  return 0;
 }
 
 void
@@ -82,12 +239,61 @@ print_rule(void *r)
   printf("\n");
 }
 
+NUM
+find_in_cpa_list(NUM *string, NUM size, List *list)
+{
+  NUM len = list_length(list), index = 0;
+
+  unsigned char foundp = 0;
+
+  for (; !foundp && index < len;) {
+    cpa *cpap = list_nth(list, index);
+
+    if (cpap->size != (UNUM) size) {
+      index++;
+      continue;
+    }
+
+    foundp = 1;
+
+    for (NUM j = 0; j < size; j++) {
+      if (*(string+j) != *(cpap->array+j)) {
+        foundp = 0;
+        break;
+      }
+    }
+
+    if (!foundp) index++;
+  }
+
+  return (foundp) ? index : -1;
+}
+
 /* a local function pointer */
 void
 print_name(void *element)
 {
-  char *str = (char *) element;
-  printf("%s", str);
+  cpa *array = (cpa *) element;
+  char *carray = MYALLOC(char, 5);
+  str *string = new_str(carray, 5);
+
+  for (UNUM i = 0; i < array->size; i++) {
+    int result = encode(*(array->array+i), string);
+
+    if (result) {
+      fleprintf("%s:%d:%llu, fail to encode\n", i);
+      str_set_length(string, 5);
+      continue;
+    }
+
+    carray = get_data(string);
+    *(carray+str_length(string)) = 0;
+
+    printf("%s", carray);
+    str_set_length(string, 5);
+  }
+
+  destroy_str(string, 1);
 }
 
 /* a local function pointer */
@@ -103,7 +309,12 @@ print_grammar(Grammar *g)
   printf("Printing a grammar:\n");
   map_list_between(g->names, print_name, print_sep);
   printf("\n");
-  map_list(g->rules, print_rule);
+
+  for (int i = 0; i < list_length(g->rule_grps); i++) {
+    print_rule_group(list_nth(g->rule_grps, i));
+    printf("\n");
+  }
+
   printf("\n");
 }
 
@@ -142,6 +353,12 @@ new_tnt_string(char *format, int format_len, ...)
   return result;
 }
 
+TNT *
+new_tnt_pointer(size_t size)
+{
+  return MYALLOC(TNT, size);
+}
+
 void
 destroy_rule(void *rule, int flag)
 {
@@ -169,22 +386,31 @@ destroy_rule_no_free(void *rule)
 }
 
 void
+destroy_cpa_and_free_all(void *element)
+{
+  cpa *c = (cpa *) element;
+  free(c->array);
+  free(c);
+}
+
+void
 destroy_grammar(void *grammar, int flag)
 {
   Grammar *g = (Grammar *) grammar;
 
-  switch (flag) {
-  case 0:
-    map_list(g->rules, destroy_rule_no_free);
-    break;
-  case 1:
-    map_list(g->rules, destroy_rule_and_free_all);
-    break;
-  default:
-    map_list(g->rules, destroy_rule_and_free_first);
-    break;    
-  }
-
-  destroy_list(g->rules, 0);
-  destroy_list(g->names, flag);
+  if (flag == 1)
+    map_list(g->rule_grps, destroy_rule_group_free_all);
+  else if (flag == 2)
+    map_list(g->rule_grps, destroy_rule_group_free_first);
+  
+  destroy_list(g->rule_grps, 0);
+  
+  /* CPA structure has no concept of freeing first.  So if flag is
+     non-zero, we just free everything, and hope no disaster
+     ensues. */
+  if (flag) map_list(g->names, destroy_cpa_and_free_all);
+
+  destroy_list(g->names, 0);
+
+  free(grammar);
 }
author	JSDurand <mmemmew@gmail.com>	2022-01-04 11:51:58 +0800
committer	JSDurand <mmemmew@gmail.com>	2022-01-04 11:51:58 +0800
commit	55dc897da6e81f2a26cfc7e66ac942824773498b (patch)
tree	fce0d7d57832907c991d551833bf5eecde947dd2 /src/grammar.c
parent	53b8b6ffab5a968db75e9babddf4e2dbb2c688a3 (diff)