summaryrefslogtreecommitdiff
path: root/src/grammar.c
diff options
context:
space:
mode:
authorJSDurand <mmemmew@gmail.com>2022-01-04 11:51:58 +0800
committerJSDurand <mmemmew@gmail.com>2022-01-04 11:51:58 +0800
commit55dc897da6e81f2a26cfc7e66ac942824773498b (patch)
treefce0d7d57832907c991d551833bf5eecde947dd2 /src/grammar.c
parent53b8b6ffab5a968db75e9babddf4e2dbb2c688a3 (diff)
temporary commit
Now we can read grammars from a file. But we need to check if it works for reading strings still.
Diffstat (limited to 'src/grammar.c')
-rw-r--r--src/grammar.c280
1 files changed, 253 insertions, 27 deletions
diff --git a/src/grammar.c b/src/grammar.c
index fd38ac8..118a10e 100644
--- a/src/grammar.c
+++ b/src/grammar.c
@@ -1,21 +1,85 @@
#include "grammar.h"
-struct TNT_s {
- int type; /* 0 => T and NT otherwise */
- union { T t; NT nt; } data;
-};
-
struct Rule_s {
NT left;
- List *right;
+ List *right; /* a list of TNT's */
};
+typedef struct {
+ /* this group holds rules for this non-terminal */
+ NT left;
+ /* a list of lists of TNT's */
+ List *rights;
+} Rule_group;
+
+/* rule_grps and names should have the same length */
struct Grammar_s {
- List *rules;
- /* a list of names of non-terminals */
+ /* a list of Rule_group's */
+ List *rule_grps;
+ /* a list of names of non-terminals
+
+ Each element is an array of code-points.
+
+ To print them, first encode them into normal strings. */
List *names;
};
+static void
+destroy_rule_group(void *rule_grp, int flag)
+{
+ Rule_group *rg = (Rule_group *) rule_grp;
+
+ switch (flag) {
+ case 1:
+ map_list(rg->rights, destroy_list_free_all);
+ break;
+ case 2:
+ map_list(rg->rights, destroy_list_free_first);
+ break;
+ default:
+ map_list(rg->rights, destroy_list_no_free);
+ break;
+ }
+
+ destroy_list(rg->rights, 0);
+
+ free(rule_grp);
+}
+
+UNUSED
+static void
+destroy_rule_group_no_free(void *rule_grp)
+{
+ destroy_rule_group(rule_grp, 0);
+}
+
+UNUSED
+static void
+destroy_rule_group_free_all(void *rule_grp)
+{
+ destroy_rule_group(rule_grp, 1);
+}
+
+UNUSED
+static void
+destroy_rule_group_free_first(void *rule_grp)
+{
+ destroy_rule_group(rule_grp, 2);
+}
+
+static void
+print_rule_group(void *rule_grp)
+{
+ Rule_group *rg = (Rule_group *) rule_grp;
+
+ for (int i = 0; i < list_length(rg->rights); i++) {
+ List *str = (List *) list_nth(rg->rights, i);
+ printf("Rule %u => ", rg->left);
+ map_list(str, print_tnt);
+ printf("\n");
+ }
+}
+
TNT *
new_tnt(int type, ...)
{
@@ -53,11 +117,104 @@ new_grammar()
return g;
}
-void
+/* We classify the rules into different rule groups. */
+unsigned char
build_grammar(Grammar *g, List *rules, List *names)
{
- g->rules = rules;
g->names = names;
+
+ NUM len = list_length(names);
+ NUM rule_len = list_length(rules);
+
+ /* If a rule corresponds to a non-terminal not on the list, signal
+ an error. */
+ for (int i = 0; i < rule_len; i++) {
+ if (((Rule *) list_nth(rules, i))->left >= len) {
+ fleprintf("%s:%d:%d, Rule contains weird non-terminal\n", i);
+ return 1;
+ }
+ }
+
+ List *rule_grps = new_list();
+
+ if (rule_grps == NULL) {
+ fleprintf0("%s:%d, Cannot create list\n");
+ return 1;
+ }
+
+ if (list_assure_size(rule_grps, len)) {
+ fleprintf0("%s:%d, Cannot assure size of rule groups\n");
+ return 1;
+ }
+
+ List *temp_ls = NULL;
+ void *temp_pointer = NULL;
+
+ /* Initialize the list of rule groups */
+ for (int i = 0; i < len; i++) {
+ if ((temp_ls = new_list()) == NULL) {
+ fleprintf("%s:%d:%d, Cannot create list\n", i);
+
+ map_list(rule_grps, destroy_rule_group_no_free);
+ destroy_list(rule_grps, 0);
+ return 1;
+ }
+
+ temp_pointer = MYALLOC(Rule_group, 1);
+
+ if (temp_pointer == NULL) {
+ fleprintf0("%s:%d, Cannot malloc\n");
+ map_list(rule_grps, destroy_rule_group_no_free);
+ destroy_list(rule_grps, 0);
+ destroy_list(temp_ls, 0);
+ return 1;
+ }
+
+ ((Rule_group *) temp_pointer)->left = i;
+ ((Rule_group *) temp_pointer)->rights = temp_ls;
+
+ int result = add_to_list(rule_grps, temp_pointer);
+
+ if (result) {
+ fleprintf("%s:%d:%d, Cannot add to list\n", i);
+
+ map_list(rule_grps, destroy_rule_group_no_free);
+ destroy_list(rule_grps, 0);
+ destroy_list(temp_ls, 0);
+ free(temp_pointer);
+ return 1;
+ }
+ }
+
+ /* Now fill in rule groups */
+
+ for (int i = 0; i < rule_len; i++) {
+ Rule *r = (Rule *) list_nth(rules, i);
+
+ int result = add_to_list
+ (((Rule_group *) list_nth(rule_grps, r->left))->rights,
+ r->right);
+
+ if (result) {
+ fleprintf("%s:%d:%d, Cannot add to list\n", i);
+
+ for (int j = 0; j < list_length(rule_grps); j++) {
+ Rule_group *rg = (Rule_group *) list_nth(rule_grps, j);
+
+ map_list(rg->rights, destroy_list_free_all);
+ destroy_list(rg->rights, 0);
+ free(rg);
+ }
+
+ destroy_list(rule_grps, 0);
+
+ return 1;
+ }
+ }
+
+ g->rule_grps = rule_grps;
+
+ return 0;
}
void
@@ -82,12 +239,61 @@ print_rule(void *r)
printf("\n");
}
+NUM
+find_in_cpa_list(NUM *string, NUM size, List *list)
+{
+ NUM len = list_length(list), index = 0;
+
+ unsigned char foundp = 0;
+
+ for (; !foundp && index < len;) {
+ cpa *cpap = list_nth(list, index);
+
+ if (cpap->size != (UNUM) size) {
+ index++;
+ continue;
+ }
+
+ foundp = 1;
+
+ for (NUM j = 0; j < size; j++) {
+ if (*(string+j) != *(cpap->array+j)) {
+ foundp = 0;
+ break;
+ }
+ }
+
+ if (!foundp) index++;
+ }
+
+ return (foundp) ? index : -1;
+}
+
/* a local function pointer */
void
print_name(void *element)
{
- char *str = (char *) element;
- printf("%s", str);
+ cpa *array = (cpa *) element;
+ char *carray = MYALLOC(char, 5);
+ str *string = new_str(carray, 5);
+
+ for (UNUM i = 0; i < array->size; i++) {
+ int result = encode(*(array->array+i), string);
+
+ if (result) {
+ fleprintf("%s:%d:%llu, fail to encode\n", i);
+ str_set_length(string, 5);
+ continue;
+ }
+
+ carray = get_data(string);
+ *(carray+str_length(string)) = 0;
+
+ printf("%s", carray);
+ str_set_length(string, 5);
+ }
+
+ destroy_str(string, 1);
}
/* a local function pointer */
@@ -103,7 +309,12 @@ print_grammar(Grammar *g)
printf("Printing a grammar:\n");
map_list_between(g->names, print_name, print_sep);
printf("\n");
- map_list(g->rules, print_rule);
+
+ for (int i = 0; i < list_length(g->rule_grps); i++) {
+ print_rule_group(list_nth(g->rule_grps, i));
+ printf("\n");
+ }
+
printf("\n");
}
@@ -142,6 +353,12 @@ new_tnt_string(char *format, int format_len, ...)
return result;
}
+TNT *
+new_tnt_pointer(size_t size)
+{
+ return MYALLOC(TNT, size);
+}
+
void
destroy_rule(void *rule, int flag)
{
@@ -169,22 +386,31 @@ destroy_rule_no_free(void *rule)
}
void
+destroy_cpa_and_free_all(void *element)
+{
+ cpa *c = (cpa *) element;
+ free(c->array);
+ free(c);
+}
+
+void
destroy_grammar(void *grammar, int flag)
{
Grammar *g = (Grammar *) grammar;
- switch (flag) {
- case 0:
- map_list(g->rules, destroy_rule_no_free);
- break;
- case 1:
- map_list(g->rules, destroy_rule_and_free_all);
- break;
- default:
- map_list(g->rules, destroy_rule_and_free_first);
- break;
- }
-
- destroy_list(g->rules, 0);
- destroy_list(g->names, flag);
+ if (flag == 1)
+ map_list(g->rule_grps, destroy_rule_group_free_all);
+ else if (flag == 2)
+ map_list(g->rule_grps, destroy_rule_group_free_first);
+
+ destroy_list(g->rule_grps, 0);
+
+ /* CPA structure has no concept of freeing first. So if flag is
+ non-zero, we just free everything, and hope no disaster
+ ensues. */
+ if (flag) map_list(g->names, destroy_cpa_and_free_all);
+
+ destroy_list(g->names, 0);
+
+ free(grammar);
}