summaryrefslogtreecommitdiff
path: root/src/grammar.c
diff options
context:
space:
mode:
authorJSDurand <mmemmew@gmail.com>2022-02-08 00:29:10 +0800
committerJSDurand <mmemmew@gmail.com>2022-02-08 12:33:05 +0800
commit5426d9e2a6b820e34809d639838b26643df9ab17 (patch)
tree111f2b478b671092e3f2e64a6171970b8a5cdf99 /src/grammar.c
parentaaa12504c6095b2cdfa213a3d4b269bbd5e7038a (diff)
fix errorsHEADmaster
There are multiple subtle errors in the previous version, both in the codes and in the description of the BNF format. This version should fix some problems now. This version can successfully parse the grammar of its own grammar format, which is quite nice. See test/check_reader.c for parsing this format.
Diffstat (limited to 'src/grammar.c')
-rw-r--r--src/grammar.c150
1 files changed, 131 insertions, 19 deletions
diff --git a/src/grammar.c b/src/grammar.c
index 1ea52b0..55a1e07 100644
--- a/src/grammar.c
+++ b/src/grammar.c
@@ -63,6 +63,13 @@ new_ptd(str *user_name, str *raw_name, dfa *dfap)
return result;
}
+P_ATTR
+str *
+ptd_user_name(PTD *p)
+{
+ return p->user_name;
+}
+
void
destroy_ptd(PTD *p, int flag)
{
@@ -91,14 +98,8 @@ destroy_ptd_no_free(void *e)
destroy_ptd((PTD *)e, 0);
}
-/* REVIEW: Add some statistic counts to assist the hash tables. For
- example, store the total number of terminals as an integer; then
- the hash table can be hinted at initialization to contain that many
- elements, which can reduce the number of time a hash table needs to
- expand and re-insert all its keys, which then helps the
- performance. But this is not the top priority thing to do, and
- might be postponed until a proto-type of the parser generator is
- usable already. */
+/* FIXME: Add a list of terminals, so that we don't need to use hash
+ tables. */
/* rule_grps and names should have the same length */
struct Grammar_s {
@@ -160,20 +161,85 @@ destroy_rule_group_free_first(void *rule_grp)
static void
print_sep()
{
- printf(", ");
+ printf(",\n");
}
static void
-print_rule_group(void *rule_grp)
+print_rule_group(Grammar *g, void *rule_grp)
{
Rule_group *rg = (Rule_group *) rule_grp;
+ char s[5];
+ str *strp = (str *) new_utf8(s, 5);
+
for (int i = 0; i < list_length(rg->rights); i++) {
- List *str = (List *) list_nth(rg->rights, i);
- printf("Rule %lu => ", rg->left);
- map_list_between(str, print_tnt, print_sep);
+ /* fleprintf("i = %d\n", i); */
+ List *strls = (List *) list_nth(rg->rights, i);
+ /* printf("Rule "); */
+ for (int j = 0;
+ (UNUM) j < ((cpa *)list_nth(g->names, rg->left))->size;
+ j++) {
+
+ if (encode
+ (*(((cpa *)list_nth(g->names, rg->left))->array+j),
+ strp)) {
+ destroy_str(strp, 0);
+ fleprintf0("Fail to encode!\n");
+ return;
+ }
+
+ *(s+str_length(strp)) = 0;
+ printf("%s", s);
+ str_set_length(strp, 5);
+ }
+ printf(" := ");
+
+ for (int j = 0; j < list_length(strls); j++) {
+ /* fleprintf("j = %d, len = %ld\n", j, list_length(strls)); */
+ TNT *tntp = (TNT *) list_nth(strls, j);
+ PTD *ptdp = NULL;
+ switch (tntp->type) {
+ case TERMINAL:
+ printf("'");
+ if (encode
+ (tntp->data.t,
+ strp)) {
+ destroy_str(strp, 0);
+ fleprintf0("Fail to encode!\n");
+ return;
+ }
+ printf("%s'", s);
+ str_set_length(strp, 5);
+ break;
+ case NONTERMINAL:
+ for (int k = 0;
+ (UNUM) k < ((cpa *)list_nth(g->names,
+ tntp->data.nt))->size;
+ k++) {
+ if (encode
+ (*(((cpa *)list_nth(g->names, tntp->data.nt))->array+k),
+ strp)) {
+ destroy_str(strp, 0);
+ fleprintf0("Fail to encode!\n");
+ return;
+ }
+ printf("%s", s);
+ str_set_length(strp, 5);
+ }
+ break;
+ default:
+ /* predicate */
+ ptdp = (PTD *) list_nth(g->predicates, tntp->data.pt);
+ /* fleprintf("pt = %lu\n", tntp->data.pt); */
+ printf("[%s]", get_data(ptdp->user_name));
+ break;
+ }
+ if (j+1<list_length(strls)) printf(", ");
+ }
printf("\n");
}
+
+ destroy_str((str*) strp, 0);
}
TNT *
@@ -386,7 +452,7 @@ find_in_cpa_list(CCR_MOD(NUM *) string, NUM size,
}
void
-print_name(void *element)
+eprint_name(void *element)
{
cpa *array = (cpa *) element;
char *carray = MYALLOC(char, 5);
@@ -404,6 +470,32 @@ print_name(void *element)
carray = get_data(string);
*(carray+str_length(string)) = 0;
+ eprintf("%s", carray);
+ str_set_length(string, 5);
+ }
+
+ destroy_str(string, 1);
+}
+
+void
+print_name(void *element)
+{
+ cpa *array = (cpa *) element;
+ char *carray = MYALLOC(char, 5);
+ str *string = new_str(carray, 5);
+
+ for (UNUM i = 0; i < array->size; i++) {
+ int result = encode(*(array->array+i), string);
+
+ if (result) {
+ fleprintf("%llu, fail to encode %ld\n", i, *(array->array+i));
+ str_set_length(string, 5);
+ continue;
+ }
+
+ carray = get_data(string);
+ *(carray+str_length(string)) = 0;
+
printf("%s", carray);
str_set_length(string, 5);
}
@@ -411,8 +503,6 @@ print_name(void *element)
destroy_str(string, 1);
}
-/* REVIEW: Print the names of non-terminals out, instead of printing
- the numbers? */
void
print_grammar(CC_MOD(Grammar *) g)
{
@@ -420,7 +510,7 @@ print_grammar(CC_MOD(Grammar *) g)
printf("Non-terminals...\n");
map_list_between(g->names, print_name, print_sep);
- printf("\n");
+ printf("\n\n");
printf("Predicates...\n");
@@ -431,9 +521,11 @@ print_grammar(CC_MOD(Grammar *) g)
get_data(ptdp->user_name), get_data(ptdp->raw_name));
}
+ printf("\n");
+
printf("Rules...\n");
for (int i = 0; i < list_length(g->rule_grps); i++) {
- print_rule_group(list_nth(g->rule_grps, i));
+ print_rule_group((Grammar *) g, list_nth(g->rule_grps, i));
printf("\n");
}
@@ -568,6 +660,7 @@ grammar_ptd(CCR_MOD(Grammar *) g, PT pt)
return (PTD *) list_nth(g->predicates, pt);
}
+P_ATTR
NUM
grammar_left_len(CCR_MOD(Grammar *)g)
{
@@ -581,6 +674,13 @@ grammar_names(CCR_MOD(Grammar *)g)
return g->names;
}
+P_ATTR
+List *
+grammar_preds(CCR_MOD(Grammar *)g)
+{
+ return g->predicates;
+}
+
/* A transitive closure algorithm */
void
epsilon_nts(CC_MOD(Grammar *) g, BOOL * const restrict nts)
@@ -682,11 +782,13 @@ nt_first(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts,
BREAKOUT;
} else {
free(tempT);
+ BREAKOUT;
}
break;
case PREDICATE:
SAFE_MALLOC(NUM, tempPT, 1, return 1;);
*tempPT = top->data.pt;
+
if (first &&
ht_find(predicate_hts+i, tempPT) == NULL) {
changed = 1;
@@ -694,6 +796,7 @@ nt_first(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts,
BREAKOUT;
} else {
free(tempPT);
+ BREAKOUT;
}
break;
default:
@@ -777,7 +880,6 @@ tnt_first(CC_MOD(ht *) terminal_hts, CC_MOD(ht *) predicate_hts,
for (NUM i = 0; i < tnt_len; i++) {
top = (TNT *) list_nth(tnts, i);
-
switch (top->type) {
case TERMINAL:
if (ht_find(result_terminals, &(top->data.t)) == NULL) {
@@ -797,6 +899,9 @@ tnt_first(CC_MOD(ht *) terminal_hts, CC_MOD(ht *) predicate_hts,
break;
default:
current = top->data.nt;
+ /* if (tnt_len == 2 && current == 1) {
+ * fleprintf0("generating for empty\n");
+ * } */
if (current >= (NT) len || current < 0) {
fleprintf("Wrong non-terminal: %ld>%ld\n", current, len);
@@ -949,6 +1054,13 @@ nt_follow(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts,
for (NUM ell = 0; ell < ht_len;) {
SAFE_MALLOC(NUM, tempN, 1, return 1;);
*tempN = **(keys+ell++);
+ /* if (top->data.nt == 17 ||
+ * top->data.nt == 14 ||
+ * top->data.nt == 13) {
+ * fleprintf("add pred %ld to NT %ld\n", *tempN,
+ * top->data.nt);
+ * eprintf("i = %ld, k = %ld, ell = %ld\n", i, k, ell);
+ * } */
if (ht_find(result_predicates+top->data.nt,
tempN) == NULL) {
changed = 1;