diff options
author | JSDurand <mmemmew@gmail.com> | 2022-02-08 00:29:10 +0800 |
---|---|---|
committer | JSDurand <mmemmew@gmail.com> | 2022-02-08 12:33:05 +0800 |
commit | 5426d9e2a6b820e34809d639838b26643df9ab17 (patch) | |
tree | 111f2b478b671092e3f2e64a6171970b8a5cdf99 /src/grammar.c | |
parent | aaa12504c6095b2cdfa213a3d4b269bbd5e7038a (diff) |
There are multiple subtle errors in the previous version, both in the
codes and in the description of the BNF format. This version should
fix some problems now.
This version can successfully parse the grammar of its own grammar
format, which is quite nice. See test/check_reader.c for parsing this
format.
Diffstat (limited to 'src/grammar.c')
-rw-r--r-- | src/grammar.c | 150 |
1 files changed, 131 insertions, 19 deletions
diff --git a/src/grammar.c b/src/grammar.c index 1ea52b0..55a1e07 100644 --- a/src/grammar.c +++ b/src/grammar.c @@ -63,6 +63,13 @@ new_ptd(str *user_name, str *raw_name, dfa *dfap) return result; } +P_ATTR +str * +ptd_user_name(PTD *p) +{ + return p->user_name; +} + void destroy_ptd(PTD *p, int flag) { @@ -91,14 +98,8 @@ destroy_ptd_no_free(void *e) destroy_ptd((PTD *)e, 0); } -/* REVIEW: Add some statistic counts to assist the hash tables. For - example, store the total number of terminals as an integer; then - the hash table can be hinted at initialization to contain that many - elements, which can reduce the number of time a hash table needs to - expand and re-insert all its keys, which then helps the - performance. But this is not the top priority thing to do, and - might be postponed until a proto-type of the parser generator is - usable already. */ +/* FIXME: Add a list of terminals, so that we don't need to use hash + tables. */ /* rule_grps and names should have the same length */ struct Grammar_s { @@ -160,20 +161,85 @@ destroy_rule_group_free_first(void *rule_grp) static void print_sep() { - printf(", "); + printf(",\n"); } static void -print_rule_group(void *rule_grp) +print_rule_group(Grammar *g, void *rule_grp) { Rule_group *rg = (Rule_group *) rule_grp; + char s[5]; + str *strp = (str *) new_utf8(s, 5); + for (int i = 0; i < list_length(rg->rights); i++) { - List *str = (List *) list_nth(rg->rights, i); - printf("Rule %lu => ", rg->left); - map_list_between(str, print_tnt, print_sep); + /* fleprintf("i = %d\n", i); */ + List *strls = (List *) list_nth(rg->rights, i); + /* printf("Rule "); */ + for (int j = 0; + (UNUM) j < ((cpa *)list_nth(g->names, rg->left))->size; + j++) { + + if (encode + (*(((cpa *)list_nth(g->names, rg->left))->array+j), + strp)) { + destroy_str(strp, 0); + fleprintf0("Fail to encode!\n"); + return; + } + + *(s+str_length(strp)) = 0; + printf("%s", s); + str_set_length(strp, 5); + } + printf(" := "); + + for (int j = 0; j < list_length(strls); j++) { + /* fleprintf("j = %d, len = %ld\n", j, list_length(strls)); */ + TNT *tntp = (TNT *) list_nth(strls, j); + PTD *ptdp = NULL; + switch (tntp->type) { + case TERMINAL: + printf("'"); + if (encode + (tntp->data.t, + strp)) { + destroy_str(strp, 0); + fleprintf0("Fail to encode!\n"); + return; + } + printf("%s'", s); + str_set_length(strp, 5); + break; + case NONTERMINAL: + for (int k = 0; + (UNUM) k < ((cpa *)list_nth(g->names, + tntp->data.nt))->size; + k++) { + if (encode + (*(((cpa *)list_nth(g->names, tntp->data.nt))->array+k), + strp)) { + destroy_str(strp, 0); + fleprintf0("Fail to encode!\n"); + return; + } + printf("%s", s); + str_set_length(strp, 5); + } + break; + default: + /* predicate */ + ptdp = (PTD *) list_nth(g->predicates, tntp->data.pt); + /* fleprintf("pt = %lu\n", tntp->data.pt); */ + printf("[%s]", get_data(ptdp->user_name)); + break; + } + if (j+1<list_length(strls)) printf(", "); + } printf("\n"); } + + destroy_str((str*) strp, 0); } TNT * @@ -386,7 +452,7 @@ find_in_cpa_list(CCR_MOD(NUM *) string, NUM size, } void -print_name(void *element) +eprint_name(void *element) { cpa *array = (cpa *) element; char *carray = MYALLOC(char, 5); @@ -404,6 +470,32 @@ print_name(void *element) carray = get_data(string); *(carray+str_length(string)) = 0; + eprintf("%s", carray); + str_set_length(string, 5); + } + + destroy_str(string, 1); +} + +void +print_name(void *element) +{ + cpa *array = (cpa *) element; + char *carray = MYALLOC(char, 5); + str *string = new_str(carray, 5); + + for (UNUM i = 0; i < array->size; i++) { + int result = encode(*(array->array+i), string); + + if (result) { + fleprintf("%llu, fail to encode %ld\n", i, *(array->array+i)); + str_set_length(string, 5); + continue; + } + + carray = get_data(string); + *(carray+str_length(string)) = 0; + printf("%s", carray); str_set_length(string, 5); } @@ -411,8 +503,6 @@ print_name(void *element) destroy_str(string, 1); } -/* REVIEW: Print the names of non-terminals out, instead of printing - the numbers? */ void print_grammar(CC_MOD(Grammar *) g) { @@ -420,7 +510,7 @@ print_grammar(CC_MOD(Grammar *) g) printf("Non-terminals...\n"); map_list_between(g->names, print_name, print_sep); - printf("\n"); + printf("\n\n"); printf("Predicates...\n"); @@ -431,9 +521,11 @@ print_grammar(CC_MOD(Grammar *) g) get_data(ptdp->user_name), get_data(ptdp->raw_name)); } + printf("\n"); + printf("Rules...\n"); for (int i = 0; i < list_length(g->rule_grps); i++) { - print_rule_group(list_nth(g->rule_grps, i)); + print_rule_group((Grammar *) g, list_nth(g->rule_grps, i)); printf("\n"); } @@ -568,6 +660,7 @@ grammar_ptd(CCR_MOD(Grammar *) g, PT pt) return (PTD *) list_nth(g->predicates, pt); } +P_ATTR NUM grammar_left_len(CCR_MOD(Grammar *)g) { @@ -581,6 +674,13 @@ grammar_names(CCR_MOD(Grammar *)g) return g->names; } +P_ATTR +List * +grammar_preds(CCR_MOD(Grammar *)g) +{ + return g->predicates; +} + /* A transitive closure algorithm */ void epsilon_nts(CC_MOD(Grammar *) g, BOOL * const restrict nts) @@ -682,11 +782,13 @@ nt_first(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts, BREAKOUT; } else { free(tempT); + BREAKOUT; } break; case PREDICATE: SAFE_MALLOC(NUM, tempPT, 1, return 1;); *tempPT = top->data.pt; + if (first && ht_find(predicate_hts+i, tempPT) == NULL) { changed = 1; @@ -694,6 +796,7 @@ nt_first(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts, BREAKOUT; } else { free(tempPT); + BREAKOUT; } break; default: @@ -777,7 +880,6 @@ tnt_first(CC_MOD(ht *) terminal_hts, CC_MOD(ht *) predicate_hts, for (NUM i = 0; i < tnt_len; i++) { top = (TNT *) list_nth(tnts, i); - switch (top->type) { case TERMINAL: if (ht_find(result_terminals, &(top->data.t)) == NULL) { @@ -797,6 +899,9 @@ tnt_first(CC_MOD(ht *) terminal_hts, CC_MOD(ht *) predicate_hts, break; default: current = top->data.nt; + /* if (tnt_len == 2 && current == 1) { + * fleprintf0("generating for empty\n"); + * } */ if (current >= (NT) len || current < 0) { fleprintf("Wrong non-terminal: %ld>%ld\n", current, len); @@ -949,6 +1054,13 @@ nt_follow(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts, for (NUM ell = 0; ell < ht_len;) { SAFE_MALLOC(NUM, tempN, 1, return 1;); *tempN = **(keys+ell++); + /* if (top->data.nt == 17 || + * top->data.nt == 14 || + * top->data.nt == 13) { + * fleprintf("add pred %ld to NT %ld\n", *tempN, + * top->data.nt); + * eprintf("i = %ld, k = %ld, ell = %ld\n", i, k, ell); + * } */ if (ht_find(result_predicates+top->data.nt, tempN) == NULL) { changed = 1; |