From 5426d9e2a6b820e34809d639838b26643df9ab17 Mon Sep 17 00:00:00 2001 From: JSDurand Date: Tue, 8 Feb 2022 00:29:10 +0800 Subject: fix errors There are multiple subtle errors in the previous version, both in the codes and in the description of the BNF format. This version should fix some problems now. This version can successfully parse the grammar of its own grammar format, which is quite nice. See test/check_reader.c for parsing this format. --- src/grammar.c | 150 ++++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 131 insertions(+), 19 deletions(-) (limited to 'src/grammar.c') diff --git a/src/grammar.c b/src/grammar.c index 1ea52b0..55a1e07 100644 --- a/src/grammar.c +++ b/src/grammar.c @@ -63,6 +63,13 @@ new_ptd(str *user_name, str *raw_name, dfa *dfap) return result; } +P_ATTR +str * +ptd_user_name(PTD *p) +{ + return p->user_name; +} + void destroy_ptd(PTD *p, int flag) { @@ -91,14 +98,8 @@ destroy_ptd_no_free(void *e) destroy_ptd((PTD *)e, 0); } -/* REVIEW: Add some statistic counts to assist the hash tables. For - example, store the total number of terminals as an integer; then - the hash table can be hinted at initialization to contain that many - elements, which can reduce the number of time a hash table needs to - expand and re-insert all its keys, which then helps the - performance. But this is not the top priority thing to do, and - might be postponed until a proto-type of the parser generator is - usable already. */ +/* FIXME: Add a list of terminals, so that we don't need to use hash + tables. */ /* rule_grps and names should have the same length */ struct Grammar_s { @@ -160,20 +161,85 @@ destroy_rule_group_free_first(void *rule_grp) static void print_sep() { - printf(", "); + printf(",\n"); } static void -print_rule_group(void *rule_grp) +print_rule_group(Grammar *g, void *rule_grp) { Rule_group *rg = (Rule_group *) rule_grp; + char s[5]; + str *strp = (str *) new_utf8(s, 5); + for (int i = 0; i < list_length(rg->rights); i++) { - List *str = (List *) list_nth(rg->rights, i); - printf("Rule %lu => ", rg->left); - map_list_between(str, print_tnt, print_sep); + /* fleprintf("i = %d\n", i); */ + List *strls = (List *) list_nth(rg->rights, i); + /* printf("Rule "); */ + for (int j = 0; + (UNUM) j < ((cpa *)list_nth(g->names, rg->left))->size; + j++) { + + if (encode + (*(((cpa *)list_nth(g->names, rg->left))->array+j), + strp)) { + destroy_str(strp, 0); + fleprintf0("Fail to encode!\n"); + return; + } + + *(s+str_length(strp)) = 0; + printf("%s", s); + str_set_length(strp, 5); + } + printf(" := "); + + for (int j = 0; j < list_length(strls); j++) { + /* fleprintf("j = %d, len = %ld\n", j, list_length(strls)); */ + TNT *tntp = (TNT *) list_nth(strls, j); + PTD *ptdp = NULL; + switch (tntp->type) { + case TERMINAL: + printf("'"); + if (encode + (tntp->data.t, + strp)) { + destroy_str(strp, 0); + fleprintf0("Fail to encode!\n"); + return; + } + printf("%s'", s); + str_set_length(strp, 5); + break; + case NONTERMINAL: + for (int k = 0; + (UNUM) k < ((cpa *)list_nth(g->names, + tntp->data.nt))->size; + k++) { + if (encode + (*(((cpa *)list_nth(g->names, tntp->data.nt))->array+k), + strp)) { + destroy_str(strp, 0); + fleprintf0("Fail to encode!\n"); + return; + } + printf("%s", s); + str_set_length(strp, 5); + } + break; + default: + /* predicate */ + ptdp = (PTD *) list_nth(g->predicates, tntp->data.pt); + /* fleprintf("pt = %lu\n", tntp->data.pt); */ + printf("[%s]", get_data(ptdp->user_name)); + break; + } + if (j+1size; i++) { + int result = encode(*(array->array+i), string); + + if (result) { + fleprintf("%llu, fail to encode %ld\n", i, *(array->array+i)); + str_set_length(string, 5); + continue; + } + + carray = get_data(string); + *(carray+str_length(string)) = 0; + printf("%s", carray); str_set_length(string, 5); } @@ -411,8 +503,6 @@ print_name(void *element) destroy_str(string, 1); } -/* REVIEW: Print the names of non-terminals out, instead of printing - the numbers? */ void print_grammar(CC_MOD(Grammar *) g) { @@ -420,7 +510,7 @@ print_grammar(CC_MOD(Grammar *) g) printf("Non-terminals...\n"); map_list_between(g->names, print_name, print_sep); - printf("\n"); + printf("\n\n"); printf("Predicates...\n"); @@ -431,9 +521,11 @@ print_grammar(CC_MOD(Grammar *) g) get_data(ptdp->user_name), get_data(ptdp->raw_name)); } + printf("\n"); + printf("Rules...\n"); for (int i = 0; i < list_length(g->rule_grps); i++) { - print_rule_group(list_nth(g->rule_grps, i)); + print_rule_group((Grammar *) g, list_nth(g->rule_grps, i)); printf("\n"); } @@ -568,6 +660,7 @@ grammar_ptd(CCR_MOD(Grammar *) g, PT pt) return (PTD *) list_nth(g->predicates, pt); } +P_ATTR NUM grammar_left_len(CCR_MOD(Grammar *)g) { @@ -581,6 +674,13 @@ grammar_names(CCR_MOD(Grammar *)g) return g->names; } +P_ATTR +List * +grammar_preds(CCR_MOD(Grammar *)g) +{ + return g->predicates; +} + /* A transitive closure algorithm */ void epsilon_nts(CC_MOD(Grammar *) g, BOOL * const restrict nts) @@ -682,11 +782,13 @@ nt_first(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts, BREAKOUT; } else { free(tempT); + BREAKOUT; } break; case PREDICATE: SAFE_MALLOC(NUM, tempPT, 1, return 1;); *tempPT = top->data.pt; + if (first && ht_find(predicate_hts+i, tempPT) == NULL) { changed = 1; @@ -694,6 +796,7 @@ nt_first(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts, BREAKOUT; } else { free(tempPT); + BREAKOUT; } break; default: @@ -777,7 +880,6 @@ tnt_first(CC_MOD(ht *) terminal_hts, CC_MOD(ht *) predicate_hts, for (NUM i = 0; i < tnt_len; i++) { top = (TNT *) list_nth(tnts, i); - switch (top->type) { case TERMINAL: if (ht_find(result_terminals, &(top->data.t)) == NULL) { @@ -797,6 +899,9 @@ tnt_first(CC_MOD(ht *) terminal_hts, CC_MOD(ht *) predicate_hts, break; default: current = top->data.nt; + /* if (tnt_len == 2 && current == 1) { + * fleprintf0("generating for empty\n"); + * } */ if (current >= (NT) len || current < 0) { fleprintf("Wrong non-terminal: %ld>%ld\n", current, len); @@ -949,6 +1054,13 @@ nt_follow(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts, for (NUM ell = 0; ell < ht_len;) { SAFE_MALLOC(NUM, tempN, 1, return 1;); *tempN = **(keys+ell++); + /* if (top->data.nt == 17 || + * top->data.nt == 14 || + * top->data.nt == 13) { + * fleprintf("add pred %ld to NT %ld\n", *tempN, + * top->data.nt); + * eprintf("i = %ld, k = %ld, ell = %ld\n", i, k, ell); + * } */ if (ht_find(result_predicates+top->data.nt, tempN) == NULL) { changed = 1; -- cgit v1.2.3-18-g5258