diff options
author | JSDurand <mmemmew@gmail.com> | 2022-01-21 21:53:50 +0800 |
---|---|---|
committer | JSDurand <mmemmew@gmail.com> | 2022-01-21 21:53:50 +0800 |
commit | 53865aad225ffbe5cf3c42736e5a2095092f9fff (patch) | |
tree | 697ecbee7fa69ae8c58a0ec2f69cdf84d7cd313f /src/grammar.c | |
parent | 5730d6c04258e192195bfbbbed76d68fd78ed458 (diff) |
temporary save point
Just to save some work.
Diffstat (limited to 'src/grammar.c')
-rw-r--r-- | src/grammar.c | 448 |
1 files changed, 426 insertions, 22 deletions
diff --git a/src/grammar.c b/src/grammar.c index 692e858..a24f9cb 100644 --- a/src/grammar.c +++ b/src/grammar.c @@ -1,3 +1,5 @@ +#include <string.h> +#include "ht.h" #include "grammar.h" struct Rule_s { @@ -5,12 +7,51 @@ struct Rule_s { List *right; /* a list of TNT's */ }; -typedef struct { +struct Rule_group_s { /* this group holds rules for this non-terminal */ NT left; /* a list of lists of TNT's */ List *rights; -} Rule_group; +}; + +NT +rg_left(CCR_MOD(Rule_group *) rg) +{ + return rg->left; +} + +NUM +rg_len(CCR_MOD(Rule_group *) rg) +{ + return list_length(rg->rights); +} + +List * +rg_right(CCR_MOD(Rule_group *) rg) +{ + return rg->rights; +} + +List * +rg_nth(CCR_MOD(Rule_group *) rg, NUM n) +{ + if (n < 0 || n >= list_length(rg->rights)) { + fleprintf("Out of bounds: %ld with len = %ld\n", + n, list_length(rg->rights)); + return NULL; + } + + return (List *) list_nth(rg->rights, n); +} + +/* TODO: Add some statistic counts to assist the hash tables. For + example, store the total number of terminals as an integer; then + the hash table can be hinted at initialization to contain that many + elements, which can reduce the number of time a hash table needs to + expand and re-insert all its keys, which then helps the + performance. But this is not the top priority thing to do, and + might be postponed until a proto-type of the parser generator is + usable already. */ /* rule_grps and names should have the same length */ struct Grammar_s { @@ -22,6 +63,7 @@ struct Grammar_s { To print them, first encode them into normal strings. */ List *names; + /* TODO: Add an array of predicates. */ }; static void @@ -80,14 +122,14 @@ print_rule_group(void *rule_grp) for (int i = 0; i < list_length(rg->rights); i++) { List *str = (List *) list_nth(rg->rights, i); - printf("Rule %u => ", rg->left); + printf("Rule %lu => ", rg->left); map_list_between(str, print_tnt, print_sep); printf("\n"); } } TNT * -new_tnt(int type, ...) +new_tnt(TNT_TYPE type, ...) { va_list args; va_start(args, type); @@ -95,8 +137,17 @@ new_tnt(int type, ...) TNT *result = MYALLOC(TNT, 1); result->type = type; - if (type) result->data.nt = va_arg(args, NT); - else result->data.t = va_arg(args, T); + switch (type) { + case TERMINAL: + result->data.t = va_arg(args, T); + break; + case NONTERMINAL: + result->data.nt = va_arg(args, NT); + break; + default: + result->data.pt = va_arg(args, PT); + break; + } va_end(args); @@ -104,7 +155,7 @@ new_tnt(int type, ...) } Rule * -new_rule(NT left, const List * const right) +new_rule(NT left, CC_MOD(List *) right) { if (!right) return NULL; @@ -125,7 +176,7 @@ new_grammar() /* We classify the rules into different rule groups. */ BOOL -build_grammar(Grammar *g, const List *rules, const List * const names) +build_grammar(Grammar *g, const List *rules, CC_MOD(List *) names) { g->names = (List *) names; @@ -135,7 +186,7 @@ build_grammar(Grammar *g, const List *rules, const List * const names) /* If a rule corresponds to a non-terminal not on the list, signal an error. */ for (int i = 0; i < rule_len; i++) { - if (((Rule *) list_nth(rules, i))->left >= len) { + if ((NUM) (((Rule *) list_nth(rules, i))->left) >= len) { fleprintf("%d, Rule contains weird non-terminal\n", i); return 1; } @@ -228,17 +279,25 @@ print_tnt(void *element) { TNT *tnt = (TNT*) element; - if (tnt->type) - printf("NT %u", tnt->data.nt); - else - printf("T %lu", tnt->data.t); + switch (tnt->type) { + case TERMINAL: + if (tnt->data.t == END_OF_INPUT) printf("T $"); + else printf("T %lu", tnt->data.t); + break; + case NONTERMINAL: + printf("NT %lu", tnt->data.nt); + break; + default: + printf("PT %lu", tnt->data.pt); + break; + } } void print_rule(void *r) { Rule *rule = (Rule *) r; - printf("Rule: %u => ", rule->left); + printf("Rule: %lu => ", rule->left); map_list(rule->right, print_tnt); @@ -246,8 +305,8 @@ print_rule(void *r) } NUM -find_in_cpa_list(const NUM * const restrict string, NUM size, - const List * const restrict list) +find_in_cpa_list(CCR_MOD(NUM *) string, NUM size, + CCR_MOD(List *) list) { NUM len = list_length(list), index = 0; @@ -305,7 +364,7 @@ print_name(void *element) /* REVIEW: Print the names of non-terminals out, instead of printing the numbers? */ void -print_grammar(const Grammar * const g) +print_grammar(CC_MOD(Grammar *) g) { printf("Printing a grammar:\n"); map_list_between(g->names, print_name, print_sep); @@ -335,13 +394,16 @@ new_tnt_string(char *format, int format_len, ...) for (int point = 0; point < format_len; point++) { switch (*(format+point)) { case 'n': - add_to_list(result, new_tnt(1, va_arg(args, NT))); + add_to_list(result, new_tnt(NONTERMINAL, va_arg(args, NT))); break; case 't': - add_to_list(result, new_tnt(0, va_arg(args, T))); + add_to_list(result, new_tnt(TERMINAL, va_arg(args, T))); + break; + case 'p': + add_to_list(result, new_tnt(PREDICATE, va_arg(args, PT))); break; default: - eprintf("Wront character: %c\n", *(format+point)); + eprintf("Wrong character: %c\n", *(format+point)); destroy_list(result, 1); va_end(args); return NULL; @@ -403,9 +465,9 @@ destroy_grammar(void *grammar, int flag) map_list(g->rule_grps, destroy_rule_group_free_all); else if (flag == 2) map_list(g->rule_grps, destroy_rule_group_free_first); - + destroy_list(g->rule_grps, 0); - + /* CPA structure has no concept of freeing first. So if flag is non-zero, we just free everything, and hope no disaster ensues. */ @@ -415,3 +477,345 @@ destroy_grammar(void *grammar, int flag) free(grammar); } + +Rule_group * +grammar_rule(CCR_MOD(Grammar *) g, NT nt) +{ + if ((NUM) nt >= list_length(g->rule_grps)) { + fleprintf("Invalid nonterminal: %lu\n", nt); + return NULL; + } + + return (Rule_group *) list_nth(g->rule_grps, nt); +} + +NUM +grammar_left_len(CCR_MOD(Grammar *)g) +{ + return list_length(g->names); +} + +/* A transitive closure algorithm */ +void +epsilon_nts(CC_MOD(Grammar *) g, BOOL * const restrict nts) +{ + NUM left_len = grammar_left_len(g); + + memset(nts, 0, sizeof(BOOL) * left_len); + + BOOL changed = 0, first_time = 1; + + do { + changed = 0; + + for (NUM i = 0; i < left_len; i++) { + /* If this non-terminal is already known to produce the empty + string, then we don't need to check it again. */ + if (*(nts+i)) continue; + + Rule_group *rg = grammar_rule(g, (NT) i); + NUM rg_length = rg_len(rg); + + for (NUM j = 0; j < rg_length; j++) { + List *string = rg_nth(rg, j); + if (first_time && list_length(string) == 0) { + changed = 1; + *(nts+i) = 1; + break; + } + + NUM string_len = list_length(string); + + BOOL non_epsilon = 0; + + for (NUM k = 0; k < string_len;) { + TNT *tnte = (TNT *) list_nth(string, k++); + if (tnte->type == NONTERMINAL && *(nts+tnte->data.nt)) + continue; + + non_epsilon = 1; + break; + } + + if (!non_epsilon && !(*(nts+i))) { + changed = 1; + *(nts+i) = 1; + } + } + } + + first_time = 0; + } while (changed); +} + +BOOL +nt_first(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts, + ht *terminal_hts, ht *predicate_hts) +{ + NUM left_len = grammar_left_len(g); + + BOOL changed = 0, first = 1; + + ht *terminal_ht = NULL, *pred_ht = NULL; + + /* lazy macro */ + +#define BREAKOUT k = string_len + + do { + changed = 0; + + for (NUM i = 0; i < left_len; i++) { + Rule_group *rg = grammar_rule(g, (NT) i); + NUM rg_length = rg_len(rg); + + for (NUM j = 0; j < rg_length;) { + List *string = rg_nth(rg, j++); + NUM string_len = list_length(string); + TNT *top = NULL; + + for (NUM k = 0; k < string_len;) { + top = (TNT *) list_nth(string, k++); + + switch (top->type) { + case TERMINAL: + if (first && + ht_find(terminal_hts+i, top->data.t) == NULL) { + changed = 1; + ht_insert(terminal_hts+i, top->data.t, (void*)1); + BREAKOUT; + } + break; + case PREDICATE: + if (first && + ht_find(predicate_hts+i, top->data.pt) == NULL) { + changed = 1; + ht_insert(predicate_hts+i, top->data.pt, (void*)1); + BREAKOUT; + } + break; + default: + terminal_ht = terminal_hts+top->data.nt; + pred_ht = predicate_hts+top->data.nt; + /* Add all entries to the corresponding hash table for the + nonterminal I. Also record if new entries have been + found. */ + + NUM len = ht_size(terminal_ht); + NUM *keys = ht_keys(terminal_ht); + + for (NUM ell = 0; ell < len;) { + NUM key = *(keys+ell++); + if (ht_find(terminal_hts+i, key) == NULL) { + changed = 1; + ht_insert(terminal_hts+i, key, (void*)1); + } + } + + len = ht_size(pred_ht); + keys = ht_keys(pred_ht); + + for (NUM ell = 0; ell < len;) { + NUM key = *(keys+ell++); + if (ht_find(predicate_hts+i, key) == NULL) { + changed = 1; + ht_insert(predicate_hts+i, key, (void*)1); + } + } + + if (!(*(nts+(top->data.nt)))) BREAKOUT; + break; + } + } + } + } + first = 0; + } while (changed); + +#undef BREAKOUT + + return 0; +} + +BOOL +tnt_first(CC_MOD(ht *) terminal_hts, CC_MOD(ht *) predicate_hts, + CCR_MOD(BOOL *) nts, NUM len, List *tnts, + ht * const restrict result_terminals, + ht * const restrict result_predicates) +{ + if (tnts == NULL) return 0; + + NUM tnt_len = list_length(tnts); + + if (!tnt_len) return 0; + + NUM temp_len = 0, *keys = NULL; + + TNT *top = NULL; + + NT current = 0; + + for (NUM i = 0; i < tnt_len; i++) { + top = (TNT *) list_nth(tnts, i); + + switch (top->type) { + case TERMINAL: + ht_insert(result_terminals, top->data.t, (void *)1); + return 0; + break; + case PREDICATE: + ht_insert(result_predicates, top->data.pt, (void *)1); + return 0; + break; + default: + current = top->data.nt; + + if (current >= (NT) len || current < 0) { + fleprintf("Wrong non-terminal: %ld>%ld\n", current, len); + return 1; + } + + temp_len = ht_size(terminal_hts+current); + keys = ht_keys(terminal_hts+current); + + for (NUM j = 0; j < temp_len;) + ht_insert(result_terminals, *(keys+j++), (void *)1); + + temp_len = ht_size(predicate_hts+current); + keys = ht_keys(predicate_hts+current); + + for (NUM j = 0; j < temp_len;) + ht_insert(result_predicates, *(keys+j++), (void *)1); + + if (!(*(nts+current))) i = tnt_len; + + break; + } + } + + return 0; +} + +BOOL +nt_follow(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts, + CC_MOD(ht *)terminal_hts, CC_MOD(ht *)predicate_hts, + ht * const restrict result_terminals, + ht * const restrict result_predicates) +{ + ht_insert(result_terminals, END_OF_INPUT, (void*)1); + + NUM left_len = grammar_left_len(g); + + BOOL changed = 0, first = 1; + + NUM ht_len = 0, *keys = NULL; + + List *temp = NULL; + + do { + changed = 0; + + for (NUM i = 0; i < left_len; i++) { + Rule_group *rg = grammar_rule(g, (NT) i); + NUM rg_length = rg_len(rg); + + for (NUM j = 0; j < rg_length;) { + List *string = rg_nth(rg, j++); + NUM string_len = list_length(string); + TNT *top = NULL; + + NUM rest_produce_epsilon = -1; + + for (NUM ell = 0; ell < string_len; ell++) { + top = (TNT *) list_nth(string, ell); + + switch (top->type) { + case NONTERMINAL: + if (!(*(nts+top->data.nt))) + rest_produce_epsilon = -1; + else if (rest_produce_epsilon < 0) + rest_produce_epsilon = ell; + break; + default: + rest_produce_epsilon = -1; + break; + } + } + + for (NUM k = 0; k < string_len; k++) { + top = (TNT *) list_nth(string, k); + + switch (top->type) { + case NONTERMINAL: + if (first && k+1<string_len) { + changed = 1; + temp = array_to_list + (list_array(string)+k+1, string_len-k-1); + + if (temp == NULL) return 1; + + if (tnt_first + (terminal_hts, predicate_hts, nts, left_len, + temp, + result_terminals+top->data.nt, + result_predicates+top->data.nt)) { + fleprintf("Error in generating the first set for %ld:" + " i = %ld, j = %ld, k = %ld, len = %ld\n", + top->data.nt, i, j, k, string_len); + free(temp); + return 1; + } + + free(temp); + } + + /* Test if the remaining thing produces the emtpy + string. */ + + if (k+1 == string_len || + (rest_produce_epsilon >= 0 && + rest_produce_epsilon <= k)) + goto add_to_follow; + + break; + + add_to_follow: + + ht_len = ht_size(result_terminals+i); + keys = ht_keys(result_terminals+i); + + for (NUM ell = 0; ell < ht_len;) { + NUM key = *(keys+ell++); + if (ht_find(result_terminals+top->data.nt, + key) == NULL) { + changed = 1; + ht_insert(result_terminals+top->data.nt, + key, (void*)1); + } + } + + ht_len = ht_size(result_predicates+i); + keys = ht_keys(result_predicates+i); + + for (NUM ell = 0; ell < ht_len;) { + NUM key = *(keys+ell++); + if (ht_find(result_predicates+top->data.nt, + key) == NULL) { + changed = 1; + ht_insert(result_predicates+top->data.nt, + key, (void*)1); + } + } + + break; + default: + break; + } + } + } + } + first = 0; + } while (changed); + + return 0; +} |