diff options
author | JSDurand <mmemmew@gmail.com> | 2022-01-31 09:23:20 +0800 |
---|---|---|
committer | JSDurand <mmemmew@gmail.com> | 2022-01-31 09:23:20 +0800 |
commit | a8bd5e9d85ac9928bd29add82e887f82642af893 (patch) | |
tree | 74e377f9fccffc2779ff97fa0bd8ad180b9c865c /src/cnp.c | |
parent | e555c88b8107caf886da229444c2bed1aaef6c2c (diff) |
test/check_cnp: working algorithm
I now have a working algorithm in test/check_cnp. It can correctly
parse the grammar for an esoteric language called "Brainfuck". This
language does not matter. What matters is that it contains
parentheses. So this shows that at least for grammars as complex as
parentheses, this parser works well. Haha.
Diffstat (limited to 'src/cnp.c')
-rw-r--r-- | src/cnp.c | 523 |
1 files changed, 523 insertions, 0 deletions
@@ -1 +1,524 @@ +#include <string.h> #include "cnp.h" + +struct Environment_s { + grammar_info *gi; + /* RESULT_TS and RESULT_PS are temporary arrays used in the + computations. */ + ht *result_ts; + ht *result_ps; + NUM *string; + UNUM string_len; + crf *crfp; + procu *pu; + procr *pr; + spa *spap; + bsr *bsrp; + /* If there are errors this will be non-zero. When this field is + non-zero, every function that involves this struct will do + nothing. */ + BOOL error; +}; + +void +nt_add(Environment *env, NUM X, NUM j) +{ + + if (env->error) return; + +#define GI env->gi +#define RG (grammar_rule(GI->g, X)) + + for (NUM i = 0; i < rg_len(RG); i++) { + BOOL result = + test_select(env, *(env->string+j), X, rg_nth(RG, i)); + + for (NUM k = 0; k < grammar_left_len(GI->g); k++) { + ht_reset(env->result_ts+k, DELETE_KEY); + ht_reset(env->result_ps+k, DELETE_KEY); + } + + if (env->error) break; + + if (result) { + /* fleprintf("i = %ld, j = %ld\n", i, j); */ + env->error = + desc_add(env->pr, env->pu, j, + (pair4) { .x = X, .y = i, .z = 0, .w = j }); + + /* fleprintf("env->error = %d\n", env->error); */ + /* fleprintf("pr len = %ld\n", env->pr->len); + * fleprintf("pr ini = %d\n", (env->pr->array)->initialized); */ + } + + if (env->error) break; + } + +#undef RG +#undef GI + +} + +BOOL +test_select(Environment *env, NUM b, NUM X, CCR_MOD(List *) tnts) +{ + if (env->error) return 0; + + BOOL result = 0; + + grammar_info *gi = env->gi; + + ht *result_ts = env->result_ts; + ht *result_ps = env->result_ps; + + if (tnt_first(gi->pts, gi->pps, gi->nts, grammar_left_len(gi->g), + tnts, result_ts, result_ps)) { + fleprintf0("Fail to find the first set of the TNT string.\n"); + goto cleanup; + } + + if (ht_find(result_ts, &b) != NULL) { + result = 1; + goto success; + } + + /* predicates haven't been implemented yet */ + /* for (NUM i = 0; i < ht_size(result_ps); i++) { + * + * } */ + + for (NUM i = 0; i < list_length(tnts); i++) { + +#define TOP ((TNT*)list_nth(tnts, i)) + + switch (TOP->type) { + case TERMINAL: + case PREDICATE: + goto success; + break; + default: + if (!(gi->nts+TOP->data.nt)) goto success; + break; + } + +#undef TOP + + } + + if (ht_find(gi->sts+X, &b) != NULL) result = 1; + + /* predicates haven't been implemented yet */ + + goto success; + + cleanup: + env->error = 1; + return result; + + success: + return result; +} + +void +rtn(Environment *env, NUM X, NUM k, NUM j) +{ + if (env->error) return; + + pair3 label = (pair3) { .x = X, .y = k, .z = j }; + pair2 label2 = (pair2) { .x = X, .y = k }; + + if (spa_belong(env->spap, label)) return; + + /* fleprintf0("new spa\n"); */ + + if ((env->error = spa_insert(env->spap, label))) return; + + ht *edge_ht = NULL; + + if ((edge_ht = crf_find_node(env->crfp, label2)) == NULL) { + fleprintf0("this should not happen\n"); + env->error = 1; + return; + } + +#define EDGE_LABELS ((pair4 **) ht_keys(edge_ht)) + + for (NUM i = 0; i < ht_size(edge_ht); i++) { + +#define EDGE (*(EDGE_LABELS+i)) + + if ((env->error = desc_add(env->pr, env->pu, j, *EDGE))) return; + /* fleprintf("added descriptor (%ld, %ld, %ld, %ld, %ld)\n", + * EDGE->x, EDGE->y, EDGE->z, EDGE->w, j); */ + if ((env->error = + bsr_add(env->gi->g, env->bsrp, + EDGE->x, EDGE->y, EDGE->z, EDGE->w, k, j))) + return; + } + +#undef EDGE_LABELS +#undef EDGE + +} + +void +cnp_call(Environment *env, pair3 label, NUM i, NUM j) +{ + if (env->error) return; + + if (label.z < 1) goto cleanup; + + NUM Y = label.x; + List *tnts = rg_nth(grammar_rule(env->gi->g, Y), label.y); + + TNT *xtnt = (TNT *) list_nth(tnts, label.z - 1); + + if (xtnt->type != NONTERMINAL) goto cleanup; + + NUM X = (NUM) xtnt->data.nt, h = 0; + + pair2 node = (pair2) { .x = X, .y = j }; + pair4 u = (pair4) + { + .x = label.x, + .y = label.y, + .z = label.z, + .w = i + }; + + ht *edge_ht = NULL, *spa_ht = NULL; + + if ((edge_ht = crf_find_node(env->crfp, node)) == NULL) { + if (crf_add_node(env->crfp, node)) goto cleanup; + + if (crf_add_edge(env->crfp, node, u)) goto cleanup; + + /* errors will be stored in ENV, so we simply return */ + nt_add(env, X, j); + + return; + } + + if (ht_find(edge_ht, &u) == NULL) { + if (crf_add_edge(env->crfp, node, u)) goto cleanup; + + spa_ht = spa_find(env->spap, node); + + if (spa_ht == NULL) return; + + for (NUM k = 0; k < ht_size(spa_ht); k++) { + h = **((NUM **) ht_keys(spa_ht) + k); + + if (desc_add(env->pr, env->pu, h, u)) goto cleanup; + + if (bsr_add(env->gi->g, env->bsrp, + label.x, label.y, label.z, + i, j, h)) + goto cleanup; + } + } + + return; + + cleanup: + fleprintf0("error\n"); + env->error = 1; + return; +} + +HP_ATTR +BOOL +env_error_p(CCR_MOD(Environment *) env) +{ + return env->error; +} + +P_ATTR bsr * +env_bsrp(CCR_MOD(Environment *) env) +{ + return env->bsrp; +} + +P_ATTR +Grammar * +env_grammar(CCR_MOD(Environment *) env) +{ + return env->gi->g; +} + +P_ATTR +crf * +env_crfp(CCR_MOD(Environment *) env) +{ + return env->crfp; +} + +P_ATTR +procr * +env_r(CCR_MOD(Environment *) env) +{ + return env->pr; +} + +P_ATTR +procu * +env_u(CCR_MOD(Environment *) env) +{ + return env->pu; +} + +H_ATTR +void +env_str(CCR_MOD(Environment *) env, NUM **str, NUM *str_len) +{ + *str = env->string; + *str_len = env->string_len; +} + +H_ATTR +BOOL +env_follow_p(CCR_MOD(Environment *) env, NUM X, NUM t) +{ + return ht_find(((env->gi->sts)+X), &t) != NULL; +} + +void +env_print_follow(CCR_MOD(Environment *) env, NUM X) +{ + for (NUM i = 0; i < ht_size((env->gi->sts+X)); i++) { + if (**(((NUM **) ht_keys(env->gi->sts+X)) + i) != + END_OF_INPUT) + printf("key %ld = %ld\n", + i, **(((NUM **) ht_keys(env->gi->sts+X)) + i)); + else + printf("key %ld = END OF INPUT\n", i); + } +} + +void +env_print_first(CCR_MOD(Environment *) env, NUM X) +{ + for (NUM i = 0; i < ht_size((env->gi->pts+X)); i++) { + printf("key %ld = %ld\n", + i, **(((NUM **) ht_keys(env->gi->pts+X)) + i)); + } +} + +Environment * +new_env(Grammar *g, str *string) +{ + Environment *env = NULL; + ht *htp = NULL; + NUM current_index = 0, follow_current_index = 0; + NUM result_current_index = 0; + + SAFE_MALLOC(Environment, env, 1, goto cleanup;); + + env->gi = NULL; + + SAFE_MALLOC(grammar_info, env->gi, 1, goto cleanup;); + + memset(env->gi, 0, sizeof(grammar_info)); + + env->gi->g = g; + + NUM left_len = grammar_left_len(g); + + SAFE_MALLOC(BOOL, env->gi->nts, sizeof(BOOL) * left_len, + goto cleanup;); + + epsilon_nts(g, env->gi->nts); + + SAFE_MALLOC(ht, env->gi->pts, sizeof(ht) * left_len, + goto cleanup;); + SAFE_MALLOC(ht, env->gi->pps, sizeof(ht) * left_len, + goto cleanup;); + + for (current_index = 0; current_index < left_len; current_index++) { + if ((htp = new_ht(HT_INIT_CAP, 0)) == NULL) goto cleanup; + *(env->gi->pts+current_index) = *htp; + free(htp); + + if ((htp = new_ht(HT_INIT_CAP, 0)) == NULL) { + destroy_ht(env->gi->pts+current_index, DESTROY_NONE_NO_SELF); + goto cleanup; + } + *(env->gi->pps+current_index) = *htp; + free(htp); + } + + if (nt_first(g, env->gi->nts, env->gi->pts, env->gi->pps)) + goto cleanup; + + SAFE_MALLOC(ht, env->gi->sts, sizeof(ht) * left_len, + goto cleanup;); + SAFE_MALLOC(ht, env->gi->sps, sizeof(ht) * left_len, + goto cleanup;); + + for (follow_current_index = 0; + follow_current_index < left_len; + follow_current_index++) { + if ((htp = new_ht(HT_INIT_CAP, 0)) == NULL) goto cleanup; + + *(env->gi->sts+follow_current_index) = *htp; + + free(htp); + + if ((htp = new_ht(HT_INIT_CAP, 0)) == NULL) { + destroy_ht(env->gi->sts+follow_current_index, + DESTROY_NONE_NO_SELF); + goto cleanup; + } + + *(env->gi->sps+follow_current_index) = *htp; + + free(htp); + } + + if (nt_follow(g, env->gi->nts, + env->gi->pts, env->gi->pps, + env->gi->sts, env->gi->sps)) + goto cleanup; + + SAFE_MALLOC(ht, env->result_ts, sizeof(ht) * left_len, + goto cleanup;); + SAFE_MALLOC(ht, env->result_ps, sizeof(ht) * left_len, + goto cleanup;); + + for (result_current_index = 0; + result_current_index < left_len; + result_current_index++) { + if ((htp = new_ht(HT_INIT_CAP, 0)) == NULL) goto cleanup; + + *(env->result_ts+result_current_index) = *htp; + + free(htp); + + if ((htp = new_ht(HT_INIT_CAP, 0)) == NULL) { + destroy_ht(env->result_ts+result_current_index, + DESTROY_NONE_NO_SELF); + goto cleanup; + } + + *(env->result_ps+result_current_index) = *htp; + + free(htp); + + } + + SAFE_MALLOC(NUM, env->string, str_length(string)+1, goto cleanup;); + + str_info info = EMPTY_STR_INFO; + + env->string_len = 0; + + for (NUM i = 0; i < str_length(string); i += info.step) { + info = get_info(string, i); + *(env->string+(env->string_len)++) = info.value; + } + + *(env->string+(env->string_len)++) = END_OF_INPUT; + + if ((env->crfp = new_crf()) == NULL) goto cleanup; + + BOOL error = 0; + + env->pu = new_procu(env->string_len, &error); + + if (error) goto cleanup; + + env->pr = new_procr(env->string_len, &error); + + if (error) goto cleanup; + + if ((env->spap = new_spa()) == NULL) goto cleanup; + + env->bsrp = new_bsr(&error, g); + + if (error) goto cleanup; + + return env; + + cleanup: + + if (env) { + if (env->gi) { + if (env->gi->nts) free(env->gi->nts); + + for (NUM i = 0; i < current_index; i++) { + destroy_ht(env->gi->pts+i, DESTROY_NONE_NO_SELF); + destroy_ht(env->gi->pps+i, DESTROY_NONE_NO_SELF); + } + + if (env->gi->pts) free(env->gi->pts); + if (env->gi->pps) free(env->gi->pps); + + for (NUM i = 0; i < follow_current_index; i++) { + destroy_ht(env->gi->sts+i, DESTROY_NONE_NO_SELF); + destroy_ht(env->gi->sps+i, DESTROY_NONE_NO_SELF); + } + + if (env->gi->sts) free(env->gi->sts); + if (env->gi->sps) free(env->gi->sps); + + free(env->gi); + } + + for (NUM i = 0; i < result_current_index; i++) { + destroy_ht(env->result_ts+i, DESTROY_NONE_NO_SELF); + destroy_ht(env->result_ps+i, DESTROY_NONE_NO_SELF); + } + + if (env->result_ts) free(env->result_ts); + if (env->result_ps) free(env->result_ps); + + if (env->string) free(env->string); + + if (env->crfp) destroy_crf(env->crfp); + + if (env->pu) destroy_procu(env->pu); + if (env->pr) destroy_procr(env->pr); + if (env->spap) destroy_spa(env->spap); + if (env->bsrp) destroy_bsr(env->bsrp); + + free(env); + } + + return NULL; +} + +void +destroy_env(Environment *env) +{ + if (env == NULL) return; + + free(env->gi->nts); + + NUM left_len = grammar_left_len(env->gi->g); + + for (NUM i = 0; i < left_len; i++) { + destroy_ht(env->gi->pts+i, DESTROY_KEY_NO_SELF); + destroy_ht(env->gi->pps+i, DESTROY_KEY_NO_SELF); + destroy_ht(env->gi->sts+i, DESTROY_KEY_NO_SELF); + destroy_ht(env->gi->sps+i, DESTROY_KEY_NO_SELF); + destroy_ht(env->result_ts+i, DESTROY_KEY_NO_SELF); + destroy_ht(env->result_ps+i, DESTROY_KEY_NO_SELF); + } + + free(env->gi->pts); + free(env->gi->pps); + free(env->gi->sts); + free(env->gi->sps); + free(env->result_ts); + free(env->result_ps); + free(env->string); + + destroy_crf(env->crfp); + destroy_procu(env->pu); + destroy_procr(env->pr); + destroy_spa(env->spap); + destroy_bsr(env->bsrp); + + free(env->gi); + free(env); +} |