From a8bd5e9d85ac9928bd29add82e887f82642af893 Mon Sep 17 00:00:00 2001 From: JSDurand Date: Mon, 31 Jan 2022 09:23:20 +0800 Subject: test/check_cnp: working algorithm I now have a working algorithm in test/check_cnp. It can correctly parse the grammar for an esoteric language called "Brainfuck". This language does not matter. What matters is that it contains parentheses. So this shows that at least for grammars as complex as parentheses, this parser works well. Haha. --- src/cnp.c | 523 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 523 insertions(+) (limited to 'src/cnp.c') diff --git a/src/cnp.c b/src/cnp.c index d9a4978..8c7c519 100644 --- a/src/cnp.c +++ b/src/cnp.c @@ -1 +1,524 @@ +#include #include "cnp.h" + +struct Environment_s { + grammar_info *gi; + /* RESULT_TS and RESULT_PS are temporary arrays used in the + computations. */ + ht *result_ts; + ht *result_ps; + NUM *string; + UNUM string_len; + crf *crfp; + procu *pu; + procr *pr; + spa *spap; + bsr *bsrp; + /* If there are errors this will be non-zero. When this field is + non-zero, every function that involves this struct will do + nothing. */ + BOOL error; +}; + +void +nt_add(Environment *env, NUM X, NUM j) +{ + + if (env->error) return; + +#define GI env->gi +#define RG (grammar_rule(GI->g, X)) + + for (NUM i = 0; i < rg_len(RG); i++) { + BOOL result = + test_select(env, *(env->string+j), X, rg_nth(RG, i)); + + for (NUM k = 0; k < grammar_left_len(GI->g); k++) { + ht_reset(env->result_ts+k, DELETE_KEY); + ht_reset(env->result_ps+k, DELETE_KEY); + } + + if (env->error) break; + + if (result) { + /* fleprintf("i = %ld, j = %ld\n", i, j); */ + env->error = + desc_add(env->pr, env->pu, j, + (pair4) { .x = X, .y = i, .z = 0, .w = j }); + + /* fleprintf("env->error = %d\n", env->error); */ + /* fleprintf("pr len = %ld\n", env->pr->len); + * fleprintf("pr ini = %d\n", (env->pr->array)->initialized); */ + } + + if (env->error) break; + } + +#undef RG +#undef GI + +} + +BOOL +test_select(Environment *env, NUM b, NUM X, CCR_MOD(List *) tnts) +{ + if (env->error) return 0; + + BOOL result = 0; + + grammar_info *gi = env->gi; + + ht *result_ts = env->result_ts; + ht *result_ps = env->result_ps; + + if (tnt_first(gi->pts, gi->pps, gi->nts, grammar_left_len(gi->g), + tnts, result_ts, result_ps)) { + fleprintf0("Fail to find the first set of the TNT string.\n"); + goto cleanup; + } + + if (ht_find(result_ts, &b) != NULL) { + result = 1; + goto success; + } + + /* predicates haven't been implemented yet */ + /* for (NUM i = 0; i < ht_size(result_ps); i++) { + * + * } */ + + for (NUM i = 0; i < list_length(tnts); i++) { + +#define TOP ((TNT*)list_nth(tnts, i)) + + switch (TOP->type) { + case TERMINAL: + case PREDICATE: + goto success; + break; + default: + if (!(gi->nts+TOP->data.nt)) goto success; + break; + } + +#undef TOP + + } + + if (ht_find(gi->sts+X, &b) != NULL) result = 1; + + /* predicates haven't been implemented yet */ + + goto success; + + cleanup: + env->error = 1; + return result; + + success: + return result; +} + +void +rtn(Environment *env, NUM X, NUM k, NUM j) +{ + if (env->error) return; + + pair3 label = (pair3) { .x = X, .y = k, .z = j }; + pair2 label2 = (pair2) { .x = X, .y = k }; + + if (spa_belong(env->spap, label)) return; + + /* fleprintf0("new spa\n"); */ + + if ((env->error = spa_insert(env->spap, label))) return; + + ht *edge_ht = NULL; + + if ((edge_ht = crf_find_node(env->crfp, label2)) == NULL) { + fleprintf0("this should not happen\n"); + env->error = 1; + return; + } + +#define EDGE_LABELS ((pair4 **) ht_keys(edge_ht)) + + for (NUM i = 0; i < ht_size(edge_ht); i++) { + +#define EDGE (*(EDGE_LABELS+i)) + + if ((env->error = desc_add(env->pr, env->pu, j, *EDGE))) return; + /* fleprintf("added descriptor (%ld, %ld, %ld, %ld, %ld)\n", + * EDGE->x, EDGE->y, EDGE->z, EDGE->w, j); */ + if ((env->error = + bsr_add(env->gi->g, env->bsrp, + EDGE->x, EDGE->y, EDGE->z, EDGE->w, k, j))) + return; + } + +#undef EDGE_LABELS +#undef EDGE + +} + +void +cnp_call(Environment *env, pair3 label, NUM i, NUM j) +{ + if (env->error) return; + + if (label.z < 1) goto cleanup; + + NUM Y = label.x; + List *tnts = rg_nth(grammar_rule(env->gi->g, Y), label.y); + + TNT *xtnt = (TNT *) list_nth(tnts, label.z - 1); + + if (xtnt->type != NONTERMINAL) goto cleanup; + + NUM X = (NUM) xtnt->data.nt, h = 0; + + pair2 node = (pair2) { .x = X, .y = j }; + pair4 u = (pair4) + { + .x = label.x, + .y = label.y, + .z = label.z, + .w = i + }; + + ht *edge_ht = NULL, *spa_ht = NULL; + + if ((edge_ht = crf_find_node(env->crfp, node)) == NULL) { + if (crf_add_node(env->crfp, node)) goto cleanup; + + if (crf_add_edge(env->crfp, node, u)) goto cleanup; + + /* errors will be stored in ENV, so we simply return */ + nt_add(env, X, j); + + return; + } + + if (ht_find(edge_ht, &u) == NULL) { + if (crf_add_edge(env->crfp, node, u)) goto cleanup; + + spa_ht = spa_find(env->spap, node); + + if (spa_ht == NULL) return; + + for (NUM k = 0; k < ht_size(spa_ht); k++) { + h = **((NUM **) ht_keys(spa_ht) + k); + + if (desc_add(env->pr, env->pu, h, u)) goto cleanup; + + if (bsr_add(env->gi->g, env->bsrp, + label.x, label.y, label.z, + i, j, h)) + goto cleanup; + } + } + + return; + + cleanup: + fleprintf0("error\n"); + env->error = 1; + return; +} + +HP_ATTR +BOOL +env_error_p(CCR_MOD(Environment *) env) +{ + return env->error; +} + +P_ATTR bsr * +env_bsrp(CCR_MOD(Environment *) env) +{ + return env->bsrp; +} + +P_ATTR +Grammar * +env_grammar(CCR_MOD(Environment *) env) +{ + return env->gi->g; +} + +P_ATTR +crf * +env_crfp(CCR_MOD(Environment *) env) +{ + return env->crfp; +} + +P_ATTR +procr * +env_r(CCR_MOD(Environment *) env) +{ + return env->pr; +} + +P_ATTR +procu * +env_u(CCR_MOD(Environment *) env) +{ + return env->pu; +} + +H_ATTR +void +env_str(CCR_MOD(Environment *) env, NUM **str, NUM *str_len) +{ + *str = env->string; + *str_len = env->string_len; +} + +H_ATTR +BOOL +env_follow_p(CCR_MOD(Environment *) env, NUM X, NUM t) +{ + return ht_find(((env->gi->sts)+X), &t) != NULL; +} + +void +env_print_follow(CCR_MOD(Environment *) env, NUM X) +{ + for (NUM i = 0; i < ht_size((env->gi->sts+X)); i++) { + if (**(((NUM **) ht_keys(env->gi->sts+X)) + i) != + END_OF_INPUT) + printf("key %ld = %ld\n", + i, **(((NUM **) ht_keys(env->gi->sts+X)) + i)); + else + printf("key %ld = END OF INPUT\n", i); + } +} + +void +env_print_first(CCR_MOD(Environment *) env, NUM X) +{ + for (NUM i = 0; i < ht_size((env->gi->pts+X)); i++) { + printf("key %ld = %ld\n", + i, **(((NUM **) ht_keys(env->gi->pts+X)) + i)); + } +} + +Environment * +new_env(Grammar *g, str *string) +{ + Environment *env = NULL; + ht *htp = NULL; + NUM current_index = 0, follow_current_index = 0; + NUM result_current_index = 0; + + SAFE_MALLOC(Environment, env, 1, goto cleanup;); + + env->gi = NULL; + + SAFE_MALLOC(grammar_info, env->gi, 1, goto cleanup;); + + memset(env->gi, 0, sizeof(grammar_info)); + + env->gi->g = g; + + NUM left_len = grammar_left_len(g); + + SAFE_MALLOC(BOOL, env->gi->nts, sizeof(BOOL) * left_len, + goto cleanup;); + + epsilon_nts(g, env->gi->nts); + + SAFE_MALLOC(ht, env->gi->pts, sizeof(ht) * left_len, + goto cleanup;); + SAFE_MALLOC(ht, env->gi->pps, sizeof(ht) * left_len, + goto cleanup;); + + for (current_index = 0; current_index < left_len; current_index++) { + if ((htp = new_ht(HT_INIT_CAP, 0)) == NULL) goto cleanup; + *(env->gi->pts+current_index) = *htp; + free(htp); + + if ((htp = new_ht(HT_INIT_CAP, 0)) == NULL) { + destroy_ht(env->gi->pts+current_index, DESTROY_NONE_NO_SELF); + goto cleanup; + } + *(env->gi->pps+current_index) = *htp; + free(htp); + } + + if (nt_first(g, env->gi->nts, env->gi->pts, env->gi->pps)) + goto cleanup; + + SAFE_MALLOC(ht, env->gi->sts, sizeof(ht) * left_len, + goto cleanup;); + SAFE_MALLOC(ht, env->gi->sps, sizeof(ht) * left_len, + goto cleanup;); + + for (follow_current_index = 0; + follow_current_index < left_len; + follow_current_index++) { + if ((htp = new_ht(HT_INIT_CAP, 0)) == NULL) goto cleanup; + + *(env->gi->sts+follow_current_index) = *htp; + + free(htp); + + if ((htp = new_ht(HT_INIT_CAP, 0)) == NULL) { + destroy_ht(env->gi->sts+follow_current_index, + DESTROY_NONE_NO_SELF); + goto cleanup; + } + + *(env->gi->sps+follow_current_index) = *htp; + + free(htp); + } + + if (nt_follow(g, env->gi->nts, + env->gi->pts, env->gi->pps, + env->gi->sts, env->gi->sps)) + goto cleanup; + + SAFE_MALLOC(ht, env->result_ts, sizeof(ht) * left_len, + goto cleanup;); + SAFE_MALLOC(ht, env->result_ps, sizeof(ht) * left_len, + goto cleanup;); + + for (result_current_index = 0; + result_current_index < left_len; + result_current_index++) { + if ((htp = new_ht(HT_INIT_CAP, 0)) == NULL) goto cleanup; + + *(env->result_ts+result_current_index) = *htp; + + free(htp); + + if ((htp = new_ht(HT_INIT_CAP, 0)) == NULL) { + destroy_ht(env->result_ts+result_current_index, + DESTROY_NONE_NO_SELF); + goto cleanup; + } + + *(env->result_ps+result_current_index) = *htp; + + free(htp); + + } + + SAFE_MALLOC(NUM, env->string, str_length(string)+1, goto cleanup;); + + str_info info = EMPTY_STR_INFO; + + env->string_len = 0; + + for (NUM i = 0; i < str_length(string); i += info.step) { + info = get_info(string, i); + *(env->string+(env->string_len)++) = info.value; + } + + *(env->string+(env->string_len)++) = END_OF_INPUT; + + if ((env->crfp = new_crf()) == NULL) goto cleanup; + + BOOL error = 0; + + env->pu = new_procu(env->string_len, &error); + + if (error) goto cleanup; + + env->pr = new_procr(env->string_len, &error); + + if (error) goto cleanup; + + if ((env->spap = new_spa()) == NULL) goto cleanup; + + env->bsrp = new_bsr(&error, g); + + if (error) goto cleanup; + + return env; + + cleanup: + + if (env) { + if (env->gi) { + if (env->gi->nts) free(env->gi->nts); + + for (NUM i = 0; i < current_index; i++) { + destroy_ht(env->gi->pts+i, DESTROY_NONE_NO_SELF); + destroy_ht(env->gi->pps+i, DESTROY_NONE_NO_SELF); + } + + if (env->gi->pts) free(env->gi->pts); + if (env->gi->pps) free(env->gi->pps); + + for (NUM i = 0; i < follow_current_index; i++) { + destroy_ht(env->gi->sts+i, DESTROY_NONE_NO_SELF); + destroy_ht(env->gi->sps+i, DESTROY_NONE_NO_SELF); + } + + if (env->gi->sts) free(env->gi->sts); + if (env->gi->sps) free(env->gi->sps); + + free(env->gi); + } + + for (NUM i = 0; i < result_current_index; i++) { + destroy_ht(env->result_ts+i, DESTROY_NONE_NO_SELF); + destroy_ht(env->result_ps+i, DESTROY_NONE_NO_SELF); + } + + if (env->result_ts) free(env->result_ts); + if (env->result_ps) free(env->result_ps); + + if (env->string) free(env->string); + + if (env->crfp) destroy_crf(env->crfp); + + if (env->pu) destroy_procu(env->pu); + if (env->pr) destroy_procr(env->pr); + if (env->spap) destroy_spa(env->spap); + if (env->bsrp) destroy_bsr(env->bsrp); + + free(env); + } + + return NULL; +} + +void +destroy_env(Environment *env) +{ + if (env == NULL) return; + + free(env->gi->nts); + + NUM left_len = grammar_left_len(env->gi->g); + + for (NUM i = 0; i < left_len; i++) { + destroy_ht(env->gi->pts+i, DESTROY_KEY_NO_SELF); + destroy_ht(env->gi->pps+i, DESTROY_KEY_NO_SELF); + destroy_ht(env->gi->sts+i, DESTROY_KEY_NO_SELF); + destroy_ht(env->gi->sps+i, DESTROY_KEY_NO_SELF); + destroy_ht(env->result_ts+i, DESTROY_KEY_NO_SELF); + destroy_ht(env->result_ps+i, DESTROY_KEY_NO_SELF); + } + + free(env->gi->pts); + free(env->gi->pps); + free(env->gi->sts); + free(env->gi->sps); + free(env->result_ts); + free(env->result_ps); + free(env->string); + + destroy_crf(env->crfp); + destroy_procu(env->pu); + destroy_procr(env->pr); + destroy_spa(env->spap); + destroy_bsr(env->bsrp); + + free(env->gi); + free(env); +} -- cgit v1.2.3-18-g5258