#include #include #include "cnp.h" #define TITO struct timespec tic, toc #define TIC do { \ clock_gettime(CLOCK_MONOTONIC_RAW, &tic); \ } while (0) #define TOC do { \ clock_gettime(CLOCK_MONOTONIC_RAW, &toc); \ printf("\nTotal time = %f seconds\n", \ (toc.tv_nsec - tic.tv_nsec) / \ 1000000000.0 + \ toc.tv_sec - tic.tv_sec); \ } while (0) struct Environment_s { grammar_info *gi; /* RESULT_TS and RESULT_PS are temporary arrays used in the computations. */ ht *result_ts; ht *result_ps; NUM *string; UNUM string_len; crf *crfp; procu *pu; procr *pr; spa *spap; bsr *bsrp; /* If there are errors this will be non-zero. When this field is non-zero, every function that involves this struct will do nothing. */ BOOL error; }; void nt_add(Environment *env, NUM X, NUM j) { if (env->error) return; #define GI env->gi #define RG (grammar_rule(GI->g, X)) for (NUM i = 0; i < rg_len(RG); i++) { BOOL result = test_select(env, *(env->string+j), X, rg_nth(RG, i)); for (NUM k = 0; k < grammar_left_len(GI->g); k++) { ht_reset(env->result_ts+k, DELETE_KEY); ht_reset(env->result_ps+k, DELETE_KEY); } if (env->error) break; if (result) { env->error = desc_add(env->pr, env->pu, j, (pair4) { .x = X, .y = i, .z = 0, .u = j }); /* print_procr(env->pr); */ /* fleprintf("pr len = %ld\n", env->pr->len); * fleprintf("pr ini = %d\n", (env->pr->array)->initialized); */ } if (env->error) break; } #undef RG #undef GI } BOOL test_select(Environment *env, NUM b, NUM X, CCR_MOD(List *) tnts) { if (env->error) return 0; BOOL result = 0; grammar_info *gi = env->gi; ht *result_ts = env->result_ts; ht *result_ps = env->result_ps; ht_reset(result_ts, DELETE_KEY); ht_reset(result_ps, DELETE_KEY); if (tnt_first(gi->pts, gi->pps, gi->nts, grammar_left_len(gi->g), tnts, result_ts, result_ps)) { fleprintf0("Fail to find the first set of the TNT string.\n"); goto cleanup; } /* if (X == 3) { * fleprintf0("for empty the first has "); * for (NUM i = 0; i < ht_size(result_ts); i++) { * eprintf("%ld", **((NUM **)ht_keys(result_ts)+i)); * if (i+1data.nt == 1) * fleprintf("first of tnts is %ld\n", * ((TNT *)list_nth(tnts, 0))->data.nt); * } */ if (ht_find(result_ts, &b) != NULL) { result = 1; #ifdef DEBUG fleprintf("test succeeds against %ld\n", b); #endif goto success; } /* fleprintf("hi, size = %ld\n", ht_size(result_ps)); */ for (NUM i = 0; i < ht_size(result_ps); i++) { PTD *ptdp = grammar_ptd(gi->g, **((PT **) ht_keys(result_ps)+i)); if (ptd_run(ptdp, b)) { result = 1; goto success; } } for (NUM i = 0; i < list_length(tnts); i++) { #define TOP ((TNT*)list_nth(tnts, i)) switch (TOP->type) { case TERMINAL: case PREDICATE: goto success; break; default: if (!(*(gi->nts+(TOP->data.nt)))) goto success; break; } #undef TOP } /* fleprintf("hi %ld\n", X); */ if (ht_find(gi->sts+X, &b) != NULL) { result = 1; #ifdef DEBUG fleprintf("followed by %ld\n", b); #endif goto success; } for (NUM i = 0; i < ht_size(gi->sps+X); i++) { PTD *ptdp = grammar_ptd(gi->g, **((PT **) ht_keys(gi->sps+X)+i)); if (ptd_run(ptdp, b)) { result = 1; goto success; } } goto success; cleanup: env->error = 1; return result; success: return result; } static Environment *rtn_internal_env = NULL; static NUM rtn_internal_num = 0; static void rtn_internal(pair6 label) { if (rtn_internal_env->error) return; if ((rtn_internal_env->error = desc_add(rtn_internal_env->pr, rtn_internal_env->pu, rtn_internal_num, (pair4) { label.z, label.u, label.v, label.w }))) return; #ifdef DEBUG fleprintf("added descriptor (%ld, %ld, %ld, %ld, %ld)\n", label.z, label.u, label.v, label.w, rtn_internal_num); #endif if ((rtn_internal_env->error = bsr_add(rtn_internal_env->gi->g, rtn_internal_env->bsrp, (pair6) { label.z, label.u, label.v, label.w, label.y, rtn_internal_num }))) return; } void rtn(Environment *env, NUM X, NUM k, NUM j) { if (env->error) return; pair3 label = (pair3) { .x = X, .y = k, .z = j }; pair2 label2 = (pair2) { .x = X, .y = k }; #ifdef DEBUG fleprintf("encounter %ld, %ld, %ld\n", X, k, j); #endif if (spa_belong(env->spap, label)) return; if ((env->error = spa_insert(env->spap, label))) return; if (!(crf_find_node(env->crfp, label2))) { fleprintf0("this should not happen\n"); env->error = 1; return; } rtn_internal_env = env; rtn_internal_num = j; crf_map(env->crfp, label2, rtn_internal); rtn_internal_env = NULL; rtn_internal_num = 0; } static Environment *call_internal_env = NULL; static pair5 call_internal_label5 = (pair5) { 0 }; void cnp_call_internal(pair3 label) { if (call_internal_env->error) return; if ((call_internal_env->error = desc_add(call_internal_env->pr, call_internal_env->pu, label.z, (prodecor) { call_internal_label5.x, call_internal_label5.y, call_internal_label5.z, call_internal_label5.u }))) { fleprintf0("error\n"); return; } /* printf("Added desc (%ld, %ld, %ld, %ld, %ld)\n", * call_internal_label5.x, * call_internal_label5.y, * call_internal_label5.z, * call_internal_label5.u, * label.z); */ if ((call_internal_env->error = bsr_add(call_internal_env->gi->g, call_internal_env->bsrp, (pair6) { call_internal_label5.x, call_internal_label5.y, call_internal_label5.z, call_internal_label5.u, call_internal_label5.v, label.z }))) { fleprintf0("error\n"); return; } /* printf("Added bsr (%ld, %ld, %ld, %ld, %ld, %ld)\n", * call_internal_label5.x, * call_internal_label5.y, * call_internal_label5.z, * call_internal_label5.u, * call_internal_label5.v, * label.z); */ } void cnp_call(Environment *env, pair3 label, NUM i, NUM j) { if (env->error) return; if (label.z < 1) goto cleanup; NUM Y = label.x; List *tnts = rg_nth(grammar_rule(env->gi->g, Y), label.y); TNT *xtnt = (TNT *) list_nth(tnts, label.z - 1); if (xtnt->type != NONTERMINAL) { fleprintf("Wrong type: %d\n", xtnt->type); goto cleanup; } NUM X = (NUM) xtnt->data.nt; #ifdef DEBUG fleprintf0("X = "); eprint_name(list_nth(grammar_names(env->gi->g), X)); eprintf(", j = %ld\n", j); #endif pair2 node = (pair2) { .x = X, .y = j }; pair4 u = (pair4) { .x = label.x, .y = label.y, .z = label.z, .u = i }; if (!(crf_find_node(env->crfp, node))) { #ifdef DEBUG fleprintf("adding node (%ld, %ld)\n", X, j); #endif if (crf_add_node(env->crfp, node)) { fleprintf0("fail to add node to crf\n"); goto cleanup; } #ifdef DEBUG fleprintf("adding edge to (%ld, %ld, %ld, %ld)\n", u.x, u.y, u.z, u.u); #endif if (crf_add_edge(env->crfp, node, u)) { fleprintf0("fail to add edge to crf\n"); goto cleanup; } /* errors will be stored in ENV, so we simply return */ nt_add(env, X, j); return; } #ifdef DEBUG fleprintf("adding edge to (%ld, %ld, %ld, %ld)\n", u.x, u.y, u.z, u.u); #endif if (!(crf_find_edge(env->crfp, node, u))) { if (crf_add_edge(env->crfp, node, u)) { fleprintf0("fail to add edge to crf\n"); goto cleanup; } call_internal_env = env; call_internal_label5 = (pair5) { label.x, label.y, label.z, i, j }; spa_map(env->spap, node, cnp_call_internal); call_internal_env = NULL; call_internal_label5 = (pair5) { 0 }; } return; cleanup: fleprintf0("error\n"); env->error = 1; return; } HP_ATTR BOOL env_error_p(CCR_MOD(Environment *) env) { return env->error; } P_ATTR bsr * env_bsrp(CCR_MOD(Environment *) env) { return env->bsrp; } P_ATTR Grammar * env_grammar(CCR_MOD(Environment *) env) { return env->gi->g; } P_ATTR crf * env_crfp(CCR_MOD(Environment *) env) { return env->crfp; } P_ATTR procr * env_r(CCR_MOD(Environment *) env) { return env->pr; } P_ATTR procu * env_u(CCR_MOD(Environment *) env) { return env->pu; } H_ATTR void env_str(CCR_MOD(Environment *) env, NUM **str, NUM *str_len) { *str = env->string; *str_len = env->string_len; } H_ATTR BOOL env_follow_p(CCR_MOD(Environment *) env, NUM X, NUM t) { if (ht_find(((env->gi->sts)+X), &t) != NULL) return 1; for (NUM i = 0; i < ht_size(env->gi->sps+X); i++) { PTD *ptdp = grammar_ptd(env->gi->g, **((PT **) ht_keys(env->gi->sps+X)+i)); if (ptd_run(ptdp, t)) return 1; } #ifdef DEBUG fleprintf("X = %ld, t = %ld\n", X, t); #endif return 0; } void env_print_follow(CCR_MOD(Environment *) env, NUM X) { for (NUM i = 0; i < ht_size((env->gi->sts+X)); i++) { if (**(((NUM **) ht_keys(env->gi->sts+X)) + i) != END_OF_INPUT) printf("key %ld = %ld\n", i, **(((NUM **) ht_keys(env->gi->sts+X)) + i)); else printf("key %ld = END OF INPUT\n", i); } } void env_print_first(CCR_MOD(Environment *) env, NUM X) { for (NUM i = 0; i < ht_size((env->gi->pts+X)); i++) { printf("key %ld = %ld\n", i, **(((NUM **) ht_keys(env->gi->pts+X)) + i)); } } Environment * new_env(Grammar *g, str *string) { Environment *env = NULL; ht *htp = NULL; NUM current_index = 0, follow_current_index = 0; NUM result_current_index = 0; SAFE_MALLOC(Environment, env, 1, goto cleanup;); env->gi = NULL; SAFE_CALLOC(grammar_info, env->gi, 1, goto cleanup;); env->gi->g = g; NUM left_len = grammar_left_len(g); SAFE_MALLOC(BOOL, env->gi->nts, sizeof(BOOL) * left_len, goto cleanup;); epsilon_nts(g, env->gi->nts); SAFE_MALLOC(ht, env->gi->pts, sizeof(ht) * left_len, goto cleanup;); SAFE_MALLOC(ht, env->gi->pps, sizeof(ht) * left_len, goto cleanup;); for (current_index = 0; current_index < left_len; current_index++) { if ((htp = new_ht(HT_INIT_CAP, 0)) == NULL) goto cleanup; *(env->gi->pts+current_index) = *htp; free(htp); if ((htp = new_ht(HT_INIT_CAP, 0)) == NULL) { destroy_ht(env->gi->pts+current_index, DESTROY_NONE_NO_SELF); goto cleanup; } *(env->gi->pps+current_index) = *htp; free(htp); } if (nt_first(g, env->gi->nts, env->gi->pts, env->gi->pps)) goto cleanup; SAFE_MALLOC(ht, env->gi->sts, sizeof(ht) * left_len, goto cleanup;); SAFE_MALLOC(ht, env->gi->sps, sizeof(ht) * left_len, goto cleanup;); for (follow_current_index = 0; follow_current_index < left_len; follow_current_index++) { if ((htp = new_ht(HT_INIT_CAP, 0)) == NULL) goto cleanup; *(env->gi->sts+follow_current_index) = *htp; free(htp); if ((htp = new_ht(HT_INIT_CAP, 0)) == NULL) { destroy_ht(env->gi->sts+follow_current_index, DESTROY_NONE_NO_SELF); goto cleanup; } *(env->gi->sps+follow_current_index) = *htp; free(htp); } if (nt_follow(g, env->gi->nts, env->gi->pts, env->gi->pps, env->gi->sts, env->gi->sps)) goto cleanup; SAFE_MALLOC(ht, env->result_ts, sizeof(ht) * left_len, goto cleanup;); SAFE_MALLOC(ht, env->result_ps, sizeof(ht) * left_len, goto cleanup;); for (result_current_index = 0; result_current_index < left_len; result_current_index++) { if ((htp = new_ht(HT_INIT_CAP, 0)) == NULL) goto cleanup; *(env->result_ts+result_current_index) = *htp; free(htp); if ((htp = new_ht(HT_INIT_CAP, 0)) == NULL) { destroy_ht(env->result_ts+result_current_index, DESTROY_NONE_NO_SELF); goto cleanup; } *(env->result_ps+result_current_index) = *htp; free(htp); } SAFE_MALLOC(NUM, env->string, str_length(string)+1, goto cleanup;); str_info info = EMPTY_STR_INFO; env->string_len = 0; for (NUM i = 0; i < str_length(string); i += info.step) { info = get_info(string, i); *(env->string+(env->string_len)++) = info.value; } *(env->string+(env->string_len)++) = END_OF_INPUT; if ((env->crfp = new_crf(g, env->string_len)) == NULL) goto cleanup; BOOL error = 0; env->pu = new_procu(g, env->string_len, &error); if (error) goto cleanup; env->pr = new_procr(g, env->string_len, &error); if (error) goto cleanup; if ((env->spap = new_spa(g, env->string_len)) == NULL) goto cleanup; env->bsrp = new_bsr(g, env->string_len, &error); if (error) goto cleanup; return env; cleanup: if (env) { if (env->gi) { if (env->gi->nts) free(env->gi->nts); for (NUM i = 0; i < current_index; i++) { destroy_ht(env->gi->pts+i, DESTROY_NONE_NO_SELF); destroy_ht(env->gi->pps+i, DESTROY_NONE_NO_SELF); } if (env->gi->pts) free(env->gi->pts); if (env->gi->pps) free(env->gi->pps); for (NUM i = 0; i < follow_current_index; i++) { destroy_ht(env->gi->sts+i, DESTROY_NONE_NO_SELF); destroy_ht(env->gi->sps+i, DESTROY_NONE_NO_SELF); } if (env->gi->sts) free(env->gi->sts); if (env->gi->sps) free(env->gi->sps); free(env->gi); } for (NUM i = 0; i < result_current_index; i++) { destroy_ht(env->result_ts+i, DESTROY_NONE_NO_SELF); destroy_ht(env->result_ps+i, DESTROY_NONE_NO_SELF); } if (env->result_ts) free(env->result_ts); if (env->result_ps) free(env->result_ps); if (env->string) free(env->string); if (env->crfp) destroy_crf(env->crfp); if (env->pu) destroy_procu(env->pu); if (env->pr) destroy_procr(env->pr); if (env->spap) destroy_spa(env->spap); if (env->bsrp) destroy_bsr(env->bsrp); free(env); } return NULL; } void destroy_env(Environment *env) { if (env == NULL) return; free(env->gi->nts); NUM left_len = grammar_left_len(env->gi->g); for (NUM i = 0; i < left_len; i++) { destroy_ht(env->gi->pts+i, DESTROY_KEY_NO_SELF); destroy_ht(env->gi->pps+i, DESTROY_KEY_NO_SELF); destroy_ht(env->gi->sts+i, DESTROY_KEY_NO_SELF); destroy_ht(env->gi->sps+i, DESTROY_KEY_NO_SELF); destroy_ht(env->result_ts+i, DESTROY_KEY_NO_SELF); destroy_ht(env->result_ps+i, DESTROY_KEY_NO_SELF); } free(env->gi->pts); free(env->gi->pps); free(env->gi->sts); free(env->gi->sps); free(env->result_ts); free(env->result_ps); free(env->string); destroy_crf(env->crfp); destroy_procu(env->pu); destroy_procr(env->pr); destroy_spa(env->spap); destroy_bsr(env->bsrp); free(env->gi); free(env); } H_ATTR Environment * cnp_parse(Grammar *g, str *string) { Environment *env = new_env(g, (str *) string); prodecor *current_prodecor = NULL; if (crf_add_node(env_crfp(env), (pair2) { 0 })) goto cleanup; nt_add(env, 0, 0); CHECK_ENV_ERROR(env, goto cleanup;); NUM current_grade = 0; Rule_group *rg = NULL; List *right = NULL, *tnts = NULL; NUM *num_string = NULL, num_string_len = 0; BOOL errorp = 0; env_str(env, &num_string, &num_string_len); while ((current_prodecor = pop_procr(env_r(env), env_u(env), ¤t_grade))) { #ifdef DEBUG fleprintf("{ %ld, %ld, %ld, %ld, %ld }\n", current_prodecor->x, current_prodecor->y, current_prodecor->z, current_prodecor->u, current_grade); print_procr(env_r(env)); #endif /* different cases */ rg = grammar_rule(env_grammar(env), current_prodecor->x); right = rg_nth(rg, current_prodecor->y); /* separate the empty rules out */ if (list_length(right) == 0) { errorp = bsr_add(env_grammar(env), env_bsrp(env), (pair6) { current_prodecor->x, current_prodecor->y, 0, current_grade, current_grade, current_grade }); if (errorp) goto cleanup; if (env_follow_p(env, current_prodecor->x, *(num_string+current_grade))) { #ifdef DEBUG fleprintf0("returning from an empty rule\n"); #endif rtn(env, current_prodecor->x, current_prodecor->u, current_grade); } goto continue_label; } #ifdef DEBUG fleprintf0("non empty rule\n"); #endif /* if at the end of a rule, return if possible */ if (current_prodecor->z == list_length(right)) { if (env_follow_p(env, current_prodecor->x, *(num_string+current_grade))) { #ifdef DEBUG fleprintf0("returning from the end\n"); #endif rtn(env, current_prodecor->x, current_prodecor->u, current_grade); } goto continue_label; } #ifdef DEBUG fleprintf0("not at the end of the rule\n"); #endif /* if at the start of a rule, no need for test_select */ if (current_prodecor->z == 0) { #ifdef DEBUG fleprintf0("at the start of the rule\n"); #endif current_prodecor->z = 1; /* otherwise test select */ } else { #ifdef DEBUG fleprintf0("start by test select\n"); #endif tnts = array_to_list(list_array(right)+current_prodecor->z, list_length(right)-current_prodecor->z); if (test_select(env, *(num_string+current_grade), current_prodecor->x, tnts)) { #ifdef DEBUG fleprintf0("successful test\n"); #endif free(tnts); tnts = NULL; ++(current_prodecor->z); } else { #ifdef DEBUG fleprintf0("failed test\n"); #endif free(tnts); tnts = NULL; goto continue_label; } } CHECK_ENV_ERROR(env, goto cleanup;); /* while there are terminals */ while (((TNT *) list_nth(right, current_prodecor->z-1))->type == TERMINAL || ((TNT *) list_nth(right, current_prodecor->z-1))->type == PREDICATE) { #ifdef DEBUG if (((TNT *) list_nth(right, current_prodecor->z-1))->type == TERMINAL) fleprintf("found terminal %ld\n", ((TNT *) list_nth (right, current_prodecor->z-1))->data.t); else fleprintf("found predicate terminal %ld\n", ((TNT *) list_nth (right, current_prodecor->z-1))->data.pt); #endif /* add to BSR set */ errorp = bsr_add(env_grammar(env), env_bsrp(env), (pair6) { current_prodecor->x, current_prodecor->y, current_prodecor->z, current_prodecor->u, current_grade, current_grade+1 }); if (errorp) goto cleanup; current_grade++; /* if at the end of the rule, return if possible */ if (current_prodecor->z == list_length(right)) { if (env_follow_p(env, current_prodecor->x, *(num_string+current_grade))) { #ifdef DEBUG fleprintf0("we choose to return\n"); #endif rtn(env, current_prodecor->x, current_prodecor->u, current_grade); /* fleprintf0("hi\n"); */ } goto continue_label; } #ifdef DEBUG fleprintf0("terminal not at the end\n"); fleprintf("testing X = %ld, z = %ld, input = %ld\n", current_prodecor->x, current_prodecor->z, *(num_string+current_grade)); #endif /* else test select */ tnts = array_to_list(list_array(right)+current_prodecor->z, list_length(right)-current_prodecor->z); if (test_select(env, *(num_string+current_grade), current_prodecor->x, tnts)) { #ifdef DEBUG fleprintf0("Successful test\n"); #endif free(tnts); tnts = NULL; (current_prodecor->z)++; } else { #ifdef DEBUG fleprintf0("Failed test\n"); #endif /* printf("next thing is "); */ /* print_tnt(list_nth(right, current_prodecor->z)); */ /* printf("\n"); */ free(tnts); tnts = NULL; goto continue_label; } } #ifdef DEBUG fleprintf0("encountered non-terminal\n"); #endif /* FIXME: Add the optimisation that when we know only one rule for the encountered non-terminal is possible according to the current input, either because the non-terminal is an "LL(1)" non-terminal, or because the current input symbol says so, we don't want to go through the process of adding a process descriptor only to pop it up immediately. This can remove the burden brought by the generalized techniques when they are not needed. */ /* when a nonterminal is encountered, we call it */ cnp_call(env, (pair3) { .x = current_prodecor->x, .y = current_prodecor->y, .z = current_prodecor->z }, current_prodecor->u, current_grade); #ifdef DEBUG fleprintf("after call { %ld, %ld, %ld, %ld, %ld }\n", current_prodecor->x, current_prodecor->y, current_prodecor->z, current_prodecor->u, current_grade); #endif continue_label: CHECK_ENV_ERROR(env, goto cleanup;); free(current_prodecor); current_prodecor = NULL; } return env; cleanup: if (current_prodecor) free(current_prodecor); destroy_env(env); return NULL; }