From 510b10b96b546fcc6c6b6be85050305ddd192a41 Mon Sep 17 00:00:00 2001 From: JSDurand Date: Sat, 5 Feb 2022 17:30:11 +0800 Subject: replace some hash table usage by tuples Previously I used hash tables, which consume too much memory. Now the critical parts are replaced by a new hand-written library called "tuple.h". Now we can easily parse larger inputs. I haven't tested its limits, though. --- src/cnp.c | 182 ++++++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 125 insertions(+), 57 deletions(-) (limited to 'src/cnp.c') diff --git a/src/cnp.c b/src/cnp.c index 91a8df7..fe5a4dc 100644 --- a/src/cnp.c +++ b/src/cnp.c @@ -44,9 +44,10 @@ nt_add(Environment *env, NUM X, NUM j) /* fleprintf("i = %ld, j = %ld\n", i, j); */ env->error = desc_add(env->pr, env->pu, j, - (pair4) { .x = X, .y = i, .z = 0, .w = j }); + (pair4) { .x = X, .y = i, .z = 0, .u = j }); /* fleprintf("env->error = %d\n", env->error); */ + /* print_procr(env->pr); */ /* fleprintf("pr len = %ld\n", env->pr->len); * fleprintf("pr ini = %d\n", (env->pr->array)->initialized); */ } @@ -82,7 +83,7 @@ test_select(Environment *env, NUM b, NUM X, CCR_MOD(List *) tnts) goto success; } - /* predicates haven't been implemented yet */ + /* TODO: predicates haven't been implemented yet */ /* for (NUM i = 0; i < ht_size(result_ps); i++) { * * } */ @@ -107,7 +108,7 @@ test_select(Environment *env, NUM b, NUM X, CCR_MOD(List *) tnts) if (ht_find(gi->sts+X, &b) != NULL) result = 1; - /* predicates haven't been implemented yet */ + /* TODO: predicates haven't been implemented yet */ goto success; @@ -119,6 +120,36 @@ test_select(Environment *env, NUM b, NUM X, CCR_MOD(List *) tnts) return result; } +static Environment *rtn_internal_env = NULL; +static NUM rtn_internal_num = 0; + +static void +rtn_internal(pair6 label) +{ + if (rtn_internal_env->error) return; + + if ((rtn_internal_env->error = + desc_add(rtn_internal_env->pr, + rtn_internal_env->pu, + rtn_internal_num, + (pair4) { + label.z, label.u, + label.v, label.w + }))) return; + + /* fleprintf("added descriptor (%ld, %ld, %ld, %ld, %ld)\n", + * label.z, label.u, label.v, label.w, + * rtn_internal_num); */ + + if ((rtn_internal_env->error = + bsr_add(rtn_internal_env->gi->g, + rtn_internal_env->bsrp, + (pair6) { + label.z, label.u, label.v, label.w, + label.y, rtn_internal_num + }))) return; +} + void rtn(Environment *env, NUM X, NUM k, NUM j) { @@ -133,31 +164,71 @@ rtn(Environment *env, NUM X, NUM k, NUM j) if ((env->error = spa_insert(env->spap, label))) return; - ht *edge_ht = NULL; - - if ((edge_ht = crf_find_node(env->crfp, label2)) == NULL) { + if (!(crf_find_node(env->crfp, label2))) { fleprintf0("this should not happen\n"); env->error = 1; return; } -#define EDGE_LABELS ((pair4 **) ht_keys(edge_ht)) + rtn_internal_env = env; + rtn_internal_num = j; + + crf_map(env->crfp, label2, rtn_internal); - for (NUM i = 0; i < ht_size(edge_ht); i++) { + rtn_internal_env = NULL; + rtn_internal_num = 0; +} -#define EDGE (*(EDGE_LABELS+i)) +static Environment *call_internal_env = NULL; +static pair5 call_internal_label5 = (pair5) { 0 }; - if ((env->error = desc_add(env->pr, env->pu, j, *EDGE))) return; - /* fleprintf("added descriptor (%ld, %ld, %ld, %ld, %ld)\n", - * EDGE->x, EDGE->y, EDGE->z, EDGE->w, j); */ - if ((env->error = - bsr_add(env->gi->g, env->bsrp, - EDGE->x, EDGE->y, EDGE->z, EDGE->w, k, j))) - return; +void +cnp_call_internal(pair3 label) +{ + if (call_internal_env->error) return; + + if ((call_internal_env->error = + desc_add(call_internal_env->pr, + call_internal_env->pu, + label.z, + (prodecor) { + call_internal_label5.x, + call_internal_label5.y, + call_internal_label5.z, + call_internal_label5.u + }))) { + fleprintf0("error\n"); + return; + } + + /* printf("Added desc (%ld, %ld, %ld, %ld, %ld)\n", + * call_internal_label5.x, + * call_internal_label5.y, + * call_internal_label5.z, + * call_internal_label5.u, + * label.z); */ + + if ((call_internal_env->error = + bsr_add(call_internal_env->gi->g, call_internal_env->bsrp, + (pair6) { + call_internal_label5.x, + call_internal_label5.y, + call_internal_label5.z, + call_internal_label5.u, + call_internal_label5.v, + label.z + }))) { + fleprintf0("error\n"); + return; } -#undef EDGE_LABELS -#undef EDGE + /* printf("Added bsr (%ld, %ld, %ld, %ld, %ld, %ld)\n", + * call_internal_label5.x, + * call_internal_label5.y, + * call_internal_label5.z, + * call_internal_label5.u, + * call_internal_label5.v, + * label.z); */ } @@ -175,7 +246,7 @@ cnp_call(Environment *env, pair3 label, NUM i, NUM j) if (xtnt->type != NONTERMINAL) goto cleanup; - NUM X = (NUM) xtnt->data.nt, h = 0; + NUM X = (NUM) xtnt->data.nt; pair2 node = (pair2) { .x = X, .y = j }; pair4 u = (pair4) @@ -183,12 +254,10 @@ cnp_call(Environment *env, pair3 label, NUM i, NUM j) .x = label.x, .y = label.y, .z = label.z, - .w = i + .u = i }; - ht *edge_ht = NULL, *spa_ht = NULL; - - if ((edge_ht = crf_find_node(env->crfp, node)) == NULL) { + if (!(crf_find_node(env->crfp, node))) { if (crf_add_node(env->crfp, node)) goto cleanup; if (crf_add_edge(env->crfp, node, u)) goto cleanup; @@ -196,26 +265,22 @@ cnp_call(Environment *env, pair3 label, NUM i, NUM j) /* errors will be stored in ENV, so we simply return */ nt_add(env, X, j); + /* printf("X = %ld, j = %ld\n", X, j); */ + return; } - if (ht_find(edge_ht, &u) == NULL) { + if (!(crf_find_edge(env->crfp, node, u))) { if (crf_add_edge(env->crfp, node, u)) goto cleanup; - spa_ht = spa_find(env->spap, node); - - if (spa_ht == NULL) return; + call_internal_env = env; + call_internal_label5 = + (pair5) { label.x, label.y, label.z, i, j }; - for (NUM k = 0; k < ht_size(spa_ht); k++) { - h = **((NUM **) ht_keys(spa_ht) + k); + spa_map(env->spap, node, cnp_call_internal); - if (desc_add(env->pr, env->pu, h, u)) goto cleanup; - - if (bsr_add(env->gi->g, env->bsrp, - label.x, label.y, label.z, - i, j, h)) - goto cleanup; - } + call_internal_env = NULL; + call_internal_label5 = (pair5) { 0 }; } return; @@ -316,9 +381,7 @@ new_env(Grammar *g, str *string) env->gi = NULL; - SAFE_MALLOC(grammar_info, env->gi, 1, goto cleanup;); - - memset(env->gi, 0, sizeof(grammar_info)); + SAFE_CALLOC(grammar_info, env->gi, 1, goto cleanup;); env->gi->g = g; @@ -419,21 +482,23 @@ new_env(Grammar *g, str *string) *(env->string+(env->string_len)++) = END_OF_INPUT; - if ((env->crfp = new_crf()) == NULL) goto cleanup; + if ((env->crfp = new_crf(g, env->string_len)) == NULL) + goto cleanup; BOOL error = 0; - env->pu = new_procu(env->string_len, &error); + env->pu = new_procu(g, env->string_len, &error); if (error) goto cleanup; - env->pr = new_procr(env->string_len, &error); + env->pr = new_procr(g, env->string_len, &error); if (error) goto cleanup; - if ((env->spap = new_spa()) == NULL) goto cleanup; + if ((env->spap = new_spa(g, env->string_len)) == NULL) + goto cleanup; - env->bsrp = new_bsr(&error, g); + env->bsrp = new_bsr(g, env->string_len, &error); if (error) goto cleanup; @@ -529,6 +594,8 @@ cnp_parse(Grammar *g, str *string) { Environment *env = new_env(g, (str *) string); + prodecor *current_prodecor = NULL; + if (crf_add_node(env_crfp(env), (pair2) { 0 })) goto cleanup; nt_add(env, 0, 0); @@ -537,8 +604,6 @@ cnp_parse(Grammar *g, str *string) NUM current_grade = 0; - prodecor *current_prodecor = NULL; - Rule_group *rg = NULL; List *right = NULL, *tnts = NULL; @@ -551,7 +616,7 @@ cnp_parse(Grammar *g, str *string) pop_procr(env_r(env), env_u(env), ¤t_grade))) { /* fleprintf("{ %ld, %ld, %ld, %ld, %ld }\n", * current_prodecor->x, current_prodecor->y, - * current_prodecor->z, current_prodecor->w, + * current_prodecor->z, current_prodecor->u, * current_grade); * print_procr(env_r(env)); */ @@ -564,15 +629,16 @@ cnp_parse(Grammar *g, str *string) if (list_length(right) == 0) { errorp = bsr_add(env_grammar(env), env_bsrp(env), - current_prodecor->x, current_prodecor->y, 0, - current_grade, current_grade, current_grade); + (pair6) { + current_prodecor->x, current_prodecor->y, 0, + current_grade, current_grade, current_grade }); if (errorp) goto cleanup; if (env_follow_p(env, current_prodecor->x, *(num_string+current_grade))) { rtn(env, current_prodecor->x, - current_prodecor->w, current_grade); + current_prodecor->u, current_grade); } goto continue_label; @@ -585,7 +651,7 @@ cnp_parse(Grammar *g, str *string) if (env_follow_p(env, current_prodecor->x, *(num_string+current_grade))) { rtn(env, current_prodecor->x, - current_prodecor->w, current_grade); + current_prodecor->u, current_grade); } goto continue_label; @@ -627,9 +693,9 @@ cnp_parse(Grammar *g, str *string) /* add to BSR set */ errorp = bsr_add(env_grammar(env), env_bsrp(env), - current_prodecor->x, current_prodecor->y, - current_prodecor->z, current_prodecor->w, - current_grade, current_grade+1); + (pair6) { current_prodecor->x, current_prodecor->y, + current_prodecor->z, current_prodecor->u, + current_grade, current_grade+1 }); if (errorp) goto cleanup; @@ -641,7 +707,7 @@ cnp_parse(Grammar *g, str *string) *(num_string+current_grade))) { /* fleprintf0("we choose to return\n"); */ rtn(env, current_prodecor->x, - current_prodecor->w, current_grade); + current_prodecor->u, current_grade); /* fleprintf0("hi\n"); */ } @@ -676,11 +742,11 @@ cnp_parse(Grammar *g, str *string) .x = current_prodecor->x, .y = current_prodecor->y, .z = current_prodecor->z - }, current_prodecor->w, current_grade); + }, current_prodecor->u, current_grade); /* fleprintf("after call { %ld, %ld, %ld, %ld, %ld }\n", * current_prodecor->x, current_prodecor->y, - * current_prodecor->z, current_prodecor->w, + * current_prodecor->z, current_prodecor->u, * current_grade); */ continue_label: @@ -688,12 +754,14 @@ cnp_parse(Grammar *g, str *string) CHECK_ENV_ERROR(env, goto cleanup;); free(current_prodecor); + current_prodecor = NULL; } return env; cleanup: + if (current_prodecor) free(current_prodecor); destroy_env(env); return NULL; -- cgit v1.2.3-18-g5258