From 510b10b96b546fcc6c6b6be85050305ddd192a41 Mon Sep 17 00:00:00 2001 From: JSDurand Date: Sat, 5 Feb 2022 17:30:11 +0800 Subject: replace some hash table usage by tuples Previously I used hash tables, which consume too much memory. Now the critical parts are replaced by a new hand-written library called "tuple.h". Now we can easily parse larger inputs. I haven't tested its limits, though. --- src/crf.c | 560 +++++++++++++++++++++++++++++--------------------------------- 1 file changed, 260 insertions(+), 300 deletions(-) (limited to 'src/crf.c') diff --git a/src/crf.c b/src/crf.c index 1cff96f..1c9fea9 100644 --- a/src/crf.c +++ b/src/crf.c @@ -5,77 +5,110 @@ #include "crf.h" struct crf_s { - ht *table; + luple6 *lup; }; crf * -new_crf() +new_crf(Grammar *g, NUM input_len) { + NUM left_len = grammar_left_len(g); + + NUM max_alt_len = 0, max_rule_len = 0; + + for (NUM i = 0; i < left_len; i++) { + NUM alt_len = rg_len(grammar_rule(g, i)); + if (alt_len > max_rule_len) max_rule_len = alt_len; + for (NUM j = 0; j < alt_len; j++) { + NUM jth_len = list_length(rg_nth(grammar_rule(g, i), j)); + if (jth_len > max_alt_len) max_alt_len = jth_len; + } + } + + NUM *len = NULL; + crf *crfp = NULL; + SAFE_MALLOC(crf, crfp, 1, goto cleanup;); - crfp->table = new_ht2(HT_INIT_CAP); + SAFE_MALLOC(NUM, len, 6, goto cleanup;); + + *len = left_len; + *(len+1) = input_len; + *(len+2) = left_len; + *(len+3) = max_rule_len; + *(len+4) = max_alt_len+1; + *(len+5) = input_len; - if (crfp->table == NULL) goto cleanup; + crfp->lup = new_luple6(len); + + if (crfp->lup == NULL) goto cleanup; return crfp; cleanup: if (crfp) free(crfp); + if (len) free(len); return NULL; } BOOL -crf_add_node(crf * const restrict crfp, pair2 node) +crf_add_node(crf *crfp, pair2 node) { - if (ht_find(crfp->table, &node)) return 0; - - pair2 *p2 = NULL; - - NEW_P2(p2, node.x, node.y, return 1;); - - ht *value_table = new_ht4(HT_INIT_CAP); - - if (value_table == NULL) { - free(p2); - return 1; - } - - if (ht_insert(crfp->table, p2, value_table)) { - fleprintf0("Fail to insert\n"); - destroy_ht(value_table, DESTROY_NONE_SELF); - free(p2); - return 1; - } + return add_to_luple6_pt_2(crfp->lup, node); +} - return 0; +HP_ATTR +BOOL +crf_find_node(crf *crfp, pair2 node) +{ + return luple6_pf_2(crfp->lup, node); } -ht * -crf_find_node(crf * const restrict crfp, pair2 node) +HP_ATTR +BOOL +crf_find_edge(crf *crfp, pair2 source, pair4 label) { - return ht_find(crfp->table, &node); + NUM *result = luple6_find(crfp->lup, (pair6) { + source.x, source.y, label.x, label.y, label.z, label.u + }); + + return (result && *result) ? 1 : 0; } BOOL -crf_add_edge(crf * const restrict crfp, pair2 source, pair4 label) +crf_add_edge(crf *crfp, pair2 source, pair4 label) { - if (ht_find(crfp->table, &source) == NULL) { + if (!(luple6_pf_2(crfp->lup, source))) { fleprintf0("add the node before adding its edges\n"); return 1; } - if (ht_find(ht_find(crfp->table, &source), &label)) return 0; + pair6 p6 = (pair6) + { + source.x, + source.y, + label.x, + label.y, + label.z, + label.u + }; - pair4 *p = NULL; + /* fleprintf("adding (%ld, %ld, %ld, %ld, %ld, %ld)\n", + * p6.x, p6.y, p6.z, p6.u, p6.v, p6.w); */ - NEW_P4(p, label.x, label.y, label.z, label.w, return 1;); + NUM *find_result = luple6_find(crfp->lup, p6); - if (ht_insert(ht_find(crfp->table, &source), p, (void*)1)) { + if (find_result && *find_result) { + /* fleprintf0("Already added!\n"); */ + return 0; + } + + if (add_to_luple6(crfp->lup, p6, 1)) { fleprintf0("Fail to insert\n"); - free(p); + fleprintf("adding (%ld, %ld, %ld, %ld, %ld, %ld)\n", + p6.x, p6.y, p6.z, p6.u, p6.v, p6.w); return 1; } @@ -83,207 +116,167 @@ crf_add_edge(crf * const restrict crfp, pair2 source, pair4 label) } void -destroy_crf(crf * const restrict crfp) +destroy_crf(crf *crfp) { + destroy_luple6(crfp->lup); + free(crfp); +} -#define SIZE (ht_size(crfp->table)) -#define VALUES ((ht**) ht_values(crfp->table)) - - for (NUM i = 0; i < SIZE;) - destroy_ht(*(VALUES+i++), DESTROY_KEY_SELF); +UNUSED +static void +crf_print_internal(pair6 label) +{ + printf("(%ld, %ld, %ld, %ld)\n", + label.z, label.u, label.v, label.w); +} - destroy_ht(crfp->table, DESTROY_KEY_SELF); +void +crf_print(crf *crfp) +{ + printf("Printing a call return forest...\n"); - free(crfp); + NUM *len = luple6_len(crfp->lup); -#undef SIZE -#undef VALUES + for (NUM i = 0; i < *len; i++) + for (NUM j = 0; j < *(len+1); j++) + if (luple6_pf_2(crfp->lup, (pair2) { i, j })) { + printf("Printing edges for (%ld, %ld)...\n", i, j); + luple6_map_2(crfp->lup, (pair2) { i, j }, -1, + crf_print_internal); + printf("\n"); + } +} +H_ATTR +void +crf_map(crf *crfp, pair2 label, map_pair6_t fn) +{ + luple6_map_2(crfp->lup, label, label.y, fn); } struct spa_s { - ht *table; + luple3 *lup; }; spa * -new_spa() +new_spa(Grammar *g, NUM input_len) { spa *spap = NULL; - SAFE_MALLOC(spa, spap, 1, return NULL;); + NUM *len = NULL; - spap->table = new_ht2(HT_INIT_CAP); + SAFE_MALLOC(spa, spap, 1, goto cleanup;); - if (spap->table == NULL) { - free(spap); - return NULL; - } + SAFE_MALLOC(NUM, len, 3, goto cleanup;); - return spap; -} + *len = grammar_left_len(g); + *(len+1) = input_len; + *(len+2) = input_len; -void -destroy_spa(spa * const restrict spap) -{ - for (NUM i = 0; i < ht_size(spap->table); i++) - destroy_ht(*((ht **) ht_values(spap->table)+i), DESTROY_KEY_SELF); + spap->lup = new_luple3(len); - /* fleprintf0("ho\n"); */ + if (spap->lup == NULL) { + fleprintf0("Fail to create luple3\n"); + goto cleanup; + } - destroy_ht(spap->table, DESTROY_KEY_SELF); + return spap; - free(spap); + cleanup: + if (spap) free(spap); + if (len) free(len); + return NULL; } -__attribute__((__pure__, __cold__)) -ht * -spa_ht(CCR_MOD(spa *) spap) +void +destroy_spa(spa *spap) { - return spap->table; + destroy_luple3(spap->lup); + free(spap); } BOOL -spa_insert(spa * const restrict spap, pair3 label) +spa_insert(spa *spap, pair3 label) { - pair2 first_two = (pair2) { 0 }, *p2 = NULL; - - first_two.x = label.x; - first_two.y = label.y; - - NUM j = label.z, *np = NULL; - - ht *value_table = NULL; - - if (ht_find(spap->table, &first_two) == NULL) { - value_table = new_ht(HT_INIT_CAP, 0); - - if (value_table == NULL) { - fleprintf0("Fail to have new hash table\n"); - goto cleanup; - } - - SAFE_MALLOC(NUM, np, 1, goto cleanup;); - *np = label.z; - - if (ht_insert(value_table, np, (void*)1)) { - fleprintf0("Fail to insert\n"); - goto cleanup; - } - - np = NULL; - - NEW_P2(p2, label.x, label.y, goto cleanup;); - - if (ht_insert(spap->table, p2, value_table)) { - fleprintf0("Fail to insert\n"); - goto cleanup; - } - - p2 = NULL; - - goto success; - } - - if (ht_find(ht_find(spap->table, &first_two), &j) == NULL) { - SAFE_MALLOC(NUM, np, 1, goto cleanup;); - *np = label.z; - - if (ht_insert(ht_find(spap->table, &first_two), np, (void*)1)) { - fleprintf0("Fail to insert\n"); - goto cleanup; - } - - np = NULL; + if (add_to_luple3(spap->lup, label, 1)) { + fleprintf0("Fail to insert\n"); + return 1; } - goto success; - - cleanup: - if (value_table) destroy_ht(value_table, DESTROY_KEY_SELF); - if (p2) free(p2); - if (np) free(np); - - return 1; - - success: return 0; } P_ATTR BOOL -spa_belong(CCR_MOD(spa *) spap, pair3 label) +spa_belong(spa *spap, pair3 label) { - pair2 first_two = (pair2) { 0 }; - - first_two.x = label.x; - first_two.y = label.y; - - if (ht_find(spap->table, &first_two) == NULL) return 0; - - NUM j = label.z; + NUM *result = luple3_find(spap->lup, label); - if (ht_find(ht_find(spap->table, &first_two), &j) == NULL) return 0; - - return 1; + return (result && *result) ? 1 : 0; } -P_ATTR -ht * -spa_find(CCR_MOD(spa *) spap, pair2 label) +H_ATTR +void +spa_map(spa *spap, pair2 label, map_pair3_t fn) { - return ht_find(spap->table, &label); + luple3_map_2(spap->lup, label, fn); } typedef struct procr_array_element_s procr_array_element; -/* The list at index k is the set R_k. */ +/* The list at index k is the set of "prodecor"s representing R_k. */ struct procr_array_element_s { BOOL initialized; List *ls; }; struct procr_s { - /* input length */ - NUM len; + /* an array of length 5, holding the lengths */ + NUM *len; /* an array of lists */ procr_array_element *array; /* minimal grade that is non-empty */ NUM mg; }; -typedef struct procu_array_element_s procu_array_element; - -/* The table at index k represents the set U_k. Its keys are those - (X, a, i, j) such that (X, a, i, j, k) belongs to U_k. */ -struct procu_array_element_s { - BOOL initialized; - ht *table; -}; - struct procu_s { - /* input length */ - NUM len; - /* an array of hash tables */ - procu_array_element *array; + luple5 *lup; }; procr * -new_procr(NUM size, BOOL * const restrict errorp) +new_procr(Grammar *g, NUM input_len, BOOL *errorp) { *errorp = 0; + NUM left_len = grammar_left_len(g); + + NUM max_alt_len = 0, max_rule_len = 0; + + for (NUM i = 0; i < left_len; i++) { + NUM alt_len = rg_len(grammar_rule(g, i)); + if (alt_len > max_rule_len) max_rule_len = alt_len; + for (NUM j = 0; j < alt_len; j++) { + NUM jth_len = list_length(rg_nth(grammar_rule(g, i), j)); + if (jth_len > max_alt_len) max_alt_len = jth_len; + } + } + procr *pr = NULL; - SAFE_MALLOC(procr, pr, 1, goto cleanup;); + NUM *len = NULL; + + SAFE_CALLOC(procr, pr, 1, goto cleanup;); - pr->len = size; - pr->array = NULL; - pr->mg = 0; + SAFE_MALLOC(NUM, len, 5, goto cleanup;); - SAFE_MALLOC(procr_array_element, pr->array, - sizeof(procr_array_element) * size, - goto cleanup;); + *len = input_len; + *(len+1) = left_len; + *(len+2) = max_rule_len; + *(len+3) = max_alt_len+1; + *(len+4) = input_len; - memset(pr->array, 0, sizeof(procr_array_element) * size); + pr->len = len; + + SAFE_CALLOC(procr_array_element, pr->array, *len, goto cleanup;); goto success; @@ -291,9 +284,8 @@ new_procr(NUM size, BOOL * const restrict errorp) *errorp = 1; - if (pr && pr->array) free(pr->array); - if (pr) free(pr); + if (len) free(len); pr = NULL; @@ -303,22 +295,41 @@ new_procr(NUM size, BOOL * const restrict errorp) } procu * -new_procu(NUM size, BOOL * const restrict errorp) +new_procu(Grammar *g, NUM input_len, BOOL *errorp) { *errorp = 0; + NUM left_len = grammar_left_len(g); + + NUM max_alt_len = 0, max_rule_len = 0; + + for (NUM i = 0; i < left_len; i++) { + NUM alt_len = rg_len(grammar_rule(g, i)); + if (alt_len > max_rule_len) max_rule_len = alt_len; + for (NUM j = 0; j < alt_len; j++) { + NUM jth_len = list_length(rg_nth(grammar_rule(g, i), j)); + if (jth_len > max_alt_len) max_alt_len = jth_len; + } + } + procu *pu = NULL; + NUM *len = NULL; SAFE_MALLOC(procu, pu, 1, goto cleanup;); + SAFE_MALLOC(NUM, len, 5, goto cleanup;); - pu->len = size; - pu->array = NULL; + *len = input_len; + *(len+1) = left_len; + *(len+2) = max_rule_len; + *(len+3) = max_alt_len+1; + *(len+4) = input_len; - SAFE_MALLOC(procu_array_element, pu->array, - sizeof(procu_array_element) * size, - goto cleanup;); + pu->lup = new_luple5(len); - memset(pu->array, 0, sizeof(procu_array_element) * size); + if (pu->lup == NULL) { + fleprintf0("Fail to create luple5\n"); + goto cleanup; + } goto success; @@ -326,8 +337,7 @@ new_procu(NUM size, BOOL * const restrict errorp) *errorp = 1; - if (pu && pu->array) free(pu->array); - + if (len) free(len); if (pu) free(pu); pu = NULL; @@ -337,160 +347,113 @@ new_procu(NUM size, BOOL * const restrict errorp) return pu; } +UNUSED +static void +print_label4(pair4 label) +{ + printf("(%ld, %ld, %ld, %ld)\n", + label.x, label.y, label.z, label.u); +} + void -print_procr(CCR_MOD(procr *) pr) +print_procr(procr *pr) { - eprintf("\n"); - fleprintf0("Printing procr...\n"); - fleprintf("LEN = %ld, MG = %ld\n", pr->len, pr->mg); - for (NUM i = 0; i < pr->len; i++) { - if ((pr->array+i)->initialized) { - if (list_length((pr->array+i)->ls)) eprintf("%ld: ", i); - for (NUM j = 0; j < list_length((pr->array+i)->ls); j++) { - fleprintf("(%ld, %ld, %ld, %ld)", - ((pair4 *) list_nth((pr->array+i)->ls, j))->x, - ((pair4 *) list_nth((pr->array+i)->ls, j))->y, - ((pair4 *) list_nth((pr->array+i)->ls, j))->z, - ((pair4 *) list_nth((pr->array+i)->ls, j))->w); - if (j+1==list_length((pr->array+i)->ls)) eprintf("\n"); - else eprintf(", "); - } - } + printf("\nPrinting procr...\n"); + printf("MG = %ld\n", pr->mg); + + NUM *len = pr->len; + + for (NUM i = 0; i < *len; i++) { + if (!((pr->array+i)->initialized)) continue; + + printf("Grade = %ld\n", i); + + for (NUM j = 0; j < list_length((pr->array+i)->ls); j++) + printf("(%ld, %ld, %ld, %ld)\n", + ((prodecor *) list_nth((pr->array+i)->ls, j))->x, + ((prodecor *) list_nth((pr->array+i)->ls, j))->y, + ((prodecor *) list_nth((pr->array+i)->ls, j))->z, + ((prodecor *) list_nth((pr->array+i)->ls, j))->u); } - eprintf("\n"); + printf("\n"); } void -destroy_procr(procr * const restrict pr) +destroy_procr(procr *pr) { - for (NUM i = 0; i < pr->len; i++) - if ((pr->array+i)->initialized) - destroy_list((pr->array+i)->ls, 1); + if (pr == NULL) return; - free(pr->array); + if (pr->array == NULL) { + if (pr->len) free(pr->len); + free(pr); + return; + } + for (NUM i = 0; i < *(pr->len); i++) { + if (!((pr->array+i)->initialized)) continue; + destroy_list((pr->array+i)->ls, 1); + } + + if (pr->len) free(pr->len); + + free(pr->array); free(pr); } void -destroy_procu(procu * const restrict pu) +destroy_procu(procu *pu) { - for (NUM i = 0; i < pu->len; i++) - if ((pu->array+i)->initialized) - destroy_ht((pu->array+i)->table, DESTROY_KEY_SELF); - - free(pu->array); - + destroy_luple5(pu->lup); free(pu); } BOOL -desc_add(procr * const restrict pr, procu * const restrict pu, - NUM grade, prodecor dec) +desc_add(procr *pr, procu *pu, NUM grade, prodecor dec) { - pair4 *p4 = NULL; - - if (grade < 0 || grade >= pu->len) { - fleprintf("Invalid grade: %ld\n", grade); - goto cleanup; - } - -#define HELE (pu->array+grade) -#define LELE (pr->array+grade) + pair5 label = (pair5) { grade, dec.x, dec.y, dec.z, dec.u }; - if (HELE->initialized) { - /* If HELE is initialized then LELE should also be initialized. */ - if (ht_find(HELE->table, &dec) != NULL) goto success; + NUM *result = luple5_find(pu->lup, label); - NEW_P4(p4, dec.x, dec.y, dec.z, dec.w, goto cleanup;); + if (result && *result) return 0; - if (ht_insert(HELE->table, p4, (void*)1)) { - fleprintf0("Fail to insert\n"); - goto cleanup; - } - - NEW_P4(p4, dec.x, dec.y, dec.z, dec.w, - ht_delete(HELE->table, &dec, DELETE_KEY); goto cleanup;); - - if (add_to_list(LELE->ls, p4)) { - fleprintf0("Fail to add to list\n"); - ht_delete(HELE->table, &dec, DELETE_KEY); - goto cleanup; - } - - goto success; + if (add_to_luple5(pu->lup, label, 1)) { + fleprintf0("Fail to add to procu\n"); + return 1; } - HELE->table = new_ht4(HT_INIT_CAP); + if (!((pr->array+grade)->initialized)) { + (pr->array+grade)->ls = new_list(); + if ((pr->array+grade)->ls == NULL) { + fleprintf0("Fail to create new list for procr\n"); + return 1; + } - if (HELE->table == NULL) { - fleprintf0("Fail to have new hash table\n"); - goto cleanup; + (pr->array+grade)->initialized = 1; } - HELE->initialized = 1; - - NEW_P4(p4, dec.x, dec.y, dec.z, dec.w, - destroy_ht(HELE->table, DESTROY_KEY_SELF); - HELE->initialized = 0; - goto cleanup;); + prodecor *element = NULL; - if (ht_insert(HELE->table, p4, (void*)1)) { - fleprintf0("Fail to insert\n"); - destroy_ht(HELE->table, DESTROY_KEY_SELF); - HELE->initialized = 0; - goto cleanup; - } + SAFE_MALLOC(prodecor, element, 1, return 1;); - LELE->ls = new_list(); - - if (LELE->ls == NULL) { - fleprintf0("Fail to have new list\n"); - destroy_ht(HELE->table, DESTROY_KEY_SELF); - HELE->initialized = 0; - goto cleanup; - } + *element = dec; - LELE->initialized = 1; - - NEW_P4(p4, dec.x, dec.y, dec.z, dec.w, - destroy_ht(HELE->table, DESTROY_KEY_SELF); - HELE->initialized = 0; - destroy_list(LELE->ls, 1); - LELE->initialized = 0; - goto cleanup;); - - if (add_to_list(LELE->ls, p4)) { - fleprintf0("Fail to add to list\n"); - destroy_ht(HELE->table, DESTROY_KEY_SELF); - HELE->initialized = 0; - destroy_list(LELE->ls, 1); - LELE->initialized = 0; - goto cleanup; + if (add_to_list((pr->array+grade)->ls, element)) { + fleprintf0("Fail to add to procr\n"); + free(element); + return 1; } -#undef HELE -#undef LELE - - goto success; - - cleanup: - - if (p4) free(p4); - - return 1; - - success: - return 0; } prodecor * -pop_procr(procr * pr, procu * pu, NUM *gradep) +pop_procr(procr *pr, procu *pu, NUM *gradep) { prodecor *result = NULL; - if (pr->mg >= pr->len) goto success; + NUM *len = pr->len; + + if (pr->mg >= *len) goto success; #define ELEMENT (pr->array+pr->mg) @@ -498,20 +461,18 @@ pop_procr(procr * pr, procu * pu, NUM *gradep) goto no_reset_mg; if (ELEMENT->initialized && list_length(ELEMENT->ls) == 0) { - destroy_ht((pu->array+pr->mg)->table, DESTROY_KEY_SELF); - (pu->array+pr->mg)->initialized = 0; - (pu->array+pr->mg)->table = NULL; + luple5_free_1(pu->lup, pr->mg); destroy_list(ELEMENT->ls, 1); ELEMENT->initialized = 0; ELEMENT->ls = NULL; } - while (++(pr->mg) < pr->len) { + while (++(pr->mg) < *(pr->len)) { if (ELEMENT->initialized) break; } - if (pr->mg >= pr->len) goto success; + if (pr->mg >= *(pr->len)) goto success; no_reset_mg: @@ -523,5 +484,4 @@ pop_procr(procr * pr, procu * pu, NUM *gradep) success: return result; - } -- cgit v1.2.3-18-g5258