From 510b10b96b546fcc6c6b6be85050305ddd192a41 Mon Sep 17 00:00:00 2001 From: JSDurand Date: Sat, 5 Feb 2022 17:30:11 +0800 Subject: replace some hash table usage by tuples Previously I used hash tables, which consume too much memory. Now the critical parts are replaced by a new hand-written library called "tuple.h". Now we can easily parse larger inputs. I haven't tested its limits, though. --- src/bsr.c | 664 +++++++++++++++++--------------------------------------------- 1 file changed, 182 insertions(+), 482 deletions(-) (limited to 'src/bsr.c') diff --git a/src/bsr.c b/src/bsr.c index c034072..4b7953a 100644 --- a/src/bsr.c +++ b/src/bsr.c @@ -1,106 +1,84 @@ #include "list.h" +#include "splist.h" #include "util.h" #include "ht.h" +#include "tuple.h" #include "bsr.h" #include #include #include -typedef struct first_type_bsr_s first_type_bsr; -typedef struct second_type_bsr_s second_type_bsr; +typedef luple5 first_type_bsr; +typedef luple6 second_type_bsr; -typedef struct first_type_array_element_s first_type_array_element; - -/* The keys of the table are (i, k) and values are hash tables whose - keys are (a, j) such that (X, a, i, j, k) belongs to the set. */ -struct first_type_array_element_s { - ht *table; - BOOL initialized; -}; - -struct first_type_bsr_s { - NUM len; - first_type_array_element *array; +struct bsr_s { + first_type_bsr *f; + second_type_bsr *s; }; -/* Arrrrgh! */ -typedef struct second_arrarrarr_element_s second_arrarrarr_element; - -/* The keys are (i, k) and the values are hash tables whose keys are - those j such that - - (X, a, c, i, j, k) +bsr * +new_bsr(Grammar *g, NUM input_len, BOOL * const restrict errorp) +{ + NUM left_len = grammar_left_len(g); - belongs to the set. */ -struct second_arrarrarr_element_s { - ht *table; - BOOL initialized; -}; + NUM max_alt_len = 0, max_rule_len = 0; -typedef struct second_array_array_element_s \ -second_array_array_element; + for (NUM i = 0; i < left_len; i++) { + NUM alt_len = rg_len(grammar_rule(g, i)); + if (alt_len > max_rule_len) max_rule_len = alt_len; + for (NUM j = 0; j < alt_len; j++) { + NUM jth_len = list_length(rg_nth(grammar_rule(g, i), j)); + if (jth_len > max_alt_len) max_alt_len = jth_len; + } + } -struct second_array_array_element_s { - NUM len; - second_arrarrarr_element *array; - BOOL initialized; -}; + bsr *bsrp = NULL; + SAFE_MALLOC(bsr, bsrp, 1, *errorp = 1; return NULL;); -typedef struct second_type_array_element_s second_type_array_element; + NUM *first_lengths = NULL, *second_lengths = NULL; -struct second_type_array_element_s { - NUM len; - second_array_array_element *array; - BOOL initialized; -}; + SAFE_MALLOC(NUM, first_lengths, 5, goto cleanup;); + SAFE_MALLOC(NUM, second_lengths, 6, goto cleanup;); -struct second_type_bsr_s { - NUM len; - second_type_array_element *array; -}; + *(first_lengths) = left_len; + *(first_lengths+1) = input_len; + *(first_lengths+2) = input_len; + *(first_lengths+3) = max_rule_len; + *(first_lengths+4) = input_len; -struct bsr_s { - /* bsr_type type; */ - /* union { */ - first_type_bsr f; - second_type_bsr s; - /* } data; */ -}; + *(second_lengths) = left_len; + *(second_lengths+1) = max_rule_len; + *(second_lengths+2) = max_alt_len; + *(second_lengths+3) = input_len; + *(second_lengths+4) = input_len; + *(second_lengths+5) = input_len; -bsr * -new_bsr(BOOL * const restrict errorp, Grammar *g) -{ - NUM left_len = grammar_left_len(g); + bsrp->f = new_luple5(first_lengths); - bsr *bsrp = NULL; - SAFE_MALLOC(bsr, bsrp, 1, *errorp = 1; return NULL;); - - /* first type */ - bsrp->f.len = left_len; - SAFE_MALLOC(first_type_array_element, - bsrp->f.array, bsrp->f.len, goto cleanup;); + if (bsrp->f == NULL) { + fleprintf0("Fail to create luple5\n"); + goto cleanup; + } - /* ensure every bit is zero, including the initialized field */ - memset(bsrp->f.array, 0, - sizeof(first_type_array_element) * bsrp->f.len); + first_lengths = NULL; - /* second type */ - bsrp->s.len = left_len; - SAFE_MALLOC(second_type_array_element, - bsrp->s.array, bsrp->s.len, goto cleanup;); - memset(bsrp->s.array, 0, - sizeof(second_type_array_element) * bsrp->s.len); + bsrp->s = new_luple6(second_lengths); - for (NUM i = 0; i < left_len; i++) - (bsrp->s.array+i)->len = rg_len(grammar_rule(g, i)); + if (bsrp->s == NULL) { + fleprintf0("Fail to create luple6\n"); + goto cleanup; + } return bsrp; cleanup: *errorp = 1; - if (bsrp->f.array) free(bsrp->f.array); - if (bsrp->s.array) free(bsrp->s.array); + if (first_lengths) free(first_lengths); + if (second_lengths) free(second_lengths); + + if (bsrp->f) destroy_luple5(bsrp->f); + if (bsrp->s) destroy_luple6(bsrp->s); if (bsrp) free(bsrp); return NULL; @@ -109,67 +87,13 @@ new_bsr(BOOL * const restrict errorp, Grammar *g) void destroy_bsr(bsr * const restrict bsrp) { - /* destroy first type */ - for (NUM i = 0; i < bsrp->f.len; i++) { -#define ELEMENT (bsrp->f.array+i) - if (ELEMENT->initialized) { - /* destroy every hash table values as well */ - -#define HTS ((ht **) ht_values(ELEMENT->table)) - - for (NUM j = 0; j < ht_size(ELEMENT->table); j++) - destroy_ht(*(HTS+j), DESTROY_KEY_SELF); + if (bsrp == NULL) return; - destroy_ht(ELEMENT->table, DESTROY_KEY_SELF); - } - } - -#undef ELEMENT -#undef HTS - - free(bsrp->f.array); + /* destroy first type */ + destroy_luple5(bsrp->f); /* destroy second type */ - - for (NUM i = 0; i < bsrp->s.len; i++) { - -#define ELEMENT (bsrp->s.array+i) - - if (ELEMENT->initialized) { - for (NUM j = 0; j < ELEMENT->len; j++) { - -#define SELE (ELEMENT->array+j) - - if (SELE->initialized) { - for (NUM k = 0; k < SELE->len; k++) { - -#define HELE (SELE->array+k) - - if (HELE->initialized) { - -#define HTS ((ht **) ht_values(HELE->table)) - - for (NUM ell = 0; ell < ht_size(HELE->table); ell++) - destroy_ht(*(HTS+ell), DESTROY_KEY_SELF); - - destroy_ht(HELE->table, DESTROY_KEY_SELF); - } - } - - free(SELE->array); - } - } - - free(ELEMENT->array); - } - } - -#undef ELEMENT -#undef SELE -#undef HELE -#undef HTS - - free(bsrp->s.array); + destroy_luple6(bsrp->s); free(bsrp); } @@ -181,15 +105,27 @@ destroy_bsr(bsr * const restrict bsrp) the right-most terminal or non-terminal in the alternate corresponding to a. */ +H_ATTR BOOL -bsr_add(CCR_MOD(Grammar *) g, bsr * const restrict b, - NUM X, NUM a, NUM c, NUM i, NUM k, NUM j) +bsr_add(CCR_MOD(Grammar *) g, bsr * const restrict b, pair6 p6) { - if (X < 0 || X >= b->f.len) { + NUM X = p6.x; + NUM a = p6.y; + NUM c = p6.z; + NUM i = p6.u; + NUM k = p6.v; + NUM j = p6.w; + + if (X < 0 || X >= grammar_left_len(g)) { fleprintf("Invalid X: %ld\n", X); return 1; } + if (a < 0 || a >= rg_len(grammar_rule(g, X))) { + fleprintf("Invalid a: %ld\n", a); + return 1; + } + if (c > list_length(rg_nth(grammar_rule(g, X), a)) || c < 0) { fleprintf("Invalid c: %ld\n", c); return 1; @@ -197,169 +133,20 @@ bsr_add(CCR_MOD(Grammar *) g, bsr * const restrict b, BOOL errorp = 0; - ht *value_table = NULL; - - pair2 *p2 = NULL, p21 = { 0 }, p22 = { 0 }; - - NUM *p1 = NULL, p11 = 0; - if (c == list_length(rg_nth(grammar_rule(g, X), a))) { /* first type BSR */ /* fleprintf0("First type\n"); */ - -#define ARR (b->f.array+X) - - if (!(ARR->initialized)) { - ARR->table = new_ht2(HT_INIT_CAP); - - if (ARR->table == NULL) { - fleprintf0("Fail to get new ht\n"); - goto cleanup; - } - - ARR->initialized = 1; - - value_table = new_ht2(HT_INIT_CAP); - - if (value_table == NULL) { - fleprintf0("Fail to get new ht\n"); - goto cleanup; - } - - NEW_P2(p2, a, k, goto cleanup;); - if (ht_insert(value_table, p2, (void*)1)) goto cleanup; - - NEW_P2(p2, i, j, goto cleanup;); - - if (ht_insert(ARR->table, p2, value_table)) goto cleanup; - /* don't destroy the pointers once they have been inserted */ - value_table = NULL; - p2 = NULL; - } else { - p21.x = i; - p21.y = j; - - if (ht_find(ARR->table, &p21) == NULL) { - value_table = new_ht2(HT_INIT_CAP); - - if (value_table == NULL) { - fleprintf0("Fail to get new ht\n"); - goto cleanup; - } - - NEW_P2(p2, a, k, goto cleanup;); - if (ht_insert(value_table, p2, (void*)1)) goto cleanup; - - NEW_P2(p2, i, j, goto cleanup;); - if (ht_insert(ARR->table, p2, value_table)) goto cleanup; - - /* don't destroy the pointers once they have been inserted */ - value_table = NULL; - p2 = NULL; - } else { - p22.x = a; - p22.y = k; - - if (ht_find(ht_find(ARR->table, &p21), &p22) == NULL) { - NEW_P2(p2, a, k, goto cleanup;); - if (ht_insert(ht_find(ARR->table, &p21), p2, (void*)1)) - goto cleanup; - - /* don't destroy the pointer once it has been inserted */ - p2 = NULL; - } else { - /* this element is already present */ - } - } + if (add_to_luple5(b->f, (pair5) { X, i, j, a, k }, 1)) { + fleprintf0("Fail to add to first type\n"); + goto cleanup; } - -#undef ARR - } else { /* second type BSR */ /* fleprintf0("Second type\n"); */ - -#define AR (b->s.array+X) - - if (!(AR->initialized)) { - SAFE_MALLOC(second_array_array_element, - AR->array, AR->len, - goto cleanup;); - memset(AR->array, 0, - sizeof(second_array_array_element)*(AR->len)); - AR->initialized = 1; - } - -#define ARR (AR->array+a) - - if (a < 0 || a >= rg_len(grammar_rule(g, X))) { - fleprintf("Invalid a: %ld\n", a); + if (add_to_luple6(b->s, (pair6) { X, a, c, i, j, k }, 1)) { + fleprintf0("Fail to add to second type\n"); goto cleanup; } - - ARR->len = list_length(rg_nth(grammar_rule(g, X), a)); - - if (!(ARR->initialized)) { - SAFE_MALLOC(second_arrarrarr_element, - ARR->array, ARR->len, - goto cleanup;); - memset(ARR->array, 0, - sizeof(second_arrarrarr_element)*(ARR->len)); - ARR->initialized = 1; - } - -#define ARRR (ARR->array+c) - - if (!(ARRR->initialized)) { - ARRR->table = new_ht2(HT_INIT_CAP); - - if (ARRR->table == NULL) { - fleprintf0("Fail to get new ht\n"); - goto cleanup; - } - - ARRR->initialized = 1; - } - - p21.x = i; - p21.y = j; - - if (ht_find(ARRR->table, &p21) == NULL) { - value_table = new_ht(HT_INIT_CAP, 0); - - if (value_table == NULL) { - fleprintf0("Fail to get new ht\n"); - goto cleanup; - } - - NEW_P2(p2, i, j, goto cleanup;); - - if (ht_insert(ARRR->table, p2, value_table)) { - fleprintf0("Fail to insert into table\n"); - goto cleanup; - } - value_table = NULL; - p2 = NULL; - } - - p11 = k; - - if (ht_find(ht_find(ARRR->table, &p21), - &p11) == NULL) { - SAFE_MALLOC(NUM, p1, 1, goto cleanup;); - *p1 = k; - if (ht_insert(ht_find(ARRR->table, &p21), - p1, (void *)1)) { - fleprintf0("Fail to insert into table\n"); - goto cleanup; - } - p1 = NULL; - } - -#undef AR -#undef ARR -#undef ARRR - } goto success; @@ -369,22 +156,25 @@ bsr_add(CCR_MOD(Grammar *) g, bsr * const restrict b, success: - if (value_table) destroy_ht(value_table, DESTROY_KEY_SELF); - if (p2) free(p2); - if (p1) free(p1); - return errorp; } BOOL bsr_find(CCR_MOD(bsr *) b, CCR_MOD(Grammar *) g, - BOOL * const restrict errorp, - NUM X, NUM a, NUM c, NUM i, NUM k, NUM j) + BOOL * const restrict errorp, pair6 p6) { *errorp = 0; BOOL result = 0; + NUM *resultp = NULL; + + NUM X = p6.x; + NUM a = p6.y; + NUM c = p6.z; + NUM i = p6.u; + NUM k = p6.v; + NUM j = p6.w; - if (X < 0 || X >= b->f.len) { + if (X < 0 || X >= grammar_left_len(g)) { fleprintf("Invalid X: %ld\n", X); goto cleanup; } @@ -399,74 +189,18 @@ bsr_find(CCR_MOD(bsr *) b, CCR_MOD(Grammar *) g, goto cleanup; } - pair2 p21 = { 0 }, p22 = { 0 }; - NUM p11 = 0; - if (c == list_length(rg_nth(grammar_rule(g, X), a))) { /* first type */ -#define ARR (b->f.array+X) - - if (!(ARR->initialized)) { - goto success; - } else { - p21.x = i; - p21.y = j; - - if (ht_find(ARR->table, &p21) == NULL) { - goto success; - } else { - p22.x = a; - p22.y = k; - - if (ht_find(ht_find(ARR->table, &p21), &p22) == NULL) { - goto success; - } else { - result = 1; - goto success; - } - } - } - -#undef ARR - + resultp = luple5_find(b->f, (pair5) { X, i, j, a, k }); + result = (resultp && *resultp) ? 1 : 0; } else { /* second type */ - -#define AR (b->s.array+X) - - if (!(AR->initialized)) { - goto success; - } - -#define ARR (AR->array+a) - - ARR->len = list_length(rg_nth(grammar_rule(g, X), a)); - - if (!(ARR->initialized)) goto success; - -#define ARRR (ARR->array+c) - - if (!(ARRR->initialized)) goto success; - - p21.x = i; - p21.y = j; - - if (ht_find(ARRR->table, &p21) == NULL) goto success; - - p11 = k; - - if (ht_find(ht_find(ARRR->table, &p21), &p11) == NULL) - goto success; - - result = 1; - - goto success; - - #undef AR - #undef ARR - #undef ARRR + resultp = luple6_find(b->s, (pair6) { X, a, c, i, j, k}); + result = (resultp && *resultp) ? 1 : 0; } + goto success; + cleanup: *errorp = 1; @@ -474,165 +208,131 @@ bsr_find(CCR_MOD(bsr *) b, CCR_MOD(Grammar *) g, return result; } -ht * -bsr_lookup(CCR_MOD(bsr *) b, BOOL * const restrict errorp, - NUM X, NUM i, NUM j) +P_ATTR +BOOL +bsr_lookup(bsr * b, NUM X, NUM i, NUM j) { - *errorp = 0; - ht *result = NULL; + return luple5_pf_3(b->f, (pair3) { X, i, j }); +} - if (X < 0 || X >= b->f.len) { - fleprintf("Invalid X: %ld\n", X); - goto cleanup; - } +UNUSED +static void +print_sep() +{ + printf(" "); +} - if (!((b->f.array+X)->initialized)) goto success; +static BOOL bsr_print_should_initialize_counter = 0; +static NUM bsr_print_line_len = 0; +static Grammar *bsr_print_grammar = NULL; - pair2 p2 = (pair2) { .x = i, .y = j }; +static void +print_bsr_f(pair5 label) +{ + static int counter = 0; - result = (ht *) ht_find((b->f.array+X)->table, &p2); + if (bsr_print_should_initialize_counter) { + counter = 0; + bsr_print_should_initialize_counter = 0; + } - goto success; + counter++; - cleanup: - *errorp = 1; + printf("("); + print_name(list_nth(grammar_names(bsr_print_grammar), + label.x)); + printf(" := "); + map_list_between + (rg_nth (grammar_rule(bsr_print_grammar, label.x), label.u), + print_tnt, print_sep); + printf(", %ld, %ld, %ld)", label.y, label.v, label.z); - success: - return result; + if (counter == bsr_print_line_len) { + counter = 0; + printf("\n"); + } else { + printf(", "); + } } static void -print_sep() +print_bsr_s(pair6 label) { - printf(" "); -} - -void -bsr_print(CCR_MOD(bsr *) b, CCR_MOD(Grammar *) g, NUM line_len) -{ - printf("Printing a BSR set...\n"); + static int counter = 0; - NUM count = 0; - - /* print first type BSR */ - for (NUM i = 0; i < b->f.len; i++) { - -#define ELEMENT (b->f.array+i) - - if (ELEMENT->initialized) { - -#define KEYS ((pair2 **) ht_keys(ELEMENT->table)) -#define VALUES ((ht **) ht_values(ELEMENT->table)) -#define SIZE (ht_size(ELEMENT->table)) - - for (NUM j = 0; j < SIZE; j++) { - -#define VALUE_TABLE (*(VALUES+j)) -#define VALUE_TABLE_KEYS ((pair2 **) ht_keys(VALUE_TABLE)) - - for (NUM k = 0; k < ht_size(VALUE_TABLE); k++) { - count++; - printf("("); - print_name(list_nth(grammar_names(g), i)); - printf(" := "); - map_list_between - (rg_nth - (grammar_rule(g, i), - (*(VALUE_TABLE_KEYS+k))->x), - print_tnt, print_sep); - printf(", %ld, %ld, %ld)", - (*(KEYS+j))->x, - (*(VALUE_TABLE_KEYS+k))->y, - (*(KEYS+j))->y); - - if (count == line_len) { - count = 0; - printf("\n"); - } else { - printf(", "); - } - } - } - } + if (bsr_print_should_initialize_counter) { + counter = 0; + bsr_print_should_initialize_counter = 0; } -#undef ELEMENT -#undef KEYS -#undef VALUES -#undef SIZE -#undef VALUE_TABLE -#undef VALUE_TABLE_KEYS + counter++; - for (NUM i = 0; i < b->s.len; i++) { + printf("("); + print_name(list_nth(grammar_names(bsr_print_grammar), + label.x)); + printf(" := "); -#define ELEMENT (b->s.array+i) +#define RGLS rg_nth(grammar_rule(bsr_print_grammar, label.x), \ + label.y) - if (!(ELEMENT->initialized)) continue; + for (NUM k = 0; k < label.z; k++) { + print_tnt(*(list_array(RGLS)+k)); + if (k+1len; j++) { + printf(" · "); -#define SELE (ELEMENT->array+j) + for (NUM k = label.z; k < list_length(RGLS); k++) { + print_tnt(*(list_array(RGLS)+k)); + if (k+1initialized)) continue; + printf(", %ld, %ld, %ld)", label.u, label.w, label.v); - for (NUM k = 0; k < SELE->len; k++) { + if (counter == bsr_print_line_len) { + counter = 0; + printf("\n"); + } else { + printf(", "); + } -#define HELE (SELE->array+k) +#undef RGLS - if (!(HELE->initialized)) continue; +} -#define KEYS ((pair2 **) ht_keys(HELE->table)) -#define VALUES ((ht **) ht_values(HELE->table)) +void +bsr_print(bsr *b, Grammar *g, NUM line_len) +{ + printf("Printing a BSR set...\n"); - for (NUM n = 0; n < ht_size(HELE->table); n++) { - for (NUM ell = 0; ell < ht_size(*(VALUES+n)); ell++) { + bsr_print_grammar = g; + bsr_print_should_initialize_counter = 1; + bsr_print_line_len = line_len; -#define VALUE_KEYS ((NUM **) ht_keys(*(VALUES+n))) + printf("Printing the first type nodes...\n"); - count++; - printf("("); - print_name(list_nth(grammar_names(g), i)); - printf(" := "); + NUM *len = NULL; -#define LS rg_nth(grammar_rule(g, i), j) + len = luple5_len(b->f); - for (NUM m = 0; m < k; m++) { - print_tnt(*(list_array(LS)+m)); - if (m+1f, (pair3) { i, j, k }, print_bsr_f); + } - printf(" · "); + printf("\n\nPrinting the second type nodes...\n"); - for (NUM m = k; m < list_length(LS); m++) { - print_tnt(*(list_array(LS)+m)); - if (m+1x, - **(VALUE_KEYS+ell), - (*(KEYS+n))->y); + len = luple6_len(b->s); - if (count == line_len) { - count = 0; - printf("\n"); - } else { - printf(", "); - } - } - } - } - } - } + for (NUM i = 0; i < *len; i++) + for (NUM j = 0; j < *(len+1); j++) + luple6_map_2(b->s, (pair2) { i, j }, -1, print_bsr_s); printf("\n"); -#undef ELEMENT -#undef SELE -#undef HELE -#undef KEYS -#undef VALUES -#undef VALUE_KEYS -#undef LS - + bsr_print_grammar = NULL; + bsr_print_line_len = 0; } -- cgit v1.2.3-18-g5258