From 510b10b96b546fcc6c6b6be85050305ddd192a41 Mon Sep 17 00:00:00 2001 From: JSDurand Date: Sat, 5 Feb 2022 17:30:11 +0800 Subject: replace some hash table usage by tuples Previously I used hash tables, which consume too much memory. Now the critical parts are replaced by a new hand-written library called "tuple.h". Now we can easily parse larger inputs. I haven't tested its limits, though. --- src/Makefile.am | 21 +- src/bsr.c | 664 +++++++++++++----------------------------------- src/bsr.h | 37 ++- src/cnp.c | 182 ++++++++----- src/cnp.h | 1 + src/crf.c | 560 +++++++++++++++++++--------------------- src/crf.h | 80 ++++-- src/grammar.c | 2 +- src/grammar.h | 7 + src/ht.c | 4 +- src/ht.h | 15 +- src/list.c | 6 +- src/pair.h | 57 +++++ src/splist.c | 141 ++++++++++ src/splist.h | 36 +++ src/test/check_bsr.c | 30 ++- src/test/check_cnp.c | 41 ++- src/test/check_crf.c | 195 +++++++++++--- src/test/check_ht.c | 6 +- src/test/check_splist.c | 62 +++++ src/test/check_tuple.c | 244 ++++++++++++++++++ src/tuple.c | 629 +++++++++++++++++++++++++++++++++++++++++++++ src/tuple.h | 116 +++++++++ src/util.h | 18 ++ 24 files changed, 2184 insertions(+), 970 deletions(-) create mode 100644 src/pair.h create mode 100644 src/splist.c create mode 100644 src/splist.h create mode 100644 src/test/check_splist.c create mode 100644 src/test/check_tuple.c create mode 100644 src/tuple.c create mode 100644 src/tuple.h (limited to 'src') diff --git a/src/Makefile.am b/src/Makefile.am index b897ec0..2b2d7d8 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,10 +1,10 @@ -AM_CFLAGS = -Wall -Wextra +AM_CFLAGS = -Wall -Wextra -Wno-missing-field-initializers noinst_LIBRARIES = libeps.a libeps_a_SOURCES = grammar.c list.c util.c reader.c str.c \ -utf8.c dfa.c ht.c bsr.c crf.c cnp.c \ +utf8.c dfa.c ht.c bsr.c tuple.c crf.c cnp.c \ grammar.h list.h util.h reader.h str_partial.h \ -str.h utf8.h dfa.h ht.h bsr.h crf.h cnp.h +str.h utf8.h dfa.h ht.h bsr.h crf.h cnp.h tuple.h pair.h libeps_a_CFLAGS = $(AM_CFLAGS) --pedantic @@ -22,7 +22,8 @@ CLEANFILES = TAGS # tests check_PROGRAMS = check_list check_grammar check_reader check_dfa \ -check_ht check_bsr check_crf check_cnp check_pred +check_ht check_pred check_tuple check_bsr check_crf \ +check_cnp check_list_SOURCES = test/check_list.c list.c @@ -37,19 +38,21 @@ check_dfa_SOURCES = test/check_dfa.c dfa.c list.c str.c utf8.c check_ht_SOURCES = test/check_ht.c ht.c check_bsr_SOURCES = test/check_bsr.c bsr.c grammar.c list.c util.c \ -ht.c utf8.c str.c dfa.c +ht.c utf8.c str.c dfa.c tuple.c check_crf_SOURCES = test/check_crf.c crf.c grammar.c list.c util.c \ -ht.c utf8.c str.c dfa.c +ht.c utf8.c str.c dfa.c tuple.c + +# check_splist_SOURCES = test/check_splist.c splist.c list.c check_cnp_SOURCES = test/check_cnp.c crf.c cnp.c grammar.c list.c \ -util.c ht.c utf8.c str.c dfa.c bsr.c +util.c ht.c utf8.c str.c dfa.c bsr.c tuple.c check_pred_SOURCES = test/check_pred.c dfa.c list.c str.c utf8.c \ grammar.c util.c ht.c +check_tuple_SOURCES = test/check_tuple.c tuple.c util.c + TESTS = $(check_PROGRAMS) AM_COLOR_TESTS = always - - diff --git a/src/bsr.c b/src/bsr.c index c034072..4b7953a 100644 --- a/src/bsr.c +++ b/src/bsr.c @@ -1,106 +1,84 @@ #include "list.h" +#include "splist.h" #include "util.h" #include "ht.h" +#include "tuple.h" #include "bsr.h" #include #include #include -typedef struct first_type_bsr_s first_type_bsr; -typedef struct second_type_bsr_s second_type_bsr; +typedef luple5 first_type_bsr; +typedef luple6 second_type_bsr; -typedef struct first_type_array_element_s first_type_array_element; - -/* The keys of the table are (i, k) and values are hash tables whose - keys are (a, j) such that (X, a, i, j, k) belongs to the set. */ -struct first_type_array_element_s { - ht *table; - BOOL initialized; -}; - -struct first_type_bsr_s { - NUM len; - first_type_array_element *array; +struct bsr_s { + first_type_bsr *f; + second_type_bsr *s; }; -/* Arrrrgh! */ -typedef struct second_arrarrarr_element_s second_arrarrarr_element; - -/* The keys are (i, k) and the values are hash tables whose keys are - those j such that - - (X, a, c, i, j, k) +bsr * +new_bsr(Grammar *g, NUM input_len, BOOL * const restrict errorp) +{ + NUM left_len = grammar_left_len(g); - belongs to the set. */ -struct second_arrarrarr_element_s { - ht *table; - BOOL initialized; -}; + NUM max_alt_len = 0, max_rule_len = 0; -typedef struct second_array_array_element_s \ -second_array_array_element; + for (NUM i = 0; i < left_len; i++) { + NUM alt_len = rg_len(grammar_rule(g, i)); + if (alt_len > max_rule_len) max_rule_len = alt_len; + for (NUM j = 0; j < alt_len; j++) { + NUM jth_len = list_length(rg_nth(grammar_rule(g, i), j)); + if (jth_len > max_alt_len) max_alt_len = jth_len; + } + } -struct second_array_array_element_s { - NUM len; - second_arrarrarr_element *array; - BOOL initialized; -}; + bsr *bsrp = NULL; + SAFE_MALLOC(bsr, bsrp, 1, *errorp = 1; return NULL;); -typedef struct second_type_array_element_s second_type_array_element; + NUM *first_lengths = NULL, *second_lengths = NULL; -struct second_type_array_element_s { - NUM len; - second_array_array_element *array; - BOOL initialized; -}; + SAFE_MALLOC(NUM, first_lengths, 5, goto cleanup;); + SAFE_MALLOC(NUM, second_lengths, 6, goto cleanup;); -struct second_type_bsr_s { - NUM len; - second_type_array_element *array; -}; + *(first_lengths) = left_len; + *(first_lengths+1) = input_len; + *(first_lengths+2) = input_len; + *(first_lengths+3) = max_rule_len; + *(first_lengths+4) = input_len; -struct bsr_s { - /* bsr_type type; */ - /* union { */ - first_type_bsr f; - second_type_bsr s; - /* } data; */ -}; + *(second_lengths) = left_len; + *(second_lengths+1) = max_rule_len; + *(second_lengths+2) = max_alt_len; + *(second_lengths+3) = input_len; + *(second_lengths+4) = input_len; + *(second_lengths+5) = input_len; -bsr * -new_bsr(BOOL * const restrict errorp, Grammar *g) -{ - NUM left_len = grammar_left_len(g); + bsrp->f = new_luple5(first_lengths); - bsr *bsrp = NULL; - SAFE_MALLOC(bsr, bsrp, 1, *errorp = 1; return NULL;); - - /* first type */ - bsrp->f.len = left_len; - SAFE_MALLOC(first_type_array_element, - bsrp->f.array, bsrp->f.len, goto cleanup;); + if (bsrp->f == NULL) { + fleprintf0("Fail to create luple5\n"); + goto cleanup; + } - /* ensure every bit is zero, including the initialized field */ - memset(bsrp->f.array, 0, - sizeof(first_type_array_element) * bsrp->f.len); + first_lengths = NULL; - /* second type */ - bsrp->s.len = left_len; - SAFE_MALLOC(second_type_array_element, - bsrp->s.array, bsrp->s.len, goto cleanup;); - memset(bsrp->s.array, 0, - sizeof(second_type_array_element) * bsrp->s.len); + bsrp->s = new_luple6(second_lengths); - for (NUM i = 0; i < left_len; i++) - (bsrp->s.array+i)->len = rg_len(grammar_rule(g, i)); + if (bsrp->s == NULL) { + fleprintf0("Fail to create luple6\n"); + goto cleanup; + } return bsrp; cleanup: *errorp = 1; - if (bsrp->f.array) free(bsrp->f.array); - if (bsrp->s.array) free(bsrp->s.array); + if (first_lengths) free(first_lengths); + if (second_lengths) free(second_lengths); + + if (bsrp->f) destroy_luple5(bsrp->f); + if (bsrp->s) destroy_luple6(bsrp->s); if (bsrp) free(bsrp); return NULL; @@ -109,67 +87,13 @@ new_bsr(BOOL * const restrict errorp, Grammar *g) void destroy_bsr(bsr * const restrict bsrp) { - /* destroy first type */ - for (NUM i = 0; i < bsrp->f.len; i++) { -#define ELEMENT (bsrp->f.array+i) - if (ELEMENT->initialized) { - /* destroy every hash table values as well */ - -#define HTS ((ht **) ht_values(ELEMENT->table)) - - for (NUM j = 0; j < ht_size(ELEMENT->table); j++) - destroy_ht(*(HTS+j), DESTROY_KEY_SELF); + if (bsrp == NULL) return; - destroy_ht(ELEMENT->table, DESTROY_KEY_SELF); - } - } - -#undef ELEMENT -#undef HTS - - free(bsrp->f.array); + /* destroy first type */ + destroy_luple5(bsrp->f); /* destroy second type */ - - for (NUM i = 0; i < bsrp->s.len; i++) { - -#define ELEMENT (bsrp->s.array+i) - - if (ELEMENT->initialized) { - for (NUM j = 0; j < ELEMENT->len; j++) { - -#define SELE (ELEMENT->array+j) - - if (SELE->initialized) { - for (NUM k = 0; k < SELE->len; k++) { - -#define HELE (SELE->array+k) - - if (HELE->initialized) { - -#define HTS ((ht **) ht_values(HELE->table)) - - for (NUM ell = 0; ell < ht_size(HELE->table); ell++) - destroy_ht(*(HTS+ell), DESTROY_KEY_SELF); - - destroy_ht(HELE->table, DESTROY_KEY_SELF); - } - } - - free(SELE->array); - } - } - - free(ELEMENT->array); - } - } - -#undef ELEMENT -#undef SELE -#undef HELE -#undef HTS - - free(bsrp->s.array); + destroy_luple6(bsrp->s); free(bsrp); } @@ -181,15 +105,27 @@ destroy_bsr(bsr * const restrict bsrp) the right-most terminal or non-terminal in the alternate corresponding to a. */ +H_ATTR BOOL -bsr_add(CCR_MOD(Grammar *) g, bsr * const restrict b, - NUM X, NUM a, NUM c, NUM i, NUM k, NUM j) +bsr_add(CCR_MOD(Grammar *) g, bsr * const restrict b, pair6 p6) { - if (X < 0 || X >= b->f.len) { + NUM X = p6.x; + NUM a = p6.y; + NUM c = p6.z; + NUM i = p6.u; + NUM k = p6.v; + NUM j = p6.w; + + if (X < 0 || X >= grammar_left_len(g)) { fleprintf("Invalid X: %ld\n", X); return 1; } + if (a < 0 || a >= rg_len(grammar_rule(g, X))) { + fleprintf("Invalid a: %ld\n", a); + return 1; + } + if (c > list_length(rg_nth(grammar_rule(g, X), a)) || c < 0) { fleprintf("Invalid c: %ld\n", c); return 1; @@ -197,169 +133,20 @@ bsr_add(CCR_MOD(Grammar *) g, bsr * const restrict b, BOOL errorp = 0; - ht *value_table = NULL; - - pair2 *p2 = NULL, p21 = { 0 }, p22 = { 0 }; - - NUM *p1 = NULL, p11 = 0; - if (c == list_length(rg_nth(grammar_rule(g, X), a))) { /* first type BSR */ /* fleprintf0("First type\n"); */ - -#define ARR (b->f.array+X) - - if (!(ARR->initialized)) { - ARR->table = new_ht2(HT_INIT_CAP); - - if (ARR->table == NULL) { - fleprintf0("Fail to get new ht\n"); - goto cleanup; - } - - ARR->initialized = 1; - - value_table = new_ht2(HT_INIT_CAP); - - if (value_table == NULL) { - fleprintf0("Fail to get new ht\n"); - goto cleanup; - } - - NEW_P2(p2, a, k, goto cleanup;); - if (ht_insert(value_table, p2, (void*)1)) goto cleanup; - - NEW_P2(p2, i, j, goto cleanup;); - - if (ht_insert(ARR->table, p2, value_table)) goto cleanup; - /* don't destroy the pointers once they have been inserted */ - value_table = NULL; - p2 = NULL; - } else { - p21.x = i; - p21.y = j; - - if (ht_find(ARR->table, &p21) == NULL) { - value_table = new_ht2(HT_INIT_CAP); - - if (value_table == NULL) { - fleprintf0("Fail to get new ht\n"); - goto cleanup; - } - - NEW_P2(p2, a, k, goto cleanup;); - if (ht_insert(value_table, p2, (void*)1)) goto cleanup; - - NEW_P2(p2, i, j, goto cleanup;); - if (ht_insert(ARR->table, p2, value_table)) goto cleanup; - - /* don't destroy the pointers once they have been inserted */ - value_table = NULL; - p2 = NULL; - } else { - p22.x = a; - p22.y = k; - - if (ht_find(ht_find(ARR->table, &p21), &p22) == NULL) { - NEW_P2(p2, a, k, goto cleanup;); - if (ht_insert(ht_find(ARR->table, &p21), p2, (void*)1)) - goto cleanup; - - /* don't destroy the pointer once it has been inserted */ - p2 = NULL; - } else { - /* this element is already present */ - } - } + if (add_to_luple5(b->f, (pair5) { X, i, j, a, k }, 1)) { + fleprintf0("Fail to add to first type\n"); + goto cleanup; } - -#undef ARR - } else { /* second type BSR */ /* fleprintf0("Second type\n"); */ - -#define AR (b->s.array+X) - - if (!(AR->initialized)) { - SAFE_MALLOC(second_array_array_element, - AR->array, AR->len, - goto cleanup;); - memset(AR->array, 0, - sizeof(second_array_array_element)*(AR->len)); - AR->initialized = 1; - } - -#define ARR (AR->array+a) - - if (a < 0 || a >= rg_len(grammar_rule(g, X))) { - fleprintf("Invalid a: %ld\n", a); + if (add_to_luple6(b->s, (pair6) { X, a, c, i, j, k }, 1)) { + fleprintf0("Fail to add to second type\n"); goto cleanup; } - - ARR->len = list_length(rg_nth(grammar_rule(g, X), a)); - - if (!(ARR->initialized)) { - SAFE_MALLOC(second_arrarrarr_element, - ARR->array, ARR->len, - goto cleanup;); - memset(ARR->array, 0, - sizeof(second_arrarrarr_element)*(ARR->len)); - ARR->initialized = 1; - } - -#define ARRR (ARR->array+c) - - if (!(ARRR->initialized)) { - ARRR->table = new_ht2(HT_INIT_CAP); - - if (ARRR->table == NULL) { - fleprintf0("Fail to get new ht\n"); - goto cleanup; - } - - ARRR->initialized = 1; - } - - p21.x = i; - p21.y = j; - - if (ht_find(ARRR->table, &p21) == NULL) { - value_table = new_ht(HT_INIT_CAP, 0); - - if (value_table == NULL) { - fleprintf0("Fail to get new ht\n"); - goto cleanup; - } - - NEW_P2(p2, i, j, goto cleanup;); - - if (ht_insert(ARRR->table, p2, value_table)) { - fleprintf0("Fail to insert into table\n"); - goto cleanup; - } - value_table = NULL; - p2 = NULL; - } - - p11 = k; - - if (ht_find(ht_find(ARRR->table, &p21), - &p11) == NULL) { - SAFE_MALLOC(NUM, p1, 1, goto cleanup;); - *p1 = k; - if (ht_insert(ht_find(ARRR->table, &p21), - p1, (void *)1)) { - fleprintf0("Fail to insert into table\n"); - goto cleanup; - } - p1 = NULL; - } - -#undef AR -#undef ARR -#undef ARRR - } goto success; @@ -369,22 +156,25 @@ bsr_add(CCR_MOD(Grammar *) g, bsr * const restrict b, success: - if (value_table) destroy_ht(value_table, DESTROY_KEY_SELF); - if (p2) free(p2); - if (p1) free(p1); - return errorp; } BOOL bsr_find(CCR_MOD(bsr *) b, CCR_MOD(Grammar *) g, - BOOL * const restrict errorp, - NUM X, NUM a, NUM c, NUM i, NUM k, NUM j) + BOOL * const restrict errorp, pair6 p6) { *errorp = 0; BOOL result = 0; + NUM *resultp = NULL; + + NUM X = p6.x; + NUM a = p6.y; + NUM c = p6.z; + NUM i = p6.u; + NUM k = p6.v; + NUM j = p6.w; - if (X < 0 || X >= b->f.len) { + if (X < 0 || X >= grammar_left_len(g)) { fleprintf("Invalid X: %ld\n", X); goto cleanup; } @@ -399,74 +189,18 @@ bsr_find(CCR_MOD(bsr *) b, CCR_MOD(Grammar *) g, goto cleanup; } - pair2 p21 = { 0 }, p22 = { 0 }; - NUM p11 = 0; - if (c == list_length(rg_nth(grammar_rule(g, X), a))) { /* first type */ -#define ARR (b->f.array+X) - - if (!(ARR->initialized)) { - goto success; - } else { - p21.x = i; - p21.y = j; - - if (ht_find(ARR->table, &p21) == NULL) { - goto success; - } else { - p22.x = a; - p22.y = k; - - if (ht_find(ht_find(ARR->table, &p21), &p22) == NULL) { - goto success; - } else { - result = 1; - goto success; - } - } - } - -#undef ARR - + resultp = luple5_find(b->f, (pair5) { X, i, j, a, k }); + result = (resultp && *resultp) ? 1 : 0; } else { /* second type */ - -#define AR (b->s.array+X) - - if (!(AR->initialized)) { - goto success; - } - -#define ARR (AR->array+a) - - ARR->len = list_length(rg_nth(grammar_rule(g, X), a)); - - if (!(ARR->initialized)) goto success; - -#define ARRR (ARR->array+c) - - if (!(ARRR->initialized)) goto success; - - p21.x = i; - p21.y = j; - - if (ht_find(ARRR->table, &p21) == NULL) goto success; - - p11 = k; - - if (ht_find(ht_find(ARRR->table, &p21), &p11) == NULL) - goto success; - - result = 1; - - goto success; - - #undef AR - #undef ARR - #undef ARRR + resultp = luple6_find(b->s, (pair6) { X, a, c, i, j, k}); + result = (resultp && *resultp) ? 1 : 0; } + goto success; + cleanup: *errorp = 1; @@ -474,165 +208,131 @@ bsr_find(CCR_MOD(bsr *) b, CCR_MOD(Grammar *) g, return result; } -ht * -bsr_lookup(CCR_MOD(bsr *) b, BOOL * const restrict errorp, - NUM X, NUM i, NUM j) +P_ATTR +BOOL +bsr_lookup(bsr * b, NUM X, NUM i, NUM j) { - *errorp = 0; - ht *result = NULL; + return luple5_pf_3(b->f, (pair3) { X, i, j }); +} - if (X < 0 || X >= b->f.len) { - fleprintf("Invalid X: %ld\n", X); - goto cleanup; - } +UNUSED +static void +print_sep() +{ + printf(" "); +} - if (!((b->f.array+X)->initialized)) goto success; +static BOOL bsr_print_should_initialize_counter = 0; +static NUM bsr_print_line_len = 0; +static Grammar *bsr_print_grammar = NULL; - pair2 p2 = (pair2) { .x = i, .y = j }; +static void +print_bsr_f(pair5 label) +{ + static int counter = 0; - result = (ht *) ht_find((b->f.array+X)->table, &p2); + if (bsr_print_should_initialize_counter) { + counter = 0; + bsr_print_should_initialize_counter = 0; + } - goto success; + counter++; - cleanup: - *errorp = 1; + printf("("); + print_name(list_nth(grammar_names(bsr_print_grammar), + label.x)); + printf(" := "); + map_list_between + (rg_nth (grammar_rule(bsr_print_grammar, label.x), label.u), + print_tnt, print_sep); + printf(", %ld, %ld, %ld)", label.y, label.v, label.z); - success: - return result; + if (counter == bsr_print_line_len) { + counter = 0; + printf("\n"); + } else { + printf(", "); + } } static void -print_sep() +print_bsr_s(pair6 label) { - printf(" "); -} - -void -bsr_print(CCR_MOD(bsr *) b, CCR_MOD(Grammar *) g, NUM line_len) -{ - printf("Printing a BSR set...\n"); + static int counter = 0; - NUM count = 0; - - /* print first type BSR */ - for (NUM i = 0; i < b->f.len; i++) { - -#define ELEMENT (b->f.array+i) - - if (ELEMENT->initialized) { - -#define KEYS ((pair2 **) ht_keys(ELEMENT->table)) -#define VALUES ((ht **) ht_values(ELEMENT->table)) -#define SIZE (ht_size(ELEMENT->table)) - - for (NUM j = 0; j < SIZE; j++) { - -#define VALUE_TABLE (*(VALUES+j)) -#define VALUE_TABLE_KEYS ((pair2 **) ht_keys(VALUE_TABLE)) - - for (NUM k = 0; k < ht_size(VALUE_TABLE); k++) { - count++; - printf("("); - print_name(list_nth(grammar_names(g), i)); - printf(" := "); - map_list_between - (rg_nth - (grammar_rule(g, i), - (*(VALUE_TABLE_KEYS+k))->x), - print_tnt, print_sep); - printf(", %ld, %ld, %ld)", - (*(KEYS+j))->x, - (*(VALUE_TABLE_KEYS+k))->y, - (*(KEYS+j))->y); - - if (count == line_len) { - count = 0; - printf("\n"); - } else { - printf(", "); - } - } - } - } + if (bsr_print_should_initialize_counter) { + counter = 0; + bsr_print_should_initialize_counter = 0; } -#undef ELEMENT -#undef KEYS -#undef VALUES -#undef SIZE -#undef VALUE_TABLE -#undef VALUE_TABLE_KEYS + counter++; - for (NUM i = 0; i < b->s.len; i++) { + printf("("); + print_name(list_nth(grammar_names(bsr_print_grammar), + label.x)); + printf(" := "); -#define ELEMENT (b->s.array+i) +#define RGLS rg_nth(grammar_rule(bsr_print_grammar, label.x), \ + label.y) - if (!(ELEMENT->initialized)) continue; + for (NUM k = 0; k < label.z; k++) { + print_tnt(*(list_array(RGLS)+k)); + if (k+1len; j++) { + printf(" · "); -#define SELE (ELEMENT->array+j) + for (NUM k = label.z; k < list_length(RGLS); k++) { + print_tnt(*(list_array(RGLS)+k)); + if (k+1initialized)) continue; + printf(", %ld, %ld, %ld)", label.u, label.w, label.v); - for (NUM k = 0; k < SELE->len; k++) { + if (counter == bsr_print_line_len) { + counter = 0; + printf("\n"); + } else { + printf(", "); + } -#define HELE (SELE->array+k) +#undef RGLS - if (!(HELE->initialized)) continue; +} -#define KEYS ((pair2 **) ht_keys(HELE->table)) -#define VALUES ((ht **) ht_values(HELE->table)) +void +bsr_print(bsr *b, Grammar *g, NUM line_len) +{ + printf("Printing a BSR set...\n"); - for (NUM n = 0; n < ht_size(HELE->table); n++) { - for (NUM ell = 0; ell < ht_size(*(VALUES+n)); ell++) { + bsr_print_grammar = g; + bsr_print_should_initialize_counter = 1; + bsr_print_line_len = line_len; -#define VALUE_KEYS ((NUM **) ht_keys(*(VALUES+n))) + printf("Printing the first type nodes...\n"); - count++; - printf("("); - print_name(list_nth(grammar_names(g), i)); - printf(" := "); + NUM *len = NULL; -#define LS rg_nth(grammar_rule(g, i), j) + len = luple5_len(b->f); - for (NUM m = 0; m < k; m++) { - print_tnt(*(list_array(LS)+m)); - if (m+1f, (pair3) { i, j, k }, print_bsr_f); + } - printf(" · "); + printf("\n\nPrinting the second type nodes...\n"); - for (NUM m = k; m < list_length(LS); m++) { - print_tnt(*(list_array(LS)+m)); - if (m+1x, - **(VALUE_KEYS+ell), - (*(KEYS+n))->y); + len = luple6_len(b->s); - if (count == line_len) { - count = 0; - printf("\n"); - } else { - printf(", "); - } - } - } - } - } - } + for (NUM i = 0; i < *len; i++) + for (NUM j = 0; j < *(len+1); j++) + luple6_map_2(b->s, (pair2) { i, j }, -1, print_bsr_s); printf("\n"); -#undef ELEMENT -#undef SELE -#undef HELE -#undef KEYS -#undef VALUES -#undef VALUE_KEYS -#undef LS - + bsr_print_grammar = NULL; + bsr_print_line_len = 0; } diff --git a/src/bsr.h b/src/bsr.h index 64e428a..1b91c3e 100644 --- a/src/bsr.h +++ b/src/bsr.h @@ -77,9 +77,30 @@ I don't know if there are better implementations. If I think of better ways in the future, I will re-implement the BSR sets. */ +/* With the newly written library tuple.h, the implementation changes + as follows. + + The first type is stored as a "luple5", but the coordinates are in + the following order. + + (X, a, i, k, j) => (X, i, j, a, k) + + This is to ensure that we can quickly find BSR's with a given + non-terminal X and given left and right extents i and j. This is + necessary for extracting an SPPF out of it, and for deciding + whether the parser succeeds in parsing the entire input. + + The second type os stored as a "luple6", with the following + coordinates order. + + (X, a, b, i, k, j) => (X, a, b, i, j, k) + + The reason is similar to the above. Though this is not necessary for + deciding if the parser succeeds. */ + typedef struct bsr_s bsr; -bsr *new_bsr(BOOL * const restrict errorp, Grammar *g); +bsr *new_bsr(Grammar *g, NUM input_len, BOOL * const restrict errorp); /* No flag to control the deletion, as the user cannot change how the data is stored. */ @@ -94,18 +115,16 @@ void destroy_bsr(bsr * const restrict bsrp); the right-most terminal or non-terminal in the alternate corresponding to a. */ -BOOL bsr_add(CCR_MOD(Grammar *) g, bsr * const restrict b, - NUM X, NUM a, NUM c, NUM i, NUM k, NUM j); +H_ATTR BOOL bsr_add(CCR_MOD(Grammar *) g, bsr * const restrict b, + pair6 p6); BOOL bsr_find(CCR_MOD(bsr *) b, CCR_MOD(Grammar *) g, BOOL * const restrict errorp, - NUM X, NUM a, NUM c, NUM i, NUM k, NUM j); + pair6 p6); -/* if not found return NULL */ -ht *bsr_lookup(CCR_MOD(bsr *) b, BOOL * const restrict errorp, - NUM X, NUM i, NUM j); +P_ATTR BOOL bsr_lookup(bsr * b, NUM X, NUM i, NUM j); -/* Change to new line after LINE_LEN many elements. */ -void bsr_print(CCR_MOD(bsr *) b, CCR_MOD(Grammar *) g, NUM line_len); +/* Print a new line after LINE_LEN many elements. */ +void bsr_print(bsr *b, Grammar *g, NUM line_len); #endif diff --git a/src/cnp.c b/src/cnp.c index 91a8df7..fe5a4dc 100644 --- a/src/cnp.c +++ b/src/cnp.c @@ -44,9 +44,10 @@ nt_add(Environment *env, NUM X, NUM j) /* fleprintf("i = %ld, j = %ld\n", i, j); */ env->error = desc_add(env->pr, env->pu, j, - (pair4) { .x = X, .y = i, .z = 0, .w = j }); + (pair4) { .x = X, .y = i, .z = 0, .u = j }); /* fleprintf("env->error = %d\n", env->error); */ + /* print_procr(env->pr); */ /* fleprintf("pr len = %ld\n", env->pr->len); * fleprintf("pr ini = %d\n", (env->pr->array)->initialized); */ } @@ -82,7 +83,7 @@ test_select(Environment *env, NUM b, NUM X, CCR_MOD(List *) tnts) goto success; } - /* predicates haven't been implemented yet */ + /* TODO: predicates haven't been implemented yet */ /* for (NUM i = 0; i < ht_size(result_ps); i++) { * * } */ @@ -107,7 +108,7 @@ test_select(Environment *env, NUM b, NUM X, CCR_MOD(List *) tnts) if (ht_find(gi->sts+X, &b) != NULL) result = 1; - /* predicates haven't been implemented yet */ + /* TODO: predicates haven't been implemented yet */ goto success; @@ -119,6 +120,36 @@ test_select(Environment *env, NUM b, NUM X, CCR_MOD(List *) tnts) return result; } +static Environment *rtn_internal_env = NULL; +static NUM rtn_internal_num = 0; + +static void +rtn_internal(pair6 label) +{ + if (rtn_internal_env->error) return; + + if ((rtn_internal_env->error = + desc_add(rtn_internal_env->pr, + rtn_internal_env->pu, + rtn_internal_num, + (pair4) { + label.z, label.u, + label.v, label.w + }))) return; + + /* fleprintf("added descriptor (%ld, %ld, %ld, %ld, %ld)\n", + * label.z, label.u, label.v, label.w, + * rtn_internal_num); */ + + if ((rtn_internal_env->error = + bsr_add(rtn_internal_env->gi->g, + rtn_internal_env->bsrp, + (pair6) { + label.z, label.u, label.v, label.w, + label.y, rtn_internal_num + }))) return; +} + void rtn(Environment *env, NUM X, NUM k, NUM j) { @@ -133,31 +164,71 @@ rtn(Environment *env, NUM X, NUM k, NUM j) if ((env->error = spa_insert(env->spap, label))) return; - ht *edge_ht = NULL; - - if ((edge_ht = crf_find_node(env->crfp, label2)) == NULL) { + if (!(crf_find_node(env->crfp, label2))) { fleprintf0("this should not happen\n"); env->error = 1; return; } -#define EDGE_LABELS ((pair4 **) ht_keys(edge_ht)) + rtn_internal_env = env; + rtn_internal_num = j; + + crf_map(env->crfp, label2, rtn_internal); - for (NUM i = 0; i < ht_size(edge_ht); i++) { + rtn_internal_env = NULL; + rtn_internal_num = 0; +} -#define EDGE (*(EDGE_LABELS+i)) +static Environment *call_internal_env = NULL; +static pair5 call_internal_label5 = (pair5) { 0 }; - if ((env->error = desc_add(env->pr, env->pu, j, *EDGE))) return; - /* fleprintf("added descriptor (%ld, %ld, %ld, %ld, %ld)\n", - * EDGE->x, EDGE->y, EDGE->z, EDGE->w, j); */ - if ((env->error = - bsr_add(env->gi->g, env->bsrp, - EDGE->x, EDGE->y, EDGE->z, EDGE->w, k, j))) - return; +void +cnp_call_internal(pair3 label) +{ + if (call_internal_env->error) return; + + if ((call_internal_env->error = + desc_add(call_internal_env->pr, + call_internal_env->pu, + label.z, + (prodecor) { + call_internal_label5.x, + call_internal_label5.y, + call_internal_label5.z, + call_internal_label5.u + }))) { + fleprintf0("error\n"); + return; + } + + /* printf("Added desc (%ld, %ld, %ld, %ld, %ld)\n", + * call_internal_label5.x, + * call_internal_label5.y, + * call_internal_label5.z, + * call_internal_label5.u, + * label.z); */ + + if ((call_internal_env->error = + bsr_add(call_internal_env->gi->g, call_internal_env->bsrp, + (pair6) { + call_internal_label5.x, + call_internal_label5.y, + call_internal_label5.z, + call_internal_label5.u, + call_internal_label5.v, + label.z + }))) { + fleprintf0("error\n"); + return; } -#undef EDGE_LABELS -#undef EDGE + /* printf("Added bsr (%ld, %ld, %ld, %ld, %ld, %ld)\n", + * call_internal_label5.x, + * call_internal_label5.y, + * call_internal_label5.z, + * call_internal_label5.u, + * call_internal_label5.v, + * label.z); */ } @@ -175,7 +246,7 @@ cnp_call(Environment *env, pair3 label, NUM i, NUM j) if (xtnt->type != NONTERMINAL) goto cleanup; - NUM X = (NUM) xtnt->data.nt, h = 0; + NUM X = (NUM) xtnt->data.nt; pair2 node = (pair2) { .x = X, .y = j }; pair4 u = (pair4) @@ -183,12 +254,10 @@ cnp_call(Environment *env, pair3 label, NUM i, NUM j) .x = label.x, .y = label.y, .z = label.z, - .w = i + .u = i }; - ht *edge_ht = NULL, *spa_ht = NULL; - - if ((edge_ht = crf_find_node(env->crfp, node)) == NULL) { + if (!(crf_find_node(env->crfp, node))) { if (crf_add_node(env->crfp, node)) goto cleanup; if (crf_add_edge(env->crfp, node, u)) goto cleanup; @@ -196,26 +265,22 @@ cnp_call(Environment *env, pair3 label, NUM i, NUM j) /* errors will be stored in ENV, so we simply return */ nt_add(env, X, j); + /* printf("X = %ld, j = %ld\n", X, j); */ + return; } - if (ht_find(edge_ht, &u) == NULL) { + if (!(crf_find_edge(env->crfp, node, u))) { if (crf_add_edge(env->crfp, node, u)) goto cleanup; - spa_ht = spa_find(env->spap, node); - - if (spa_ht == NULL) return; + call_internal_env = env; + call_internal_label5 = + (pair5) { label.x, label.y, label.z, i, j }; - for (NUM k = 0; k < ht_size(spa_ht); k++) { - h = **((NUM **) ht_keys(spa_ht) + k); + spa_map(env->spap, node, cnp_call_internal); - if (desc_add(env->pr, env->pu, h, u)) goto cleanup; - - if (bsr_add(env->gi->g, env->bsrp, - label.x, label.y, label.z, - i, j, h)) - goto cleanup; - } + call_internal_env = NULL; + call_internal_label5 = (pair5) { 0 }; } return; @@ -316,9 +381,7 @@ new_env(Grammar *g, str *string) env->gi = NULL; - SAFE_MALLOC(grammar_info, env->gi, 1, goto cleanup;); - - memset(env->gi, 0, sizeof(grammar_info)); + SAFE_CALLOC(grammar_info, env->gi, 1, goto cleanup;); env->gi->g = g; @@ -419,21 +482,23 @@ new_env(Grammar *g, str *string) *(env->string+(env->string_len)++) = END_OF_INPUT; - if ((env->crfp = new_crf()) == NULL) goto cleanup; + if ((env->crfp = new_crf(g, env->string_len)) == NULL) + goto cleanup; BOOL error = 0; - env->pu = new_procu(env->string_len, &error); + env->pu = new_procu(g, env->string_len, &error); if (error) goto cleanup; - env->pr = new_procr(env->string_len, &error); + env->pr = new_procr(g, env->string_len, &error); if (error) goto cleanup; - if ((env->spap = new_spa()) == NULL) goto cleanup; + if ((env->spap = new_spa(g, env->string_len)) == NULL) + goto cleanup; - env->bsrp = new_bsr(&error, g); + env->bsrp = new_bsr(g, env->string_len, &error); if (error) goto cleanup; @@ -529,6 +594,8 @@ cnp_parse(Grammar *g, str *string) { Environment *env = new_env(g, (str *) string); + prodecor *current_prodecor = NULL; + if (crf_add_node(env_crfp(env), (pair2) { 0 })) goto cleanup; nt_add(env, 0, 0); @@ -537,8 +604,6 @@ cnp_parse(Grammar *g, str *string) NUM current_grade = 0; - prodecor *current_prodecor = NULL; - Rule_group *rg = NULL; List *right = NULL, *tnts = NULL; @@ -551,7 +616,7 @@ cnp_parse(Grammar *g, str *string) pop_procr(env_r(env), env_u(env), ¤t_grade))) { /* fleprintf("{ %ld, %ld, %ld, %ld, %ld }\n", * current_prodecor->x, current_prodecor->y, - * current_prodecor->z, current_prodecor->w, + * current_prodecor->z, current_prodecor->u, * current_grade); * print_procr(env_r(env)); */ @@ -564,15 +629,16 @@ cnp_parse(Grammar *g, str *string) if (list_length(right) == 0) { errorp = bsr_add(env_grammar(env), env_bsrp(env), - current_prodecor->x, current_prodecor->y, 0, - current_grade, current_grade, current_grade); + (pair6) { + current_prodecor->x, current_prodecor->y, 0, + current_grade, current_grade, current_grade }); if (errorp) goto cleanup; if (env_follow_p(env, current_prodecor->x, *(num_string+current_grade))) { rtn(env, current_prodecor->x, - current_prodecor->w, current_grade); + current_prodecor->u, current_grade); } goto continue_label; @@ -585,7 +651,7 @@ cnp_parse(Grammar *g, str *string) if (env_follow_p(env, current_prodecor->x, *(num_string+current_grade))) { rtn(env, current_prodecor->x, - current_prodecor->w, current_grade); + current_prodecor->u, current_grade); } goto continue_label; @@ -627,9 +693,9 @@ cnp_parse(Grammar *g, str *string) /* add to BSR set */ errorp = bsr_add(env_grammar(env), env_bsrp(env), - current_prodecor->x, current_prodecor->y, - current_prodecor->z, current_prodecor->w, - current_grade, current_grade+1); + (pair6) { current_prodecor->x, current_prodecor->y, + current_prodecor->z, current_prodecor->u, + current_grade, current_grade+1 }); if (errorp) goto cleanup; @@ -641,7 +707,7 @@ cnp_parse(Grammar *g, str *string) *(num_string+current_grade))) { /* fleprintf0("we choose to return\n"); */ rtn(env, current_prodecor->x, - current_prodecor->w, current_grade); + current_prodecor->u, current_grade); /* fleprintf0("hi\n"); */ } @@ -676,11 +742,11 @@ cnp_parse(Grammar *g, str *string) .x = current_prodecor->x, .y = current_prodecor->y, .z = current_prodecor->z - }, current_prodecor->w, current_grade); + }, current_prodecor->u, current_grade); /* fleprintf("after call { %ld, %ld, %ld, %ld, %ld }\n", * current_prodecor->x, current_prodecor->y, - * current_prodecor->z, current_prodecor->w, + * current_prodecor->z, current_prodecor->u, * current_grade); */ continue_label: @@ -688,12 +754,14 @@ cnp_parse(Grammar *g, str *string) CHECK_ENV_ERROR(env, goto cleanup;); free(current_prodecor); + current_prodecor = NULL; } return env; cleanup: + if (current_prodecor) free(current_prodecor); destroy_env(env); return NULL; diff --git a/src/cnp.h b/src/cnp.h index 904e383..7d4a862 100644 --- a/src/cnp.h +++ b/src/cnp.h @@ -2,6 +2,7 @@ #define CNP_H #include "str.h" #include "utf8.h" +#include "tuple.h" #include "bsr.h" #include "crf.h" diff --git a/src/crf.c b/src/crf.c index 1cff96f..1c9fea9 100644 --- a/src/crf.c +++ b/src/crf.c @@ -5,77 +5,110 @@ #include "crf.h" struct crf_s { - ht *table; + luple6 *lup; }; crf * -new_crf() +new_crf(Grammar *g, NUM input_len) { + NUM left_len = grammar_left_len(g); + + NUM max_alt_len = 0, max_rule_len = 0; + + for (NUM i = 0; i < left_len; i++) { + NUM alt_len = rg_len(grammar_rule(g, i)); + if (alt_len > max_rule_len) max_rule_len = alt_len; + for (NUM j = 0; j < alt_len; j++) { + NUM jth_len = list_length(rg_nth(grammar_rule(g, i), j)); + if (jth_len > max_alt_len) max_alt_len = jth_len; + } + } + + NUM *len = NULL; + crf *crfp = NULL; + SAFE_MALLOC(crf, crfp, 1, goto cleanup;); - crfp->table = new_ht2(HT_INIT_CAP); + SAFE_MALLOC(NUM, len, 6, goto cleanup;); + + *len = left_len; + *(len+1) = input_len; + *(len+2) = left_len; + *(len+3) = max_rule_len; + *(len+4) = max_alt_len+1; + *(len+5) = input_len; - if (crfp->table == NULL) goto cleanup; + crfp->lup = new_luple6(len); + + if (crfp->lup == NULL) goto cleanup; return crfp; cleanup: if (crfp) free(crfp); + if (len) free(len); return NULL; } BOOL -crf_add_node(crf * const restrict crfp, pair2 node) +crf_add_node(crf *crfp, pair2 node) { - if (ht_find(crfp->table, &node)) return 0; - - pair2 *p2 = NULL; - - NEW_P2(p2, node.x, node.y, return 1;); - - ht *value_table = new_ht4(HT_INIT_CAP); - - if (value_table == NULL) { - free(p2); - return 1; - } - - if (ht_insert(crfp->table, p2, value_table)) { - fleprintf0("Fail to insert\n"); - destroy_ht(value_table, DESTROY_NONE_SELF); - free(p2); - return 1; - } + return add_to_luple6_pt_2(crfp->lup, node); +} - return 0; +HP_ATTR +BOOL +crf_find_node(crf *crfp, pair2 node) +{ + return luple6_pf_2(crfp->lup, node); } -ht * -crf_find_node(crf * const restrict crfp, pair2 node) +HP_ATTR +BOOL +crf_find_edge(crf *crfp, pair2 source, pair4 label) { - return ht_find(crfp->table, &node); + NUM *result = luple6_find(crfp->lup, (pair6) { + source.x, source.y, label.x, label.y, label.z, label.u + }); + + return (result && *result) ? 1 : 0; } BOOL -crf_add_edge(crf * const restrict crfp, pair2 source, pair4 label) +crf_add_edge(crf *crfp, pair2 source, pair4 label) { - if (ht_find(crfp->table, &source) == NULL) { + if (!(luple6_pf_2(crfp->lup, source))) { fleprintf0("add the node before adding its edges\n"); return 1; } - if (ht_find(ht_find(crfp->table, &source), &label)) return 0; + pair6 p6 = (pair6) + { + source.x, + source.y, + label.x, + label.y, + label.z, + label.u + }; - pair4 *p = NULL; + /* fleprintf("adding (%ld, %ld, %ld, %ld, %ld, %ld)\n", + * p6.x, p6.y, p6.z, p6.u, p6.v, p6.w); */ - NEW_P4(p, label.x, label.y, label.z, label.w, return 1;); + NUM *find_result = luple6_find(crfp->lup, p6); - if (ht_insert(ht_find(crfp->table, &source), p, (void*)1)) { + if (find_result && *find_result) { + /* fleprintf0("Already added!\n"); */ + return 0; + } + + if (add_to_luple6(crfp->lup, p6, 1)) { fleprintf0("Fail to insert\n"); - free(p); + fleprintf("adding (%ld, %ld, %ld, %ld, %ld, %ld)\n", + p6.x, p6.y, p6.z, p6.u, p6.v, p6.w); return 1; } @@ -83,207 +116,167 @@ crf_add_edge(crf * const restrict crfp, pair2 source, pair4 label) } void -destroy_crf(crf * const restrict crfp) +destroy_crf(crf *crfp) { + destroy_luple6(crfp->lup); + free(crfp); +} -#define SIZE (ht_size(crfp->table)) -#define VALUES ((ht**) ht_values(crfp->table)) - - for (NUM i = 0; i < SIZE;) - destroy_ht(*(VALUES+i++), DESTROY_KEY_SELF); +UNUSED +static void +crf_print_internal(pair6 label) +{ + printf("(%ld, %ld, %ld, %ld)\n", + label.z, label.u, label.v, label.w); +} - destroy_ht(crfp->table, DESTROY_KEY_SELF); +void +crf_print(crf *crfp) +{ + printf("Printing a call return forest...\n"); - free(crfp); + NUM *len = luple6_len(crfp->lup); -#undef SIZE -#undef VALUES + for (NUM i = 0; i < *len; i++) + for (NUM j = 0; j < *(len+1); j++) + if (luple6_pf_2(crfp->lup, (pair2) { i, j })) { + printf("Printing edges for (%ld, %ld)...\n", i, j); + luple6_map_2(crfp->lup, (pair2) { i, j }, -1, + crf_print_internal); + printf("\n"); + } +} +H_ATTR +void +crf_map(crf *crfp, pair2 label, map_pair6_t fn) +{ + luple6_map_2(crfp->lup, label, label.y, fn); } struct spa_s { - ht *table; + luple3 *lup; }; spa * -new_spa() +new_spa(Grammar *g, NUM input_len) { spa *spap = NULL; - SAFE_MALLOC(spa, spap, 1, return NULL;); + NUM *len = NULL; - spap->table = new_ht2(HT_INIT_CAP); + SAFE_MALLOC(spa, spap, 1, goto cleanup;); - if (spap->table == NULL) { - free(spap); - return NULL; - } + SAFE_MALLOC(NUM, len, 3, goto cleanup;); - return spap; -} + *len = grammar_left_len(g); + *(len+1) = input_len; + *(len+2) = input_len; -void -destroy_spa(spa * const restrict spap) -{ - for (NUM i = 0; i < ht_size(spap->table); i++) - destroy_ht(*((ht **) ht_values(spap->table)+i), DESTROY_KEY_SELF); + spap->lup = new_luple3(len); - /* fleprintf0("ho\n"); */ + if (spap->lup == NULL) { + fleprintf0("Fail to create luple3\n"); + goto cleanup; + } - destroy_ht(spap->table, DESTROY_KEY_SELF); + return spap; - free(spap); + cleanup: + if (spap) free(spap); + if (len) free(len); + return NULL; } -__attribute__((__pure__, __cold__)) -ht * -spa_ht(CCR_MOD(spa *) spap) +void +destroy_spa(spa *spap) { - return spap->table; + destroy_luple3(spap->lup); + free(spap); } BOOL -spa_insert(spa * const restrict spap, pair3 label) +spa_insert(spa *spap, pair3 label) { - pair2 first_two = (pair2) { 0 }, *p2 = NULL; - - first_two.x = label.x; - first_two.y = label.y; - - NUM j = label.z, *np = NULL; - - ht *value_table = NULL; - - if (ht_find(spap->table, &first_two) == NULL) { - value_table = new_ht(HT_INIT_CAP, 0); - - if (value_table == NULL) { - fleprintf0("Fail to have new hash table\n"); - goto cleanup; - } - - SAFE_MALLOC(NUM, np, 1, goto cleanup;); - *np = label.z; - - if (ht_insert(value_table, np, (void*)1)) { - fleprintf0("Fail to insert\n"); - goto cleanup; - } - - np = NULL; - - NEW_P2(p2, label.x, label.y, goto cleanup;); - - if (ht_insert(spap->table, p2, value_table)) { - fleprintf0("Fail to insert\n"); - goto cleanup; - } - - p2 = NULL; - - goto success; - } - - if (ht_find(ht_find(spap->table, &first_two), &j) == NULL) { - SAFE_MALLOC(NUM, np, 1, goto cleanup;); - *np = label.z; - - if (ht_insert(ht_find(spap->table, &first_two), np, (void*)1)) { - fleprintf0("Fail to insert\n"); - goto cleanup; - } - - np = NULL; + if (add_to_luple3(spap->lup, label, 1)) { + fleprintf0("Fail to insert\n"); + return 1; } - goto success; - - cleanup: - if (value_table) destroy_ht(value_table, DESTROY_KEY_SELF); - if (p2) free(p2); - if (np) free(np); - - return 1; - - success: return 0; } P_ATTR BOOL -spa_belong(CCR_MOD(spa *) spap, pair3 label) +spa_belong(spa *spap, pair3 label) { - pair2 first_two = (pair2) { 0 }; - - first_two.x = label.x; - first_two.y = label.y; - - if (ht_find(spap->table, &first_two) == NULL) return 0; - - NUM j = label.z; + NUM *result = luple3_find(spap->lup, label); - if (ht_find(ht_find(spap->table, &first_two), &j) == NULL) return 0; - - return 1; + return (result && *result) ? 1 : 0; } -P_ATTR -ht * -spa_find(CCR_MOD(spa *) spap, pair2 label) +H_ATTR +void +spa_map(spa *spap, pair2 label, map_pair3_t fn) { - return ht_find(spap->table, &label); + luple3_map_2(spap->lup, label, fn); } typedef struct procr_array_element_s procr_array_element; -/* The list at index k is the set R_k. */ +/* The list at index k is the set of "prodecor"s representing R_k. */ struct procr_array_element_s { BOOL initialized; List *ls; }; struct procr_s { - /* input length */ - NUM len; + /* an array of length 5, holding the lengths */ + NUM *len; /* an array of lists */ procr_array_element *array; /* minimal grade that is non-empty */ NUM mg; }; -typedef struct procu_array_element_s procu_array_element; - -/* The table at index k represents the set U_k. Its keys are those - (X, a, i, j) such that (X, a, i, j, k) belongs to U_k. */ -struct procu_array_element_s { - BOOL initialized; - ht *table; -}; - struct procu_s { - /* input length */ - NUM len; - /* an array of hash tables */ - procu_array_element *array; + luple5 *lup; }; procr * -new_procr(NUM size, BOOL * const restrict errorp) +new_procr(Grammar *g, NUM input_len, BOOL *errorp) { *errorp = 0; + NUM left_len = grammar_left_len(g); + + NUM max_alt_len = 0, max_rule_len = 0; + + for (NUM i = 0; i < left_len; i++) { + NUM alt_len = rg_len(grammar_rule(g, i)); + if (alt_len > max_rule_len) max_rule_len = alt_len; + for (NUM j = 0; j < alt_len; j++) { + NUM jth_len = list_length(rg_nth(grammar_rule(g, i), j)); + if (jth_len > max_alt_len) max_alt_len = jth_len; + } + } + procr *pr = NULL; - SAFE_MALLOC(procr, pr, 1, goto cleanup;); + NUM *len = NULL; + + SAFE_CALLOC(procr, pr, 1, goto cleanup;); - pr->len = size; - pr->array = NULL; - pr->mg = 0; + SAFE_MALLOC(NUM, len, 5, goto cleanup;); - SAFE_MALLOC(procr_array_element, pr->array, - sizeof(procr_array_element) * size, - goto cleanup;); + *len = input_len; + *(len+1) = left_len; + *(len+2) = max_rule_len; + *(len+3) = max_alt_len+1; + *(len+4) = input_len; - memset(pr->array, 0, sizeof(procr_array_element) * size); + pr->len = len; + + SAFE_CALLOC(procr_array_element, pr->array, *len, goto cleanup;); goto success; @@ -291,9 +284,8 @@ new_procr(NUM size, BOOL * const restrict errorp) *errorp = 1; - if (pr && pr->array) free(pr->array); - if (pr) free(pr); + if (len) free(len); pr = NULL; @@ -303,22 +295,41 @@ new_procr(NUM size, BOOL * const restrict errorp) } procu * -new_procu(NUM size, BOOL * const restrict errorp) +new_procu(Grammar *g, NUM input_len, BOOL *errorp) { *errorp = 0; + NUM left_len = grammar_left_len(g); + + NUM max_alt_len = 0, max_rule_len = 0; + + for (NUM i = 0; i < left_len; i++) { + NUM alt_len = rg_len(grammar_rule(g, i)); + if (alt_len > max_rule_len) max_rule_len = alt_len; + for (NUM j = 0; j < alt_len; j++) { + NUM jth_len = list_length(rg_nth(grammar_rule(g, i), j)); + if (jth_len > max_alt_len) max_alt_len = jth_len; + } + } + procu *pu = NULL; + NUM *len = NULL; SAFE_MALLOC(procu, pu, 1, goto cleanup;); + SAFE_MALLOC(NUM, len, 5, goto cleanup;); - pu->len = size; - pu->array = NULL; + *len = input_len; + *(len+1) = left_len; + *(len+2) = max_rule_len; + *(len+3) = max_alt_len+1; + *(len+4) = input_len; - SAFE_MALLOC(procu_array_element, pu->array, - sizeof(procu_array_element) * size, - goto cleanup;); + pu->lup = new_luple5(len); - memset(pu->array, 0, sizeof(procu_array_element) * size); + if (pu->lup == NULL) { + fleprintf0("Fail to create luple5\n"); + goto cleanup; + } goto success; @@ -326,8 +337,7 @@ new_procu(NUM size, BOOL * const restrict errorp) *errorp = 1; - if (pu && pu->array) free(pu->array); - + if (len) free(len); if (pu) free(pu); pu = NULL; @@ -337,160 +347,113 @@ new_procu(NUM size, BOOL * const restrict errorp) return pu; } +UNUSED +static void +print_label4(pair4 label) +{ + printf("(%ld, %ld, %ld, %ld)\n", + label.x, label.y, label.z, label.u); +} + void -print_procr(CCR_MOD(procr *) pr) +print_procr(procr *pr) { - eprintf("\n"); - fleprintf0("Printing procr...\n"); - fleprintf("LEN = %ld, MG = %ld\n", pr->len, pr->mg); - for (NUM i = 0; i < pr->len; i++) { - if ((pr->array+i)->initialized) { - if (list_length((pr->array+i)->ls)) eprintf("%ld: ", i); - for (NUM j = 0; j < list_length((pr->array+i)->ls); j++) { - fleprintf("(%ld, %ld, %ld, %ld)", - ((pair4 *) list_nth((pr->array+i)->ls, j))->x, - ((pair4 *) list_nth((pr->array+i)->ls, j))->y, - ((pair4 *) list_nth((pr->array+i)->ls, j))->z, - ((pair4 *) list_nth((pr->array+i)->ls, j))->w); - if (j+1==list_length((pr->array+i)->ls)) eprintf("\n"); - else eprintf(", "); - } - } + printf("\nPrinting procr...\n"); + printf("MG = %ld\n", pr->mg); + + NUM *len = pr->len; + + for (NUM i = 0; i < *len; i++) { + if (!((pr->array+i)->initialized)) continue; + + printf("Grade = %ld\n", i); + + for (NUM j = 0; j < list_length((pr->array+i)->ls); j++) + printf("(%ld, %ld, %ld, %ld)\n", + ((prodecor *) list_nth((pr->array+i)->ls, j))->x, + ((prodecor *) list_nth((pr->array+i)->ls, j))->y, + ((prodecor *) list_nth((pr->array+i)->ls, j))->z, + ((prodecor *) list_nth((pr->array+i)->ls, j))->u); } - eprintf("\n"); + printf("\n"); } void -destroy_procr(procr * const restrict pr) +destroy_procr(procr *pr) { - for (NUM i = 0; i < pr->len; i++) - if ((pr->array+i)->initialized) - destroy_list((pr->array+i)->ls, 1); + if (pr == NULL) return; - free(pr->array); + if (pr->array == NULL) { + if (pr->len) free(pr->len); + free(pr); + return; + } + for (NUM i = 0; i < *(pr->len); i++) { + if (!((pr->array+i)->initialized)) continue; + destroy_list((pr->array+i)->ls, 1); + } + + if (pr->len) free(pr->len); + + free(pr->array); free(pr); } void -destroy_procu(procu * const restrict pu) +destroy_procu(procu *pu) { - for (NUM i = 0; i < pu->len; i++) - if ((pu->array+i)->initialized) - destroy_ht((pu->array+i)->table, DESTROY_KEY_SELF); - - free(pu->array); - + destroy_luple5(pu->lup); free(pu); } BOOL -desc_add(procr * const restrict pr, procu * const restrict pu, - NUM grade, prodecor dec) +desc_add(procr *pr, procu *pu, NUM grade, prodecor dec) { - pair4 *p4 = NULL; - - if (grade < 0 || grade >= pu->len) { - fleprintf("Invalid grade: %ld\n", grade); - goto cleanup; - } - -#define HELE (pu->array+grade) -#define LELE (pr->array+grade) + pair5 label = (pair5) { grade, dec.x, dec.y, dec.z, dec.u }; - if (HELE->initialized) { - /* If HELE is initialized then LELE should also be initialized. */ - if (ht_find(HELE->table, &dec) != NULL) goto success; + NUM *result = luple5_find(pu->lup, label); - NEW_P4(p4, dec.x, dec.y, dec.z, dec.w, goto cleanup;); + if (result && *result) return 0; - if (ht_insert(HELE->table, p4, (void*)1)) { - fleprintf0("Fail to insert\n"); - goto cleanup; - } - - NEW_P4(p4, dec.x, dec.y, dec.z, dec.w, - ht_delete(HELE->table, &dec, DELETE_KEY); goto cleanup;); - - if (add_to_list(LELE->ls, p4)) { - fleprintf0("Fail to add to list\n"); - ht_delete(HELE->table, &dec, DELETE_KEY); - goto cleanup; - } - - goto success; + if (add_to_luple5(pu->lup, label, 1)) { + fleprintf0("Fail to add to procu\n"); + return 1; } - HELE->table = new_ht4(HT_INIT_CAP); + if (!((pr->array+grade)->initialized)) { + (pr->array+grade)->ls = new_list(); + if ((pr->array+grade)->ls == NULL) { + fleprintf0("Fail to create new list for procr\n"); + return 1; + } - if (HELE->table == NULL) { - fleprintf0("Fail to have new hash table\n"); - goto cleanup; + (pr->array+grade)->initialized = 1; } - HELE->initialized = 1; - - NEW_P4(p4, dec.x, dec.y, dec.z, dec.w, - destroy_ht(HELE->table, DESTROY_KEY_SELF); - HELE->initialized = 0; - goto cleanup;); + prodecor *element = NULL; - if (ht_insert(HELE->table, p4, (void*)1)) { - fleprintf0("Fail to insert\n"); - destroy_ht(HELE->table, DESTROY_KEY_SELF); - HELE->initialized = 0; - goto cleanup; - } + SAFE_MALLOC(prodecor, element, 1, return 1;); - LELE->ls = new_list(); - - if (LELE->ls == NULL) { - fleprintf0("Fail to have new list\n"); - destroy_ht(HELE->table, DESTROY_KEY_SELF); - HELE->initialized = 0; - goto cleanup; - } + *element = dec; - LELE->initialized = 1; - - NEW_P4(p4, dec.x, dec.y, dec.z, dec.w, - destroy_ht(HELE->table, DESTROY_KEY_SELF); - HELE->initialized = 0; - destroy_list(LELE->ls, 1); - LELE->initialized = 0; - goto cleanup;); - - if (add_to_list(LELE->ls, p4)) { - fleprintf0("Fail to add to list\n"); - destroy_ht(HELE->table, DESTROY_KEY_SELF); - HELE->initialized = 0; - destroy_list(LELE->ls, 1); - LELE->initialized = 0; - goto cleanup; + if (add_to_list((pr->array+grade)->ls, element)) { + fleprintf0("Fail to add to procr\n"); + free(element); + return 1; } -#undef HELE -#undef LELE - - goto success; - - cleanup: - - if (p4) free(p4); - - return 1; - - success: - return 0; } prodecor * -pop_procr(procr * pr, procu * pu, NUM *gradep) +pop_procr(procr *pr, procu *pu, NUM *gradep) { prodecor *result = NULL; - if (pr->mg >= pr->len) goto success; + NUM *len = pr->len; + + if (pr->mg >= *len) goto success; #define ELEMENT (pr->array+pr->mg) @@ -498,20 +461,18 @@ pop_procr(procr * pr, procu * pu, NUM *gradep) goto no_reset_mg; if (ELEMENT->initialized && list_length(ELEMENT->ls) == 0) { - destroy_ht((pu->array+pr->mg)->table, DESTROY_KEY_SELF); - (pu->array+pr->mg)->initialized = 0; - (pu->array+pr->mg)->table = NULL; + luple5_free_1(pu->lup, pr->mg); destroy_list(ELEMENT->ls, 1); ELEMENT->initialized = 0; ELEMENT->ls = NULL; } - while (++(pr->mg) < pr->len) { + while (++(pr->mg) < *(pr->len)) { if (ELEMENT->initialized) break; } - if (pr->mg >= pr->len) goto success; + if (pr->mg >= *(pr->len)) goto success; no_reset_mg: @@ -523,5 +484,4 @@ pop_procr(procr * pr, procu * pu, NUM *gradep) success: return result; - } diff --git a/src/crf.h b/src/crf.h index 9d1cc78..b9955fe 100644 --- a/src/crf.h +++ b/src/crf.h @@ -1,5 +1,7 @@ #ifndef CRF_H #define CRF_H +#include "tuple.h" +#include "grammar.h" #include "ht.h" /* This file implements some support functions for the Clustered @@ -48,22 +50,42 @@ tables whose keys are (X, a, b, i) that are in the edge set of the first type node, and whose values are meaningless. */ +/* Now, with the newly written library tuple.h, we implement this in a + different way. We first observe that we don't actually need those + nodes. What matters is the edges, the ability to know if an edge + exists between two given nodes, and to loop through the edges + starting from a given node. Then we record the existence of the + edge between (X, i) and (Y, a, b, j) by storing the tuple + + (X, i, Y, a, b, j). + + The only thing that is troublesome is to loop through the edges + from a given node. So we also observe that an edge is possible + only if j <= i. This ensures that we don't search impossible + edges. Hence the worst-case time complexity of the overall + algorithm is preserved (for every possibility, there really might + exist an edge indeed). The rest is premature optimization, I have + to proceed first. :P */ + typedef struct crf_s crf; -crf *new_crf(); +crf *new_crf(Grammar *g, NUM input_len); /* on error return non-zero */ -BOOL crf_add_node(crf * const restrict crfp, pair2 node); +BOOL crf_add_node(crf *crfp, pair2 node); -/* if not found return NULL. +HP_ATTR BOOL crf_find_node(crf *crfp, pair2 node); - if found but the node has no edges return an empty hash table. */ -ht *crf_find_node(crf * const restrict crfp, pair2 node); +HP_ATTR BOOL crf_find_edge(crf *crfp, pair2 source, pair4 label); BOOL crf_add_edge(crf * const restrict crfp, pair2 source, pair4 label); -void destroy_crf(crf * const restrict crfp); +void destroy_crf(crf *crfp); + +void crf_print(crf *crfp); + +H_ATTR void crf_map(crf *crfp, pair2 label, map_pair6_t fn); /* We also need to implement the set of pending actions. These are triples of the form: @@ -76,25 +98,22 @@ void destroy_crf(crf * const restrict crfp); Precisely speaking, the values are hash tables whose keys are those j and whose values are meaningless non-zero values. */ +/* With the new library, this is now implemented as a "luple3". */ + /* Set of Pending Actions */ typedef struct spa_s spa; -spa *new_spa(); - -void destroy_spa(spa * const restrict spap); +spa *new_spa(Grammar *g, NUM input_len); -__attribute__((__pure__, __cold__)) ht *spa_ht(CCR_MOD(spa *) spap); +void destroy_spa(spa *spap); /* On error return non-zero */ -BOOL spa_insert(spa * const restrict spap, pair3 label); +BOOL spa_insert(spa *spap, pair3 label); /* Find if a triple belongs to the set */ -P_ATTR BOOL spa_belong(CCR_MOD(spa *) spap, pair3 label); +P_ATTR BOOL spa_belong(spa *spap, pair3 label); -/* Find if for a pair (X, k) there exist some j such that (X, k, j) - belong to the set. If so, return the hash table whose keys are - those j; otherwise return NULL. */ -P_ATTR ht *spa_find(CCR_MOD(spa *) spap, pair2 label); +H_ATTR void spa_map(spa *spap, pair2 label, map_pair3_t fn); /* We also implement "process descriptors" for the CNP algorithm. Since a process descriptor is of the form @@ -130,6 +149,18 @@ P_ATTR ht *spa_find(CCR_MOD(spa *) spap, pair2 label); descriptors in U_k. I think this is a good way to lower the space requirements of this algorithm. */ +/* With the new library "tuple.h", we represent a process descriptor + as a "luple5". To implement grading, we store + + (X, a, b, j, k) + + as + + (k, X, a, b, j). + + Note that the procr set is still an array of lists, since we need + to pop elements from procr efficiently. */ + typedef pair4 prodecor; /* The set named R in the paper */ @@ -139,14 +170,13 @@ typedef struct procr_s procr; typedef struct procu_s procu; /* constructor */ -procr *new_procr(NUM size, BOOL * const restrict errorp); -procu *new_procu(NUM size, BOOL * const restrict errorp); +procr *new_procr(Grammar *g, NUM input_len, BOOL *errorp); +procu *new_procu(Grammar *g, NUM input_len, BOOL *errorp); /* insert */ /* the function dscAdd in the paper */ -BOOL desc_add(procr * const restrict pr, procu * const restrict pu, - NUM grade, prodecor dec); +BOOL desc_add(procr *pr, procu *pu, NUM grade, prodecor dec); /* pop */ @@ -156,14 +186,12 @@ BOOL desc_add(procr * const restrict pr, procu * const restrict pu, corresponding slot in pu will be freed automatically. */ /* if no more elements are left, return NULL */ -prodecor *pop_procr(procr * pr, procu * pu, NUM *gradep); +prodecor *pop_procr(procr *pr, procu *pu, NUM *gradep); -void print_procr(CCR_MOD(procr *) pr); +void print_procr(procr *pr); /* destructor */ -void destroy_procr(procr * const restrict pr); -void destroy_procu(procu * const restrict pu); - -#define TEST haha +void destroy_procr(procr *pr); +void destroy_procu(procu *pu); #endif diff --git a/src/grammar.c b/src/grammar.c index 5ed0b96..0b2be8d 100644 --- a/src/grammar.c +++ b/src/grammar.c @@ -222,7 +222,7 @@ build_grammar(Grammar *g, const List *rules, CC_MOD(List *) names, CC_MOD(List *) predicates) { g->names = (List *) names; - g->predicates = predicates; + g->predicates = (List *) predicates; NUM len = list_length(names); NUM rule_len = list_length(rules); diff --git a/src/grammar.h b/src/grammar.h index fd8d5c9..e18ad0e 100644 --- a/src/grammar.h +++ b/src/grammar.h @@ -231,6 +231,13 @@ BOOL nt_follow(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts, ht * const result_terminals, ht * const result_predicates); +/* TODO: Compute which non-terminals are "LL(1)" non-terminals. With + this information, we can save the time to insert a process + descriptor and pop that descriptor, if we know the non-terminal in + question is "LL(1)". A non-terminal A is defined as "LL(1)" if its + different rules have disjoint FIRST sets, and if FIRST(A) is + disjoint from FOLLOW(A), when A is nullable. */ + /* struct that holds information about grammars */ typedef struct grammar_info_s grammar_info; diff --git a/src/ht.c b/src/ht.c index 770eff6..de1db2d 100644 --- a/src/ht.c +++ b/src/ht.c @@ -541,7 +541,7 @@ pair4_hf(CCR_MOD(void *) key) { ((pair4 *) key)->x, ((pair4 *) key)->y, ((pair4 *) key)->z, - ((pair4 *) key)->w); + ((pair4 *) key)->u); /* fleprintf("Hash of (%ld, %ld, %ld) is %ld\n", * ((pair3 *) key)->x, @@ -564,7 +564,7 @@ pair4_cf(CCR_MOD(void *) keya, CCR_MOD(void *) keyb) { return ((pair4 *) keya)->x == ((pair4 *) keyb)->x && ((pair4 *) keya)->y == ((pair4 *) keyb)->y && ((pair4 *) keya)->z == ((pair4 *) keyb)->z && - ((pair4 *) keya)->w == ((pair4 *) keyb)->w; + ((pair4 *) keya)->u == ((pair4 *) keyb)->u; } ht * diff --git a/src/ht.h b/src/ht.h index 65ae747..1defb53 100644 --- a/src/ht.h +++ b/src/ht.h @@ -1,5 +1,6 @@ #ifndef HT_H #define HT_H +#include "pair.h" #include "util.h" enum { HT_INIT_CAP = 1 << 8 }; @@ -117,24 +118,12 @@ P_ATTR void *ht_find(CCR_MOD(ht *) htp, void *key); /* Pairing hash tables */ -struct pair2_s { NUM x; NUM y; }; - -typedef struct pair2_s pair2; - /* On error return NULL */ ht *new_ht2(UNUM size); -struct pair3_s { NUM x; NUM y; NUM z; }; - -typedef struct pair3_s pair3; - /* On error return NULL */ ht *new_ht3(UNUM size); -struct pair4_s { NUM x; NUM y; NUM z; NUM w; }; - -typedef struct pair4_s pair4; - ht *new_ht4(UNUM size); #define NEW_P2(P, X, Y, CLEAN) do { \ @@ -155,7 +144,7 @@ ht *new_ht4(UNUM size); P->x = X; \ P->y = Y; \ P->z = Z; \ - P->w = W; \ + P->u = W; \ } while (0) #endif diff --git a/src/list.c b/src/list.c index 1b9f4d4..05d760c 100644 --- a/src/list.c +++ b/src/list.c @@ -157,7 +157,7 @@ H_ATTR NUM list_length(const List * const restrict ls) { - return ls->len; + return (ls == NULL) ? 0 : ls->len; } BOOL @@ -231,7 +231,7 @@ List * array_to_list(void **array, NUM size) { List *ls = NULL; - + SAFE_MALLOC(List, ls, 1, return NULL;); ls->array = array; @@ -243,6 +243,8 @@ array_to_list(void **array, NUM size) void destroy_list(List *ls, BOOL all_free_p) { + if (ls == NULL) return; + if (all_free_p == 1) for (NUM i = 0; i < ls->len; i++) free(*(ls->array+i)); diff --git a/src/pair.h b/src/pair.h new file mode 100644 index 0000000..baae9f5 --- /dev/null +++ b/src/pair.h @@ -0,0 +1,57 @@ +#ifndef PAIR_H +#define PAIR_H +#include "util.h" + +/* This header contains struct definitions of some pairs. */ + +/* I probably should have named the fields by numbers, such as x1, x2, + etc, so that we can recursively declare these structs by macros. + But it is not worth the trouble to rewrite this I guess. */ + +typedef NUM pair1; + +struct pair2_s { NUM x; NUM y; }; + +typedef struct pair2_s pair2; + +struct pair3_s { NUM x; NUM y; NUM z; }; + +typedef struct pair3_s pair3; + +struct pair4_s { NUM x; NUM y; NUM z; NUM u; }; + +typedef struct pair4_s pair4; + +struct pair5_s { NUM x; NUM y; NUM z; NUM u; NUM v; }; + +typedef struct pair5_s pair5; + +struct pair6_s { NUM x; NUM y; NUM z; NUM u; NUM v; NUM w; }; + +typedef struct pair6_s pair6; + +#define COPY_PAIR1(DEST, SOUR) do { \ + DEST.x = SOUR.y; \ + } while (0) + +#define COPY_PAIR2(DEST, SOUR) do { \ + COPY_PAIR1(DEST, SOUR); \ + DEST.y = SOUR.z; \ + } while (0) + +#define COPY_PAIR3(DEST, SOUR) do { \ + COPY_PAIR2(DEST, SOUR); \ + DEST.z = SOUR.u; \ + } while (0) + +#define COPY_PAIR4(DEST, SOUR) do { \ + COPY_PAIR3(DEST, SOUR); \ + DEST.u = SOUR.v; \ + } while (0) + +#define COPY_PAIR5(DEST, SOUR) do { \ + COPY_PAIR4(DEST, SOUR); \ + DEST.v = SOUR.w; \ + } while (0) + +#endif diff --git a/src/splist.c b/src/splist.c new file mode 100644 index 0000000..c990bfa --- /dev/null +++ b/src/splist.c @@ -0,0 +1,141 @@ +#include "splist.h" +#include "list.h" +#include + +struct splist_s { + List *ls; /* a list */ + List *inv; /* and its inverse */ + NUM len; /* how many elements are stored */ +}; + +splist * +new_splist() +{ + splist *result = MYALLOC(splist, 1); + + if (result == NULL) return NULL; + + result->ls = new_list(); + + if (result->ls == NULL) { + free(result); + return NULL; + } + + result->inv = new_list(); + + if (result->inv == NULL) { + destroy_list(result->ls, 0); + free(result); + return NULL; + } + + result->len = 0; + + return result; +} + +unsigned char +init_splist(splist *s, NUM size) +{ + unsigned char result = 0; + + result = list_assure_size(s->ls, size); + + if (result) return result; + + result = list_set_length(s->ls, size); + + if (result) return result; + + result = list_assure_size(s->inv, size); + + if (result) return result; + + result = list_set_length(s->inv, size); + + return result; +} + +P_ATTR +NUM +splist_len(CCR_MOD(splist *)s) +{ + return s->len; +} + +P_ATTR +List * +splist_ls(CCR_MOD(splist *) s) +{ + return s->ls; +} + +H_ATTR +void +reset_splist(splist *s) +{ + s->len = 0; +} + +H_ATTR +unsigned char +add_to_splist(splist *s, NUM element) +{ + if (s->len >= list_length(s->ls)) { + fleprintf0("The length of the sparse list exceeds its size!\n"); + print_splist(s); + return 1; + } + + NUM *index_pointer = list_nth(s->ls, s->len); + + *index_pointer = element; + + NUM *element_pointer = list_nth(s->inv, element); + *element_pointer = s->len; + + s->len += 1; + + return 0; +} + +H_ATTR +unsigned char +splist_is_member(splist *s, NUM element) +{ + NUM *index = list_nth(s->inv, element); + + if (*index < 0 || *index >= s->len) + return 0; + + NUM *element_pointer = list_nth(s->ls, *index); + + return *element_pointer == element; +} + +void +print_splist(splist *s) +{ + printf("Printing a sparse list with size %lu:\n", + list_length(s->ls)); + + for (NUM i = 0; i < list_length(s->ls); i++) { + NUM *pointer = list_nth(s->ls, i); + printf("%ld, \t", *pointer); + + pointer = list_nth(s->inv, i); + printf("%ld\n", *pointer); + } + + printf("Printing terminated\n"); +} + +void +destroy_splist(splist *s) +{ + destroy_list(s->ls, 2); + destroy_list(s->inv, 2); + + free(s); +} diff --git a/src/splist.h b/src/splist.h new file mode 100644 index 0000000..e6be624 --- /dev/null +++ b/src/splist.h @@ -0,0 +1,36 @@ +#ifndef SPLIST_H +#define SPLIST_H +#include "util.h" +#include "list.h" + +/* A sparse list is a list with its "inverse list". This can be used + to determine if a number belongs to a sparse list at constant + time. */ + +typedef struct splist_s splist; + +splist *new_splist(); + +/* A sparse list is of fixed size. */ +unsigned char init_splist(splist *s, NUM size); + +P_ATTR NUM splist_len(CCR_MOD(splist *) s); + +/* NOTE: The returned list is owned by the original splist, so don't + destroy that list or do anything that should not be done. */ +P_ATTR List *splist_ls(CCR_MOD(splist *) s); + +/* We assume that elements of a sparse list are of type NUM. */ +unsigned char add_to_splist(splist *s, NUM element); + +/* resetting a sparse list means to set its length to 0 */ +void reset_splist(splist *s); + +/* Constant time operation */ +unsigned char splist_is_member(splist *s, NUM element); + +void print_splist(splist *s); + +void destroy_splist(splist *s); + +#endif diff --git a/src/test/check_bsr.c b/src/test/check_bsr.c index 81a84a7..7f729ca 100644 --- a/src/test/check_bsr.c +++ b/src/test/check_bsr.c @@ -114,23 +114,39 @@ main(int UNUSED argc, char ** UNUSED argv) BOOL error = 0; - bsr *bsrp = new_bsr(&error, g); + bsr *bsrp = new_bsr(g, 10, &error); if (error) goto cleanup; - bsr_add(g, bsrp, 0, 0, 1, 0, 1, 2); - bsr_add(g, bsrp, 0, 1, 1, 0, 1, 2); - bsr_add(g, bsrp, 0, 1, 2, 0, 1, 2); - + if (bsr_add(g, bsrp, (pair6) { 0, 0, 1, 0, 1, 2 })) { + fleprintf0("Fail to add to BSR\n"); + goto cleanup; + } + + if (bsr_add(g, bsrp, (pair6) { 0, 1, 1, 0, 1, 2 })) { + fleprintf0("Fail to add to BSR\n"); + goto cleanup; + } + + if (bsr_add(g, bsrp, (pair6) { 0, 1, 2, 0, 1, 2 })) { + fleprintf0("Fail to add to BSR\n"); + goto cleanup; + } + + if (bsr_add(g, bsrp, (pair6) { 0, 1, 3, 0, 1, 3 })) { + fleprintf0("Fail to add to BSR\n"); + goto cleanup; + } + if (bsr_find(bsrp, g, &error, - 0, 0, 1, 0, 1, 2)) { + (pair6) { 0, 0, 1, 0, 1, 2 })) { printf("Successfully found 0, 0, 1, 0, 1, 2\n"); } else { fleprintf0("Fail to find 0, 0, 1, 0, 1, 2\n"); } if (bsr_find(bsrp, g, &error, - 0, 0, 1, 0, 1, 3)) { + (pair6) { 0, 0, 1, 0, 1, 3 })) { fleprintf0("Accidentally found 0, 0, 1, 0, 1, 3\n"); } else { printf("Indeed cannot find 0, 0, 1, 0, 1, 3\n"); diff --git a/src/test/check_cnp.c b/src/test/check_cnp.c index 91952ef..a21d74e 100644 --- a/src/test/check_cnp.c +++ b/src/test/check_cnp.c @@ -1,9 +1,16 @@ #include "time.h" #include "../cnp.h" -#define TIC do { \ - clock_gettime(CLOCK_MONOTONIC_RAW, &tic); \ - } while (0) +UNUSED +static void +print_label6(pair6 label) +{ + printf("(%ld, %ld, %ld, %ld, %ld, %ld)\n", + label.x, label.y, label.z, label.u, label.v, label.w); +} + +#define TIC struct timespec tic, toc; \ + do { clock_gettime(CLOCK_MONOTONIC_RAW, &tic); } while (0) #define TOC do { \ clock_gettime(CLOCK_MONOTONIC_RAW, &toc); \ @@ -175,13 +182,13 @@ main(int UNUSED argc, char ** UNUSED argv) string = new_utf8("aab", 3); break; case 2: - string = new_utf8("[+-[.,]><][+-[.,]><][+-[.,]><][+-[.,]><][+-[.,]><][+-[.,]><][+-[.,]><][+-[.,]><][+-[.,]><][+-[.,]><]", 100); + string = new_utf8("[[[[[[[[[[[[[[[[[[[[[[[++[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]--]]]]]]]]]]]]]]]]]]]]]]]", 204); break; case 3: - string = new_utf8("bbbbb", 5); + string = new_utf8("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", 100); break; case 4: - string = new_utf8("aaaaaaaaaaaa", 12); + string = new_utf8("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 48); break; default: fleprintf0("Forgot to assign input!\n"); @@ -190,7 +197,6 @@ main(int UNUSED argc, char ** UNUSED argv) printf("\nPrinting the input...\n%s\n", get_data((str *) string)); - struct timespec tic, toc; TIC; Environment *env = cnp_parse(g, (str *) string); @@ -199,33 +205,22 @@ main(int UNUSED argc, char ** UNUSED argv) if (env) { if (!(env_error_p(env))) { - BOOL error = 0; - ht *result = bsr_lookup - (env_bsrp(env), &error, 0, 0, str_length((str *) string)); - - if (error) { - printf("There are errors!\n"); - goto destroy; - } + BOOL result = bsr_lookup + (env_bsrp(env), 0, 0, str_length((str *) string)); - if (result && ht_size(result)) { + if (result) { printf("\nSuccessfully parsed the input!\n"); - for (NUM i = 0; i < ht_size(result); i++) - printf("(%d, %ld, %d, %ld, %ld)\n", - 0, (*((pair2 **) ht_keys(result)+i))->x, - 0, (*((pair2 **) ht_keys(result)+i))->y, - str_length((str *) string)); } else { printf("\nThe input does not parse!\n"); } printf("\nAll BSRs follow:\n\n"); - bsr_print(env_bsrp(env), env_grammar(env), 1); + if (argc == 1) + bsr_print(env_bsrp(env), env_grammar(env), 1); } else { printf("There are errors!\n"); } - destroy: destroy_env(env); } diff --git a/src/test/check_crf.c b/src/test/check_crf.c index d99a838..72931f8 100644 --- a/src/test/check_crf.c +++ b/src/test/check_crf.c @@ -2,9 +2,116 @@ #include "../util.h" #include "../crf.h" +#define READ_INTO_CPA(N, U, I, VA, VI, CP) do { \ + U = new_utf8(N, 1); \ + I = get_info((str *)U, 0); \ + VA = MYALLOC(NUM, 1); \ + VI = 0; \ + for (NUM index = 0; \ + I.value >= 0 && index < str_length((str *) U); \ + index += I.step, VI++) { \ + I = get_info((str *)U, index); \ + *(VA+VI) = I.value; \ + } \ + CP = MYALLOC(cpa, 1); \ + CP->array = VA; \ + CP->size = VI; \ + add_to_list(names, CP); \ + destroy_str((str *)U, 1); \ + } while (0) + +#define READ_TNT_STRING(LEFT, FORMAT, LEN, ...) do { \ + tnt_string = new_tnt_string(FORMAT, LEN, __VA_ARGS__); \ + if (!tnt_string) { \ + fleprintf("left = %d, f = %s, l = %d, " \ + "cannot create tnt string\n", \ + LEFT, FORMAT, LEN); \ + map_list(rules, destroy_rule_and_free_all); \ + destroy_list(rules, 0); \ + map_list(names, destroy_cpa_and_free_all); \ + destroy_list(names, 0); \ + return 1; \ + } \ + rule = new_rule(LEFT, tnt_string); \ + add_to_list(rules, rule); \ + } while (0) + +static void +print_label3(pair3 label) +{ + printf("(%ld, %ld, %ld)\n", label.x, label.y, label.z); +} + int main(int UNUSED argc, char ** UNUSED argv) { + /* Have a grammar for testing */ + + List *tnt_string = NULL; + Rule *rule = NULL; + List *rules = new_list(); + List *names = new_list(); + + char *name = MYALLOC(char, 2); + *(name+1) = 0; + *name = 'E'; + + utf8* uname = NULL; + + str_info info = EMPTY_STR_INFO; + + NUM *varray = NULL; + NUM vindex = 0; + cpa *cpap = NULL; + + READ_INTO_CPA(name, uname, info, varray, vindex, cpap); + + name = MYALLOC(char, 2); + *(name+1) = 0; + *name = 'T'; + + READ_INTO_CPA(name, uname, info, varray, vindex, cpap); + + name = MYALLOC(char, 2); + *(name+1) = 0; + *name = 'F'; + + READ_INTO_CPA(name, uname, info, varray, vindex, cpap); + + name = MYALLOC(char, 2); + *(name+1) = 0; + *name = 'D'; + + READ_INTO_CPA(name, uname, info, varray, vindex, cpap); + + READ_TNT_STRING(0, "n", 1, (NT) 1); + READ_TNT_STRING(0, "ntn", 3, (NT) 1, (T) 43, (NT) 1); + + READ_TNT_STRING(1, "n", 1, (NT) 2); + READ_TNT_STRING(1, "ntn", 3, (NT) 2, (T) 42, (NT) 2); + + READ_TNT_STRING(2, "n", 1, (NT) 3); + READ_TNT_STRING(2, "tnt", 3, (T) 40, (NT) 0, (T) 41); + + READ_TNT_STRING(3, "tn", 2, (T) 48, (NT) 3); + READ_TNT_STRING(3, "tn", 2, (T) 49, (NT) 3); + READ_TNT_STRING(3, "tn", 2, (T) 50, (NT) 3); + READ_TNT_STRING(3, "tn", 2, (T) 51, (NT) 3); + READ_TNT_STRING(3, "tn", 2, (T) 52, (NT) 3); + READ_TNT_STRING(3, "tn", 2, (T) 53, (NT) 3); + READ_TNT_STRING(3, "tn", 2, (T) 54, (NT) 3); + READ_TNT_STRING(3, "tn", 2, (T) 55, (NT) 3); + READ_TNT_STRING(3, "tn", 2, (T) 56, (NT) 3); + READ_TNT_STRING(3, "tn", 2, (T) 57, (NT) 3); + rule = new_rule(3, new_list()); + add_to_list(rules, rule); + + Grammar *g = new_grammar(); + + build_grammar(g, rules, names, NULL); + + print_grammar(g); + procr *pr = NULL; procu *pu = NULL; @@ -12,14 +119,21 @@ main(int UNUSED argc, char ** UNUSED argv) prodecor *prod = NULL; + NUM input_len = 10; + /* check crf */ - crf *crfp = new_crf(); + crf *crfp = new_crf(g, input_len); + if (crfp == NULL) { + fleprintf0("Fail to create new CRF\n"); + goto cleanup; + } + pair2 p = { 0 }; p.x = 0; p.y = 1; - if (crf_find_node(crfp, p) == NULL) { + if (!(crf_find_node(crfp, p))) { if (crf_add_node(crfp, p)) { fleprintf0("fail to add node\n"); goto cleanup; @@ -28,7 +142,7 @@ main(int UNUSED argc, char ** UNUSED argv) } } - if (crf_find_node(crfp, p) == NULL) { + if (!(crf_find_node(crfp, p))) { fleprintf0("Fail to find node\n"); goto cleanup; } else { @@ -41,39 +155,46 @@ main(int UNUSED argc, char ** UNUSED argv) fleprintf0("Fail to add edge\n"); goto cleanup; } else { - printf("Successfully add edge from (0, 1) to zero\n"); + printf("Successfully add edge from (0, 1) to (0, 0, 0, 0)\n"); } - if (crf_find_node(crfp, p)) { - printf("Printing edges for (0, 1)...\n"); + p4.u = 1; -#define TABLE (crf_find_node(crfp, p)) + if (crf_add_edge(crfp, p, p4)) { + fleprintf0("Fail to add edge\n"); + goto cleanup; + } else { + printf("Successfully add edge from (0, 1) to (0, 0, 0, 1)\n"); + } - for (NUM i = 0; i < ht_size(TABLE); i++) { -#define KEY (*((pair4**) ht_keys(TABLE)+i)) - printf("(%ld, %ld, %ld, %ld)", - KEY->x, KEY->y, KEY->z, KEY->w); - if (i+1x, prod->y, prod->z, prod->w, grade); + prod->x, prod->y, prod->z, prod->u, grade); cleanup: - destroy_crf(crfp); + if (crfp) destroy_crf(crfp); + if (spap) destroy_spa(spap); + + destroy_grammar(g, 1); + destroy_list(rules, 1); if (pr) destroy_procr(pr); if (pu) destroy_procu(pu); - if (spap) destroy_spa(spap); if (prod) free(prod); return 0; diff --git a/src/test/check_ht.c b/src/test/check_ht.c index 62a7bee..dd94eab 100644 --- a/src/test/check_ht.c +++ b/src/test/check_ht.c @@ -192,7 +192,7 @@ int main(int UNUSED argc, char ** UNUSED argv) printf("Success!\n"); - pair4 testk4 = { .x = 0, .y = 0, .z = 0, .w = 1 }; + pair4 testk4 = { .x = 0, .y = 0, .z = 0, .u = 1 }; if (ht_find(htp, &testk4) == NULL) { fleprintf0("Fail to find zero pair\n"); @@ -206,7 +206,7 @@ int main(int UNUSED argc, char ** UNUSED argv) testk4.x = 1; testk4.y = 2; testk4.z = -1; - testk4.w = 2; + testk4.u = 2; if (ht_find(htp, &testk4) == NULL) { fleprintf0("Fail to find one two minus one pair\n"); @@ -220,7 +220,7 @@ int main(int UNUSED argc, char ** UNUSED argv) testk4.x = 1; testk4.y = 2; testk4.z = 1; - testk4.w = 2; + testk4.u = 2; if (ht_find(htp, &testk4)) { fleprintf0("Accidentally find one two one two pair\n"); diff --git a/src/test/check_splist.c b/src/test/check_splist.c new file mode 100644 index 0000000..335b383 --- /dev/null +++ b/src/test/check_splist.c @@ -0,0 +1,62 @@ +#include "../splist.h" +#include + +int +main(U_ATTR int argc, U_ATTR char **argv) +{ + splist *s = new_splist(); + + if (!s) { + eprintf("failed to create a splist!\n"); + exit(1); + } + + unsigned char result = 0; + + result = init_splist(s, 10); + + if (result) { + eprintf("failed to init splist\n"); + exit(1); + } + + if (add_to_splist(s, 2) || + add_to_splist(s, 4) || + add_to_splist(s, 8)) { + eprintf("failed to add to splist!\n"); + exit(1); + } + + print_splist(s); + + /* eprintf("Successfully printed splist\n"); */ + + if (splist_is_member(s, 8)) + eprintf("8 is indeed a member\n"); + else { + eprintf("8 should be a member!\n"); + exit(1); + } + + if (splist_is_member(s, 1)) { + eprintf("1 should not be a member\n"); + exit(1); + } else { + eprintf("1 indeed is not a member!\n"); + } + + reset_splist(s); + + if (splist_is_member(s, 8)) { + eprintf("8 should not be a member now!\n"); + exit(1); + } else { + eprintf("8 is indeed not anymore a member!\n"); + } + + destroy_splist(s); + + eprintf("Successfully destroyed splist\n"); + + return 0; +} diff --git a/src/test/check_tuple.c b/src/test/check_tuple.c new file mode 100644 index 0000000..74a60b2 --- /dev/null +++ b/src/test/check_tuple.c @@ -0,0 +1,244 @@ +#include +#include "../util.h" +#include "../tuple.h" + +void print_label3(pair3 label) +{ + printf("(%ld, %ld, %ld)\n", label.x, label.y, label.z); +} + +void print_label5(pair5 label) +{ + printf("(%ld, %ld, %ld, %ld, %ld)\n", + label.x, label.y, label.z, label.u, label.v); +} + +void print_label6(pair6 label) +{ + printf("(%ld, %ld, %ld, %ld, %ld, %ld)\n", + label.x, label.y, label.z, label.u, label.v, label.w); +} + +int +main(int UNUSED argc, char ** UNUSED argv) +{ + NUM *lengths = NULL; + + SAFE_MALLOC(NUM, lengths, 3, return 1;); + + *lengths = 2; + *(lengths+1) = 2; + *(lengths+2) = 6; + + luple3 *l3 = new_luple3(lengths); + + printf("Successfully created 3 tuple!\n"); + + if (add_to_luple3(l3, (pair3) { 0 }, 1)) { + fleprintf0("Fail to add to L3\n"); + destroy_luple3(l3); + goto cleanup; + } + + if (add_to_luple3(l3, (pair3) { 0, 0, 1 }, 1)) { + fleprintf0("Fail to add to L3\n"); + destroy_luple3(l3); + goto cleanup; + } + + if (add_to_luple3(l3, (pair3) { 0, 0, 5 }, 1)) { + fleprintf0("Fail to add to L3\n"); + destroy_luple3(l3); + goto cleanup; + } + + printf("Successfully added to L3\n"); + + NUM *result = luple3_find(l3, (pair3) { 0 }); + + if (result) { + printf("Successfully found value %ld for (0, 0, 0)\n", + *result); + } else { + fleprintf0("fail to find value for (0, 0, 0)\n"); + destroy_luple3(l3); + goto cleanup; + } + + /* the first is one and the rest are all zero */ + result = luple3_find(l3, (pair3) { 1 }); + + if (result) { + fleprintf("Accidentally found value %ld for (1, 0, 0)\n", + *result); + destroy_luple3(l3); + goto cleanup; + } else { + printf("Fail to find value for (1, 0, 0), as expected\n"); + } + + printf("Printing values starting from (0, 0)...\n"); + luple3_map_2(l3, (pair2) { 0 }, print_label3); + + destroy_luple3(l3); + + SAFE_MALLOC(NUM, lengths, 5, goto cleanup;); + + for (int i = 0; i < 4;) *(lengths+i++) = 2; + *(lengths+4) = 10; + + luple5 *l5 = new_luple5(lengths); + + printf("Successfully created 5 tuple!\n"); + + pair5 p5 = (pair5) { 1 }; + + if (add_to_luple5(l5, p5, 1)) { + fleprintf0("Fail to add to L5\n"); + destroy_luple5(l5); + goto cleanup; + } + + p5.v = 2; + + if (add_to_luple5(l5, p5, 1)) { + fleprintf0("Fail to add to L5\n"); + destroy_luple5(l5); + goto cleanup; + } + + p5.v = 9; + + if (add_to_luple5(l5, p5, 1)) { + fleprintf0("Fail to add to L5\n"); + destroy_luple5(l5); + goto cleanup; + } + + printf("Successfully added to L5\n"); + + printf("Printing values starting from (1, 0, 0)...\n"); + luple5_map_3(l5, (pair3) { 1 }, print_label5); + + destroy_luple5(l5); + + SAFE_MALLOC(NUM, lengths, 6, return 1;); + + for (int i = 0; i < 5;) *(lengths+i++) = 2; + *(lengths+5) = 10; + + luple6 *l6 = new_luple6(lengths); + + printf("Successfully created 6 tuple!\n"); + + pair6 p6 = (pair6) { 1, 0, 1 }; + + if (add_to_luple6(l6, p6, 1)) { + fleprintf0("Fail to add to L6\n"); + destroy_luple6(l6); + goto cleanup; + } + + p6.w = 2; + + if (add_to_luple6(l6, p6, 1)) { + fleprintf0("Fail to add to L6\n"); + destroy_luple6(l6); + goto cleanup; + } + + p6.w = 9; + + if (add_to_luple6(l6, p6, 1)) { + fleprintf0("Fail to add to L6\n"); + destroy_luple6(l6); + goto cleanup; + } + + p6.w = 7; + + if (add_to_luple6(l6, p6, 1)) { + fleprintf0("Fail to add to L6\n"); + destroy_luple6(l6); + goto cleanup; + } + + printf("Successfully added to L6\n"); + + if (add_to_luple6_pt_2(l6, (pair2) { 0 })) { + fleprintf0("Fail to add partially to L6\n"); + destroy_luple6(l6); + goto cleanup; + } + + printf("Successfully added partially to L6\n"); + + result = luple6_find(l6, p6); + + if (result) { + printf("Successfully found value %ld for " + "(1, 0, 1, 0, 0, 0)\n", + *result); + } else { + fleprintf0("fail to find value for " + "(1, 0, 1, 0, 0, 0)\n"); + destroy_luple6(l6); + goto cleanup; + } + + result = luple6_find(l6, (pair6) { 1, 4 }); + + if (result) { + fleprintf("Accidentally found value %ld for " + "(1, 4, 0, 0, 0, 0)\n", + *result); + destroy_luple6(l6); + goto cleanup; + } else { + printf("Fail to find value for " + "(1, 4, 0, 0, 0, 0), as expected\n"); + } + + if (luple6_pf_2(l6, (pair2) { 1 })) { + printf("Successfully detected existence of (1, 0)\n"); + } else { + fleprintf0("fail to detect existence of (1, 0)\n"); + destroy_luple6(l6); + goto cleanup; + } + + if (luple6_pf_2(l6, (pair2) { 1, 1 })) { + fleprintf0("Accidentally detected existence of (1, 1)\n"); + destroy_luple6(l6); + goto cleanup; + } else { + printf("Fail to detect existence of (1, 0), as expected\n"); + } + + if (luple6_pf_2(l6, (pair2) { 0 })) { + printf("Successfully detected partial existence of (0, 0)\n"); + } else { + fleprintf0("fail to detect partial existence of (0, 0)\n"); + destroy_luple6(l6); + goto cleanup; + } + + printf("Printing values starting from (1, 0) under 7...\n"); + luple6_map_2(l6, (pair2) { 1 }, 7, print_label6); + + printf("Printing values starting from (1, 0) under 2...\n"); + luple6_map_2(l6, (pair2) { 1 }, 2, print_label6); + + printf("Printing values starting from (1, 0) under 9...\n"); + luple6_map_2(l6, (pair2) { 1 }, 9, print_label6); + + printf("Printing values starting from (1, 0, 1, 0, 0)...\n"); + luple6_map_5(l6, (pair5) { 1, 0, 1 }, print_label6); + + destroy_luple6(l6); + + return 0; + + cleanup: + return 1; +} diff --git a/src/tuple.c b/src/tuple.c new file mode 100644 index 0000000..92f2e6a --- /dev/null +++ b/src/tuple.c @@ -0,0 +1,629 @@ +#include +#include "tuple.h" + +#define TUP(N, M) struct PASTER(tuple, N, _s) { \ + struct PASTER(tuple, M, _s) *array; \ + BOOL initialized; \ + } + +/* "lengthed" tuple */ +#define LTUP(N) struct PASTER(luple, N, _s) { \ + struct PASTER(tuple, N, _s) tuple; \ + NUM *lengths; \ + } + +#define CONSTRUCT(N, M) UNUSED static \ + PASTER(tuple, N,) *PASTER \ + (new, _tuple, N)() { \ + PASTER(tuple, N,) *result = NULL; \ + SAFE_CALLOC(PASTER(tuple, N,), result, 1, return NULL;); \ + return result; \ + } \ + MY_Static_MES("To require a semicolon!") + +#define LONSTRUCT(N) PASTER(luple, N,) *PASTER \ + (new_luple, N,)(NUM *len) { \ + PASTER(luple, N,) *result = NULL; \ + SAFE_CALLOC(PASTER(luple, N,), result, 1, return NULL;); \ + result->lengths = len; \ + return result; \ + } \ + MY_Static_MES("To require a semicolon!") + +#define DESTRUCT(N, M) static void PASTER(destroy_tuple, N,) \ + (PASTER(tuple, N,) tup, NUM *len) { \ + if (!(tup.initialized)) return; \ + for (NUM PASTER(i, N,) = 0; \ + PASTER(i, N,) < *len; \ + PASTER(i, N,)++) { \ + PASTER(destroy_tuple, M,)(*(tup.array+PASTER(i, N,)), \ + len+1); \ + } \ + free(tup.array); \ + } \ + MY_Static_MES("To require a semicolon!") + +#define LESTRUCT(N) void PASTER(destroy_luple, N,) \ + (PASTER(luple, N,) *lup) { \ + if (lup == NULL) return; \ + PASTER(destroy_tuple, N,)(lup->tuple, lup->lengths); \ + free(lup->lengths); \ + free(lup); \ + } \ + MY_Static_MES("To require a semicolon!") + +/* NOTE: The function call return should be the last thing in the + function body, so that GCC knows to perform tail-call optimization, + with optimization level at least 2 I guess. */ +#define ADDT(N, M) static BOOL PASTER(add_to_tuple, N,) \ + (PASTER(tuple, N,) *tup, NUM *len, \ + PASTER(pair, N,) label, NUM val) { \ + if (label.x < 0 || label.x >= *len) { \ + fleprintf("Invalid label = %ld, len = %ld\n", \ + label.x, *len); \ + goto cleanup; \ + } \ + if (!(tup->initialized)) { \ + SAFE_CALLOC \ + (PASTER(tuple, M,), tup->array, *len, goto cleanup;); \ + tup->initialized = 1; \ + } \ + PASTER(pair, M,) newlabel = (PASTER(pair, M,)) { 0 }; \ + PASTER(COPY_PAIR, M,)(newlabel, label); \ + goto success; \ + cleanup: \ + return 1; \ + success: \ + return PASTER(add_to_tuple, M,) \ + (tup->array+label.x, len+1, newlabel, val); \ + } \ + MY_Static_MES("To require a semicolon!") + +#define ADDL(N) BOOL PASTER(add_to_luple, N,) \ + (PASTER(luple, N,) *lup, PASTER(pair, N,) label, \ + NUM val) { \ + return PASTER(add_to_tuple, N,) \ + ((PASTER(tuple, N,) *) lup, lup->lengths, label, val); \ + } \ + MY_Static_MES("To require a semicolon!") + +#define INDEXT(N, M) HP_ATTR static NUM * \ + PASTER(tuple, N, _find) \ + (PASTER(tuple, N,) *tup, NUM *len, \ + PASTER(pair, N,) label) { \ + if (!(tup->initialized)) return NULL; \ + if (label.x < 0 || label.x >= *len) return NULL; \ + PASTER(pair, M,) newlabel = (PASTER(pair, M,)) { 0 }; \ + PASTER(COPY_PAIR, M,)(newlabel, label); \ + return PASTER(tuple, M, _find) \ + (tup->array+label.x, len+1, newlabel); \ + } \ + MY_Static_MES("To require a semicolon!") + +#define INDEXL(N) HP_ATTR NUM *PASTER(luple, N, _find) \ + (PASTER(luple, N,) *lup, PASTER(pair, N,) label) { \ + return PASTER(tuple, N, _find) \ + ((PASTER(tuple, N,) *) lup, lup->lengths, label); \ + } \ + MY_Static_MES("To require a semicolon!") + +#define INDEXPT(N, M, P, Q) HP_ATTR static BOOL \ + PASTER(PASTER(tuple, N, _pf), _, M) \ + (PASTER(tuple, N,) *tup, NUM *len, \ + PASTER(pair, M,) label) { \ + if (!(tup->initialized)) return 0; \ + if (label.x < 0 || label.x >= *len) return 0; \ + PASTER(pair, Q,) newlabel = (PASTER(pair, Q,)) { 0 }; \ + PASTER(COPY_PAIR, Q,)(newlabel, label); \ + return PASTER(PASTER(tuple, P, _pf), _, Q) \ + (tup->array+label.x, len+1, newlabel); \ + } \ + MY_Static_MES("To require a semicolon!") + +#define INDEXPL(N, M) HP_ATTR BOOL \ + PASTER(PASTER(luple, N, _pf), _, M) \ + (PASTER(luple, N,) *lup, PASTER(pair, M,) label) { \ + return PASTER(PASTER(tuple, N, _pf), _, M) \ + ((PASTER(tuple, N,) *) lup, lup->lengths, label); \ + } \ + MY_Static_MES("To require a semicolon!") + +#define LENGHL(N) P_ATTR NUM *PASTER(luple, N, _len) \ + (PASTER(luple, N,) *lup) { \ + return lup->lengths; \ + } \ + MY_Static_MES("To require a semicolon!") + +/* TODO: Memory report */ + +struct tuple1_s { + NUM *array; + BOOL initialized; +}; + +TUP(2, 1); +TUP(3, 2); +TUP(4, 3); +TUP(5, 4); +TUP(6, 5); + +LTUP(2); +LTUP(3); +LTUP(4); +LTUP(5); +LTUP(6); + +UNUSED +static tuple1 * +new_tuple1(NUM len) +{ + tuple1 *result = NULL; + SAFE_CALLOC(tuple1, result, 1, return NULL;); + SAFE_CALLOC(NUM, result->array, len, free(result); return NULL;); + return result; +} + +CONSTRUCT(2, 1); +CONSTRUCT(3, 2); + +tuple4 * +new_tuple4() +{ + tuple4 *result = NULL; + SAFE_CALLOC(tuple4, result, 1, return NULL;); + return result; +} + +CONSTRUCT(5, 4); +CONSTRUCT(6, 5); + +LONSTRUCT(2); +LONSTRUCT(3); +LONSTRUCT(4); +LONSTRUCT(5); +LONSTRUCT(6); + +static void +destroy_tuple1(tuple1 tup, NUM * UNUSED len) { + if (!(tup.initialized)) return; + free(tup.array); +} + +DESTRUCT(2, 1); +DESTRUCT(3, 2); + +void +destroy_tuple4(tuple4 tup, NUM *len) +{ + if (!(tup.initialized)) return; + for (NUM i4 = 0; i4 < *len; i4++) + destroy_tuple3(*(tup.array+i4), len+1); + free(tup.array); +} + +DESTRUCT(5, 4); +DESTRUCT(6, 5); + +LESTRUCT(2); +LESTRUCT(3); +LESTRUCT(4); +LESTRUCT(5); +LESTRUCT(6); + +static BOOL +add_to_tuple1(tuple1 *tup, NUM *len, pair1 label, NUM val) +{ + if (!(tup->initialized)) { + SAFE_CALLOC(NUM, tup->array, *len, goto cleanup;); + } + tup->initialized = 1; + + if (label < 0 || label >= *len) { + fleprintf("Invalid label = %ld, len = %ld\n", + label, *len); + goto cleanup; + } + + *(tup->array+label) = val; + + return 0; + + cleanup: + return 1; +} + +static BOOL +add_to_tuple2(tuple2 *tup, NUM *len, pair2 label, NUM val) +{ + if (label.x < 0 || label.x >= *len) { + fleprintf("Invalid label = %ld, len = %ld\n", + label.x, *len); + goto cleanup; + } + + if (!(tup->initialized)) { + SAFE_CALLOC(tuple1, tup->array, *len, goto cleanup;); + tup->initialized = 1; + } + + pair1 newlabel = label.y; + + goto success; + + cleanup: + return 1; + + success: + return add_to_tuple1(tup->array+label.x, len+1, newlabel, val); +} + +ADDT(3, 2); +ADDT(4, 3); +ADDT(5, 4); +ADDT(6, 5); + +ADDL(2); +ADDL(3); +ADDL(4); +ADDL(5); +ADDL(6); + +H_ATTR +static BOOL +add_to_tuple_6_pt_2(tuple6 *tup, NUM *len, pair2 label) +{ + if (label.x < 0 || label.x >= *len) { + fleprintf("Invalid label = %ld, len = %ld\n", label.x, *len); + goto cleanup; + } + + if (!(tup->initialized)) { + SAFE_CALLOC(tuple5, tup->array, *len, goto cleanup;); + tup->initialized = 1; + } + + if (!((tup->array+label.x)->initialized)) { + SAFE_CALLOC(tuple4, (tup->array+label.x)->array, *(len+1), + goto cleanup;); + (tup->array+label.x)->initialized = 1; + } + + if (label.y < 0 || label.y >= *(len+1)) { + fleprintf("Invalid label = %ld, len = %ld\n", label.y, *(len+1)); + goto cleanup; + } + + if (!(((tup->array+label.x)->array+label.y)->initialized)) { + SAFE_CALLOC(tuple3, + ((tup->array+label.x)->array+label.y)->array, + *(len+1), goto cleanup;); + ((tup->array+label.x)->array+label.y)->initialized = 1; + } + + goto success; + + cleanup: + return 1; + + success: + return 0; +} + +H_ATTR +BOOL +add_to_luple6_pt_2(luple6 *lup, pair2 label) +{ + return add_to_tuple_6_pt_2((tuple6 *) lup, lup->lengths, label); +} + +HP_ATTR +static NUM * +tuple1_find(tuple1 *tup, NUM *len, NUM label) +{ + if (!(tup->initialized)) return NULL; + if (label < 0 || label >= *len) return NULL; + return tup->array+label; +} + +HP_ATTR +static NUM * +tuple2_find(tuple2 *tup, NUM *len, pair2 label) +{ + if (!(tup->initialized)) return NULL; + if (label.x < 0 || label.x >= *len) return NULL; + + return tuple1_find(tup->array+label.x, len+1, label.y); +} + +INDEXT(3, 2); +INDEXT(4, 3); +INDEXT(5, 4); +INDEXT(6, 5); + +INDEXL(2); +INDEXL(3); +INDEXL(4); +INDEXL(5); +INDEXL(6); + +HP_ATTR +static BOOL +tuple5_pf_1(tuple5 *tup, NUM *len, NUM label) +{ + if (!(tup->initialized)) return 0; + if (label < 0 || label >= *len) return 0; + + return (tup->array+label)->initialized; +} + +HP_ATTR +static BOOL +tuple6_pf_2(tuple6 *tup, NUM *len, pair2 label) +{ + if (!(tup->initialized)) return 0; + if (label.x < 0 || label.x >= *len) return 0; + NUM newlabel = label.y; + + return tuple5_pf_1(tup->array+label.x, len+1, newlabel); +} + +INDEXPL(6, 2); + +HP_ATTR +static BOOL +tuple3_pf_1(tuple3 *tup, NUM *len, NUM label) +{ + if (!(tup->initialized)) return 0; + if (label < 0 || label >= *len) return 0; + + return (tup->array+label)->initialized; +} + +HP_ATTR +static BOOL +tuple4_pf_2(tuple4 *tup, NUM *len, pair2 label) +{ + if (!(tup->initialized)) return 0; + if (label.x < 0 || label.x >= *len) return 0; + NUM newlabel = label.y; + + return tuple3_pf_1(tup->array+label.x, len+1, newlabel); +} + +INDEXPT(5, 3, 4, 2); + +INDEXPL(5, 3); + +HP_ATTR +static BOOL +tuple2_pf_1(tuple2 *tup, NUM *len, NUM label) +{ + if (!(tup->initialized)) return 0; + if (label < 0 || label >= *len) return 0; + + return (tup->array+label)->initialized; +} + +HP_ATTR +static BOOL +tuple3_pf_2(tuple3 *tup, NUM *len, pair2 label) +{ + if (!(tup->initialized)) return 0; + if (label.x < 0 || label.x >= *len) return 0; + NUM newlabel = label.y; + + return tuple2_pf_1(tup->array+label.x, len+1, newlabel); +} + +INDEXPL(3, 2); + +LENGHL(2); +LENGHL(3); +LENGHL(4); +LENGHL(5); +LENGHL(6); + +H_ATTR +static void +tuple3_map_2(tuple3 *tup, NUM *len, pair2 label, map_pair3_t fn) +{ + if (!(tup->initialized)) return; + if (label.x < 0 || label.x >= *len) return; + +#define AR (tup->array+label.x) +#define ARR (AR->array+label.y) +#define ARRR (ARR->array+i) + + if (!(AR->initialized)) return; + + if (label.y < 0 || label.y >= *(len+1)) return; + + if (!(ARR->initialized)) return; + + for (NUM i = 0; i < *(len+2); i++) + if (*ARRR) fn((pair3) { label.x, label.y, i }); + +#undef AR +#undef ARR +#undef ARRR + +} + +H_ATTR +static void +tuple6_map_2(tuple6 *tup, NUM *len, pair2 label, + NUM max, map_pair6_t fn) +{ + if (!(tup->initialized)) return; + if (label.x < 0 || label.x >= *len) return; + + tuple5 *AR1 = (tup->array+label.x); + + if (!(AR1->initialized)) return; + + if (label.y < 0 || label.y >= *(len+1)) return; + + tuple4 *AR2 = (AR1->array+label.y); + + if (!(AR2->initialized)) return; + + max = (max < 0 || max+1 >= *(len+5)) ? *(len+5) : max+1; + + for (NUM i = 0; i < *(len+2); i++) { + tuple3 *AR3 = (AR2->array+i); + if (!(AR3->initialized)) continue; + + for (NUM j = 0; j < *(len+3); j++) { + tuple2 *AR4 = (AR3->array+j); + if (!(AR4->initialized)) continue; + + for (NUM k = 0; k < *(len+4); k++) { + tuple1 *AR5 = (AR4->array+k); + if (!(AR5->initialized)) continue; + + for (NUM ell = 0; ell < max; ell++) { + if (*(AR5->array+ell)) + fn((pair6) { label.x, label.y, i, j, k, ell }); + } + } + } + } +} + +H_ATTR +static void +tuple6_map_5(tuple6 *tup, NUM *len, pair5 label, map_pair6_t fn) +{ + if (!(tup->initialized)) return; + if (label.x < 0 || label.x >= *len) return; + + tuple5 *AR1 = (tup->array+label.x); + + if (!(AR1->initialized)) return; + + if (label.y < 0 || label.y >= *(len+1)) return; + + tuple4 *AR2 = (AR1->array+label.y); + + if (!(AR2->initialized)) return; + + if (label.z < 0 || label.z >= *(len+2)) return; + + tuple3 *AR3 = (AR2->array+label.z); + + if (!(AR3->initialized)) return; + + if (label.u < 0 || label.u >= *(len+3)) return; + + tuple2 *AR4 = (AR3->array+label.u); + + if (!(AR4->initialized)) return; + + if (label.v < 0 || label.v >= *(len+4)) return; + + tuple1 *AR5 = (AR4->array+label.v); + + if (!(AR5->initialized)) return; + + for (NUM i = 0; i < *(len+5); i++) { + if (*(AR5->array+i)) + fn((pair6) { label.x, label.y, label.z, label.u, label.v, i }); + } +} + +H_ATTR +void +tuple5_map_3(tuple5 *tup, NUM *len, pair3 label, map_pair5_t fn) +{ + if (!(tup->initialized)) return; + + if (label.x < 0 || label.x >= *len) return; + + tuple4 *AR1 = tup->array+label.x; + + if (!(AR1->initialized)) return; + + if (label.y < 0 || label.y >= *(len+1)) return; + + tuple3 *AR2 = AR1->array+label.y; + + if (!(AR2->initialized)) return; + + if (label.z < 0 || label.z >= *(len+2)) return; + + tuple2 *AR3 = AR2->array+label.z; + + if (!(AR3->initialized)) return; + + for (NUM i = 0; i < *(len+3); i++) { + tuple1 *AR4 = AR3->array+i; + + if (!(AR4->initialized)) continue; + + for (NUM j = 0; j < *(len+4); j++) { + if (*(AR4->array+j)) + fn((pair5) { label.x, label.y, label.z, i, j }); + } + } +} + +H_ATTR +void +luple5_map_3(luple5 *lup, pair3 label, map_pair5_t fn) +{ + tuple5_map_3((tuple5 *) lup, lup->lengths, label, fn); +} + +H_ATTR +void +luple3_map_2(luple3 *lup, pair2 label, map_pair3_t fn) +{ + tuple3_map_2((tuple3 *) lup, lup->lengths, label, fn); +} + +H_ATTR +void +luple6_map_2(luple6 *lup, pair2 label, NUM max, map_pair6_t fn) +{ + tuple6_map_2((tuple6 *) lup, lup->lengths, label, max, fn); +} + +H_ATTR +void +luple6_map_5(luple6 *lup, pair5 label, map_pair6_t fn) +{ + tuple6_map_5((tuple6 *) lup, lup->lengths, label, fn); +} + +H_ATTR +void +luple5_free_1(luple5 *lup, NUM label) +{ + if (lup == NULL) return; + + tuple5 *tuple = (tuple5 *) lup; + + if (!(tuple->initialized)) return; + + if (label < 0 || label >= *(lup->lengths)) return; + + if (!((tuple->array+label)->initialized)) return; + + destroy_tuple4(*(tuple->array+label), lup->lengths+1); + + (tuple->array+label)->initialized = 0; +} + +#undef TUP +#undef LTUP +#undef CONSTRUCT +#undef LONSTRUCT +#undef DESTRUCT +#undef LESTRUCT +#undef ADDT +#undef ADDL +#undef INDEXT +#undef INDEXL +#undef INDEXPT +#undef INDEXPL +#undef LENTHL diff --git a/src/tuple.h b/src/tuple.h new file mode 100644 index 0000000..3c3f0fa --- /dev/null +++ b/src/tuple.h @@ -0,0 +1,116 @@ +#ifndef TUPLE_H +#define TUPLE_H +#include "util.h" +#include "pair.h" + +/* This file implements the tuples that we need. A tuple is an array + of arrays of arrays, ..., ad infinitum. + + Since the lengths of each layer of the arrays are different, but + uniform within each layer, we store the lengths in an array at the + top level. So there are two kinds of data types: one with an array + of lengths and the other without. */ + +#define TUP(N) typedef struct PASTER(tuple, N, _s) PASTER(tuple, N,) +#define LTUP(N) typedef struct PASTER(luple, N, _s) PASTER(luple, N,) + +#define LONSTRUCT(N) PASTER(luple, N,) *PASTER(new, _luple, N) \ + (NUM *len) + +#define LESTRUCT(N) void PASTER(destroy_luple, N,) \ + (PASTER(luple, N,) *tup) + +#define ADDL(N) BOOL PASTER(add_to_luple, N,) \ + (PASTER(luple, N,) *lup, PASTER(pair, N,) label, \ + NUM val) + +#define INDEXL(N) HP_ATTR \ + NUM *PASTER(luple, N, _find) \ + (PASTER(luple, N,) *lup, PASTER(pair, N,) label) + +#define LENTHL(N) P_ATTR NUM *PASTER(luple, N, _len) \ + (PASTER(luple, N,) *lup) + +typedef struct PASTER(tuple, 1, _s) PASTER(tuple, 1,); + +TUP(2); +TUP(3); +TUP(4); +TUP(5); +TUP(6); + +LTUP(2); +LTUP(3); +LTUP(4); +LTUP(5); +LTUP(6); + +tuple4 *new_tuple4(); + +LONSTRUCT(2); +LONSTRUCT(3); +LONSTRUCT(4); +LONSTRUCT(5); +LONSTRUCT(6); + +void destroy_tuple4(tuple4 tup, NUM *len); + +LESTRUCT(2); +LESTRUCT(3); +LESTRUCT(4); +LESTRUCT(5); +LESTRUCT(6); + +ADDL(2); +ADDL(3); +ADDL(4); +ADDL(5); +ADDL(6); + +H_ATTR BOOL add_to_luple6_pt_2(luple6 *lup, pair2 label); + +INDEXL(2); +INDEXL(3); +INDEXL(4); +INDEXL(5); +INDEXL(6); + +LENTHL(2); +LENTHL(3); +LENTHL(4); +LENTHL(5); +LENTHL(6); + +HP_ATTR BOOL luple3_pf_2(luple3 *lup, pair2 label); +HP_ATTR BOOL luple5_pf_3(luple5 *lup, pair3 label); +HP_ATTR BOOL luple6_pf_2(luple6 *lup, pair2 label); + +typedef void (* map_pair3_t) (pair3 label); +typedef void (* map_pair5_t) (pair5 label); +typedef void (* map_pair6_t) (pair6 label); + +H_ATTR void luple3_map_2(luple3 *lup, pair2 label, map_pair3_t fn); + +/* H_ATTR void tuple5_map_3(tuple5 *lup, pair3 label, map_pair5_t fn); */ + +H_ATTR void luple5_map_3(luple5 *lup, pair3 label, map_pair5_t fn); + +H_ATTR void luple5_free_1(luple5 *lup, NUM label); + +/* MAX bounds the last coordinate, i.e. the W coordinate. If -1 then + there is no bound. */ +H_ATTR void luple6_map_2(luple6 *lup, pair2 label, + NUM max, map_pair6_t fn); + +H_ATTR void luple6_map_5(luple6 *tup, pair5 label, map_pair6_t fn); + +#undef LONSTRUCT +#undef LESTRUCT +#undef LTUP +#undef TUP +#undef ADDL +#undef INDEXL +#undef INDEXPL +#undef LENTHL + +#endif diff --git a/src/util.h b/src/util.h index d83e40c..88aee89 100644 --- a/src/util.h +++ b/src/util.h @@ -46,6 +46,14 @@ typedef unsigned char BOOL; } \ } while (0) +#define SAFE_CALLOC(TYPE, VAR, LEN, CLEAN) do { \ + VAR = calloc((LEN), sizeof (TYPE)); \ + if (VAR == NULL) { \ + fleprintf0("Fail to calloc\n"); \ + { CLEAN }; \ + } \ + } while (0) + #define SAFE_REALLOC(TYPE, VAR, LEN, CLEAN) do { \ TYPE *macro_new_var = \ realloc(VAR, sizeof (TYPE) * (LEN)); \ @@ -57,6 +65,16 @@ typedef unsigned char BOOL; } \ } while (0) +/* very convenient macro */ +#define PASTER2(X, Y, Z) X ## Y ## Z +#define PASTER(X, Y, Z) PASTER2(X, Y, Z) + BOOL read_entire_file(const char *file_name, char **str, NUM *len); +/* The purpose of this macro is to be used at the end of a macro + defining a function, so that we can add a semicolon when calling + that macro. I was using the keyword _Static_assert, but that + requires C11, which sounds kind of absurd to me. */ +#define MY_Static_MES(MES) struct STATIC_STRUCT_s + #endif -- cgit v1.2.3-18-g5258