diff options
author | JSDurand <mmemmew@gmail.com> | 2022-01-28 23:21:00 +0800 |
---|---|---|
committer | JSDurand <mmemmew@gmail.com> | 2022-01-28 23:22:22 +0800 |
commit | e555c88b8107caf886da229444c2bed1aaef6c2c (patch) | |
tree | 239894914a881800e244f10090fa97822b0352a3 /src | |
parent | 8ace61933130416a0b8a6b250de681a606439f48 (diff) |
rename cnp to crf
THat file implements support functions for the CNP algorithm, not the
algorithm itself.
Diffstat (limited to 'src')
-rw-r--r-- | src/Makefile.am | 8 | ||||
-rw-r--r-- | src/cnp.c | 361 | ||||
-rw-r--r-- | src/cnp.h | 142 | ||||
-rw-r--r-- | src/crf.c | 487 | ||||
-rw-r--r-- | src/crf.h | 163 | ||||
-rw-r--r-- | src/test/check_crf.c (renamed from src/test/check_cnp.c) | 48 |
6 files changed, 702 insertions, 507 deletions
diff --git a/src/Makefile.am b/src/Makefile.am index e4eae9e..3ec4f19 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -2,9 +2,9 @@ AM_CFLAGS = -Wall -Wextra noinst_LIBRARIES = libeps.a libeps_a_SOURCES = grammar.c list.c util.c reader.c str.c \ -utf8.c dfa.c ht.c bsr.c cnp.c \ +utf8.c dfa.c ht.c bsr.c crf.c \ grammar.h list.h util.h reader.h str_partial.h \ -str.h utf8.h dfa.h ht.h bsr.h cnp.h +str.h utf8.h dfa.h ht.h bsr.h crf.h libeps_a_CFLAGS = $(AM_CFLAGS) --pedantic @@ -22,7 +22,7 @@ CLEANFILES = TAGS # tests check_PROGRAMS = check_list check_grammar check_reader check_dfa \ -check_ht check_bsr check_cnp +check_ht check_bsr check_crf check_list_SOURCES = test/check_list.c list.c @@ -39,7 +39,7 @@ check_ht_SOURCES = test/check_ht.c ht.c check_bsr_SOURCES = test/check_bsr.c bsr.c grammar.c list.c util.c \ ht.c utf8.c str.c -check_cnp_SOURCES = test/check_cnp.c cnp.c grammar.c list.c util.c \ +check_crf_SOURCES = test/check_crf.c crf.c grammar.c list.c util.c \ ht.c utf8.c str.c TESTS = $(check_PROGRAMS) @@ -1,362 +1 @@ -#include <stdio.h> -#include <string.h> -#include "util.h" -#include "list.h" #include "cnp.h" - -struct crf_s { - ht *table; -}; - -crf * -new_crf() -{ - crf *crfp = NULL; - SAFE_MALLOC(crf, crfp, 1, goto cleanup;); - - crfp->table = new_ht2(HT_INIT_CAP); - - if (crfp->table == NULL) goto cleanup; - - return crfp; - - cleanup: - - if (crfp) free(crfp); - - return NULL; -} - -BOOL -crf_add_node(crf * const restrict crfp, pair2 node) -{ - if (ht_find(crfp->table, &node)) return 0; - - pair2 *p2 = NULL; - - NEW_P2(p2, node.x, node.y, return 1;); - - ht *value_table = new_ht4(HT_INIT_CAP); - - if (value_table == NULL) { - free(p2); - return 1; - } - - if (ht_insert(crfp->table, p2, value_table)) { - fleprintf0("Fail to insert\n"); - destroy_ht(value_table, DESTROY_NONE_SELF); - free(p2); - return 1; - } - - return 0; -} - -ht * -crf_find_node(crf * const restrict crfp, pair2 node) -{ - return ht_find(crfp->table, &node); -} - -BOOL -crf_add_edge(crf * const restrict crfp, pair2 source, pair4 label) -{ - if (ht_find(crfp->table, &source) == NULL) { - fleprintf0("add the node before adding its edges\n"); - return 1; - } - - if (ht_find(ht_find(crfp->table, &source), &label)) return 0; - - pair4 *p = NULL; - - NEW_P4(p, label.x, label.y, label.z, label.w, return 1;); - - if (ht_insert(ht_find(crfp->table, &source), p, (void*)1)) { - fleprintf0("Fail to insert\n"); - free(p); - return 1; - } - - return 0; -} - -void -destroy_crf(crf * const restrict crfp) -{ - -#define SIZE (ht_size(crfp->table)) -#define VALUES ((ht**) ht_values(crfp->table)) - - for (NUM i = 0; i < SIZE;) - destroy_ht(*(VALUES+i++), DESTROY_KEY_SELF); - - destroy_ht(crfp->table, DESTROY_KEY_SELF); - - free(crfp); - -#undef SIZE -#undef VALUES - -} - -typedef struct procr_array_element_s procr_array_element; - -/* The list at index k is the set R_k. */ -struct procr_array_element_s { - BOOL initialized; - List *ls; -}; - -struct procr_s { - /* input length */ - NUM len; - /* an array of lists */ - procr_array_element *array; - /* minimal grade that is non-empty */ - NUM mg; -}; - -typedef struct procu_array_element_s procu_array_element; - -/* The table at index k represents the set U_k. Its keys are those - (X, a, i, j) such that (X, a, i, j, k) belongs to U_k. */ -struct procu_array_element_s { - BOOL initialized; - ht *table; -}; - -struct procu_s { - /* input length */ - NUM len; - /* an array of hash tables */ - procu_array_element *array; -}; - -procr * -new_procr(NUM size, BOOL * const restrict errorp) -{ - *errorp = 0; - - procr *pr = NULL; - - SAFE_MALLOC(procr, pr, 1, goto cleanup;); - - pr->len = size; - pr->array = NULL; - - SAFE_MALLOC(procr_array_element, pr->array, - sizeof(procr_array_element) * size, - goto cleanup;); - - memset(pr->array, 0, sizeof(procr_array_element) * size); - - goto success; - - cleanup: - - *errorp = 1; - - if (pr && pr->array) free(pr->array); - - if (pr) free(pr); - - success: - - return pr; -} - -procu * -new_procu(NUM size, BOOL * const restrict errorp) -{ - *errorp = 0; - - procu *pu = NULL; - - SAFE_MALLOC(procu, pu, 1, goto cleanup;); - - pu->len = size; - pu->array = NULL; - - SAFE_MALLOC(procu_array_element, pu->array, - sizeof(procu_array_element) * size, - goto cleanup;); - - memset(pu->array, 0, sizeof(procu_array_element) * size); - - goto success; - - cleanup: - - *errorp = 1; - - if (pu && pu->array) free(pu->array); - - if (pu) free(pu); - - success: - - return pu; -} - -void -destroy_procr(procr * const restrict pr) -{ - for (NUM i = 0; i < pr->len; i++) - if ((pr->array+i)->initialized) - destroy_list((pr->array+i)->ls, 1); - - free(pr->array); - - free(pr); -} - -void -destroy_procu(procu * const restrict pu) -{ - for (NUM i = 0; i < pu->len; i++) - if ((pu->array+i)->initialized) - destroy_ht((pu->array+i)->table, DESTROY_KEY_SELF); - - free(pu->array); - - free(pu); -} - -BOOL -desc_add(procr * const restrict pr, procu * const restrict pu, - NUM grade, prodecor dec) -{ - if (grade < 0 || grade >= pu->len) { - fleprintf("Invalid grade: %ld\n", grade); - goto cleanup; - } - - pair4 *p4 = NULL; - -#define HELE (pu->array+grade) -#define LELE (pr->array+grade) - - if (HELE->initialized) { - /* If HELE is initialized then LELE should also be initialized. */ - if (ht_find(HELE->table, &dec) != NULL) goto success; - - NEW_P4(p4, dec.x, dec.y, dec.z, dec.w, goto cleanup;); - - if (ht_insert(HELE->table, p4, (void*)1)) { - fleprintf0("Fail to insert\n"); - goto cleanup; - } - - NEW_P4(p4, dec.x, dec.y, dec.z, dec.w, - ht_delete(HELE->table, &dec, DELETE_KEY); goto cleanup;); - - if (add_to_list(LELE->ls, p4)) { - fleprintf0("Fail to add to list\n"); - ht_delete(HELE->table, &dec, DELETE_KEY); - goto cleanup; - } - - goto success; - } - - HELE->table = new_ht4(HT_INIT_CAP); - - if (HELE->table == NULL) { - fleprintf0("Fail to have new hash table\n"); - goto cleanup; - } - - HELE->initialized = 1; - - NEW_P4(p4, dec.x, dec.y, dec.z, dec.w, - destroy_ht(HELE->table, DESTROY_KEY_SELF); - HELE->initialized = 0; - goto cleanup;); - - if (ht_insert(HELE->table, p4, (void*)1)) { - fleprintf0("Fail to insert\n"); - destroy_ht(HELE->table, DESTROY_KEY_SELF); - HELE->initialized = 0; - goto cleanup; - } - - LELE->ls = new_list(); - - if (LELE->ls == NULL) { - fleprintf0("Fail to have new list\n"); - destroy_ht(HELE->table, DESTROY_KEY_SELF); - HELE->initialized = 0; - goto cleanup; - } - - LELE->initialized = 1; - - NEW_P4(p4, dec.x, dec.y, dec.z, dec.w, - destroy_ht(HELE->table, DESTROY_KEY_SELF); - HELE->initialized = 0; - destroy_list(LELE->ls, 1); - LELE->initialized = 0; - goto cleanup;); - - if (add_to_list(LELE->ls, p4)) { - fleprintf0("Fail to add to list\n"); - destroy_ht(HELE->table, DESTROY_KEY_SELF); - HELE->initialized = 0; - destroy_list(LELE->ls, 1); - LELE->initialized = 0; - goto cleanup; - } - -#undef HELE -#undef LELE - - goto success; - - cleanup: - - return 1; - - success: - - return 0; -} - -prodecor * -pop_procr(procr * pr, procu * pu, NUM *gradep) -{ - prodecor *result = NULL; - - if (pr->mg >= pr->len) goto success; - -#define ELEMENT (pr->array+pr->mg) - - if (!(ELEMENT->initialized)) goto reset_mg; - - *gradep = pr->mg; - - result = pop_from_list(ELEMENT->ls); - - if (list_length(ELEMENT->ls) == 0) { - destroy_ht((pu->array+pr->mg)->table, DESTROY_KEY_SELF); - (pu->array+pr->mg)->initialized = 0; - (pu->array+pr->mg)->table = NULL; - - destroy_list(ELEMENT->ls, 1); - ELEMENT->initialized = 0; - ELEMENT->ls = NULL; - - /* REVIEW: Maybe we can just increment the minimal grade, as we - won't jump over two steps? */ - reset_mg: - while (++(pr->mg) < pr->len) { - if (ELEMENT->initialized) break; - } - } - -#undef ELEMENT - - success: - return result; -} @@ -1,144 +1,4 @@ #ifndef CNP_H #define CNP_H -#include "ht.h" - -/* This file implements the Clustered Non-terminal Parsing algorithm. - See the following paper for details: - - Derivation representation using binary subtree sets - - by - - Elizabeth Scott, Adrian Johnstone and L.Thomas van Binsbergen. */ - -/* First we implement a "Call Return Forest". - - A CRF has two types of nodes. - - The first type is the non-leaf nodes. It is labelled as - - (X, i), - - where X is a non-terminal and i is an input position. - - The second type is the leaf nodes. It is labelled as - - (X, a, b, i), - - where X is a non-terminal, a is an index of a rule in the array of - rules corresponding to X, b is the length of a prefix of the - right-hand side of the rule corresponding to a, and i is an input - position. The triple (X, a, b) is termed a "grammar slot" in the - parlance. - - We need constant look-up for both types of nodes. So we implement - these using hash tables. - - For the first type, we implement these as a hash table, whose keys - are those (X, i) in the set. The values are described below. - - Actually we don't need those second type nodes. What we care about - are the edges between the two types of nodes. So we simply don't - store the second type nodes at all. We only store edges, or more - precisely, edge labels. Since we need to know if an edge exists - efficiently, we implement the edges as a hash table. (This seems - to be termed a "disctionary of keys" in computer science jargon.) - So the values of the hash table of the first type node are hash - tables whose keys are (X, a, b, i) that are in the edge set of the - first type node, and whose values are meaningless. */ - -typedef struct crf_s crf; - -crf *new_crf(); - -/* on error return non-zero */ -BOOL crf_add_node(crf * const restrict crfp, pair2 node); - -/* if not found return NULL. - - if found but the node has no edges return an empty hash table. */ -ht *crf_find_node(crf * const restrict crfp, pair2 node); - -BOOL crf_add_edge(crf * const restrict crfp, pair2 source, - pair4 label); - -void destroy_crf(crf * const restrict crfp); - -/* We also need to implement the set of pending actions. These are - triples of the form: - - (X, k, j). - - Since we need to find all j such that (X, k, j) belongs to the set, - we implement the set as a hash table whose keys are (X, k) and - whose values are those j such that (X, k, j) belongs to the set. */ - -/* TODO: Pending actions */ - -/* We also implement "process descriptors" for the CNP algorithm. Since - a process descriptor is of the form - - (X := beta . gamma, b, j, k), - - where X := beta gamma is a rule and b is the length of beta, we can - represent a process descriptor as - - (X, a, b, j, k). - - However, we shall notice that there is a natural "grading" on the - process descriptors. The k-th grading consists of the descriptors - with the last element in the tuple equal to k. In the clustered - nonterminal parsing algorithm, the process descriptors that are - added as a consequence of dealing with a process descriptor of - grading k must be of grade greater than or equal to k. This means - that if all process descriptors of grade k have been processed and - the descriptors awaiting to be processed all have grades > k, then - we won't see any process descriptors of grade <= k again. So we - can actually keep an array of process descriptors that consist of - descriptors of degree k. This way we don't need to store the k in - the descriptor. Therefore a process descriptor is represented as - - (X, a, i, j). - - Since we have no other requirements than almost constant-time - lookup, we simply represent the set of descriptors using a hash - table with keys (X, a, i, j). - - The main benefit of this is that after we find that all process - descriptors in R_k have been processed, we can free those - descriptors in U_k. I think this is a good way to lower the space - requirements of this algorithm. */ - -typedef pair4 prodecor; - -/* The set named R in the paper */ -typedef struct procr_s procr; - -/* The set named U in the paper */ -typedef struct procu_s procu; - -/* constructor */ -procr *new_procr(NUM size, BOOL * const restrict errorp); -procu *new_procu(NUM size, BOOL * const restrict errorp); - -/* insert */ - -/* the function dscAdd in the paper */ -BOOL desc_add(procr * const restrict pr, procu * const restrict pu, - NUM grade, prodecor dec); - -/* pop */ - -/* gradep will be set to the grade */ - -/* if all descriptors of grade k have been poped out, then the - corresponding slot in pu will be freed automatically. */ - -/* if no more elements are left, return NULL */ -prodecor *pop_procr(procr * pr, procu * pu, NUM *gradep); - -/* destructor */ -void destroy_procr(procr * const restrict pr); -void destroy_procu(procu * const restrict pu); - +/* TODO */ #endif diff --git a/src/crf.c b/src/crf.c new file mode 100644 index 0000000..dbcb050 --- /dev/null +++ b/src/crf.c @@ -0,0 +1,487 @@ +#include <stdio.h> +#include <string.h> +#include "util.h" +#include "list.h" +#include "crf.h" + +struct crf_s { + ht *table; +}; + +crf * +new_crf() +{ + crf *crfp = NULL; + SAFE_MALLOC(crf, crfp, 1, goto cleanup;); + + crfp->table = new_ht2(HT_INIT_CAP); + + if (crfp->table == NULL) goto cleanup; + + return crfp; + + cleanup: + + if (crfp) free(crfp); + + return NULL; +} + +BOOL +crf_add_node(crf * const restrict crfp, pair2 node) +{ + if (ht_find(crfp->table, &node)) return 0; + + pair2 *p2 = NULL; + + NEW_P2(p2, node.x, node.y, return 1;); + + ht *value_table = new_ht4(HT_INIT_CAP); + + if (value_table == NULL) { + free(p2); + return 1; + } + + if (ht_insert(crfp->table, p2, value_table)) { + fleprintf0("Fail to insert\n"); + destroy_ht(value_table, DESTROY_NONE_SELF); + free(p2); + return 1; + } + + return 0; +} + +ht * +crf_find_node(crf * const restrict crfp, pair2 node) +{ + return ht_find(crfp->table, &node); +} + +BOOL +crf_add_edge(crf * const restrict crfp, pair2 source, pair4 label) +{ + if (ht_find(crfp->table, &source) == NULL) { + fleprintf0("add the node before adding its edges\n"); + return 1; + } + + if (ht_find(ht_find(crfp->table, &source), &label)) return 0; + + pair4 *p = NULL; + + NEW_P4(p, label.x, label.y, label.z, label.w, return 1;); + + if (ht_insert(ht_find(crfp->table, &source), p, (void*)1)) { + fleprintf0("Fail to insert\n"); + free(p); + return 1; + } + + return 0; +} + +void +destroy_crf(crf * const restrict crfp) +{ + +#define SIZE (ht_size(crfp->table)) +#define VALUES ((ht**) ht_values(crfp->table)) + + for (NUM i = 0; i < SIZE;) + destroy_ht(*(VALUES+i++), DESTROY_KEY_SELF); + + destroy_ht(crfp->table, DESTROY_KEY_SELF); + + free(crfp); + +#undef SIZE +#undef VALUES + +} + +struct spa_s { + ht *table; +}; + +spa * +new_spa() +{ + spa *spap = NULL; + SAFE_MALLOC(spa, spap, 1, return NULL;); + + spap->table = new_ht2(HT_INIT_CAP); + + if (spap->table == NULL) { + free(spap); + return NULL; + } + + return spap; +} + +void +destroy_spa(spa * const restrict spap) +{ + for (NUM i = 0; i < ht_size(spap->table); i++) + destroy_ht(*((ht **) ht_values(spap->table)+i), DESTROY_KEY_SELF); + + /* fleprintf0("ho\n"); */ + + destroy_ht(spap->table, DESTROY_KEY_SELF); + + free(spap); +} + +BOOL +spa_insert(spa * const restrict spap, pair3 label) +{ + pair2 first_two = (pair2) { 0 }, *p2 = NULL; + + first_two.x = label.x; + first_two.y = label.y; + + NUM j = label.z, *np = NULL; + + ht *value_table = NULL; + + if (ht_find(spap->table, &first_two) == NULL) { + value_table = new_ht(HT_INIT_CAP, 0); + + if (value_table == NULL) { + fleprintf0("Fail to have new hash table\n"); + goto cleanup; + } + + SAFE_MALLOC(NUM, np, 1, goto cleanup;); + *np = label.z; + + if (ht_insert(value_table, np, (void*)1)) { + fleprintf0("Fail to insert\n"); + goto cleanup; + } + + np = NULL; + + NEW_P2(p2, label.x, label.y, goto cleanup;); + + if (ht_insert(spap->table, p2, value_table)) { + fleprintf0("Fail to insert\n"); + goto cleanup; + } + + p2 = NULL; + + goto success; + } + + if (ht_find(ht_find(spap->table, &first_two), &j) == NULL) { + SAFE_MALLOC(NUM, np, 1, goto cleanup;); + *np = label.z; + + if (ht_insert(ht_find(spap->table, &first_two), np, (void*)1)) { + fleprintf0("Fail to insert\n"); + goto cleanup; + } + + np = NULL; + } + + goto success; + + cleanup: + if (value_table) destroy_ht(value_table, DESTROY_KEY_SELF); + if (p2) free(p2); + if (np) free(np); + + return 1; + + success: + return 0; +} + +P_ATTR +BOOL +spa_belong(CCR_MOD(spa *) spap, pair3 label) +{ + pair2 first_two = (pair2) { 0 }; + + first_two.x = label.x; + first_two.y = label.y; + + if (ht_find(spap->table, &first_two) == NULL) return 0; + + NUM j = label.z; + + if (ht_find(ht_find(spap->table, &first_two), &j) == NULL) return 0; + + return 1; +} + +P_ATTR +ht * +spa_find(CCR_MOD(spa *) spap, pair2 label) +{ + return ht_find(spap->table, &label); +} + +typedef struct procr_array_element_s procr_array_element; + +/* The list at index k is the set R_k. */ +struct procr_array_element_s { + BOOL initialized; + List *ls; +}; + +struct procr_s { + /* input length */ + NUM len; + /* an array of lists */ + procr_array_element *array; + /* minimal grade that is non-empty */ + NUM mg; +}; + +typedef struct procu_array_element_s procu_array_element; + +/* The table at index k represents the set U_k. Its keys are those + (X, a, i, j) such that (X, a, i, j, k) belongs to U_k. */ +struct procu_array_element_s { + BOOL initialized; + ht *table; +}; + +struct procu_s { + /* input length */ + NUM len; + /* an array of hash tables */ + procu_array_element *array; +}; + +procr * +new_procr(NUM size, BOOL * const restrict errorp) +{ + *errorp = 0; + + procr *pr = NULL; + + SAFE_MALLOC(procr, pr, 1, goto cleanup;); + + pr->len = size; + pr->array = NULL; + + SAFE_MALLOC(procr_array_element, pr->array, + sizeof(procr_array_element) * size, + goto cleanup;); + + memset(pr->array, 0, sizeof(procr_array_element) * size); + + goto success; + + cleanup: + + *errorp = 1; + + if (pr && pr->array) free(pr->array); + + if (pr) free(pr); + + success: + + return pr; +} + +procu * +new_procu(NUM size, BOOL * const restrict errorp) +{ + *errorp = 0; + + procu *pu = NULL; + + SAFE_MALLOC(procu, pu, 1, goto cleanup;); + + pu->len = size; + pu->array = NULL; + + SAFE_MALLOC(procu_array_element, pu->array, + sizeof(procu_array_element) * size, + goto cleanup;); + + memset(pu->array, 0, sizeof(procu_array_element) * size); + + goto success; + + cleanup: + + *errorp = 1; + + if (pu && pu->array) free(pu->array); + + if (pu) free(pu); + + success: + + return pu; +} + +void +destroy_procr(procr * const restrict pr) +{ + for (NUM i = 0; i < pr->len; i++) + if ((pr->array+i)->initialized) + destroy_list((pr->array+i)->ls, 1); + + free(pr->array); + + free(pr); +} + +void +destroy_procu(procu * const restrict pu) +{ + for (NUM i = 0; i < pu->len; i++) + if ((pu->array+i)->initialized) + destroy_ht((pu->array+i)->table, DESTROY_KEY_SELF); + + free(pu->array); + + free(pu); +} + +BOOL +desc_add(procr * const restrict pr, procu * const restrict pu, + NUM grade, prodecor dec) +{ + if (grade < 0 || grade >= pu->len) { + fleprintf("Invalid grade: %ld\n", grade); + goto cleanup; + } + + pair4 *p4 = NULL; + +#define HELE (pu->array+grade) +#define LELE (pr->array+grade) + + if (HELE->initialized) { + /* If HELE is initialized then LELE should also be initialized. */ + if (ht_find(HELE->table, &dec) != NULL) goto success; + + NEW_P4(p4, dec.x, dec.y, dec.z, dec.w, goto cleanup;); + + if (ht_insert(HELE->table, p4, (void*)1)) { + fleprintf0("Fail to insert\n"); + goto cleanup; + } + + NEW_P4(p4, dec.x, dec.y, dec.z, dec.w, + ht_delete(HELE->table, &dec, DELETE_KEY); goto cleanup;); + + if (add_to_list(LELE->ls, p4)) { + fleprintf0("Fail to add to list\n"); + ht_delete(HELE->table, &dec, DELETE_KEY); + goto cleanup; + } + + goto success; + } + + HELE->table = new_ht4(HT_INIT_CAP); + + if (HELE->table == NULL) { + fleprintf0("Fail to have new hash table\n"); + goto cleanup; + } + + HELE->initialized = 1; + + NEW_P4(p4, dec.x, dec.y, dec.z, dec.w, + destroy_ht(HELE->table, DESTROY_KEY_SELF); + HELE->initialized = 0; + goto cleanup;); + + if (ht_insert(HELE->table, p4, (void*)1)) { + fleprintf0("Fail to insert\n"); + destroy_ht(HELE->table, DESTROY_KEY_SELF); + HELE->initialized = 0; + goto cleanup; + } + + LELE->ls = new_list(); + + if (LELE->ls == NULL) { + fleprintf0("Fail to have new list\n"); + destroy_ht(HELE->table, DESTROY_KEY_SELF); + HELE->initialized = 0; + goto cleanup; + } + + LELE->initialized = 1; + + NEW_P4(p4, dec.x, dec.y, dec.z, dec.w, + destroy_ht(HELE->table, DESTROY_KEY_SELF); + HELE->initialized = 0; + destroy_list(LELE->ls, 1); + LELE->initialized = 0; + goto cleanup;); + + if (add_to_list(LELE->ls, p4)) { + fleprintf0("Fail to add to list\n"); + destroy_ht(HELE->table, DESTROY_KEY_SELF); + HELE->initialized = 0; + destroy_list(LELE->ls, 1); + LELE->initialized = 0; + goto cleanup; + } + +#undef HELE +#undef LELE + + goto success; + + cleanup: + + return 1; + + success: + + return 0; +} + +prodecor * +pop_procr(procr * pr, procu * pu, NUM *gradep) +{ + prodecor *result = NULL; + + if (pr->mg >= pr->len) goto success; + +#define ELEMENT (pr->array+pr->mg) + + if (!(ELEMENT->initialized)) goto reset_mg; + + *gradep = pr->mg; + + result = pop_from_list(ELEMENT->ls); + + if (list_length(ELEMENT->ls) == 0) { + destroy_ht((pu->array+pr->mg)->table, DESTROY_KEY_SELF); + (pu->array+pr->mg)->initialized = 0; + (pu->array+pr->mg)->table = NULL; + + destroy_list(ELEMENT->ls, 1); + ELEMENT->initialized = 0; + ELEMENT->ls = NULL; + + /* REVIEW: Maybe we can just increment the minimal grade, as we + won't jump over two steps? */ + reset_mg: + while (++(pr->mg) < pr->len) { + if (ELEMENT->initialized) break; + } + } + +#undef ELEMENT + + success: + return result; +} diff --git a/src/crf.h b/src/crf.h new file mode 100644 index 0000000..7ff5a3f --- /dev/null +++ b/src/crf.h @@ -0,0 +1,163 @@ +#ifndef CNP_H +#define CNP_H +#include "ht.h" + +/* This file implements some support functions for the Clustered + Non-terminal Parsing algorithm. See the following paper for + details: + + Derivation representation using binary subtree sets + + by + + Elizabeth Scott, Adrian Johnstone and L.Thomas van Binsbergen. */ + +/* First we implement a "Call Return Forest". + + A CRF has two types of nodes. + + The first type is the non-leaf nodes. It is labelled as + + (X, i), + + where X is a non-terminal and i is an input position. + + The second type is the leaf nodes. It is labelled as + + (X, a, b, i), + + where X is a non-terminal, a is an index of a rule in the array of + rules corresponding to X, b is the length of a prefix of the + right-hand side of the rule corresponding to a, and i is an input + position. The triple (X, a, b) is termed a "grammar slot" in the + parlance. + + We need constant look-up for both types of nodes. So we implement + these using hash tables. + + For the first type, we implement these as a hash table, whose keys + are those (X, i) in the set. The values are described below. + + Actually we don't need those second type nodes. What we care about + are the edges between the two types of nodes. So we simply don't + store the second type nodes at all. We only store edges, or more + precisely, edge labels. Since we need to know if an edge exists + efficiently, we implement the edges as a hash table. (This seems + to be termed a "disctionary of keys" in computer science jargon.) + So the values of the hash table of the first type node are hash + tables whose keys are (X, a, b, i) that are in the edge set of the + first type node, and whose values are meaningless. */ + +typedef struct crf_s crf; + +crf *new_crf(); + +/* on error return non-zero */ +BOOL crf_add_node(crf * const restrict crfp, pair2 node); + +/* if not found return NULL. + + if found but the node has no edges return an empty hash table. */ +ht *crf_find_node(crf * const restrict crfp, pair2 node); + +BOOL crf_add_edge(crf * const restrict crfp, pair2 source, + pair4 label); + +void destroy_crf(crf * const restrict crfp); + +/* We also need to implement the set of pending actions. These are + triples of the form: + + (X, k, j). + + Since we need to find all j such that (X, k, j) belongs to the set, + we implement the set as a hash table whose keys are (X, k) and + whose values are those j such that (X, k, j) belongs to the set. + Precisely speaking, the values are hash tables whose keys are those + j and whose values are meaningless non-zero values. */ + +/* Set of Pending Actions */ +typedef struct spa_s spa; + +spa *new_spa(); + +void destroy_spa(spa * const restrict spap); + +/* On error return non-zero */ +BOOL spa_insert(spa * const restrict spap, pair3 label); + +/* Find if a triple belongs to the set */ +P_ATTR BOOL spa_belong(CCR_MOD(spa *) spap, pair3 label); + +/* Find if for a pair (X, k) there exist some j such that (X, k, j) + belong to the set. If so, return the hash table whose keys are + those j; otherwise return NULL. */ +P_ATTR ht *spa_find(CCR_MOD(spa *) spap, pair2 label); + +/* We also implement "process descriptors" for the CNP algorithm. Since + a process descriptor is of the form + + (X := beta . gamma, b, j, k), + + where X := beta gamma is a rule and b is the length of beta, we can + represent a process descriptor as + + (X, a, b, j, k). + + However, we shall notice that there is a natural "grading" on the + process descriptors. The k-th grading consists of the descriptors + with the last element in the tuple equal to k. In the clustered + nonterminal parsing algorithm, the process descriptors that are + added as a consequence of dealing with a process descriptor of + grading k must be of grade greater than or equal to k. This means + that if all process descriptors of grade k have been processed and + the descriptors awaiting to be processed all have grades > k, then + we won't see any process descriptors of grade <= k again. So we + can actually keep an array of process descriptors that consist of + descriptors of degree k. This way we don't need to store the k in + the descriptor. Therefore a process descriptor is represented as + + (X, a, i, j). + + Since we have no other requirements than almost constant-time + lookup, we simply represent the set of descriptors using a hash + table with keys (X, a, i, j). + + The main benefit of this is that after we find that all process + descriptors in R_k have been processed, we can free those + descriptors in U_k. I think this is a good way to lower the space + requirements of this algorithm. */ + +typedef pair4 prodecor; + +/* The set named R in the paper */ +typedef struct procr_s procr; + +/* The set named U in the paper */ +typedef struct procu_s procu; + +/* constructor */ +procr *new_procr(NUM size, BOOL * const restrict errorp); +procu *new_procu(NUM size, BOOL * const restrict errorp); + +/* insert */ + +/* the function dscAdd in the paper */ +BOOL desc_add(procr * const restrict pr, procu * const restrict pu, + NUM grade, prodecor dec); + +/* pop */ + +/* gradep will be set to the grade */ + +/* if all descriptors of grade k have been poped out, then the + corresponding slot in pu will be freed automatically. */ + +/* if no more elements are left, return NULL */ +prodecor *pop_procr(procr * pr, procu * pu, NUM *gradep); + +/* destructor */ +void destroy_procr(procr * const restrict pr); +void destroy_procu(procu * const restrict pu); + +#endif diff --git a/src/test/check_cnp.c b/src/test/check_crf.c index 2fbaac7..d99a838 100644 --- a/src/test/check_cnp.c +++ b/src/test/check_crf.c @@ -1,6 +1,6 @@ #include <stdio.h> #include "../util.h" -#include "../cnp.h" +#include "../crf.h" int main(int UNUSED argc, char ** UNUSED argv) @@ -8,8 +8,12 @@ main(int UNUSED argc, char ** UNUSED argv) procr *pr = NULL; procu *pu = NULL; + spa *spap = NULL; + prodecor *prod = NULL; + /* check crf */ + crf *crfp = new_crf(); pair2 p = { 0 }; p.x = 0; @@ -57,6 +61,47 @@ main(int UNUSED argc, char ** UNUSED argv) #undef TABLE #undef KEY + /* check spa */ + + spap = new_spa(); + + if (spap == NULL) { + fleprintf0("fail to have new spa\n"); + goto cleanup; + } + + pair3 p3 = (pair3) { 0 }; + p3.x = 0; + p3.y = 1; + p3.z = 10; + + if (spa_insert(spap, p3)) { + fleprintf0("Fail to insert\n"); + goto cleanup; + } + + printf("Successfully inserted into SPA\n"); + + if (spa_belong(spap, p3)) { + printf("Successfully found p3\n"); + } else { + fleprintf0("Fail to find p3\n"); + goto cleanup; + } + + pair2 first_two = (pair2) { 0 }; + first_two.x = 0; + first_two.y = 1; + + if (spa_find(spap, first_two)) { + printf("Successfully found p2\n"); + } else { + fleprintf0("Fail to find p2\n"); + goto cleanup; + } + + /* check prodecor */ + BOOL error = 0; pr = new_procr(10, &error); @@ -97,6 +142,7 @@ main(int UNUSED argc, char ** UNUSED argv) if (pr) destroy_procr(pr); if (pu) destroy_procu(pu); + if (spap) destroy_spa(spap); if (prod) free(prod); return 0; |