summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Makefile.am8
-rw-r--r--src/cnp.c361
-rw-r--r--src/cnp.h142
-rw-r--r--src/crf.c487
-rw-r--r--src/crf.h163
-rw-r--r--src/test/check_crf.c (renamed from src/test/check_cnp.c)48
6 files changed, 702 insertions, 507 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index e4eae9e..3ec4f19 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -2,9 +2,9 @@ AM_CFLAGS = -Wall -Wextra
noinst_LIBRARIES = libeps.a
libeps_a_SOURCES = grammar.c list.c util.c reader.c str.c \
-utf8.c dfa.c ht.c bsr.c cnp.c \
+utf8.c dfa.c ht.c bsr.c crf.c \
grammar.h list.h util.h reader.h str_partial.h \
-str.h utf8.h dfa.h ht.h bsr.h cnp.h
+str.h utf8.h dfa.h ht.h bsr.h crf.h
libeps_a_CFLAGS = $(AM_CFLAGS) --pedantic
@@ -22,7 +22,7 @@ CLEANFILES = TAGS
# tests
check_PROGRAMS = check_list check_grammar check_reader check_dfa \
-check_ht check_bsr check_cnp
+check_ht check_bsr check_crf
check_list_SOURCES = test/check_list.c list.c
@@ -39,7 +39,7 @@ check_ht_SOURCES = test/check_ht.c ht.c
check_bsr_SOURCES = test/check_bsr.c bsr.c grammar.c list.c util.c \
ht.c utf8.c str.c
-check_cnp_SOURCES = test/check_cnp.c cnp.c grammar.c list.c util.c \
+check_crf_SOURCES = test/check_crf.c crf.c grammar.c list.c util.c \
ht.c utf8.c str.c
TESTS = $(check_PROGRAMS)
diff --git a/src/cnp.c b/src/cnp.c
index fe8212e..d9a4978 100644
--- a/src/cnp.c
+++ b/src/cnp.c
@@ -1,362 +1 @@
-#include <stdio.h>
-#include <string.h>
-#include "util.h"
-#include "list.h"
#include "cnp.h"
-
-struct crf_s {
- ht *table;
-};
-
-crf *
-new_crf()
-{
- crf *crfp = NULL;
- SAFE_MALLOC(crf, crfp, 1, goto cleanup;);
-
- crfp->table = new_ht2(HT_INIT_CAP);
-
- if (crfp->table == NULL) goto cleanup;
-
- return crfp;
-
- cleanup:
-
- if (crfp) free(crfp);
-
- return NULL;
-}
-
-BOOL
-crf_add_node(crf * const restrict crfp, pair2 node)
-{
- if (ht_find(crfp->table, &node)) return 0;
-
- pair2 *p2 = NULL;
-
- NEW_P2(p2, node.x, node.y, return 1;);
-
- ht *value_table = new_ht4(HT_INIT_CAP);
-
- if (value_table == NULL) {
- free(p2);
- return 1;
- }
-
- if (ht_insert(crfp->table, p2, value_table)) {
- fleprintf0("Fail to insert\n");
- destroy_ht(value_table, DESTROY_NONE_SELF);
- free(p2);
- return 1;
- }
-
- return 0;
-}
-
-ht *
-crf_find_node(crf * const restrict crfp, pair2 node)
-{
- return ht_find(crfp->table, &node);
-}
-
-BOOL
-crf_add_edge(crf * const restrict crfp, pair2 source, pair4 label)
-{
- if (ht_find(crfp->table, &source) == NULL) {
- fleprintf0("add the node before adding its edges\n");
- return 1;
- }
-
- if (ht_find(ht_find(crfp->table, &source), &label)) return 0;
-
- pair4 *p = NULL;
-
- NEW_P4(p, label.x, label.y, label.z, label.w, return 1;);
-
- if (ht_insert(ht_find(crfp->table, &source), p, (void*)1)) {
- fleprintf0("Fail to insert\n");
- free(p);
- return 1;
- }
-
- return 0;
-}
-
-void
-destroy_crf(crf * const restrict crfp)
-{
-
-#define SIZE (ht_size(crfp->table))
-#define VALUES ((ht**) ht_values(crfp->table))
-
- for (NUM i = 0; i < SIZE;)
- destroy_ht(*(VALUES+i++), DESTROY_KEY_SELF);
-
- destroy_ht(crfp->table, DESTROY_KEY_SELF);
-
- free(crfp);
-
-#undef SIZE
-#undef VALUES
-
-}
-
-typedef struct procr_array_element_s procr_array_element;
-
-/* The list at index k is the set R_k. */
-struct procr_array_element_s {
- BOOL initialized;
- List *ls;
-};
-
-struct procr_s {
- /* input length */
- NUM len;
- /* an array of lists */
- procr_array_element *array;
- /* minimal grade that is non-empty */
- NUM mg;
-};
-
-typedef struct procu_array_element_s procu_array_element;
-
-/* The table at index k represents the set U_k. Its keys are those
- (X, a, i, j) such that (X, a, i, j, k) belongs to U_k. */
-struct procu_array_element_s {
- BOOL initialized;
- ht *table;
-};
-
-struct procu_s {
- /* input length */
- NUM len;
- /* an array of hash tables */
- procu_array_element *array;
-};
-
-procr *
-new_procr(NUM size, BOOL * const restrict errorp)
-{
- *errorp = 0;
-
- procr *pr = NULL;
-
- SAFE_MALLOC(procr, pr, 1, goto cleanup;);
-
- pr->len = size;
- pr->array = NULL;
-
- SAFE_MALLOC(procr_array_element, pr->array,
- sizeof(procr_array_element) * size,
- goto cleanup;);
-
- memset(pr->array, 0, sizeof(procr_array_element) * size);
-
- goto success;
-
- cleanup:
-
- *errorp = 1;
-
- if (pr && pr->array) free(pr->array);
-
- if (pr) free(pr);
-
- success:
-
- return pr;
-}
-
-procu *
-new_procu(NUM size, BOOL * const restrict errorp)
-{
- *errorp = 0;
-
- procu *pu = NULL;
-
- SAFE_MALLOC(procu, pu, 1, goto cleanup;);
-
- pu->len = size;
- pu->array = NULL;
-
- SAFE_MALLOC(procu_array_element, pu->array,
- sizeof(procu_array_element) * size,
- goto cleanup;);
-
- memset(pu->array, 0, sizeof(procu_array_element) * size);
-
- goto success;
-
- cleanup:
-
- *errorp = 1;
-
- if (pu && pu->array) free(pu->array);
-
- if (pu) free(pu);
-
- success:
-
- return pu;
-}
-
-void
-destroy_procr(procr * const restrict pr)
-{
- for (NUM i = 0; i < pr->len; i++)
- if ((pr->array+i)->initialized)
- destroy_list((pr->array+i)->ls, 1);
-
- free(pr->array);
-
- free(pr);
-}
-
-void
-destroy_procu(procu * const restrict pu)
-{
- for (NUM i = 0; i < pu->len; i++)
- if ((pu->array+i)->initialized)
- destroy_ht((pu->array+i)->table, DESTROY_KEY_SELF);
-
- free(pu->array);
-
- free(pu);
-}
-
-BOOL
-desc_add(procr * const restrict pr, procu * const restrict pu,
- NUM grade, prodecor dec)
-{
- if (grade < 0 || grade >= pu->len) {
- fleprintf("Invalid grade: %ld\n", grade);
- goto cleanup;
- }
-
- pair4 *p4 = NULL;
-
-#define HELE (pu->array+grade)
-#define LELE (pr->array+grade)
-
- if (HELE->initialized) {
- /* If HELE is initialized then LELE should also be initialized. */
- if (ht_find(HELE->table, &dec) != NULL) goto success;
-
- NEW_P4(p4, dec.x, dec.y, dec.z, dec.w, goto cleanup;);
-
- if (ht_insert(HELE->table, p4, (void*)1)) {
- fleprintf0("Fail to insert\n");
- goto cleanup;
- }
-
- NEW_P4(p4, dec.x, dec.y, dec.z, dec.w,
- ht_delete(HELE->table, &dec, DELETE_KEY); goto cleanup;);
-
- if (add_to_list(LELE->ls, p4)) {
- fleprintf0("Fail to add to list\n");
- ht_delete(HELE->table, &dec, DELETE_KEY);
- goto cleanup;
- }
-
- goto success;
- }
-
- HELE->table = new_ht4(HT_INIT_CAP);
-
- if (HELE->table == NULL) {
- fleprintf0("Fail to have new hash table\n");
- goto cleanup;
- }
-
- HELE->initialized = 1;
-
- NEW_P4(p4, dec.x, dec.y, dec.z, dec.w,
- destroy_ht(HELE->table, DESTROY_KEY_SELF);
- HELE->initialized = 0;
- goto cleanup;);
-
- if (ht_insert(HELE->table, p4, (void*)1)) {
- fleprintf0("Fail to insert\n");
- destroy_ht(HELE->table, DESTROY_KEY_SELF);
- HELE->initialized = 0;
- goto cleanup;
- }
-
- LELE->ls = new_list();
-
- if (LELE->ls == NULL) {
- fleprintf0("Fail to have new list\n");
- destroy_ht(HELE->table, DESTROY_KEY_SELF);
- HELE->initialized = 0;
- goto cleanup;
- }
-
- LELE->initialized = 1;
-
- NEW_P4(p4, dec.x, dec.y, dec.z, dec.w,
- destroy_ht(HELE->table, DESTROY_KEY_SELF);
- HELE->initialized = 0;
- destroy_list(LELE->ls, 1);
- LELE->initialized = 0;
- goto cleanup;);
-
- if (add_to_list(LELE->ls, p4)) {
- fleprintf0("Fail to add to list\n");
- destroy_ht(HELE->table, DESTROY_KEY_SELF);
- HELE->initialized = 0;
- destroy_list(LELE->ls, 1);
- LELE->initialized = 0;
- goto cleanup;
- }
-
-#undef HELE
-#undef LELE
-
- goto success;
-
- cleanup:
-
- return 1;
-
- success:
-
- return 0;
-}
-
-prodecor *
-pop_procr(procr * pr, procu * pu, NUM *gradep)
-{
- prodecor *result = NULL;
-
- if (pr->mg >= pr->len) goto success;
-
-#define ELEMENT (pr->array+pr->mg)
-
- if (!(ELEMENT->initialized)) goto reset_mg;
-
- *gradep = pr->mg;
-
- result = pop_from_list(ELEMENT->ls);
-
- if (list_length(ELEMENT->ls) == 0) {
- destroy_ht((pu->array+pr->mg)->table, DESTROY_KEY_SELF);
- (pu->array+pr->mg)->initialized = 0;
- (pu->array+pr->mg)->table = NULL;
-
- destroy_list(ELEMENT->ls, 1);
- ELEMENT->initialized = 0;
- ELEMENT->ls = NULL;
-
- /* REVIEW: Maybe we can just increment the minimal grade, as we
- won't jump over two steps? */
- reset_mg:
- while (++(pr->mg) < pr->len) {
- if (ELEMENT->initialized) break;
- }
- }
-
-#undef ELEMENT
-
- success:
- return result;
-}
diff --git a/src/cnp.h b/src/cnp.h
index c7d3965..5944ca3 100644
--- a/src/cnp.h
+++ b/src/cnp.h
@@ -1,144 +1,4 @@
#ifndef CNP_H
#define CNP_H
-#include "ht.h"
-
-/* This file implements the Clustered Non-terminal Parsing algorithm.
- See the following paper for details:
-
- Derivation representation using binary subtree sets
-
- by
-
- Elizabeth Scott, Adrian Johnstone and L.Thomas van Binsbergen. */
-
-/* First we implement a "Call Return Forest".
-
- A CRF has two types of nodes.
-
- The first type is the non-leaf nodes. It is labelled as
-
- (X, i),
-
- where X is a non-terminal and i is an input position.
-
- The second type is the leaf nodes. It is labelled as
-
- (X, a, b, i),
-
- where X is a non-terminal, a is an index of a rule in the array of
- rules corresponding to X, b is the length of a prefix of the
- right-hand side of the rule corresponding to a, and i is an input
- position. The triple (X, a, b) is termed a "grammar slot" in the
- parlance.
-
- We need constant look-up for both types of nodes. So we implement
- these using hash tables.
-
- For the first type, we implement these as a hash table, whose keys
- are those (X, i) in the set. The values are described below.
-
- Actually we don't need those second type nodes. What we care about
- are the edges between the two types of nodes. So we simply don't
- store the second type nodes at all. We only store edges, or more
- precisely, edge labels. Since we need to know if an edge exists
- efficiently, we implement the edges as a hash table. (This seems
- to be termed a "disctionary of keys" in computer science jargon.)
- So the values of the hash table of the first type node are hash
- tables whose keys are (X, a, b, i) that are in the edge set of the
- first type node, and whose values are meaningless. */
-
-typedef struct crf_s crf;
-
-crf *new_crf();
-
-/* on error return non-zero */
-BOOL crf_add_node(crf * const restrict crfp, pair2 node);
-
-/* if not found return NULL.
-
- if found but the node has no edges return an empty hash table. */
-ht *crf_find_node(crf * const restrict crfp, pair2 node);
-
-BOOL crf_add_edge(crf * const restrict crfp, pair2 source,
- pair4 label);
-
-void destroy_crf(crf * const restrict crfp);
-
-/* We also need to implement the set of pending actions. These are
- triples of the form:
-
- (X, k, j).
-
- Since we need to find all j such that (X, k, j) belongs to the set,
- we implement the set as a hash table whose keys are (X, k) and
- whose values are those j such that (X, k, j) belongs to the set. */
-
-/* TODO: Pending actions */
-
-/* We also implement "process descriptors" for the CNP algorithm. Since
- a process descriptor is of the form
-
- (X := beta . gamma, b, j, k),
-
- where X := beta gamma is a rule and b is the length of beta, we can
- represent a process descriptor as
-
- (X, a, b, j, k).
-
- However, we shall notice that there is a natural "grading" on the
- process descriptors. The k-th grading consists of the descriptors
- with the last element in the tuple equal to k. In the clustered
- nonterminal parsing algorithm, the process descriptors that are
- added as a consequence of dealing with a process descriptor of
- grading k must be of grade greater than or equal to k. This means
- that if all process descriptors of grade k have been processed and
- the descriptors awaiting to be processed all have grades > k, then
- we won't see any process descriptors of grade <= k again. So we
- can actually keep an array of process descriptors that consist of
- descriptors of degree k. This way we don't need to store the k in
- the descriptor. Therefore a process descriptor is represented as
-
- (X, a, i, j).
-
- Since we have no other requirements than almost constant-time
- lookup, we simply represent the set of descriptors using a hash
- table with keys (X, a, i, j).
-
- The main benefit of this is that after we find that all process
- descriptors in R_k have been processed, we can free those
- descriptors in U_k. I think this is a good way to lower the space
- requirements of this algorithm. */
-
-typedef pair4 prodecor;
-
-/* The set named R in the paper */
-typedef struct procr_s procr;
-
-/* The set named U in the paper */
-typedef struct procu_s procu;
-
-/* constructor */
-procr *new_procr(NUM size, BOOL * const restrict errorp);
-procu *new_procu(NUM size, BOOL * const restrict errorp);
-
-/* insert */
-
-/* the function dscAdd in the paper */
-BOOL desc_add(procr * const restrict pr, procu * const restrict pu,
- NUM grade, prodecor dec);
-
-/* pop */
-
-/* gradep will be set to the grade */
-
-/* if all descriptors of grade k have been poped out, then the
- corresponding slot in pu will be freed automatically. */
-
-/* if no more elements are left, return NULL */
-prodecor *pop_procr(procr * pr, procu * pu, NUM *gradep);
-
-/* destructor */
-void destroy_procr(procr * const restrict pr);
-void destroy_procu(procu * const restrict pu);
-
+/* TODO */
#endif
diff --git a/src/crf.c b/src/crf.c
new file mode 100644
index 0000000..dbcb050
--- /dev/null
+++ b/src/crf.c
@@ -0,0 +1,487 @@
+#include <stdio.h>
+#include <string.h>
+#include "util.h"
+#include "list.h"
+#include "crf.h"
+
+struct crf_s {
+ ht *table;
+};
+
+crf *
+new_crf()
+{
+ crf *crfp = NULL;
+ SAFE_MALLOC(crf, crfp, 1, goto cleanup;);
+
+ crfp->table = new_ht2(HT_INIT_CAP);
+
+ if (crfp->table == NULL) goto cleanup;
+
+ return crfp;
+
+ cleanup:
+
+ if (crfp) free(crfp);
+
+ return NULL;
+}
+
+BOOL
+crf_add_node(crf * const restrict crfp, pair2 node)
+{
+ if (ht_find(crfp->table, &node)) return 0;
+
+ pair2 *p2 = NULL;
+
+ NEW_P2(p2, node.x, node.y, return 1;);
+
+ ht *value_table = new_ht4(HT_INIT_CAP);
+
+ if (value_table == NULL) {
+ free(p2);
+ return 1;
+ }
+
+ if (ht_insert(crfp->table, p2, value_table)) {
+ fleprintf0("Fail to insert\n");
+ destroy_ht(value_table, DESTROY_NONE_SELF);
+ free(p2);
+ return 1;
+ }
+
+ return 0;
+}
+
+ht *
+crf_find_node(crf * const restrict crfp, pair2 node)
+{
+ return ht_find(crfp->table, &node);
+}
+
+BOOL
+crf_add_edge(crf * const restrict crfp, pair2 source, pair4 label)
+{
+ if (ht_find(crfp->table, &source) == NULL) {
+ fleprintf0("add the node before adding its edges\n");
+ return 1;
+ }
+
+ if (ht_find(ht_find(crfp->table, &source), &label)) return 0;
+
+ pair4 *p = NULL;
+
+ NEW_P4(p, label.x, label.y, label.z, label.w, return 1;);
+
+ if (ht_insert(ht_find(crfp->table, &source), p, (void*)1)) {
+ fleprintf0("Fail to insert\n");
+ free(p);
+ return 1;
+ }
+
+ return 0;
+}
+
+void
+destroy_crf(crf * const restrict crfp)
+{
+
+#define SIZE (ht_size(crfp->table))
+#define VALUES ((ht**) ht_values(crfp->table))
+
+ for (NUM i = 0; i < SIZE;)
+ destroy_ht(*(VALUES+i++), DESTROY_KEY_SELF);
+
+ destroy_ht(crfp->table, DESTROY_KEY_SELF);
+
+ free(crfp);
+
+#undef SIZE
+#undef VALUES
+
+}
+
+struct spa_s {
+ ht *table;
+};
+
+spa *
+new_spa()
+{
+ spa *spap = NULL;
+ SAFE_MALLOC(spa, spap, 1, return NULL;);
+
+ spap->table = new_ht2(HT_INIT_CAP);
+
+ if (spap->table == NULL) {
+ free(spap);
+ return NULL;
+ }
+
+ return spap;
+}
+
+void
+destroy_spa(spa * const restrict spap)
+{
+ for (NUM i = 0; i < ht_size(spap->table); i++)
+ destroy_ht(*((ht **) ht_values(spap->table)+i), DESTROY_KEY_SELF);
+
+ /* fleprintf0("ho\n"); */
+
+ destroy_ht(spap->table, DESTROY_KEY_SELF);
+
+ free(spap);
+}
+
+BOOL
+spa_insert(spa * const restrict spap, pair3 label)
+{
+ pair2 first_two = (pair2) { 0 }, *p2 = NULL;
+
+ first_two.x = label.x;
+ first_two.y = label.y;
+
+ NUM j = label.z, *np = NULL;
+
+ ht *value_table = NULL;
+
+ if (ht_find(spap->table, &first_two) == NULL) {
+ value_table = new_ht(HT_INIT_CAP, 0);
+
+ if (value_table == NULL) {
+ fleprintf0("Fail to have new hash table\n");
+ goto cleanup;
+ }
+
+ SAFE_MALLOC(NUM, np, 1, goto cleanup;);
+ *np = label.z;
+
+ if (ht_insert(value_table, np, (void*)1)) {
+ fleprintf0("Fail to insert\n");
+ goto cleanup;
+ }
+
+ np = NULL;
+
+ NEW_P2(p2, label.x, label.y, goto cleanup;);
+
+ if (ht_insert(spap->table, p2, value_table)) {
+ fleprintf0("Fail to insert\n");
+ goto cleanup;
+ }
+
+ p2 = NULL;
+
+ goto success;
+ }
+
+ if (ht_find(ht_find(spap->table, &first_two), &j) == NULL) {
+ SAFE_MALLOC(NUM, np, 1, goto cleanup;);
+ *np = label.z;
+
+ if (ht_insert(ht_find(spap->table, &first_two), np, (void*)1)) {
+ fleprintf0("Fail to insert\n");
+ goto cleanup;
+ }
+
+ np = NULL;
+ }
+
+ goto success;
+
+ cleanup:
+ if (value_table) destroy_ht(value_table, DESTROY_KEY_SELF);
+ if (p2) free(p2);
+ if (np) free(np);
+
+ return 1;
+
+ success:
+ return 0;
+}
+
+P_ATTR
+BOOL
+spa_belong(CCR_MOD(spa *) spap, pair3 label)
+{
+ pair2 first_two = (pair2) { 0 };
+
+ first_two.x = label.x;
+ first_two.y = label.y;
+
+ if (ht_find(spap->table, &first_two) == NULL) return 0;
+
+ NUM j = label.z;
+
+ if (ht_find(ht_find(spap->table, &first_two), &j) == NULL) return 0;
+
+ return 1;
+}
+
+P_ATTR
+ht *
+spa_find(CCR_MOD(spa *) spap, pair2 label)
+{
+ return ht_find(spap->table, &label);
+}
+
+typedef struct procr_array_element_s procr_array_element;
+
+/* The list at index k is the set R_k. */
+struct procr_array_element_s {
+ BOOL initialized;
+ List *ls;
+};
+
+struct procr_s {
+ /* input length */
+ NUM len;
+ /* an array of lists */
+ procr_array_element *array;
+ /* minimal grade that is non-empty */
+ NUM mg;
+};
+
+typedef struct procu_array_element_s procu_array_element;
+
+/* The table at index k represents the set U_k. Its keys are those
+ (X, a, i, j) such that (X, a, i, j, k) belongs to U_k. */
+struct procu_array_element_s {
+ BOOL initialized;
+ ht *table;
+};
+
+struct procu_s {
+ /* input length */
+ NUM len;
+ /* an array of hash tables */
+ procu_array_element *array;
+};
+
+procr *
+new_procr(NUM size, BOOL * const restrict errorp)
+{
+ *errorp = 0;
+
+ procr *pr = NULL;
+
+ SAFE_MALLOC(procr, pr, 1, goto cleanup;);
+
+ pr->len = size;
+ pr->array = NULL;
+
+ SAFE_MALLOC(procr_array_element, pr->array,
+ sizeof(procr_array_element) * size,
+ goto cleanup;);
+
+ memset(pr->array, 0, sizeof(procr_array_element) * size);
+
+ goto success;
+
+ cleanup:
+
+ *errorp = 1;
+
+ if (pr && pr->array) free(pr->array);
+
+ if (pr) free(pr);
+
+ success:
+
+ return pr;
+}
+
+procu *
+new_procu(NUM size, BOOL * const restrict errorp)
+{
+ *errorp = 0;
+
+ procu *pu = NULL;
+
+ SAFE_MALLOC(procu, pu, 1, goto cleanup;);
+
+ pu->len = size;
+ pu->array = NULL;
+
+ SAFE_MALLOC(procu_array_element, pu->array,
+ sizeof(procu_array_element) * size,
+ goto cleanup;);
+
+ memset(pu->array, 0, sizeof(procu_array_element) * size);
+
+ goto success;
+
+ cleanup:
+
+ *errorp = 1;
+
+ if (pu && pu->array) free(pu->array);
+
+ if (pu) free(pu);
+
+ success:
+
+ return pu;
+}
+
+void
+destroy_procr(procr * const restrict pr)
+{
+ for (NUM i = 0; i < pr->len; i++)
+ if ((pr->array+i)->initialized)
+ destroy_list((pr->array+i)->ls, 1);
+
+ free(pr->array);
+
+ free(pr);
+}
+
+void
+destroy_procu(procu * const restrict pu)
+{
+ for (NUM i = 0; i < pu->len; i++)
+ if ((pu->array+i)->initialized)
+ destroy_ht((pu->array+i)->table, DESTROY_KEY_SELF);
+
+ free(pu->array);
+
+ free(pu);
+}
+
+BOOL
+desc_add(procr * const restrict pr, procu * const restrict pu,
+ NUM grade, prodecor dec)
+{
+ if (grade < 0 || grade >= pu->len) {
+ fleprintf("Invalid grade: %ld\n", grade);
+ goto cleanup;
+ }
+
+ pair4 *p4 = NULL;
+
+#define HELE (pu->array+grade)
+#define LELE (pr->array+grade)
+
+ if (HELE->initialized) {
+ /* If HELE is initialized then LELE should also be initialized. */
+ if (ht_find(HELE->table, &dec) != NULL) goto success;
+
+ NEW_P4(p4, dec.x, dec.y, dec.z, dec.w, goto cleanup;);
+
+ if (ht_insert(HELE->table, p4, (void*)1)) {
+ fleprintf0("Fail to insert\n");
+ goto cleanup;
+ }
+
+ NEW_P4(p4, dec.x, dec.y, dec.z, dec.w,
+ ht_delete(HELE->table, &dec, DELETE_KEY); goto cleanup;);
+
+ if (add_to_list(LELE->ls, p4)) {
+ fleprintf0("Fail to add to list\n");
+ ht_delete(HELE->table, &dec, DELETE_KEY);
+ goto cleanup;
+ }
+
+ goto success;
+ }
+
+ HELE->table = new_ht4(HT_INIT_CAP);
+
+ if (HELE->table == NULL) {
+ fleprintf0("Fail to have new hash table\n");
+ goto cleanup;
+ }
+
+ HELE->initialized = 1;
+
+ NEW_P4(p4, dec.x, dec.y, dec.z, dec.w,
+ destroy_ht(HELE->table, DESTROY_KEY_SELF);
+ HELE->initialized = 0;
+ goto cleanup;);
+
+ if (ht_insert(HELE->table, p4, (void*)1)) {
+ fleprintf0("Fail to insert\n");
+ destroy_ht(HELE->table, DESTROY_KEY_SELF);
+ HELE->initialized = 0;
+ goto cleanup;
+ }
+
+ LELE->ls = new_list();
+
+ if (LELE->ls == NULL) {
+ fleprintf0("Fail to have new list\n");
+ destroy_ht(HELE->table, DESTROY_KEY_SELF);
+ HELE->initialized = 0;
+ goto cleanup;
+ }
+
+ LELE->initialized = 1;
+
+ NEW_P4(p4, dec.x, dec.y, dec.z, dec.w,
+ destroy_ht(HELE->table, DESTROY_KEY_SELF);
+ HELE->initialized = 0;
+ destroy_list(LELE->ls, 1);
+ LELE->initialized = 0;
+ goto cleanup;);
+
+ if (add_to_list(LELE->ls, p4)) {
+ fleprintf0("Fail to add to list\n");
+ destroy_ht(HELE->table, DESTROY_KEY_SELF);
+ HELE->initialized = 0;
+ destroy_list(LELE->ls, 1);
+ LELE->initialized = 0;
+ goto cleanup;
+ }
+
+#undef HELE
+#undef LELE
+
+ goto success;
+
+ cleanup:
+
+ return 1;
+
+ success:
+
+ return 0;
+}
+
+prodecor *
+pop_procr(procr * pr, procu * pu, NUM *gradep)
+{
+ prodecor *result = NULL;
+
+ if (pr->mg >= pr->len) goto success;
+
+#define ELEMENT (pr->array+pr->mg)
+
+ if (!(ELEMENT->initialized)) goto reset_mg;
+
+ *gradep = pr->mg;
+
+ result = pop_from_list(ELEMENT->ls);
+
+ if (list_length(ELEMENT->ls) == 0) {
+ destroy_ht((pu->array+pr->mg)->table, DESTROY_KEY_SELF);
+ (pu->array+pr->mg)->initialized = 0;
+ (pu->array+pr->mg)->table = NULL;
+
+ destroy_list(ELEMENT->ls, 1);
+ ELEMENT->initialized = 0;
+ ELEMENT->ls = NULL;
+
+ /* REVIEW: Maybe we can just increment the minimal grade, as we
+ won't jump over two steps? */
+ reset_mg:
+ while (++(pr->mg) < pr->len) {
+ if (ELEMENT->initialized) break;
+ }
+ }
+
+#undef ELEMENT
+
+ success:
+ return result;
+}
diff --git a/src/crf.h b/src/crf.h
new file mode 100644
index 0000000..7ff5a3f
--- /dev/null
+++ b/src/crf.h
@@ -0,0 +1,163 @@
+#ifndef CNP_H
+#define CNP_H
+#include "ht.h"
+
+/* This file implements some support functions for the Clustered
+ Non-terminal Parsing algorithm. See the following paper for
+ details:
+
+ Derivation representation using binary subtree sets
+
+ by
+
+ Elizabeth Scott, Adrian Johnstone and L.Thomas van Binsbergen. */
+
+/* First we implement a "Call Return Forest".
+
+ A CRF has two types of nodes.
+
+ The first type is the non-leaf nodes. It is labelled as
+
+ (X, i),
+
+ where X is a non-terminal and i is an input position.
+
+ The second type is the leaf nodes. It is labelled as
+
+ (X, a, b, i),
+
+ where X is a non-terminal, a is an index of a rule in the array of
+ rules corresponding to X, b is the length of a prefix of the
+ right-hand side of the rule corresponding to a, and i is an input
+ position. The triple (X, a, b) is termed a "grammar slot" in the
+ parlance.
+
+ We need constant look-up for both types of nodes. So we implement
+ these using hash tables.
+
+ For the first type, we implement these as a hash table, whose keys
+ are those (X, i) in the set. The values are described below.
+
+ Actually we don't need those second type nodes. What we care about
+ are the edges between the two types of nodes. So we simply don't
+ store the second type nodes at all. We only store edges, or more
+ precisely, edge labels. Since we need to know if an edge exists
+ efficiently, we implement the edges as a hash table. (This seems
+ to be termed a "disctionary of keys" in computer science jargon.)
+ So the values of the hash table of the first type node are hash
+ tables whose keys are (X, a, b, i) that are in the edge set of the
+ first type node, and whose values are meaningless. */
+
+typedef struct crf_s crf;
+
+crf *new_crf();
+
+/* on error return non-zero */
+BOOL crf_add_node(crf * const restrict crfp, pair2 node);
+
+/* if not found return NULL.
+
+ if found but the node has no edges return an empty hash table. */
+ht *crf_find_node(crf * const restrict crfp, pair2 node);
+
+BOOL crf_add_edge(crf * const restrict crfp, pair2 source,
+ pair4 label);
+
+void destroy_crf(crf * const restrict crfp);
+
+/* We also need to implement the set of pending actions. These are
+ triples of the form:
+
+ (X, k, j).
+
+ Since we need to find all j such that (X, k, j) belongs to the set,
+ we implement the set as a hash table whose keys are (X, k) and
+ whose values are those j such that (X, k, j) belongs to the set.
+ Precisely speaking, the values are hash tables whose keys are those
+ j and whose values are meaningless non-zero values. */
+
+/* Set of Pending Actions */
+typedef struct spa_s spa;
+
+spa *new_spa();
+
+void destroy_spa(spa * const restrict spap);
+
+/* On error return non-zero */
+BOOL spa_insert(spa * const restrict spap, pair3 label);
+
+/* Find if a triple belongs to the set */
+P_ATTR BOOL spa_belong(CCR_MOD(spa *) spap, pair3 label);
+
+/* Find if for a pair (X, k) there exist some j such that (X, k, j)
+ belong to the set. If so, return the hash table whose keys are
+ those j; otherwise return NULL. */
+P_ATTR ht *spa_find(CCR_MOD(spa *) spap, pair2 label);
+
+/* We also implement "process descriptors" for the CNP algorithm. Since
+ a process descriptor is of the form
+
+ (X := beta . gamma, b, j, k),
+
+ where X := beta gamma is a rule and b is the length of beta, we can
+ represent a process descriptor as
+
+ (X, a, b, j, k).
+
+ However, we shall notice that there is a natural "grading" on the
+ process descriptors. The k-th grading consists of the descriptors
+ with the last element in the tuple equal to k. In the clustered
+ nonterminal parsing algorithm, the process descriptors that are
+ added as a consequence of dealing with a process descriptor of
+ grading k must be of grade greater than or equal to k. This means
+ that if all process descriptors of grade k have been processed and
+ the descriptors awaiting to be processed all have grades > k, then
+ we won't see any process descriptors of grade <= k again. So we
+ can actually keep an array of process descriptors that consist of
+ descriptors of degree k. This way we don't need to store the k in
+ the descriptor. Therefore a process descriptor is represented as
+
+ (X, a, i, j).
+
+ Since we have no other requirements than almost constant-time
+ lookup, we simply represent the set of descriptors using a hash
+ table with keys (X, a, i, j).
+
+ The main benefit of this is that after we find that all process
+ descriptors in R_k have been processed, we can free those
+ descriptors in U_k. I think this is a good way to lower the space
+ requirements of this algorithm. */
+
+typedef pair4 prodecor;
+
+/* The set named R in the paper */
+typedef struct procr_s procr;
+
+/* The set named U in the paper */
+typedef struct procu_s procu;
+
+/* constructor */
+procr *new_procr(NUM size, BOOL * const restrict errorp);
+procu *new_procu(NUM size, BOOL * const restrict errorp);
+
+/* insert */
+
+/* the function dscAdd in the paper */
+BOOL desc_add(procr * const restrict pr, procu * const restrict pu,
+ NUM grade, prodecor dec);
+
+/* pop */
+
+/* gradep will be set to the grade */
+
+/* if all descriptors of grade k have been poped out, then the
+ corresponding slot in pu will be freed automatically. */
+
+/* if no more elements are left, return NULL */
+prodecor *pop_procr(procr * pr, procu * pu, NUM *gradep);
+
+/* destructor */
+void destroy_procr(procr * const restrict pr);
+void destroy_procu(procu * const restrict pu);
+
+#endif
diff --git a/src/test/check_cnp.c b/src/test/check_crf.c
index 2fbaac7..d99a838 100644
--- a/src/test/check_cnp.c
+++ b/src/test/check_crf.c
@@ -1,6 +1,6 @@
#include <stdio.h>
#include "../util.h"
-#include "../cnp.h"
+#include "../crf.h"
int
main(int UNUSED argc, char ** UNUSED argv)
@@ -8,8 +8,12 @@ main(int UNUSED argc, char ** UNUSED argv)
procr *pr = NULL;
procu *pu = NULL;
+ spa *spap = NULL;
+
prodecor *prod = NULL;
+ /* check crf */
+
crf *crfp = new_crf();
pair2 p = { 0 };
p.x = 0;
@@ -57,6 +61,47 @@ main(int UNUSED argc, char ** UNUSED argv)
#undef TABLE
#undef KEY
+ /* check spa */
+
+ spap = new_spa();
+
+ if (spap == NULL) {
+ fleprintf0("fail to have new spa\n");
+ goto cleanup;
+ }
+
+ pair3 p3 = (pair3) { 0 };
+ p3.x = 0;
+ p3.y = 1;
+ p3.z = 10;
+
+ if (spa_insert(spap, p3)) {
+ fleprintf0("Fail to insert\n");
+ goto cleanup;
+ }
+
+ printf("Successfully inserted into SPA\n");
+
+ if (spa_belong(spap, p3)) {
+ printf("Successfully found p3\n");
+ } else {
+ fleprintf0("Fail to find p3\n");
+ goto cleanup;
+ }
+
+ pair2 first_two = (pair2) { 0 };
+ first_two.x = 0;
+ first_two.y = 1;
+
+ if (spa_find(spap, first_two)) {
+ printf("Successfully found p2\n");
+ } else {
+ fleprintf0("Fail to find p2\n");
+ goto cleanup;
+ }
+
+ /* check prodecor */
+
BOOL error = 0;
pr = new_procr(10, &error);
@@ -97,6 +142,7 @@ main(int UNUSED argc, char ** UNUSED argv)
if (pr) destroy_procr(pr);
if (pu) destroy_procu(pu);
+ if (spap) destroy_spa(spap);
if (prod) free(prod);
return 0;