rename cnp to crf

THat file implements support functions for the CNP algorithm, not the algorithm itself.
author: JSDurand <mmemmew@gmail.com> 2022-01-28 23:21:00 +0800
committer: JSDurand <mmemmew@gmail.com> 2022-01-28 23:22:22 +0800
commit: e555c88b8107caf886da229444c2bed1aaef6c2c (patch)
tree: 239894914a881800e244f10090fa97822b0352a3
parent: 8ace61933130416a0b8a6b250de681a606439f48 (diff)
6 files changed, 702 insertions, 507 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index e4eae9e..3ec4f19 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -2,9 +2,9 @@ AM_CFLAGS = -Wall -Wextra
 
 noinst_LIBRARIES = libeps.a
 libeps_a_SOURCES = grammar.c list.c util.c reader.c str.c \
-utf8.c dfa.c ht.c bsr.c cnp.c \
+utf8.c dfa.c ht.c bsr.c crf.c \
 grammar.h list.h util.h reader.h str_partial.h \
-str.h utf8.h dfa.h ht.h bsr.h cnp.h
+str.h utf8.h dfa.h ht.h bsr.h crf.h
 
 libeps_a_CFLAGS = $(AM_CFLAGS) --pedantic
 
@@ -22,7 +22,7 @@ CLEANFILES = TAGS
 # tests
 
 check_PROGRAMS = check_list check_grammar check_reader check_dfa \
-check_ht check_bsr check_cnp
+check_ht check_bsr check_crf
 
 check_list_SOURCES = test/check_list.c list.c
 
@@ -39,7 +39,7 @@ check_ht_SOURCES = test/check_ht.c ht.c
 check_bsr_SOURCES = test/check_bsr.c bsr.c grammar.c list.c util.c \
 ht.c utf8.c str.c
 
-check_cnp_SOURCES = test/check_cnp.c cnp.c grammar.c list.c util.c \
+check_crf_SOURCES = test/check_crf.c crf.c grammar.c list.c util.c \
 ht.c utf8.c str.c
 
 TESTS = $(check_PROGRAMS)
diff --git a/src/cnp.c b/src/cnp.c
index fe8212e..d9a4978 100644
--- a/src/cnp.c
+++ b/src/cnp.c
@@ -1,362 +1 @@
-#include <stdio.h>
-#include <string.h>
-#include "util.h"
-#include "list.h"
 #include "cnp.h"
-
-struct crf_s {
-  ht *table;
-};
-
-crf *
-new_crf()
-{
-  crf *crfp = NULL;
-  SAFE_MALLOC(crf, crfp, 1, goto cleanup;);
-
-  crfp->table = new_ht2(HT_INIT_CAP);
-
-  if (crfp->table == NULL) goto cleanup;
-
-  return crfp;
-
- cleanup:
-
-  if (crfp) free(crfp);
-
-  return NULL;
-}
-
-BOOL
-crf_add_node(crf * const restrict crfp, pair2 node)
-{
-  if (ht_find(crfp->table, &node)) return 0;
-
-  pair2 *p2 = NULL;
-
-  NEW_P2(p2, node.x, node.y, return 1;);
-
-  ht *value_table = new_ht4(HT_INIT_CAP);
-
-  if (value_table == NULL) {
-    free(p2);
-    return 1;
-  }
-
-  if (ht_insert(crfp->table, p2, value_table)) {
-    fleprintf0("Fail to insert\n");
-    destroy_ht(value_table, DESTROY_NONE_SELF);
-    free(p2);
-    return 1;
-  }
-
-  return 0;
-}
-
-ht *
-crf_find_node(crf * const restrict crfp, pair2 node)
-{
-  return ht_find(crfp->table, &node);
-}
-
-BOOL
-crf_add_edge(crf * const restrict crfp, pair2 source, pair4 label)
-{
-  if (ht_find(crfp->table, &source) == NULL) {
-    fleprintf0("add the node before adding its edges\n");
-    return 1;
-  }
-
-  if (ht_find(ht_find(crfp->table, &source), &label)) return 0;
-
-  pair4 *p = NULL;
-
-  NEW_P4(p, label.x, label.y, label.z, label.w, return 1;);
-
-  if (ht_insert(ht_find(crfp->table, &source), p, (void*)1)) {
-    fleprintf0("Fail to insert\n");
-    free(p);
-    return 1;
-  }
-
-  return 0;
-}
-
-void
-destroy_crf(crf * const restrict crfp)
-{
-
-#define SIZE (ht_size(crfp->table))
-#define VALUES ((ht**) ht_values(crfp->table))
-
-  for (NUM i = 0; i < SIZE;)
-    destroy_ht(*(VALUES+i++), DESTROY_KEY_SELF);
-
-  destroy_ht(crfp->table, DESTROY_KEY_SELF);
-
-  free(crfp);
-
-#undef SIZE
-#undef VALUES
-
-}
-
-typedef struct procr_array_element_s procr_array_element;
-
-/* The list at index k is the set R_k. */
-struct procr_array_element_s {
-  BOOL initialized;
-  List *ls;
-};
-
-struct procr_s {
-  /* input length */
-  NUM len;
-  /* an array of lists */
-  procr_array_element *array;
-  /* minimal grade that is non-empty */
-  NUM mg;
-};
-
-typedef struct procu_array_element_s procu_array_element;
-
-/* The table at index k represents the set U_k.  Its keys are those
-   (X, a, i, j) such that (X, a, i, j, k) belongs to U_k. */
-struct procu_array_element_s {
-  BOOL initialized;
-  ht *table;
-};
-
-struct procu_s {
-  /* input length */
-  NUM len;
-  /* an array of hash tables */
-  procu_array_element *array;
-};
-
-procr *
-new_procr(NUM size, BOOL * const restrict errorp)
-{
-  *errorp = 0;
-
-  procr *pr = NULL;
-
-  SAFE_MALLOC(procr, pr, 1, goto cleanup;);
-
-  pr->len = size;
-  pr->array = NULL;
-
-  SAFE_MALLOC(procr_array_element, pr->array,
-              sizeof(procr_array_element) * size,
-              goto cleanup;);
-
-  memset(pr->array, 0, sizeof(procr_array_element) * size);
-
-  goto success;
-
- cleanup:
-
-  *errorp = 1;
-
-  if (pr && pr->array) free(pr->array);
-
-  if (pr) free(pr);
-
- success:
-
-  return pr;
-}
-
-procu *
-new_procu(NUM size, BOOL * const restrict errorp)
-{
-  *errorp = 0;
-
-  procu *pu = NULL;
-
-  SAFE_MALLOC(procu, pu, 1, goto cleanup;);
-
-  pu->len = size;
-  pu->array = NULL;
-
-  SAFE_MALLOC(procu_array_element, pu->array,
-              sizeof(procu_array_element) * size,
-              goto cleanup;);
-
-  memset(pu->array, 0, sizeof(procu_array_element) * size);
-
-  goto success;
-
- cleanup:
-
-  *errorp = 1;
-
-  if (pu && pu->array) free(pu->array);
-
-  if (pu) free(pu);
-
- success:
-
-  return pu;
-}
-
-void
-destroy_procr(procr * const restrict pr)
-{
-  for (NUM i = 0; i < pr->len; i++)
-    if ((pr->array+i)->initialized)
-      destroy_list((pr->array+i)->ls, 1);
-
-  free(pr->array);
-
-  free(pr);
-}
-
-void
-destroy_procu(procu * const restrict pu)
-{
-  for (NUM i = 0; i < pu->len; i++)
-    if ((pu->array+i)->initialized)
-      destroy_ht((pu->array+i)->table, DESTROY_KEY_SELF);
-
-  free(pu->array);
-
-  free(pu);
-}
-
-BOOL
-desc_add(procr * const restrict pr, procu * const restrict pu,
-         NUM grade, prodecor dec)
-{
-  if (grade < 0 || grade >= pu->len) {
-    fleprintf("Invalid grade: %ld\n", grade);
-    goto cleanup;
-  }
-
-  pair4 *p4 = NULL;
-
-#define HELE (pu->array+grade)
-#define LELE (pr->array+grade)
-
-  if (HELE->initialized) {
-    /* If HELE is initialized then LELE should also be initialized. */
-    if (ht_find(HELE->table, &dec) != NULL) goto success;
-
-    NEW_P4(p4, dec.x, dec.y, dec.z, dec.w, goto cleanup;);
-
-    if (ht_insert(HELE->table, p4, (void*)1)) {
-      fleprintf0("Fail to insert\n");
-      goto cleanup;
-    }
-
-    NEW_P4(p4, dec.x, dec.y, dec.z, dec.w,
-           ht_delete(HELE->table, &dec, DELETE_KEY); goto cleanup;);
-
-    if (add_to_list(LELE->ls, p4)) {
-      fleprintf0("Fail to add to list\n");
-      ht_delete(HELE->table, &dec, DELETE_KEY);
-      goto cleanup;
-    }
-
-    goto success;
-  }
-
-  HELE->table = new_ht4(HT_INIT_CAP);
-
-  if (HELE->table == NULL) {
-    fleprintf0("Fail to have new hash table\n");
-    goto cleanup;
-  }
-
-  HELE->initialized = 1;
-
-  NEW_P4(p4, dec.x, dec.y, dec.z, dec.w,
-         destroy_ht(HELE->table, DESTROY_KEY_SELF);
-         HELE->initialized = 0;
-         goto cleanup;);
-
-  if (ht_insert(HELE->table, p4, (void*)1)) {
-    fleprintf0("Fail to insert\n");
-    destroy_ht(HELE->table, DESTROY_KEY_SELF);
-    HELE->initialized = 0;
-    goto cleanup;
-  }
-
-  LELE->ls = new_list();
-
-  if (LELE->ls == NULL) {
-    fleprintf0("Fail to have new list\n");
-    destroy_ht(HELE->table, DESTROY_KEY_SELF);
-    HELE->initialized = 0;
-    goto cleanup;
-  }
-
-  LELE->initialized = 1;
-
-  NEW_P4(p4, dec.x, dec.y, dec.z, dec.w,
-         destroy_ht(HELE->table, DESTROY_KEY_SELF);
-         HELE->initialized = 0;
-         destroy_list(LELE->ls, 1);
-         LELE->initialized = 0;
-         goto cleanup;);
-
-  if (add_to_list(LELE->ls, p4)) {
-    fleprintf0("Fail to add to list\n");
-    destroy_ht(HELE->table, DESTROY_KEY_SELF);
-    HELE->initialized = 0;
-    destroy_list(LELE->ls, 1);
-    LELE->initialized = 0;
-    goto cleanup;
-  }
-
-#undef HELE
-#undef LELE
-
-  goto success;
-
- cleanup:
-
-  return 1;
-
- success:
-
-  return 0;
-}
-
-prodecor *
-pop_procr(procr * pr, procu * pu, NUM *gradep)
-{
-  prodecor *result = NULL;
-
-  if (pr->mg >= pr->len) goto success;
-
-#define ELEMENT (pr->array+pr->mg)
-
-  if (!(ELEMENT->initialized)) goto reset_mg;
-
-  *gradep = pr->mg;
-
-  result = pop_from_list(ELEMENT->ls);
-
-  if (list_length(ELEMENT->ls) == 0) {
-    destroy_ht((pu->array+pr->mg)->table, DESTROY_KEY_SELF);
-    (pu->array+pr->mg)->initialized = 0;
-    (pu->array+pr->mg)->table = NULL;
-
-    destroy_list(ELEMENT->ls, 1);
-    ELEMENT->initialized = 0;
-    ELEMENT->ls = NULL;
-
-    /* REVIEW: Maybe we can just increment the minimal grade, as we
-       won't jump over two steps? */
-  reset_mg:
-    while (++(pr->mg) < pr->len) {
-      if (ELEMENT->initialized) break;
-    }
-  }
-
-#undef ELEMENT
-
- success:
-  return result;
-}
diff --git a/src/cnp.h b/src/cnp.h
index c7d3965..5944ca3 100644
--- a/src/cnp.h
+++ b/src/cnp.h
@@ -1,144 +1,4 @@
 #ifndef CNP_H
 #define CNP_H
-#include "ht.h"
-
-/* This file implements the Clustered Non-terminal Parsing algorithm.
-   See the following paper for details:
-
-   Derivation representation using binary subtree sets
-
-   by
-
-   Elizabeth Scott, Adrian Johnstone and L.Thomas van Binsbergen. */
-
-/* First we implement a "Call Return Forest".
-
-   A CRF has two types of nodes.
-
-   The first type is the non-leaf nodes.  It is labelled as
-
-   (X, i),
-
-   where X is a non-terminal and i is an input position.
-
-   The second type is the leaf nodes.  It is labelled as
-
-   (X, a, b, i),
-
-   where X is a non-terminal, a is an index of a rule in the array of
-   rules corresponding to X, b is the length of a prefix of the
-   right-hand side of the rule corresponding to a, and i is an input
-   position.  The triple (X, a, b) is termed a "grammar slot" in the
-   parlance.
-
-   We need constant look-up for both types of nodes.  So we implement
-   these using hash tables.
-
-   For the first type, we implement these as a hash table, whose keys
-   are those (X, i) in the set.  The values are described below.
-
-   Actually we don't need those second type nodes.  What we care about
-   are the edges between the two types of nodes.  So we simply don't
-   store the second type nodes at all.  We only store edges, or more
-   precisely, edge labels.  Since we need to know if an edge exists
-   efficiently, we implement the edges as a hash table.  (This seems
-   to be termed a "disctionary of keys" in computer science jargon.)
-   So the values of the hash table of the first type node are hash
-   tables whose keys are (X, a, b, i) that are in the edge set of the
-   first type node, and whose values are meaningless. */
-
-typedef struct crf_s crf;
-
-crf *new_crf();
-
-/* on error return non-zero */
-BOOL crf_add_node(crf * const restrict crfp, pair2 node);
-
-/* if not found return NULL.
-
-   if found but the node has no edges return an empty hash table. */
-ht *crf_find_node(crf * const restrict crfp, pair2 node);
-
-BOOL crf_add_edge(crf * const restrict crfp, pair2 source,
-                  pair4 label);
-
-void destroy_crf(crf * const restrict crfp);
-
-/* We also need to implement the set of pending actions.  These are
-   triples of the form:
-
-   (X, k, j).
-
-   Since we need to find all j such that (X, k, j) belongs to the set,
-   we implement the set as a hash table whose keys are (X, k) and
-   whose values are those j such that (X, k, j) belongs to the set. */
-
-/* TODO: Pending actions */
-
-/* We also implement "process descriptors" for the CNP algorithm.  Since
-   a process descriptor is of the form
-
-   (X := beta . gamma, b, j, k),
-
-   where X := beta gamma is a rule and b is the length of beta, we can
-   represent a process descriptor as
-
-   (X, a, b, j, k).
-
-   However, we shall notice that there is a natural "grading" on the
-   process descriptors.  The k-th grading consists of the descriptors
-   with the last element in the tuple equal to k.  In the clustered
-   nonterminal parsing algorithm, the process descriptors that are
-   added as a consequence of dealing with a process descriptor of
-   grading k must be of grade greater than or equal to k.  This means
-   that if all process descriptors of grade k have been processed and
-   the descriptors awaiting to be processed all have grades > k, then
-   we won't see any process descriptors of grade <= k again.  So we
-   can actually keep an array of process descriptors that consist of
-   descriptors of degree k.  This way we don't need to store the k in
-   the descriptor.  Therefore a process descriptor is represented as
-
-   (X, a, i, j).
-
-   Since we have no other requirements than almost constant-time
-   lookup, we simply represent the set of descriptors using a hash
-   table with keys (X, a, i, j).
-
-   The main benefit of this is that after we find that all process
-   descriptors in R_k have been processed, we can free those
-   descriptors in U_k.  I think this is a good way to lower the space
-   requirements of this algorithm. */
-
-typedef pair4 prodecor;
-
-/* The set named R in the paper */
-typedef struct procr_s procr;
-
-/* The set named U in the paper */
-typedef struct procu_s procu;
-
-/* constructor */
-procr *new_procr(NUM size, BOOL * const restrict errorp);
-procu *new_procu(NUM size, BOOL * const restrict errorp);
-
-/* insert */
-
-/* the function dscAdd in the paper */
-BOOL desc_add(procr * const restrict pr, procu * const restrict pu,
-              NUM grade, prodecor dec);
-
-/* pop */
-
-/* gradep will be set to the grade */
-
-/* if all descriptors of grade k have been poped out, then the
-   corresponding slot in pu will be freed automatically. */
-
-/* if no more elements are left, return NULL */
-prodecor *pop_procr(procr * pr, procu * pu, NUM *gradep);
-
-/* destructor */
-void destroy_procr(procr * const restrict pr);
-void destroy_procu(procu * const restrict pu);
-
+/* TODO */
 #endif
diff --git a/src/crf.c b/src/crf.c
new file mode 100644
index 0000000..dbcb050
--- /dev/null
+++ b/src/crf.c
@@ -0,0 +1,487 @@
+#include <stdio.h>
+#include <string.h>
+#include "util.h"
+#include "list.h"
+#include "crf.h"
+
+struct crf_s {
+  ht *table;
+};
+
+crf *
+new_crf()
+{
+  crf *crfp = NULL;
+  SAFE_MALLOC(crf, crfp, 1, goto cleanup;);
+
+  crfp->table = new_ht2(HT_INIT_CAP);
+
+  if (crfp->table == NULL) goto cleanup;
+
+  return crfp;
+
+ cleanup:
+
+  if (crfp) free(crfp);
+
+  return NULL;
+}
+
+BOOL
+crf_add_node(crf * const restrict crfp, pair2 node)
+{
+  if (ht_find(crfp->table, &node)) return 0;
+
+  pair2 *p2 = NULL;
+
+  NEW_P2(p2, node.x, node.y, return 1;);
+
+  ht *value_table = new_ht4(HT_INIT_CAP);
+
+  if (value_table == NULL) {
+    free(p2);
+    return 1;
+  }
+
+  if (ht_insert(crfp->table, p2, value_table)) {
+    fleprintf0("Fail to insert\n");
+    destroy_ht(value_table, DESTROY_NONE_SELF);
+    free(p2);
+    return 1;
+  }
+
+  return 0;
+}
+
+ht *
+crf_find_node(crf * const restrict crfp, pair2 node)
+{
+  return ht_find(crfp->table, &node);
+}
+
+BOOL
+crf_add_edge(crf * const restrict crfp, pair2 source, pair4 label)
+{
+  if (ht_find(crfp->table, &source) == NULL) {
+    fleprintf0("add the node before adding its edges\n");
+    return 1;
+  }
+
+  if (ht_find(ht_find(crfp->table, &source), &label)) return 0;
+
+  pair4 *p = NULL;
+
+  NEW_P4(p, label.x, label.y, label.z, label.w, return 1;);
+
+  if (ht_insert(ht_find(crfp->table, &source), p, (void*)1)) {
+    fleprintf0("Fail to insert\n");
+    free(p);
+    return 1;
+  }
+
+  return 0;
+}
+
+void
+destroy_crf(crf * const restrict crfp)
+{
+
+#define SIZE (ht_size(crfp->table))
+#define VALUES ((ht**) ht_values(crfp->table))
+
+  for (NUM i = 0; i < SIZE;)
+    destroy_ht(*(VALUES+i++), DESTROY_KEY_SELF);
+
+  destroy_ht(crfp->table, DESTROY_KEY_SELF);
+
+  free(crfp);
+
+#undef SIZE
+#undef VALUES
+
+}
+
+struct spa_s {
+  ht *table;
+};
+
+spa *
+new_spa()
+{
+  spa *spap = NULL;
+  SAFE_MALLOC(spa, spap, 1, return NULL;);
+
+  spap->table = new_ht2(HT_INIT_CAP);
+
+  if (spap->table == NULL) {
+    free(spap);
+    return NULL;
+  }
+
+  return spap;
+}
+
+void
+destroy_spa(spa * const restrict spap)
+{
+  for (NUM i = 0; i < ht_size(spap->table); i++)
+    destroy_ht(*((ht **) ht_values(spap->table)+i), DESTROY_KEY_SELF);
+
+  /* fleprintf0("ho\n"); */
+
+  destroy_ht(spap->table, DESTROY_KEY_SELF);
+
+  free(spap);
+}
+
+BOOL
+spa_insert(spa * const restrict spap, pair3 label)
+{
+  pair2 first_two = (pair2) { 0 }, *p2 = NULL;
+
+  first_two.x = label.x;
+  first_two.y = label.y;
+
+  NUM j = label.z, *np = NULL;
+
+  ht *value_table = NULL;
+
+  if (ht_find(spap->table, &first_two) == NULL) {
+    value_table = new_ht(HT_INIT_CAP, 0);
+
+    if (value_table == NULL) {
+      fleprintf0("Fail to have new hash table\n");
+      goto cleanup;
+    }
+
+    SAFE_MALLOC(NUM, np, 1, goto cleanup;);
+    *np = label.z;
+
+    if (ht_insert(value_table, np, (void*)1)) {
+      fleprintf0("Fail to insert\n");
+      goto cleanup;
+    }
+
+    np = NULL;
+
+    NEW_P2(p2, label.x, label.y, goto cleanup;);
+
+    if (ht_insert(spap->table, p2, value_table)) {
+      fleprintf0("Fail to insert\n");
+      goto cleanup;
+    }
+
+    p2 = NULL;
+
+    goto success;
+  }
+
+  if (ht_find(ht_find(spap->table, &first_two), &j) == NULL) {
+    SAFE_MALLOC(NUM, np, 1, goto cleanup;);
+    *np = label.z;
+
+    if (ht_insert(ht_find(spap->table, &first_two), np, (void*)1)) {
+      fleprintf0("Fail to insert\n");
+      goto cleanup;
+    }
+
+    np = NULL;
+  }
+
+  goto success;
+
+ cleanup:
+  if (value_table) destroy_ht(value_table, DESTROY_KEY_SELF);
+  if (p2) free(p2);
+  if (np) free(np);
+
+  return 1;
+
+ success:
+  return 0;
+}
+
+P_ATTR
+BOOL
+spa_belong(CCR_MOD(spa *) spap, pair3 label)
+{
+  pair2 first_two = (pair2) { 0 };
+
+  first_two.x = label.x;
+  first_two.y = label.y;
+
+  if (ht_find(spap->table, &first_two) == NULL) return 0;
+
+  NUM j = label.z;
+
+  if (ht_find(ht_find(spap->table, &first_two), &j) == NULL) return 0;
+
+  return 1;
+}
+
+P_ATTR
+ht *
+spa_find(CCR_MOD(spa *) spap, pair2 label)
+{
+  return ht_find(spap->table, &label);
+}
+
+typedef struct procr_array_element_s procr_array_element;
+
+/* The list at index k is the set R_k. */
+struct procr_array_element_s {
+  BOOL initialized;
+  List *ls;
+};
+
+struct procr_s {
+  /* input length */
+  NUM len;
+  /* an array of lists */
+  procr_array_element *array;
+  /* minimal grade that is non-empty */
+  NUM mg;
+};
+
+typedef struct procu_array_element_s procu_array_element;
+
+/* The table at index k represents the set U_k.  Its keys are those
+   (X, a, i, j) such that (X, a, i, j, k) belongs to U_k. */
+struct procu_array_element_s {
+  BOOL initialized;
+  ht *table;
+};
+
+struct procu_s {
+  /* input length */
+  NUM len;
+  /* an array of hash tables */
+  procu_array_element *array;
+};
+
+procr *
+new_procr(NUM size, BOOL * const restrict errorp)
+{
+  *errorp = 0;
+
+  procr *pr = NULL;
+
+  SAFE_MALLOC(procr, pr, 1, goto cleanup;);
+
+  pr->len = size;
+  pr->array = NULL;
+
+  SAFE_MALLOC(procr_array_element, pr->array,
+              sizeof(procr_array_element) * size,
+              goto cleanup;);
+
+  memset(pr->array, 0, sizeof(procr_array_element) * size);
+
+  goto success;
+
+ cleanup:
+
+  *errorp = 1;
+
+  if (pr && pr->array) free(pr->array);
+
+  if (pr) free(pr);
+
+ success:
+
+  return pr;
+}
+
+procu *
+new_procu(NUM size, BOOL * const restrict errorp)
+{
+  *errorp = 0;
+
+  procu *pu = NULL;
+
+  SAFE_MALLOC(procu, pu, 1, goto cleanup;);
+
+  pu->len = size;
+  pu->array = NULL;
+
+  SAFE_MALLOC(procu_array_element, pu->array,
+              sizeof(procu_array_element) * size,
+              goto cleanup;);
+
+  memset(pu->array, 0, sizeof(procu_array_element) * size);
+
+  goto success;
+
+ cleanup:
+
+  *errorp = 1;
+
+  if (pu && pu->array) free(pu->array);
+
+  if (pu) free(pu);
+
+ success:
+
+  return pu;
+}
+
+void
+destroy_procr(procr * const restrict pr)
+{
+  for (NUM i = 0; i < pr->len; i++)
+    if ((pr->array+i)->initialized)
+      destroy_list((pr->array+i)->ls, 1);
+
+  free(pr->array);
+
+  free(pr);
+}
+
+void
+destroy_procu(procu * const restrict pu)
+{
+  for (NUM i = 0; i < pu->len; i++)
+    if ((pu->array+i)->initialized)
+      destroy_ht((pu->array+i)->table, DESTROY_KEY_SELF);
+
+  free(pu->array);
+
+  free(pu);
+}
+
+BOOL
+desc_add(procr * const restrict pr, procu * const restrict pu,
+         NUM grade, prodecor dec)
+{
+  if (grade < 0 || grade >= pu->len) {
+    fleprintf("Invalid grade: %ld\n", grade);
+    goto cleanup;
+  }
+
+  pair4 *p4 = NULL;
+
+#define HELE (pu->array+grade)
+#define LELE (pr->array+grade)
+
+  if (HELE->initialized) {
+    /* If HELE is initialized then LELE should also be initialized. */
+    if (ht_find(HELE->table, &dec) != NULL) goto success;
+
+    NEW_P4(p4, dec.x, dec.y, dec.z, dec.w, goto cleanup;);
+
+    if (ht_insert(HELE->table, p4, (void*)1)) {
+      fleprintf0("Fail to insert\n");
+      goto cleanup;
+    }
+
+    NEW_P4(p4, dec.x, dec.y, dec.z, dec.w,
+           ht_delete(HELE->table, &dec, DELETE_KEY); goto cleanup;);
+
+    if (add_to_list(LELE->ls, p4)) {
+      fleprintf0("Fail to add to list\n");
+      ht_delete(HELE->table, &dec, DELETE_KEY);
+      goto cleanup;
+    }
+
+    goto success;
+  }
+
+  HELE->table = new_ht4(HT_INIT_CAP);
+
+  if (HELE->table == NULL) {
+    fleprintf0("Fail to have new hash table\n");
+    goto cleanup;
+  }
+
+  HELE->initialized = 1;
+
+  NEW_P4(p4, dec.x, dec.y, dec.z, dec.w,
+         destroy_ht(HELE->table, DESTROY_KEY_SELF);
+         HELE->initialized = 0;
+         goto cleanup;);
+
+  if (ht_insert(HELE->table, p4, (void*)1)) {
+    fleprintf0("Fail to insert\n");
+    destroy_ht(HELE->table, DESTROY_KEY_SELF);
+    HELE->initialized = 0;
+    goto cleanup;
+  }
+
+  LELE->ls = new_list();
+
+  if (LELE->ls == NULL) {
+    fleprintf0("Fail to have new list\n");
+    destroy_ht(HELE->table, DESTROY_KEY_SELF);
+    HELE->initialized = 0;
+    goto cleanup;
+  }
+
+  LELE->initialized = 1;
+
+  NEW_P4(p4, dec.x, dec.y, dec.z, dec.w,
+         destroy_ht(HELE->table, DESTROY_KEY_SELF);
+         HELE->initialized = 0;
+         destroy_list(LELE->ls, 1);
+         LELE->initialized = 0;
+         goto cleanup;);
+
+  if (add_to_list(LELE->ls, p4)) {
+    fleprintf0("Fail to add to list\n");
+    destroy_ht(HELE->table, DESTROY_KEY_SELF);
+    HELE->initialized = 0;
+    destroy_list(LELE->ls, 1);
+    LELE->initialized = 0;
+    goto cleanup;
+  }
+
+#undef HELE
+#undef LELE
+
+  goto success;
+
+ cleanup:
+
+  return 1;
+
+ success:
+
+  return 0;
+}
+
+prodecor *
+pop_procr(procr * pr, procu * pu, NUM *gradep)
+{
+  prodecor *result = NULL;
+
+  if (pr->mg >= pr->len) goto success;
+
+#define ELEMENT (pr->array+pr->mg)
+
+  if (!(ELEMENT->initialized)) goto reset_mg;
+
+  *gradep = pr->mg;
+
+  result = pop_from_list(ELEMENT->ls);
+
+  if (list_length(ELEMENT->ls) == 0) {
+    destroy_ht((pu->array+pr->mg)->table, DESTROY_KEY_SELF);
+    (pu->array+pr->mg)->initialized = 0;
+    (pu->array+pr->mg)->table = NULL;
+
+    destroy_list(ELEMENT->ls, 1);
+    ELEMENT->initialized = 0;
+    ELEMENT->ls = NULL;
+
+    /* REVIEW: Maybe we can just increment the minimal grade, as we
+       won't jump over two steps? */
+  reset_mg:
+    while (++(pr->mg) < pr->len) {
+      if (ELEMENT->initialized) break;
+    }
+  }
+
+#undef ELEMENT
+
+ success:
+  return result;
+}
diff --git a/src/crf.h b/src/crf.h
new file mode 100644
index 0000000..7ff5a3f
--- /dev/null
+++ b/src/crf.h
@@ -0,0 +1,163 @@
+#ifndef CNP_H
+#define CNP_H
+#include "ht.h"
+
+/* This file implements some support functions for the Clustered
+   Non-terminal Parsing algorithm.  See the following paper for
+   details:
+
+   Derivation representation using binary subtree sets
+
+   by
+
+   Elizabeth Scott, Adrian Johnstone and L.Thomas van Binsbergen. */
+
+/* First we implement a "Call Return Forest".
+
+   A CRF has two types of nodes.
+
+   The first type is the non-leaf nodes.  It is labelled as
+
+   (X, i),
+
+   where X is a non-terminal and i is an input position.
+
+   The second type is the leaf nodes.  It is labelled as
+
+   (X, a, b, i),
+
+   where X is a non-terminal, a is an index of a rule in the array of
+   rules corresponding to X, b is the length of a prefix of the
+   right-hand side of the rule corresponding to a, and i is an input
+   position.  The triple (X, a, b) is termed a "grammar slot" in the
+   parlance.
+
+   We need constant look-up for both types of nodes.  So we implement
+   these using hash tables.
+
+   For the first type, we implement these as a hash table, whose keys
+   are those (X, i) in the set.  The values are described below.
+
+   Actually we don't need those second type nodes.  What we care about
+   are the edges between the two types of nodes.  So we simply don't
+   store the second type nodes at all.  We only store edges, or more
+   precisely, edge labels.  Since we need to know if an edge exists
+   efficiently, we implement the edges as a hash table.  (This seems
+   to be termed a "disctionary of keys" in computer science jargon.)
+   So the values of the hash table of the first type node are hash
+   tables whose keys are (X, a, b, i) that are in the edge set of the
+   first type node, and whose values are meaningless. */
+
+typedef struct crf_s crf;
+
+crf *new_crf();
+
+/* on error return non-zero */
+BOOL crf_add_node(crf * const restrict crfp, pair2 node);
+
+/* if not found return NULL.
+
+   if found but the node has no edges return an empty hash table. */
+ht *crf_find_node(crf * const restrict crfp, pair2 node);
+
+BOOL crf_add_edge(crf * const restrict crfp, pair2 source,
+                  pair4 label);
+
+void destroy_crf(crf * const restrict crfp);
+
+/* We also need to implement the set of pending actions.  These are
+   triples of the form:
+
+   (X, k, j).
+
+   Since we need to find all j such that (X, k, j) belongs to the set,
+   we implement the set as a hash table whose keys are (X, k) and
+   whose values are those j such that (X, k, j) belongs to the set.
+   Precisely speaking, the values are hash tables whose keys are those
+   j and whose values are meaningless non-zero values. */
+
+/* Set of Pending Actions */
+typedef struct spa_s spa;
+
+spa *new_spa();
+
+void destroy_spa(spa * const restrict spap);
+
+/* On error return non-zero */
+BOOL spa_insert(spa * const restrict spap, pair3 label);
+
+/* Find if a triple belongs to the set */
+P_ATTR BOOL spa_belong(CCR_MOD(spa *) spap, pair3 label);
+
+/* Find if for a pair (X, k) there exist some j such that (X, k, j)
+   belong to the set.  If so, return the hash table whose keys are
+   those j; otherwise return NULL. */
+P_ATTR ht *spa_find(CCR_MOD(spa *) spap, pair2 label);
+
+/* We also implement "process descriptors" for the CNP algorithm.  Since
+   a process descriptor is of the form
+
+   (X := beta . gamma, b, j, k),
+
+   where X := beta gamma is a rule and b is the length of beta, we can
+   represent a process descriptor as
+
+   (X, a, b, j, k).
+
+   However, we shall notice that there is a natural "grading" on the
+   process descriptors.  The k-th grading consists of the descriptors
+   with the last element in the tuple equal to k.  In the clustered
+   nonterminal parsing algorithm, the process descriptors that are
+   added as a consequence of dealing with a process descriptor of
+   grading k must be of grade greater than or equal to k.  This means
+   that if all process descriptors of grade k have been processed and
+   the descriptors awaiting to be processed all have grades > k, then
+   we won't see any process descriptors of grade <= k again.  So we
+   can actually keep an array of process descriptors that consist of
+   descriptors of degree k.  This way we don't need to store the k in
+   the descriptor.  Therefore a process descriptor is represented as
+
+   (X, a, i, j).
+
+   Since we have no other requirements than almost constant-time
+   lookup, we simply represent the set of descriptors using a hash
+   table with keys (X, a, i, j).
+
+   The main benefit of this is that after we find that all process
+   descriptors in R_k have been processed, we can free those
+   descriptors in U_k.  I think this is a good way to lower the space
+   requirements of this algorithm. */
+
+typedef pair4 prodecor;
+
+/* The set named R in the paper */
+typedef struct procr_s procr;
+
+/* The set named U in the paper */
+typedef struct procu_s procu;
+
+/* constructor */
+procr *new_procr(NUM size, BOOL * const restrict errorp);
+procu *new_procu(NUM size, BOOL * const restrict errorp);
+
+/* insert */
+
+/* the function dscAdd in the paper */
+BOOL desc_add(procr * const restrict pr, procu * const restrict pu,
+              NUM grade, prodecor dec);
+
+/* pop */
+
+/* gradep will be set to the grade */
+
+/* if all descriptors of grade k have been poped out, then the
+   corresponding slot in pu will be freed automatically. */
+
+/* if no more elements are left, return NULL */
+prodecor *pop_procr(procr * pr, procu * pu, NUM *gradep);
+
+/* destructor */
+void destroy_procr(procr * const restrict pr);
+void destroy_procu(procu * const restrict pu);
+
+#endif
diff --git a/src/test/check_cnp.c b/src/test/check_crf.c
index 2fbaac7..d99a838 100644
--- a/src/test/check_cnp.c
+++ b/src/test/check_crf.c
@@ -1,6 +1,6 @@
 #include <stdio.h>
 #include "../util.h"
-#include "../cnp.h"
+#include "../crf.h"
 
 int
 main(int UNUSED argc, char ** UNUSED argv)
@@ -8,8 +8,12 @@ main(int UNUSED argc, char ** UNUSED argv)
   procr *pr = NULL;
   procu *pu = NULL;
 
+  spa *spap = NULL;
+
   prodecor *prod = NULL;
 
+  /* check crf */
+
   crf *crfp = new_crf();
   pair2 p = { 0 };
   p.x = 0;
@@ -57,6 +61,47 @@ main(int UNUSED argc, char ** UNUSED argv)
 #undef TABLE
 #undef KEY
 
+  /* check spa */
+
+  spap = new_spa();
+
+  if (spap == NULL) {
+    fleprintf0("fail to have new spa\n");
+    goto cleanup;
+  }
+
+  pair3 p3 = (pair3) { 0 };
+  p3.x = 0;
+  p3.y = 1;
+  p3.z = 10;
+
+  if (spa_insert(spap, p3)) {
+    fleprintf0("Fail to insert\n");
+    goto cleanup;
+  }
+
+  printf("Successfully inserted into SPA\n");
+
+  if (spa_belong(spap, p3)) {
+    printf("Successfully found p3\n");
+  } else {
+    fleprintf0("Fail to find p3\n");
+    goto cleanup;
+  }
+
+  pair2 first_two = (pair2) { 0 };
+  first_two.x = 0;
+  first_two.y = 1;
+
+  if (spa_find(spap, first_two)) {
+    printf("Successfully found p2\n");
+  } else {
+    fleprintf0("Fail to find p2\n");
+    goto cleanup;
+  }
+
+  /* check prodecor */
+
   BOOL error = 0;
 
   pr = new_procr(10, &error);
@@ -97,6 +142,7 @@ main(int UNUSED argc, char ** UNUSED argv)
 
   if (pr) destroy_procr(pr);
   if (pu) destroy_procu(pu);
+  if (spap) destroy_spa(spap);
   if (prod) free(prod);
 
   return 0;
author	JSDurand <mmemmew@gmail.com>	2022-01-28 23:21:00 +0800
committer	JSDurand <mmemmew@gmail.com>	2022-01-28 23:22:22 +0800
commit	e555c88b8107caf886da229444c2bed1aaef6c2c (patch)
tree	239894914a881800e244f10090fa97822b0352a3
parent	8ace61933130416a0b8a6b250de681a606439f48 (diff)