From e555c88b8107caf886da229444c2bed1aaef6c2c Mon Sep 17 00:00:00 2001
From: JSDurand <mmemmew@gmail.com>
Date: Fri, 28 Jan 2022 23:21:00 +0800
Subject: rename cnp to crf

THat file implements support functions for the CNP algorithm, not the
algorithm itself.
---
 src/cnp.h | 142 +-------------------------------------------------------------
 1 file changed, 1 insertion(+), 141 deletions(-)

(limited to 'src/cnp.h')

diff --git a/src/cnp.h b/src/cnp.h
index c7d3965..5944ca3 100644
--- a/src/cnp.h
+++ b/src/cnp.h
@@ -1,144 +1,4 @@
 #ifndef CNP_H
 #define CNP_H
-#include "ht.h"
-
-/* This file implements the Clustered Non-terminal Parsing algorithm.
-   See the following paper for details:
-
-   Derivation representation using binary subtree sets
-
-   by
-
-   Elizabeth Scott, Adrian Johnstone and L.Thomas van Binsbergen. */
-
-/* First we implement a "Call Return Forest".
-
-   A CRF has two types of nodes.
-
-   The first type is the non-leaf nodes.  It is labelled as
-
-   (X, i),
-
-   where X is a non-terminal and i is an input position.
-
-   The second type is the leaf nodes.  It is labelled as
-
-   (X, a, b, i),
-
-   where X is a non-terminal, a is an index of a rule in the array of
-   rules corresponding to X, b is the length of a prefix of the
-   right-hand side of the rule corresponding to a, and i is an input
-   position.  The triple (X, a, b) is termed a "grammar slot" in the
-   parlance.
-
-   We need constant look-up for both types of nodes.  So we implement
-   these using hash tables.
-
-   For the first type, we implement these as a hash table, whose keys
-   are those (X, i) in the set.  The values are described below.
-
-   Actually we don't need those second type nodes.  What we care about
-   are the edges between the two types of nodes.  So we simply don't
-   store the second type nodes at all.  We only store edges, or more
-   precisely, edge labels.  Since we need to know if an edge exists
-   efficiently, we implement the edges as a hash table.  (This seems
-   to be termed a "disctionary of keys" in computer science jargon.)
-   So the values of the hash table of the first type node are hash
-   tables whose keys are (X, a, b, i) that are in the edge set of the
-   first type node, and whose values are meaningless. */
-
-typedef struct crf_s crf;
-
-crf *new_crf();
-
-/* on error return non-zero */
-BOOL crf_add_node(crf * const restrict crfp, pair2 node);
-
-/* if not found return NULL.
-
-   if found but the node has no edges return an empty hash table. */
-ht *crf_find_node(crf * const restrict crfp, pair2 node);
-
-BOOL crf_add_edge(crf * const restrict crfp, pair2 source,
-                  pair4 label);
-
-void destroy_crf(crf * const restrict crfp);
-
-/* We also need to implement the set of pending actions.  These are
-   triples of the form:
-
-   (X, k, j).
-
-   Since we need to find all j such that (X, k, j) belongs to the set,
-   we implement the set as a hash table whose keys are (X, k) and
-   whose values are those j such that (X, k, j) belongs to the set. */
-
-/* TODO: Pending actions */
-
-/* We also implement "process descriptors" for the CNP algorithm.  Since
-   a process descriptor is of the form
-
-   (X := beta . gamma, b, j, k),
-
-   where X := beta gamma is a rule and b is the length of beta, we can
-   represent a process descriptor as
-
-   (X, a, b, j, k).
-
-   However, we shall notice that there is a natural "grading" on the
-   process descriptors.  The k-th grading consists of the descriptors
-   with the last element in the tuple equal to k.  In the clustered
-   nonterminal parsing algorithm, the process descriptors that are
-   added as a consequence of dealing with a process descriptor of
-   grading k must be of grade greater than or equal to k.  This means
-   that if all process descriptors of grade k have been processed and
-   the descriptors awaiting to be processed all have grades > k, then
-   we won't see any process descriptors of grade <= k again.  So we
-   can actually keep an array of process descriptors that consist of
-   descriptors of degree k.  This way we don't need to store the k in
-   the descriptor.  Therefore a process descriptor is represented as
-
-   (X, a, i, j).
-
-   Since we have no other requirements than almost constant-time
-   lookup, we simply represent the set of descriptors using a hash
-   table with keys (X, a, i, j).
-
-   The main benefit of this is that after we find that all process
-   descriptors in R_k have been processed, we can free those
-   descriptors in U_k.  I think this is a good way to lower the space
-   requirements of this algorithm. */
-
-typedef pair4 prodecor;
-
-/* The set named R in the paper */
-typedef struct procr_s procr;
-
-/* The set named U in the paper */
-typedef struct procu_s procu;
-
-/* constructor */
-procr *new_procr(NUM size, BOOL * const restrict errorp);
-procu *new_procu(NUM size, BOOL * const restrict errorp);
-
-/* insert */
-
-/* the function dscAdd in the paper */
-BOOL desc_add(procr * const restrict pr, procu * const restrict pu,
-              NUM grade, prodecor dec);
-
-/* pop */
-
-/* gradep will be set to the grade */
-
-/* if all descriptors of grade k have been poped out, then the
-   corresponding slot in pu will be freed automatically. */
-
-/* if no more elements are left, return NULL */
-prodecor *pop_procr(procr * pr, procu * pu, NUM *gradep);
-
-/* destructor */
-void destroy_procr(procr * const restrict pr);
-void destroy_procu(procu * const restrict pu);
-
+/* TODO */
 #endif
-- 
cgit v1.2.3-18-g5258