summaryrefslogtreecommitdiff
path: root/src/cnp.h
diff options
context:
space:
mode:
authorJSDurand <mmemmew@gmail.com>2022-01-28 23:21:00 +0800
committerJSDurand <mmemmew@gmail.com>2022-01-28 23:22:22 +0800
commite555c88b8107caf886da229444c2bed1aaef6c2c (patch)
tree239894914a881800e244f10090fa97822b0352a3 /src/cnp.h
parent8ace61933130416a0b8a6b250de681a606439f48 (diff)
rename cnp to crf
THat file implements support functions for the CNP algorithm, not the algorithm itself.
Diffstat (limited to 'src/cnp.h')
-rw-r--r--src/cnp.h142
1 files changed, 1 insertions, 141 deletions
diff --git a/src/cnp.h b/src/cnp.h
index c7d3965..5944ca3 100644
--- a/src/cnp.h
+++ b/src/cnp.h
@@ -1,144 +1,4 @@
#ifndef CNP_H
#define CNP_H
-#include "ht.h"
-
-/* This file implements the Clustered Non-terminal Parsing algorithm.
- See the following paper for details:
-
- Derivation representation using binary subtree sets
-
- by
-
- Elizabeth Scott, Adrian Johnstone and L.Thomas van Binsbergen. */
-
-/* First we implement a "Call Return Forest".
-
- A CRF has two types of nodes.
-
- The first type is the non-leaf nodes. It is labelled as
-
- (X, i),
-
- where X is a non-terminal and i is an input position.
-
- The second type is the leaf nodes. It is labelled as
-
- (X, a, b, i),
-
- where X is a non-terminal, a is an index of a rule in the array of
- rules corresponding to X, b is the length of a prefix of the
- right-hand side of the rule corresponding to a, and i is an input
- position. The triple (X, a, b) is termed a "grammar slot" in the
- parlance.
-
- We need constant look-up for both types of nodes. So we implement
- these using hash tables.
-
- For the first type, we implement these as a hash table, whose keys
- are those (X, i) in the set. The values are described below.
-
- Actually we don't need those second type nodes. What we care about
- are the edges between the two types of nodes. So we simply don't
- store the second type nodes at all. We only store edges, or more
- precisely, edge labels. Since we need to know if an edge exists
- efficiently, we implement the edges as a hash table. (This seems
- to be termed a "disctionary of keys" in computer science jargon.)
- So the values of the hash table of the first type node are hash
- tables whose keys are (X, a, b, i) that are in the edge set of the
- first type node, and whose values are meaningless. */
-
-typedef struct crf_s crf;
-
-crf *new_crf();
-
-/* on error return non-zero */
-BOOL crf_add_node(crf * const restrict crfp, pair2 node);
-
-/* if not found return NULL.
-
- if found but the node has no edges return an empty hash table. */
-ht *crf_find_node(crf * const restrict crfp, pair2 node);
-
-BOOL crf_add_edge(crf * const restrict crfp, pair2 source,
- pair4 label);
-
-void destroy_crf(crf * const restrict crfp);
-
-/* We also need to implement the set of pending actions. These are
- triples of the form:
-
- (X, k, j).
-
- Since we need to find all j such that (X, k, j) belongs to the set,
- we implement the set as a hash table whose keys are (X, k) and
- whose values are those j such that (X, k, j) belongs to the set. */
-
-/* TODO: Pending actions */
-
-/* We also implement "process descriptors" for the CNP algorithm. Since
- a process descriptor is of the form
-
- (X := beta . gamma, b, j, k),
-
- where X := beta gamma is a rule and b is the length of beta, we can
- represent a process descriptor as
-
- (X, a, b, j, k).
-
- However, we shall notice that there is a natural "grading" on the
- process descriptors. The k-th grading consists of the descriptors
- with the last element in the tuple equal to k. In the clustered
- nonterminal parsing algorithm, the process descriptors that are
- added as a consequence of dealing with a process descriptor of
- grading k must be of grade greater than or equal to k. This means
- that if all process descriptors of grade k have been processed and
- the descriptors awaiting to be processed all have grades > k, then
- we won't see any process descriptors of grade <= k again. So we
- can actually keep an array of process descriptors that consist of
- descriptors of degree k. This way we don't need to store the k in
- the descriptor. Therefore a process descriptor is represented as
-
- (X, a, i, j).
-
- Since we have no other requirements than almost constant-time
- lookup, we simply represent the set of descriptors using a hash
- table with keys (X, a, i, j).
-
- The main benefit of this is that after we find that all process
- descriptors in R_k have been processed, we can free those
- descriptors in U_k. I think this is a good way to lower the space
- requirements of this algorithm. */
-
-typedef pair4 prodecor;
-
-/* The set named R in the paper */
-typedef struct procr_s procr;
-
-/* The set named U in the paper */
-typedef struct procu_s procu;
-
-/* constructor */
-procr *new_procr(NUM size, BOOL * const restrict errorp);
-procu *new_procu(NUM size, BOOL * const restrict errorp);
-
-/* insert */
-
-/* the function dscAdd in the paper */
-BOOL desc_add(procr * const restrict pr, procu * const restrict pu,
- NUM grade, prodecor dec);
-
-/* pop */
-
-/* gradep will be set to the grade */
-
-/* if all descriptors of grade k have been poped out, then the
- corresponding slot in pu will be freed automatically. */
-
-/* if no more elements are left, return NULL */
-prodecor *pop_procr(procr * pr, procu * pu, NUM *gradep);
-
-/* destructor */
-void destroy_procr(procr * const restrict pr);
-void destroy_procu(procu * const restrict pu);
-
+/* TODO */
#endif