summaryrefslogtreecommitdiff
path: root/src/crf.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/crf.h')
-rw-r--r--src/crf.h43
1 files changed, 43 insertions, 0 deletions
diff --git a/src/crf.h b/src/crf.h
new file mode 100644
index 0000000..9f86178
--- /dev/null
+++ b/src/crf.h
@@ -0,0 +1,43 @@
+#ifndef CRF_H
+#define CRF_H
+
+/* This file implements the "Call Return Forest" for use in a GLL
+ Clustered Non-terminal Parser. See the following paper for
+ details:
+
+ Derivation representation using binary subtree sets
+
+ by
+
+ Elizabeth Scott, Adrian Johnstone and L.Thomas van Binsbergen. */
+
+/* A CRF has two types of nodes.
+
+ The first type is the non-leaf nodes. It is labelled as
+
+ (X, i),
+
+ where X is a non-terminal and i is an input position.
+
+ The second type is the leaf nodes. It is labelled as
+
+ (X, a, b, i),
+
+ where X is a non-terminal, a is an index of a rule in the array of
+ rules corresponding to X, b is the length of a prefix of the
+ right-hand side of the rule corresponding to a, and i is an input
+ position. The triple (X, a, b) is termed a "grammar slot" in the
+ parlance.
+
+ We don't need constant look-up for the second type of nodes. So we
+ implement these as a simple dynamic array. We do need constant
+ look-up for the first type of nodes. Thus we implement these as an
+ array of hash tables, where the index of the array represents the
+ index of the non-terminals, and the hash-table corresponding to a
+ non-terminal X contains the input positions i such that (X, i) is a
+ first-type node. Additionally, the first-type nodes should also
+ carry around an index into the array of second-type nodes. */
+
+int place(int x);
+
+#endif