summaryrefslogtreecommitdiff
path: root/src/crf.h
blob: 9f861788e28643e0dc76760f7081c25ef9e1489d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#ifndef CRF_H
#define CRF_H

/* This file implements the "Call Return Forest" for use in a GLL
   Clustered Non-terminal Parser.  See the following paper for
   details:

   Derivation representation using binary subtree sets
   
   by
   
   Elizabeth Scott, Adrian Johnstone and L.Thomas van Binsbergen. */

/* A CRF has two types of nodes.

   The first type is the non-leaf nodes.  It is labelled as

   (X, i),

   where X is a non-terminal and i is an input position.

   The second type is the leaf nodes.  It is labelled as

   (X, a, b, i),

   where X is a non-terminal, a is an index of a rule in the array of
   rules corresponding to X, b is the length of a prefix of the
   right-hand side of the rule corresponding to a, and i is an input
   position.  The triple (X, a, b) is termed a "grammar slot" in the
   parlance.

   We don't need constant look-up for the second type of nodes.  So we
   implement these as a simple dynamic array.  We do need constant
   look-up for the first type of nodes.  Thus we implement these as an
   array of hash tables, where the index of the array represents the
   index of the non-terminals, and the hash-table corresponding to a
   non-terminal X contains the input positions i such that (X, i) is a
   first-type node.  Additionally, the first-type nodes should also
   carry around an index into the array of second-type nodes. */

int place(int x);

#endif