1 files changed, 44 insertions, 9 deletions
diff --git a/src/grammar.h b/src/grammar.h
index 05bc1fc..28f8fd8 100644
--- a/src/grammar.h
+++ b/src/grammar.h
@@ -8,14 +8,45 @@
 
 /* The class file for grammars */
 
+/* TODO: Terminals should be extended to incorporate the "predicate
+   terminals", such as character classes. */
+
 /* A grammar is a list of rules.  A rule replaces a non-terminal with
    a finite string of terminals and non-terminals.
 
-   For us, a terminal is a positive integer, and a non-terminal is a
-   negative integer.  Since the signs of these two types is fixed, we
-   represent them as unsigned integers.  Since we want to deal with
-   unicode points, the terminal is stored as unsigned long, so that we
-   can include all unicode poitns. */
+   For us, a terminal is a positive integer, and a non-terminal is
+   also an integer.  We represent them as unsigned integers.  Since we
+   want to deal with unicode points, the terminal is stored as
+   unsigned long, so that we can include all unicode poitns.
+
+   Moreover, a terminal might be a "predicate terminal", that is, it
+   does not refer to one character only.  Instead, it refers to a set
+   of characters.  The most efficient way of recognizing a set of
+   characters, of which I know, is to use a deterministic finite
+   automaton.  So we represent a predicate terminal as a DFA.
+
+   Whereas the DFA's are equivalent to regular expressions in the
+   expressive power, I do not need full support for regular
+   expressions for the predicate terminals.  In particular, I only
+   want to match a character for one predicate terminal, so there are
+   no such things as repititive and optional constructs in the DFA.
+   Rather, each predicate terminal is a simple table, or a matrix,
+   which must have exactly 256 columns and an indefinite number of
+   rows.  The columns represent the input character's bytes to match,
+   and the rows are the states of the DFA.  The table must have at
+   least one row: the starting state is always the zeroth row and the
+   accepting state is any state that is out of bounds: either greater
+   than or equal to the number of rows or negative.
+
+   Of course, it is inefficient to keep the whoe table within the
+   memory, especially since the most part of the table would be
+   unused.  So we store a DFA in a compressed format.  I have
+   considered whether to apply some more advanced compression
+   techniques, such as using BW transoform followed by the MTF
+   algorithm, but in the end I think that the DFA's that will be used
+   are oft times just small tables, at least I hope so, thus I decided
+   to simply use the Run-Length-Encoding.  See the file dfa.h for
+   implementation details. */
 
 typedef unsigned long T;
 typedef unsigned NT;
@@ -41,7 +72,9 @@ typedef struct Grammar_s Grammar;
 /* G is expected to be allocated before this function.
 
    NAMES is a list of CPA pointers. */
-unsigned char build_grammar(Grammar *g, List *rules, List *names);
+BOOL
+build_grammar(Grammar *g,
+              const List *rules, const List * const names);
 
 /* This accepts one and only one optional argument, which is of type
    either T or NT, depending on TYPE being 0 or not. */
@@ -56,7 +89,9 @@ struct cpa_s {
 
 typedef struct cpa_s cpa;
 
-NUM find_in_cpa_list(NUM *string, NUM size, List *list);
+NUM
+find_in_cpa_list(const NUM * const restrict string, NUM size,
+                 const List * const restrict list);
 
 /* For debugging purposes, we need to print out rules to examine their
    values. */
@@ -69,7 +104,7 @@ void print_tnt(void *element);
 void print_rule(void *r);
 
 /* This will generate errors if G is not a list of rules. */
-void print_grammar(Grammar *g);
+void print_grammar(const Grammar * const g);
 
 /* constructors */
 
@@ -85,7 +120,7 @@ TNT *new_tnt_pointer(size_t size);
 /* RIGHT should be created by new_tnt_string or something alike.
 
    If RIGHT is NULL, NULL is returned. */
-Rule *new_rule(NT left, List *right);
+Rule *new_rule(NT left, const List * const right);
 
 Grammar *new_grammar();