18 files changed, 1019 insertions, 87 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index 4690876..f43fbee 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -2,9 +2,9 @@ AM_CFLAGS = -Wall -Wextra
 
 noinst_LIBRARIES = libeps.a
 libeps_a_SOURCES = grammar.c list.c util.c reader.c \
-str.c utf8.c lr_table.c \
+str.c utf8.c dfa.c \
 grammar.h list.h util.h reader.h \
-str_partial.h str.h utf8.h lr_table.h
+str_partial.h str.h utf8.h dfa.h
 
 libeps_a_CFLAGS = $(AM_CFLAGS) --pedantic
 
@@ -21,7 +21,7 @@ CLEANFILES = TAGS
 
 # tests
 
-check_PROGRAMS = check_list check_grammar check_reader
+check_PROGRAMS = check_list check_grammar check_reader check_dfa
 
 check_list_SOURCES = test/check_list.c list.c
 
@@ -31,6 +31,8 @@ utf8.c
 check_reader_SOURCES = test/check_reader.c list.c grammar.c reader.c \
 str.c utf8.c util.c
 
+check_dfa_SOURCES = test/check_dfa.c dfa.c list.c str.c utf8.c
+
 TESTS = $(check_PROGRAMS)
 
 AM_COLOR_TESTS = always
diff --git a/src/dfa.c b/src/dfa.c
new file mode 100644
index 0000000..e3bcdf0
--- /dev/null
+++ b/src/dfa.c
@@ -0,0 +1,692 @@
+#include "utf8.h"
+#include "str.h"
+#include <stdio.h>
+#include "list.h"
+#include "dfa.h"
+
+#define INIT_TRIE(X, ID) do {                                           \
+    X->id = ID;                                                         \
+    X->children = MYALLOC(int, MAX_CHAR_BYTES_NUM);                     \
+    for (int macro_index = 0; macro_index < MAX_CHAR_BYTES_NUM;)        \
+      *(X->children+macro_index++) = DFA_STATE_UNKNOWN;                 \
+  } while (0)
+
+/* The complete, uncompressed table is represented as an array of
+   arrays, each array of which has size MAX_CHAR_BYTES_NUM.  The
+   compressed table is represented as run-lengths.  The ROW_N is the
+   size of the total table.  And the TABLE stores the data.  It is an
+   array of arrays.  The format of the array of a row is as follows:
+
+   VALUE1 LEN1 VALUE2 LEN2
+
+   This means VALUE1 appears LEN1 times, and then VALUE2 appears LEN2
+   times, and so on.  Each array of a row ends when the lengths sum to
+   MAX_CHAR_BYTES_NUM. */
+struct compressed_table_s {
+  int row_n;
+  int **table;
+};
+
+enum DFA_TYPE {
+  DFA_TYPE_POSITIVE,
+  DFA_TYPE_NEGATIVE,
+  DFA_TYPE_BOTH,
+  DFA_TYPE_SPECIAL,
+};
+
+struct dfa_s {
+  enum DFA_TYPE type;
+  union {
+    compressed_table table;
+    special_dfa fn;
+  } data;
+};
+
+dfa *
+new_dfa()
+{
+  dfa *dfap = MYALLOC(dfa, 1);
+
+  dfap->type = 0;
+
+  dfap->data.table.row_n = 0;
+  dfap->data.table.table = NULL;
+
+  return dfap;
+}
+
+void
+destroy_dfa(dfa *table)
+{
+  /* function pointers need no destructors */
+  if (table->type == DFA_TYPE_SPECIAL) return;
+
+  for (int i = 0; i < table->data.table.row_n;)
+    free(*(table->data.table.table+i++));
+
+  free(table->data.table.table);
+  free(table);
+}
+
+/* a trie structure */
+
+typedef struct trie_s trie;
+
+/* REVIEW: Maybe we don't need ID? */
+struct trie_s {
+  /* It is guaranteed that the number of children is
+     MAX_CHAR_BYTES_NUM. */
+  int *children;
+  int id;
+};
+
+static void
+destroy_trie(void *t)
+{
+  trie *tp = (trie *)t;
+  free(tp->children);
+  free(tp);
+}
+
+/* Print a list of Trie's. */
+UNUSED
+static void
+print_tries(const List * const restrict ls)
+{
+  NUM len = list_length(ls);
+  for (NUM i = 0; i < len; i++) {
+    trie *t = (trie *) list_nth(ls, i);
+    printf("Trie ID: %d:\n", t->id);
+
+    BOOL has_children_p = 0;
+
+    for (int j = 0; j < MAX_CHAR_BYTES_NUM; j++) {
+      if (*(t->children+j) != -1) {
+        has_children_p = 1;
+        printf("    "
+               "Char %d => State %d\n", j, *(t->children+j));
+      }
+    }
+
+    if (!has_children_p) printf("    " "No children!\n");
+
+    printf("\n");
+  }
+}
+
+void
+print_dfa(const dfa * const restrict table)
+{
+  switch (table->type) {
+  case DFA_TYPE_POSITIVE:
+    printf("Printing a positive table:\n");
+    break;
+  case DFA_TYPE_NEGATIVE:
+    printf("Printing a negative table:\n");
+    break;
+  case DFA_TYPE_BOTH:
+    printf("Printing a mixed table:\n");
+    break;
+  default:
+    printf("A special table cannot be printed\n");
+    return;
+    break;
+  }
+
+  printf("Printing a table:\n");
+
+  for (int i = 0; i < table->data.table.row_n; i++) {
+    printf("Row %d: ", i);
+    int size = 0;
+    for (int j = 0; size < MAX_CHAR_BYTES_NUM; j++) {
+      printf("%d %d", *(*(table->data.table.table+i)+(j<<1)),
+             *(*(table->data.table.table+i)+(j<<1)+1));
+      size += *(*(table->data.table.table+i)+(j<<1)+1);
+
+      if (size < MAX_CHAR_BYTES_NUM) printf(", ");
+      else printf("\n");
+    }
+  }
+  printf("\n");
+}
+
+/* REVIEW: Maybe we don't want to generate the complete trie in the
+   process of generating the compressed table? */
+dfa *
+dfa_from_bytes(int sequence_size,
+               const NUM * const restrict data)
+{
+  List *states = new_list();
+
+  trie *state_pointer = MYALLOC(trie, 1);
+
+  INIT_TRIE(state_pointer, 0);
+
+  if (add_to_list(states, state_pointer)) {
+    fleprintf0("Cannot add to list\n");
+    destroy_trie(state_pointer);
+    map_list(states, destroy_trie);
+    destroy_list(states, 0);
+    return NULL;
+  }
+
+  char *s = MYALLOC(char, 5);
+
+  str *strp = new_str(s, 5);
+
+  for (int i = 0; i < sequence_size; i++) {
+    trie current_state = *((trie *) list_nth(states, 0));
+    NUM current_value = *(data+i);
+
+    if (encode(current_value, strp)) {
+      fleprintf("Fail to encode %ld at i = %d\n", current_value, i);
+      /* cleanup */
+      destroy_str(strp, 1);
+      map_list(states, destroy_trie);
+      destroy_list(states, 0);
+      return NULL;
+    }
+
+    int str_len = (int) str_length(strp);
+    s = get_data(strp);
+    unsigned char c = 0;
+    for (int j = 0; j < (str_len-1); j++) {
+      c = (unsigned char) *(s+j);
+      if (*(current_state.children+c) == DFA_STATE_UNKNOWN) {
+        /* a new state should be created */
+        state_pointer = MYALLOC(trie, 1);
+        INIT_TRIE(state_pointer, list_length(states));
+        *(current_state.children+c) = list_length(states);
+        if (add_to_list(states, state_pointer)) {
+          /* cleanup */
+          destroy_str(strp, 1);
+          destroy_trie(state_pointer);
+          map_list(states, destroy_trie);
+          destroy_list(states, 0);
+          return NULL;
+        }
+      }
+
+      current_state =
+        *((trie *) list_nth(states, *(current_state.children+c)));
+    }
+
+    if (str_len) {
+      /* the last byte leads to the accepting state */
+      c = (unsigned char) *(s+str_len-1);
+
+      /* In the trie's that we will be forming, there will be no leaf
+         that is also the parent of another node, by the design of
+         UTF-8. */
+
+      *(current_state.children+c) = DFA_STATE_ACCEPT;
+    }
+
+    str_set_length(strp, 5);
+  }
+
+  destroy_str(strp, 1);
+
+  /* For debugging */
+
+  /* print_tries(states); */
+
+  /* construct DFA from the trie */
+
+  NUM states_len = list_length(states);
+
+  dfa *dfap = new_dfa();
+
+  dfap->data.table.table = MYALLOC(int *, states_len);
+  dfap->data.table.row_n = states_len;
+
+  for (NUM i = 0; i < states_len; i++) {
+    int size = 1, current = -1, last = -1, current_length = 0;
+
+    for (int j = 0; j < MAX_CHAR_BYTES_NUM; j++) {
+      current = *(((trie *) list_nth(states, i))->children+j);
+      if (current != last) size++;
+      last = current;
+    }
+
+    *(dfap->data.table.table+i) = MYALLOC(int, size<<1);
+
+    current = (last = -1);
+
+    size = 1;
+
+    for (int j = 0; j < MAX_CHAR_BYTES_NUM; j++) {
+      current = *(((trie *) list_nth(states, i))->children+j);
+      if (current != last) {
+        *(*(dfap->data.table.table+i)+(size<<1)-2) = last;
+        *(*(dfap->data.table.table+i)+(size<<1)-1) = current_length;
+
+        size++;
+        last = current;
+        current_length = 1;
+
+        if (j == MAX_CHAR_BYTES_NUM-1) {
+          *(*(dfap->data.table.table+i)+(size<<1)-2) = last;
+          *(*(dfap->data.table.table+i)+(size<<1)-1) = 1;
+        }
+      } else if (j == MAX_CHAR_BYTES_NUM-1) {
+        *(*(dfap->data.table.table+i)+(size<<1)-2) = current;
+        *(*(dfap->data.table.table+i)+(size<<1)-1) = current_length+1;
+      } else {
+        current_length++;
+      }
+    }
+  }
+
+  /* cleanup the trie */
+  map_list(states, destroy_trie);
+  destroy_list(states, 0);
+
+  return dfap;
+}
+
+dfa *
+dfa_from_bytes_neg(int sequence_size,
+                   const NUM * const restrict data)
+{
+  List *states = new_list();
+
+  trie *state_pointer = MYALLOC(trie, 1);
+
+  INIT_TRIE(state_pointer, 0);
+
+  if (add_to_list(states, state_pointer)) {
+    fleprintf0("Cannot add to list\n");
+    destroy_trie(state_pointer);
+    map_list(states, destroy_trie);
+    destroy_list(states, 0);
+    return NULL;
+  }
+
+  char *s = MYALLOC(char, 5);
+
+  str *strp = new_str(s, 5);
+
+  for (int i = 0; i < sequence_size; i++) {
+    trie current_state = *((trie *) list_nth(states, 0));
+    NUM current_value = *(data+i);
+
+    if (encode(current_value, strp)) {
+      fleprintf("Fail to encode %ld at i = %d\n", current_value, i);
+      /* cleanup */
+      destroy_str(strp, 1);
+      map_list(states, destroy_trie);
+      destroy_list(states, 0);
+      return NULL;
+    }
+
+    int str_len = (int) str_length(strp);
+    s = get_data(strp);
+    unsigned char c = 0;
+    for (int j = 0; j < (str_len-1); j++) {
+      c = (unsigned char) *(s+j);
+      if (*(current_state.children+c) == DFA_STATE_UNKNOWN) {
+        /* a new state should be created */
+        state_pointer = MYALLOC(trie, 1);
+        INIT_TRIE(state_pointer, list_length(states));
+        *(current_state.children+c) = list_length(states);
+        if (add_to_list(states, state_pointer)) {
+          /* cleanup */
+          destroy_str(strp, 1);
+          destroy_trie(state_pointer);
+          map_list(states, destroy_trie);
+          destroy_list(states, 0);
+          return NULL;
+        }
+      }
+
+      current_state =
+        *((trie *) list_nth(states, *(current_state.children+c)));
+    }
+
+    if (str_len) {
+      /* the last byte leads to the rejecting state */
+      c = (unsigned char) *(s+str_len-1);
+
+      /* In the trie's that we will be forming, there will be no leaf
+         that is also the parent of another node, by the design of
+         UTF-8. */
+
+      *(current_state.children+c) = DFA_STATE_REJECT;
+    }
+
+    str_set_length(strp, 5);
+  }
+
+  destroy_str(strp, 1);
+
+  /* For debugging */
+
+  /* print_tries(states); */
+
+  /* construct DFA from the trie */
+
+  NUM states_len = list_length(states);
+
+  dfa *dfap = new_dfa();
+
+  dfap->data.table.table = MYALLOC(int *, states_len);
+  dfap->data.table.row_n = states_len;
+  dfap->type = 1;
+
+  for (NUM i = 0; i < states_len; i++) {
+    int size = 1, current = -1, last = -1, current_length = 0;
+
+    for (int j = 0; j < MAX_CHAR_BYTES_NUM; j++) {
+      current = *(((trie *) list_nth(states, i))->children+j);
+      if (current != last) size++;
+      last = current;
+    }
+
+    *(dfap->data.table.table+i) = MYALLOC(int, size<<1);
+
+    current = (last = -1);
+
+    size = 1;
+
+    for (int j = 0; j < MAX_CHAR_BYTES_NUM; j++) {
+      current = *(((trie *) list_nth(states, i))->children+j);
+      if (current != last) {
+        *(*(dfap->data.table.table+i)+(size<<1)-2) = last;
+        *(*(dfap->data.table.table+i)+(size<<1)-1) = current_length;
+
+        size++;
+        last = current;
+        current_length = 1;
+
+        if (j == MAX_CHAR_BYTES_NUM-1) {
+          *(*(dfap->data.table.table+i)+(size<<1)-2) = last;
+          *(*(dfap->data.table.table+i)+(size<<1)-1) = 1;
+        }
+      } else if (j == MAX_CHAR_BYTES_NUM-1) {
+        *(*(dfap->data.table.table+i)+(size<<1)-2) = current;
+        *(*(dfap->data.table.table+i)+(size<<1)-1) = current_length+1;
+      } else {
+        current_length++;
+      }
+    }
+  }
+
+  /* cleanup the trie */
+  map_list(states, destroy_trie);
+  destroy_list(states, 0);
+
+  return dfap;
+}
+
+dfa *dfa_from_bytes_both(int sequence_size,
+                         const NUM * const restrict data,
+                         int neg_sequence_size,
+                         const NUM * const restrict negdata)
+{
+  List *states = new_list();
+
+  trie *state_pointer = MYALLOC(trie, 1);
+
+  INIT_TRIE(state_pointer, 0);
+
+  if (add_to_list(states, state_pointer)) {
+    fleprintf0("Cannot add to list\n");
+    destroy_trie(state_pointer);
+    map_list(states, destroy_trie);
+    destroy_list(states, 0);
+    return NULL;
+  }
+
+  char *s = MYALLOC(char, 5);
+
+  str *strp = new_str(s, 5);
+
+  for (int i = 0; i < sequence_size; i++) {
+    trie current_state = *((trie *) list_nth(states, 0));
+    NUM current_value = *(data+i);
+
+    if (encode(current_value, strp)) {
+      fleprintf("Fail to encode %ld at i = %d\n", current_value, i);
+      /* cleanup */
+      destroy_str(strp, 1);
+      map_list(states, destroy_trie);
+      destroy_list(states, 0);
+      return NULL;
+    }
+
+    int str_len = (int) str_length(strp);
+    s = get_data(strp);
+    unsigned char c = 0;
+    for (int j = 0; j < (str_len-1); j++) {
+      c = (unsigned char) *(s+j);
+      if (*(current_state.children+c) == DFA_STATE_UNKNOWN) {
+        /* a new state should be created */
+        state_pointer = MYALLOC(trie, 1);
+        INIT_TRIE(state_pointer, list_length(states));
+        *(current_state.children+c) = list_length(states);
+        if (add_to_list(states, state_pointer)) {
+          /* cleanup */
+          destroy_str(strp, 1);
+          destroy_trie(state_pointer);
+          map_list(states, destroy_trie);
+          destroy_list(states, 0);
+          return NULL;
+        }
+      }
+
+      current_state =
+        *((trie *) list_nth(states, *(current_state.children+c)));
+    }
+
+    if (str_len) {
+      /* the last byte leads to the accepting state */
+      c = (unsigned char) *(s+str_len-1);
+
+      /* In the trie's that we will be forming, there will be no leaf
+         that is also the parent of another node, by the design of
+         UTF-8. */
+
+      *(current_state.children+c) = DFA_STATE_ACCEPT;
+    }
+
+    str_set_length(strp, 5);
+  }
+
+  for (int i = 0; i < neg_sequence_size; i++) {
+    trie current_state = *((trie *) list_nth(states, 0));
+    NUM current_value = *(negdata+i);
+
+    if (encode(current_value, strp)) {
+      fleprintf("Fail to encode %ld at i = %d\n", current_value, i);
+      /* cleanup */
+      destroy_str(strp, 1);
+      map_list(states, destroy_trie);
+      destroy_list(states, 0);
+      return NULL;
+    }
+
+    int str_len = (int) str_length(strp);
+    s = get_data(strp);
+    unsigned char c = 0;
+    for (int j = 0; j < (str_len-1); j++) {
+      c = (unsigned char) *(s+j);
+      if (*(current_state.children+c) == DFA_STATE_UNKNOWN) {
+        /* a new state should be created */
+        state_pointer = MYALLOC(trie, 1);
+        INIT_TRIE(state_pointer, list_length(states));
+        *(current_state.children+c) = list_length(states);
+        if (add_to_list(states, state_pointer)) {
+          /* cleanup */
+          fleprintf("Fail to add to list when i = %d, j = %d, "
+                    "and current value = %ld\n",
+                    i, j, current_value);
+          destroy_str(strp, 1);
+          destroy_trie(state_pointer);
+          map_list(states, destroy_trie);
+          destroy_list(states, 0);
+          return NULL;
+        }
+      }
+
+      current_state =
+        *((trie *) list_nth(states, *(current_state.children+c)));
+    }
+
+    if (str_len) {
+      /* the last byte leads to the rejecting state */
+      c = (unsigned char) *(s+str_len-1);
+
+      /* In the trie's that we will be forming, there will be no leaf
+         that is also the parent of another node, by the design of
+         UTF-8. */
+
+      *(current_state.children+c) = DFA_STATE_REJECT;
+    }
+
+    str_set_length(strp, 5);
+  }
+
+  destroy_str(strp, 1);
+
+  /* For debugging */
+
+  /* print_tries(states); */
+
+  /* construct DFA from the trie */
+
+  NUM states_len = list_length(states);
+
+  dfa *dfap = new_dfa();
+
+  dfap->data.table.table = MYALLOC(int *, states_len);
+  dfap->data.table.row_n = states_len;
+
+  for (NUM i = 0; i < states_len; i++) {
+    int size = 1, current = -1, last = -1, current_length = 0;
+
+    for (int j = 0; j < MAX_CHAR_BYTES_NUM; j++) {
+      current = *(((trie *) list_nth(states, i))->children+j);
+      if (current != last) size++;
+      last = current;
+    }
+
+    *(dfap->data.table.table+i) = MYALLOC(int, size<<1);
+
+    current = (last = -1);
+
+    size = 1;
+
+    for (int j = 0; j < MAX_CHAR_BYTES_NUM; j++) {
+      current = *(((trie *) list_nth(states, i))->children+j);
+      if (current != last) {
+        *(*(dfap->data.table.table+i)+(size<<1)-2) = last;
+        *(*(dfap->data.table.table+i)+(size<<1)-1) = current_length;
+
+        size++;
+        last = current;
+        current_length = 1;
+
+        if (j == MAX_CHAR_BYTES_NUM-1) {
+          *(*(dfap->data.table.table+i)+(size<<1)-2) = last;
+          *(*(dfap->data.table.table+i)+(size<<1)-1) = 1;
+        }
+      } else if (j == MAX_CHAR_BYTES_NUM-1) {
+        *(*(dfap->data.table.table+i)+(size<<1)-2) = current;
+        *(*(dfap->data.table.table+i)+(size<<1)-1) = current_length+1;
+      } else {
+        current_length++;
+      }
+    }
+  }
+
+  /* cleanup the trie */
+  map_list(states, destroy_trie);
+  destroy_list(states, 0);
+
+  return dfap;
+}
+
+BOOL
+run_dfa(const dfa * const restrict table, const NUM code)
+{
+  switch (table->type) {
+  case DFA_TYPE_POSITIVE:
+  case DFA_TYPE_NEGATIVE:
+  case DFA_TYPE_BOTH:
+    break;
+  default:
+    return table->data.fn(code);
+    break;
+  }
+
+  char *s = MYALLOC(char, 5);
+  str *strp = new_str(s, 5);
+
+  if (encode(code, strp)) {
+    destroy_str(strp, 1);
+    fleprintf0("Fail to encode\n");
+    return 0;
+  }
+
+  int current_row = 0;
+
+  for (NUM i = 0; i < str_length(strp); i++) {
+    unsigned char c = (unsigned char) *(s+i);
+    int lookup_value = 0;
+
+    for (int j = 0, len = 0; len <= c;) {
+      len += *(*(table->data.table.table+current_row)+(j<<1)+1);
+      lookup_value = *(*(table->data.table.table+current_row)
+                       +(j++<<1));
+    }
+
+    /* fleprintf("lookup_value = %d\n", lookup_value); */
+
+    if (lookup_value > 0) {
+      current_row = lookup_value;
+    } else {
+      switch (lookup_value) {
+      case DFA_STATE_REJECT:
+        destroy_str(strp, 1);
+        return 0;
+        break;
+      case DFA_STATE_UNKNOWN:
+        switch (table->type) {
+        case DFA_TYPE_BOTH:
+        case DFA_TYPE_POSITIVE:
+          destroy_str(strp, 1);
+          return 0;
+          break;
+        case DFA_TYPE_NEGATIVE:
+          destroy_str(strp, 1);
+          return 1;
+          break;
+        default:
+          /* This should not be possible, as we won't reach here if
+             the table is of the special type.  But I am apparently
+             paranoia. */
+          fleprintf0("Special table in the wrong place\n");
+          destroy_str(strp, 1);
+          return 0;
+          break;
+        }
+        break;
+      case DFA_STATE_ACCEPT:
+        destroy_str(strp, 1);
+        return 1;
+        break;
+      default:
+        fleprintf("Unknown state number for char %d: %d\n",
+                  c, lookup_value);
+        destroy_str(strp, 1);
+        return 0;
+        break;
+      }
+    }
+  }
+
+  destroy_str(strp, 1);
+
+  return 0;
+}
diff --git a/src/dfa.h b/src/dfa.h
new file mode 100644
index 0000000..a22409f
--- /dev/null
+++ b/src/dfa.h
@@ -0,0 +1,64 @@
+#include "util.h"
+#ifndef DFA_H
+#define DFA_H
+
+/* See the comments at the beginning of grammar.h for some backgrounds
+   about this file. */
+
+/* See the following Wikipedia link for details on Run-Length
+   Encoding.  <https://en.wikipedia.org/wiki/Run-length_encoding> */
+
+/* Maximal character bytes value */
+
+enum { MAX_CHAR_BYTES_NUM = 256 };
+
+/* Hard-coded state numbers */
+
+enum {
+  DFA_STATE_UNKNOWN = -1,
+  DFA_STATE_ACCEPT = -2,
+  DFA_STATE_REJECT = -3,
+};
+
+/* dfa type */
+
+typedef BOOL (* special_dfa) (const NUM code);
+
+typedef struct dfa_s dfa;
+
+typedef struct compressed_table_s compressed_table;
+
+dfa *new_dfa();
+
+void destroy_dfa(dfa *table);
+
+void print_dfa(const dfa * const restrict table);
+
+dfa *dfa_from_bytes(int sequence_size,
+                    const NUM * const restrict data);
+
+dfa *dfa_from_bytes_neg(int sequence_size,
+                        const NUM * const restrict data);
+
+dfa *dfa_from_bytes_both(int sequence_size,
+                         const NUM * const restrict data,
+                         int neg_sequence_size,
+                         const NUM * const restrict negdata);
+
+/* TODO: Reject character bytes from a given DFA. */
+
+/* NOTE: Add all unicode valid points to a DFA, so that we can
+   represent the ANY class.
+
+   After having done so, this costs around 16K memory.  This is not so
+   satisfactory, as all these memory are just to serve as a ANY
+   character class, which is too excessive.  So I extend the DFA by a
+   special type. */
+
+/* TODO: Construct some basic frequently used character classes. */
+
+inline BOOL dfa_any_fun(const NUM UNUSED code) { return 1; }
+
+BOOL run_dfa(const dfa * const restrict table, const NUM code);
+
+#endif
diff --git a/src/grammar.c b/src/grammar.c
index 4052510..508c8d8 100644
--- a/src/grammar.c
+++ b/src/grammar.c
@@ -104,14 +104,14 @@ new_tnt(int type, ...)
 }
 
 Rule *
-new_rule(NT left, List *right)
+new_rule(NT left, const List * const right)
 {
   if (!right) return NULL;
 
   Rule *rule = MYALLOC(Rule, 1);
 
   rule->left = left;
-  rule->right = right;
+  rule->right = (List *) right;
 
   return rule;
 }
@@ -124,10 +124,10 @@ new_grammar()
 }
 
 /* We classify the rules into different rule groups. */
-unsigned char
-build_grammar(Grammar *g, List *rules, List *names)
+BOOL
+build_grammar(Grammar *g, const List *rules, const List * const names)
 {
-  g->names = names;
+  g->names = (List *) names;
 
   NUM len = list_length(names);
   NUM rule_len = list_length(rules);
@@ -246,11 +246,12 @@ print_rule(void *r)
 }
 
 NUM
-find_in_cpa_list(NUM *string, NUM size, List *list)
+find_in_cpa_list(const NUM * const restrict string, NUM size,
+                 const List * const restrict list)
 {
   NUM len = list_length(list), index = 0;
 
-  unsigned char foundp = 0;
+  BOOL foundp = 0;
 
   for (; !foundp && index < len;) {
     cpa *cpap = list_nth(list, index);
@@ -304,7 +305,7 @@ print_name(void *element)
 /* REVIEW: Print the names of non-terminals out, instead of printing
    the numbers? */
 void
-print_grammar(Grammar *g)
+print_grammar(const Grammar * const g)
 {
   printf("Printing a grammar:\n");
   map_list_between(g->names, print_name, print_sep);
diff --git a/src/grammar.h b/src/grammar.h
index 05bc1fc..28f8fd8 100644
--- a/src/grammar.h
+++ b/src/grammar.h
@@ -8,14 +8,45 @@
 
 /* The class file for grammars */
 
+/* TODO: Terminals should be extended to incorporate the "predicate
+   terminals", such as character classes. */
+
 /* A grammar is a list of rules.  A rule replaces a non-terminal with
    a finite string of terminals and non-terminals.
 
-   For us, a terminal is a positive integer, and a non-terminal is a
-   negative integer.  Since the signs of these two types is fixed, we
-   represent them as unsigned integers.  Since we want to deal with
-   unicode points, the terminal is stored as unsigned long, so that we
-   can include all unicode poitns. */
+   For us, a terminal is a positive integer, and a non-terminal is
+   also an integer.  We represent them as unsigned integers.  Since we
+   want to deal with unicode points, the terminal is stored as
+   unsigned long, so that we can include all unicode poitns.
+
+   Moreover, a terminal might be a "predicate terminal", that is, it
+   does not refer to one character only.  Instead, it refers to a set
+   of characters.  The most efficient way of recognizing a set of
+   characters, of which I know, is to use a deterministic finite
+   automaton.  So we represent a predicate terminal as a DFA.
+
+   Whereas the DFA's are equivalent to regular expressions in the
+   expressive power, I do not need full support for regular
+   expressions for the predicate terminals.  In particular, I only
+   want to match a character for one predicate terminal, so there are
+   no such things as repititive and optional constructs in the DFA.
+   Rather, each predicate terminal is a simple table, or a matrix,
+   which must have exactly 256 columns and an indefinite number of
+   rows.  The columns represent the input character's bytes to match,
+   and the rows are the states of the DFA.  The table must have at
+   least one row: the starting state is always the zeroth row and the
+   accepting state is any state that is out of bounds: either greater
+   than or equal to the number of rows or negative.
+
+   Of course, it is inefficient to keep the whoe table within the
+   memory, especially since the most part of the table would be
+   unused.  So we store a DFA in a compressed format.  I have
+   considered whether to apply some more advanced compression
+   techniques, such as using BW transoform followed by the MTF
+   algorithm, but in the end I think that the DFA's that will be used
+   are oft times just small tables, at least I hope so, thus I decided
+   to simply use the Run-Length-Encoding.  See the file dfa.h for
+   implementation details. */
 
 typedef unsigned long T;
 typedef unsigned NT;
@@ -41,7 +72,9 @@ typedef struct Grammar_s Grammar;
 /* G is expected to be allocated before this function.
 
    NAMES is a list of CPA pointers. */
-unsigned char build_grammar(Grammar *g, List *rules, List *names);
+BOOL
+build_grammar(Grammar *g,
+              const List *rules, const List * const names);
 
 /* This accepts one and only one optional argument, which is of type
    either T or NT, depending on TYPE being 0 or not. */
@@ -56,7 +89,9 @@ struct cpa_s {
 
 typedef struct cpa_s cpa;
 
-NUM find_in_cpa_list(NUM *string, NUM size, List *list);
+NUM
+find_in_cpa_list(const NUM * const restrict string, NUM size,
+                 const List * const restrict list);
 
 /* For debugging purposes, we need to print out rules to examine their
    values. */
@@ -69,7 +104,7 @@ void print_tnt(void *element);
 void print_rule(void *r);
 
 /* This will generate errors if G is not a list of rules. */
-void print_grammar(Grammar *g);
+void print_grammar(const Grammar * const g);
 
 /* constructors */
 
@@ -85,7 +120,7 @@ TNT *new_tnt_pointer(size_t size);
 /* RIGHT should be created by new_tnt_string or something alike.
 
    If RIGHT is NULL, NULL is returned. */
-Rule *new_rule(NT left, List *right);
+Rule *new_rule(NT left, const List * const right);
 
 Grammar *new_grammar();
 
diff --git a/src/list.c b/src/list.c
index 143c693..3c430f6 100644
--- a/src/list.c
+++ b/src/list.c
@@ -32,7 +32,7 @@ new_list()
 }
 
 H_ATTR
-unsigned char
+BOOL
 add_to_list(List *ls, void *element)
 {
   if (!(ls->capacity)) {
@@ -141,10 +141,10 @@ copy_num(void *p)
   return pointer;
 }
 
-unsigned char
+BOOL
 copy_list(List *dest, List *source, copyer copyf)
 {
-  unsigned char sign = 0;
+  BOOL sign = 0;
 
   if ((sign = list_assure_size(dest, source->len)))
     return sign;
@@ -162,19 +162,19 @@ copy_list(List *dest, List *source, copyer copyf)
 
 H_ATTR
 void *
-list_nth(List *ls, NUM n)
+list_nth(const List * const ls, NUM n)
 {
   return *(ls->array+n);
 }
 
 H_ATTR
 NUM
-list_length(List *ls)
+list_length(const List * const restrict ls)
 {
   return ls->len;
 }
 
-unsigned char
+BOOL
 list_assure_size(List *ls, NUM size)
 {
   if (ls->capacity >= size) return 0;  /* we are good */
@@ -203,7 +203,7 @@ list_assure_size(List *ls, NUM size)
   return 0;
 }
 
-unsigned char
+BOOL
 list_set_length(List *ls, NUM len)
 {
   list_assure_size(ls, len);
@@ -254,7 +254,7 @@ array_to_list(void **array, NUM size)
 }
 
 void
-destroy_list(List *ls, unsigned char all_free_p)
+destroy_list(List *ls, BOOL all_free_p)
 {
   if (all_free_p == 1)
     for (NUM i = 0; i < ls->len; i++)
diff --git a/src/list.h b/src/list.h
index ff401f4..6ace213 100644
--- a/src/list.h
+++ b/src/list.h
@@ -17,7 +17,7 @@ List *new_list();
 
 /* add an element to the end of the list */
 /* upon failure return non-zero */
-unsigned char add_to_list(List *ls, void *element);
+BOOL add_to_list(List *ls, void *element);
 
 /* pop an element from the end of the list, and return that element */
 /* upon failure return NULL */
@@ -43,23 +43,23 @@ typedef void *(*copyer)(void *);
 void *copy_num(void *);
 
 /* upon failure return 1 */
-unsigned char copy_list(List *dest, List *source, copyer copyf);
+BOOL copy_list(List *dest, List *source, copyer copyf);
 
-void *list_nth(List *ls, NUM n);
+void *list_nth(const List * const ls, NUM n);
 
-NUM list_length(List *ls);
+NUM list_length(const List * const restrict ls);
 
 /* Make sure the list has at least SIZE slots to use.  This should
    only be used to create fixed capacity arrays, otherwise we risk
    frequently re-allocating and hence losing performance. */
 /* Upon failure return non-zero. */
-unsigned char list_assure_size(List *ls, NUM size);
+BOOL list_assure_size(List *ls, NUM size);
 
 /* This is mainly used to set the length of a sparse list, since only
    when dealing with sparse lists do we not need to care about the
    elements. */
 /* Upon failure return non-zero. */
-unsigned char list_set_length(List *ls, NUM len);
+BOOL list_set_length(List *ls, NUM len);
 
 /* Convert a list to an array.  
 
@@ -79,7 +79,7 @@ List *array_to_list(void ** array, NUM size);
 /* destroy the list: If ALL_FREE_P is 1, this frees every void
    pointers contained in the list; if it is 2, this frees the first
    pointer.  In any case, the list is de-allocated. */
-void destroy_list(List *ls, unsigned char all_free_p);
+void destroy_list(List *ls, BOOL all_free_p);
 
 void destroy_list_free_all(void *ls);
 void destroy_list_free_first(void *ls);
diff --git a/src/reader.c b/src/reader.c
index 18435a9..d715726 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -21,14 +21,15 @@
 
 /* Read a string of TNT's into TNTS.  TNTS is expected to be already
    allocated. */
-static unsigned char
-read_tnt_string(str *s, NUM start, NUM end, TNT *tnts, List *cpa_list)
+static BOOL
+read_tnt_string(const str * const restrict s, NUM start, NUM end,
+                TNT *tnts, List *cpa_list)
 {
   str_info info;
 
   /* fleprintf("S = %lu, E = %lu\n", start, end); */
 
-  unsigned char readingp = 0, stop = 0;
+  BOOL readingp = 0, stop = 0, double_quote_p = 0;
 
   NUM *string = NULL, string_size = 0, nt_index = 0, tnt_count = 0;
 
@@ -73,14 +74,15 @@ read_tnt_string(str *s, NUM start, NUM end, TNT *tnts, List *cpa_list)
       }
       break;
     case 34:
+      double_quote_p = 1;
     case 39:
       /* read a string */
       strindex = index + info.step;
       strinfo = get_info(s, strindex);
 
       for (;strindex < end &&
-             strinfo.value != 34 &&
-             strinfo.value != 39 &&
+             ((double_quote_p && strinfo.value != 34) ||
+              (!double_quote_p && strinfo.value != 39)) &&
              strinfo.value != 10 &&
              strinfo.value != 13;) {
         strinfo = get_info(s, strindex);
@@ -88,7 +90,7 @@ read_tnt_string(str *s, NUM start, NUM end, TNT *tnts, List *cpa_list)
         if (strinfo.value == 92) {
           strindex += info.step;
 
-          unsigned char local_exit = 0;
+          BOOL local_exit = 0;
 
           /* The following errors should not happen.  I am just
              paranoid. */
@@ -118,6 +120,8 @@ read_tnt_string(str *s, NUM start, NUM end, TNT *tnts, List *cpa_list)
       strindex -= strinfo.step;
       tnt_count--;
 
+      double_quote_p = 0;
+
       index = strindex;
       info = get_info(s, index);
       break;
@@ -166,6 +170,7 @@ read_tnt_string(str *s, NUM start, NUM end, TNT *tnts, List *cpa_list)
   return 0;
 }
 
+UNUSED
 static void
 print_int(void *p)
 {
@@ -174,7 +179,7 @@ print_int(void *p)
 
 /* Read a grammar from a string in the format of BNF notation. */
 Grammar *
-read_grammar_from_bnf(str *s)
+read_grammar_from_bnf(const str * const restrict s)
 {
   NUM len = str_length(s);
 
@@ -186,7 +191,7 @@ read_grammar_from_bnf(str *s)
 
   List *line_indices = new_list();
 
-  unsigned char definitionp = 1, readingp = 0;
+  BOOL definitionp = 1, readingp = 0;
 
   /* last index is used to collect strings */
   for (NUM index = 0, last_index = 0; index < len;) {
@@ -205,7 +210,7 @@ read_grammar_from_bnf(str *s)
     case 9:
       readingp = 0;
 
-      if (definitionp) {
+      if (definitionp && readingp) {
         fleprintf("%lu, A non-terminal cannot have spaces in "
                   "the name\n", index);
         /* cleanup */
@@ -257,9 +262,9 @@ read_grammar_from_bnf(str *s)
     index += info.step;
   }
 
-  printf("%s:%d, print indices\n", __FILE__, __LINE__);
-  map_list(line_indices, print_int);
-  printf("\n");
+  /* printf("%s:%d, print indices\n", __FILE__, __LINE__);
+   * map_list(line_indices, print_int);
+   * printf("\n"); */
 
   /* second round collects all the rules */
 
@@ -267,7 +272,7 @@ read_grammar_from_bnf(str *s)
 
   NUM line_count = 0;
 
-  unsigned char right_start_p = 0;
+  BOOL right_start_p = 0, double_quote_p = 0;
 
   definitionp = 1, readingp = 0;
 
@@ -286,6 +291,7 @@ read_grammar_from_bnf(str *s)
     switch (info.value) {
       /* the quote characters */
     case 34:                    /* double quote */
+      double_quote_p = 1;
     case 39:                    /* single quote */
       if (definitionp) {
         fleprintf("%lu, A non-terminal cannot have quotes in "
@@ -330,8 +336,8 @@ read_grammar_from_bnf(str *s)
         for (;strindex < len &&
                strinfo.value != 13 &&
                strinfo.value != 10 &&
-               strinfo.value != 34 &&
-               strinfo.value != 39;) {
+               ((double_quote_p && strinfo.value != 34) ||
+                (!double_quote_p && strinfo.value != 39));) {
           strinfo = get_info(s, strindex);
 
           /* escape character */
@@ -339,7 +345,7 @@ read_grammar_from_bnf(str *s)
             strindex += strinfo.step;
             if (strindex < len) strinfo = get_info(s, strindex);
 
-            unsigned char local_error_p = 0;
+            BOOL local_error_p = 0;
 
             if (strindex >= len) {
               local_error_p = 1;
@@ -395,8 +401,8 @@ read_grammar_from_bnf(str *s)
             return NULL;
           }
         } else {
-          if (strinfo.value != 34 &&
-              strinfo.value != 39) {
+          if ((double_quote_p && strinfo.value != 34) ||
+              (!double_quote_p && strinfo.value != 39)) {
             fleprintf0("input ended before string is left\n");
             /* cleanup */
             map_list(names, destroy_cpa_and_free_all);
@@ -408,15 +414,16 @@ read_grammar_from_bnf(str *s)
             destroy_list(rules, 0);
             return NULL;
           }
-
         }
       }
 
+      double_quote_p = 0;
+
       break;
       /* spaces and tabs are ignored */
     case ' ':
     case 9:
-      if (definitionp) {
+      if (definitionp && readingp) {
         fleprintf("%lu, A non-terminal cannot have spaces or "
                   "tabs in the name\n", index);
         /* cleanup */
@@ -435,6 +442,11 @@ read_grammar_from_bnf(str *s)
       break;
     case '\n':
     case 13:
+      if (definitionp && !readingp) {
+        /* empty line */
+        break;
+      }
+
       definitionp = 1;
       readingp = 0;
       line_count++;
@@ -461,13 +473,13 @@ read_grammar_from_bnf(str *s)
           return NULL;
         }
 
-        printf("%s:%d:%lu, Printing a TNT string: ",
-               __FILE__, __LINE__, index);
-        for (int i = 0; i < (int) tnt_count;) {
-          print_tnt(current_tnt_string+i++);
-          if (i<tnt_count) printf(", ");
-        }
-        printf("\n");
+        /* printf("%s:%d:%lu, Printing a TNT string: ",
+         *        __FILE__, __LINE__, index); */
+        /* for (int i = 0; i < (int) tnt_count;) {
+         *   print_tnt(current_tnt_string+i++);
+         *   if (i<tnt_count) printf(", ");
+         * }
+         * printf("\n"); */
 
         temp_pointers = MYALLOC(void *, tnt_count);
 
diff --git a/src/reader.h b/src/reader.h
index 65ee7ef..4351055 100644
--- a/src/reader.h
+++ b/src/reader.h
@@ -5,6 +5,6 @@
 #include "utf8.h"
 #include "grammar.h"
 
-Grammar *read_grammar_from_bnf(str *s);
+Grammar *read_grammar_from_bnf(const str * const restrict s);
 
 #endif
diff --git a/src/str.c b/src/str.c
index 46dc4a8..fdff99a 100644
--- a/src/str.c
+++ b/src/str.c
@@ -4,7 +4,7 @@
 
 H_ATTR
 str_info
-_info_getter(str *s, UNUM n)
+_info_getter(const str * const restrict s, UNUM n)
 {
   return (str_info) { (NUM) *(s->data+n), 1 };
 }
@@ -41,14 +41,14 @@ destroy_str(str *s, int flag)
 
 H_ATTR
 str_info
-get_info(str *s, UNUM n)
+get_info(const str * const restrict s, UNUM n)
 {
   return s->getter(s, n);
 }
 
 H_ATTR
 NUM
-str_length(str *s)
+str_length(const str * const restrict s)
 {
   return s->size;
 }
@@ -60,7 +60,7 @@ change_str_content(str *s, char *str, NUM size)
   s->size = size;
 }
 
-unsigned char
+BOOL
 copy_str(str *dest, str *source)
 {
   if (str_length(dest) < str_length(source))
@@ -74,7 +74,7 @@ copy_str(str *dest, str *source)
 }
 
 char *
-get_data(str *s)
+get_data(const str * const restrict s)
 {
   return s->data;
 }
diff --git a/src/str.h b/src/str.h
index 39bddf6..bfe5867 100644
--- a/src/str.h
+++ b/src/str.h
@@ -20,16 +20,16 @@ typedef struct str_info_s str_info;
 
 #define EMPTY_STR_INFO ((str_info) { 0, 1 })
 
-str_info get_info(str *, UNUM);
+str_info get_info(const str * const restrict s, UNUM n);
 
-char *get_data(str *s);
+char *get_data(const str * const restrict s);
 
-NUM str_length(str *);
+NUM str_length(const str * const restrict s);
 
 void str_set_length(str *s, UNUM size);
 
 void change_str_content(str *, char *, NUM);
 
-unsigned char copy_str(str *dest, str *source);
+BOOL copy_str(str *dest, str *source);
 
 #endif
diff --git a/src/str_partial.h b/src/str_partial.h
index 9d4fbd6..2373555 100644
--- a/src/str_partial.h
+++ b/src/str_partial.h
@@ -3,7 +3,7 @@
 /* This is meant to be extended, and only has minimal necessary
    fields. */
 
-typedef str_info (*info_getter) (str *, UNUM);
+typedef str_info (*info_getter) (const str * const restrict , UNUM);
 
 struct str_s {
   UNUM size;                    /* the size in bytes, not in chars */
diff --git a/src/test.txt b/src/test.txt
index be9b3ba..27af3ad 100644
--- a/src/test.txt
+++ b/src/test.txt
@@ -1,9 +1,25 @@
-S: A A LONG C 奎佑
-A: B B B
-B: A
-A:
-LONG: B A
-C: B C A
-B: B B
-奎佑: "handsome"
-A: A B A
+S: S "+" T
+S: T
+T: T "*" F
+T: F
+F: "(" S ")"
+F: "0"
+F: "1"
+F: "2"
+F: "3"
+F: "4"
+F: "5"
+F: "6"
+F: "7"
+F: "8"
+F: "9"
+F: "0" F
+F: "1" F
+F: "2" F
+F: "3" F
+F: "4" F
+F: "5" F
+F: "6" F
+F: "7" F
+F: "8" F
+F: "9" F
diff --git a/src/test/check_dfa.c b/src/test/check_dfa.c
new file mode 100644
index 0000000..a728fb8
--- /dev/null
+++ b/src/test/check_dfa.c
@@ -0,0 +1,108 @@
+#include "../utf8.h"
+#include "../str.h"
+#include <stdio.h>
+#include "../util.h"
+#include "../dfa.h"
+
+#define ADD_RANGE(X, Y) do {                    \
+    for (int i = 0; i + (X) <= (Y); i++)        \
+      *(v+vlen++) = (NUM) i + (X);              \
+  } while (0)
+
+int
+main(int UNUSED argc, char ** UNUSED argv)
+{
+  UNUM vlen = 0;
+  NUM *v = MYALLOC(NUM, 1<<21);
+
+  ADD_RANGE(0x0020, 0x007f);
+  ADD_RANGE(0x2580, 0x259f);
+  ADD_RANGE(0x00a0, 0x00ff);
+  ADD_RANGE(0x25a0, 0x25ff);
+  ADD_RANGE(0x0100, 0x017f);
+  ADD_RANGE(0x2600, 0x26ff);
+  ADD_RANGE(0x0180, 0x024f);
+  ADD_RANGE(0x2700, 0x27bf);
+  ADD_RANGE(0x0250, 0x02af);
+  ADD_RANGE(0x27c0, 0x27ef);
+  ADD_RANGE(0x02b0, 0x02ff);
+  ADD_RANGE(0x27f0, 0x27ff);
+  ADD_RANGE(0x0300, 0x036f);
+  ADD_RANGE(0x2800, 0x28ff);
+  ADD_RANGE(0x0370, 0x03ff);
+  ADD_RANGE(0x2900, 0x297f);
+  ADD_RANGE(0x0400, 0x04ff);
+  ADD_RANGE(0x2980, 0x29ff);
+  ADD_RANGE(0x0500, 0x052f);
+  ADD_RANGE(0x2a00, 0x2aff);
+  ADD_RANGE(0x0530, 0x058f);
+  ADD_RANGE(0x2b00, 0x2bff);
+  ADD_RANGE(0x0590, 0x05ff);
+  ADD_RANGE(0x2e80, 0x2eff);
+  ADD_RANGE(0x0600, 0x06ff);
+  ADD_RANGE(0x2f00, 0x2fdf);
+  ADD_RANGE(0x0700, 0x074f);
+  ADD_RANGE(0x2ff0, 0x2fff);
+  ADD_RANGE(0x0780, 0x07bf);
+  ADD_RANGE(0x3000, 0x303f);
+  ADD_RANGE(0x0900, 0x097f);
+  ADD_RANGE(0x3040, 0x309f);
+  ADD_RANGE(0x0980, 0x09ff);
+  ADD_RANGE(0x30a0, 0x30ff);
+  ADD_RANGE(0x0a00, 0x0a7f);
+  ADD_RANGE(0x3100, 0x312f);
+  ADD_RANGE(0x0a80, 0x0aff);
+  ADD_RANGE(0x3130, 0x318f);
+  ADD_RANGE(0x0b00, 0x0b7f);
+  ADD_RANGE(0x3190, 0x319f);
+  ADD_RANGE(0x0b80, 0x0bff);
+  ADD_RANGE(0x31a0, 0x31bf);
+  ADD_RANGE(0x0c00, 0x0c7f);
+  ADD_RANGE(0x31f0, 0x31ff);
+  ADD_RANGE(0x0c80, 0x0cff);
+  ADD_RANGE(0x3200, 0x32ff);
+  ADD_RANGE(0x0d00, 0x0d7f);
+  ADD_RANGE(0x3300, 0x33ff);
+  ADD_RANGE(0x0d80, 0x0dff);
+  ADD_RANGE(0x3400, 0x4dbf);
+  ADD_RANGE(0x0e00, 0x0e7f);
+  ADD_RANGE(0x4dc0, 0x4dff);
+  ADD_RANGE(0x0e80, 0x0eff);
+  ADD_RANGE(0x4e00, 0x9fff);
+  ADD_RANGE(0x0f00, 0x0fff);
+
+  /* for (int i = 0; i < 190; i++)
+   *   printf("v [%d] = %d\n", i, *(v+i)); */
+
+  dfa *dfap = dfa_from_bytes(vlen, v);
+
+  free(v);
+
+  if (dfap == NULL)
+    printf("returned null\n");
+  else
+    print_dfa(dfap);
+
+  /* char *s = MYALLOC(char, 5);
+   * str *strp = new_str(s, 5);
+   *
+   * for (int i = 0; i < 3; i++) {
+   *   printf("\n\n");
+   *
+   *   encode(v[i], strp);
+   *
+   *   for (int j = 0; j < str_length(strp)-1;)
+   *     printf("%d, ", *(s+j++)+(1<<8));
+   *
+   *   printf("%d\n", *(s+str_length(strp)-1)+(1<<8));
+   * }
+   *
+   * destroy_str(strp, 1); */
+
+  /* BOOL result = run_dfa(dfap, 32239);
+   *
+   * printf("the result = %d\n", result); */
+
+  if (dfap) destroy_dfa(dfap);
+  return 0;
+}
diff --git a/src/utf8.c b/src/utf8.c
index f840600..ce47faf 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -41,7 +41,7 @@ decode(UTF8_State *state, uint32_t *codep, uint8_t byte)
 }
 
 str_info
-utf8_get_info(str *s, UNUM n)
+utf8_get_info(const str * const restrict s, UNUM n)
 {
   UTF8_State state = UTF8_STATE_ACCEPT;
   uint32_t code = 0;
@@ -85,7 +85,7 @@ new_utf8(char *string, UNUM size)
 }
   
 /* result should be long enough to hold the data */
-unsigned char
+BOOL
 encode(NUM code_point, str *result)
 {
   /* calculate the number of bits of CODE_POINT */
diff --git a/src/utf8.h b/src/utf8.h
index 5b3c9f2..f5e7be1 100644
--- a/src/utf8.h
+++ b/src/utf8.h
@@ -20,6 +20,6 @@ typedef enum UTF8_State_e UTF8_State;
 
 utf8 *new_utf8(char *string, UNUM size);
 
-unsigned char encode(NUM code_point, str *result);
+BOOL encode(NUM code_point, str *result);
 
 #endif
diff --git a/src/util.c b/src/util.c
index a04ef77..1af8e5e 100644
--- a/src/util.c
+++ b/src/util.c
@@ -12,7 +12,7 @@
 
    Note that the length of the string str is actually 1+length, and
    the last character is the null character. */
-unsigned char
+BOOL
 read_entire_file(const char *file_name, char **str, NUM *len)
 {
   long file_size = 0;
diff --git a/src/util.h b/src/util.h
index 940b7e4..6d6072a 100644
--- a/src/util.h
+++ b/src/util.h
@@ -10,6 +10,8 @@ typedef long DATA;
 typedef long NUM;
 typedef unsigned long long UNUM; /* definitely bigger than size_t */
 
+typedef unsigned char BOOL;
+
 
 #define HC_ATTR __attribute__((__hot__, __const__))
 #define H_ATTR __attribute__((__hot__))
@@ -31,6 +33,6 @@ typedef unsigned long long UNUM; /* definitely bigger than size_t */
 #define fleprintf0(M) eprintf("%s:%d, " M, __FILE__, __LINE__)
 #define fleprintf(M, ...) eprintf("%s:%d, " M, __FILE__, __LINE__, __VA_ARGS__)
 
-unsigned char read_entire_file(const char *file_name, char **str, NUM *len);
+BOOL read_entire_file(const char *file_name, char **str, NUM *len);
 
 #endif