From 55dc897da6e81f2a26cfc7e66ac942824773498b Mon Sep 17 00:00:00 2001
From: JSDurand <mmemmew@gmail.com>
Date: Tue, 4 Jan 2022 11:51:58 +0800
Subject: temporary commit

Now we can read grammars from a file.

But we need to check if it works for reading strings still.
---
 src/Makefile.am          |  12 +-
 src/grammar.c            | 280 +++++++++++++++++++++++++---
 src/grammar.h            |  39 +++-
 src/list.c               |  44 ++++-
 src/list.h               |  11 ++
 src/reader.c             | 466 +++++++++++++++++++++++++++++++++++++++++++++--
 src/reader.h             |  30 +--
 src/str.c                |  86 +++++++++
 src/str.h                |  35 ++++
 src/str_partial.h        |  13 ++
 src/test.txt             |   7 +
 src/test/check_grammar.c |  36 +++-
 src/test/check_list.c    |  29 ++-
 src/test/check_reader.c  |  32 +++-
 src/utf8.c               | 150 +++++++++++++++
 src/utf8.h               |  25 +++
 src/util.c               |  49 +++++
 src/util.h               |   4 +-
 18 files changed, 1250 insertions(+), 98 deletions(-)
 create mode 100644 src/str.c
 create mode 100644 src/str.h
 create mode 100644 src/str_partial.h
 create mode 100644 src/test.txt
 create mode 100644 src/utf8.c
 create mode 100644 src/utf8.h
 create mode 100644 src/util.c

diff --git a/src/Makefile.am b/src/Makefile.am
index ffb9a5c..5eff3c5 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -1,8 +1,10 @@
 AM_CFLAGS = -Wall -Wextra
 
 noinst_LIBRARIES = libeps.a
-libeps_a_SOURCES = grammar.c list.c reader.c \
-grammar.h list.h util.h reader.h
+libeps_a_SOURCES = grammar.c list.c util.c reader.c \
+str.c utf8.c \
+grammar.h list.h util.h reader.h \
+str_partial.h str.h utf8.h
 
 libeps_a_CFLAGS = $(AM_CFLAGS) --pedantic
 
@@ -23,9 +25,11 @@ check_PROGRAMS = check_list check_grammar check_reader
 
 check_list_SOURCES = test/check_list.c list.c
 
-check_grammar_SOURCES = test/check_grammar.c list.c grammar.c
+check_grammar_SOURCES = test/check_grammar.c list.c grammar.c str.c \
+utf8.c
 
-check_reader_SOURCES = test/check_reader.c list.c grammar.c reader.c
+check_reader_SOURCES = test/check_reader.c list.c grammar.c reader.c \
+str.c utf8.c util.c
 
 TESTS = $(check_PROGRAMS)
 
diff --git a/src/grammar.c b/src/grammar.c
index fd38ac8..118a10e 100644
--- a/src/grammar.c
+++ b/src/grammar.c
@@ -1,21 +1,85 @@
 #include "grammar.h"
 
-struct TNT_s {
-  int type;                     /* 0 => T and NT otherwise */
-  union { T t; NT nt; } data;
-};
-
 struct Rule_s {
   NT left;
-  List *right;
+  List *right;                  /* a list of TNT's */
 };
 
+typedef struct {
+  /* this group holds rules for this non-terminal */
+  NT left;
+  /* a list of lists of TNT's */
+  List *rights;
+} Rule_group;
+
+/* rule_grps and names should have the same length */
 struct Grammar_s {
-  List *rules;
-  /* a list of names of non-terminals */
+  /* a list of Rule_group's */
+  List *rule_grps;
+  /* a list of names of non-terminals
+
+     Each element is an array of code-points.
+
+     To print them, first encode them into normal strings. */
   List *names;
 };
 
+static void
+destroy_rule_group(void *rule_grp, int flag)
+{
+  Rule_group *rg = (Rule_group *) rule_grp;
+
+  switch (flag) {
+  case 1:
+    map_list(rg->rights, destroy_list_free_all);
+    break;
+  case 2:
+    map_list(rg->rights, destroy_list_free_first);
+    break;
+  default:
+    map_list(rg->rights, destroy_list_no_free);
+    break;
+  }
+
+  destroy_list(rg->rights, 0);
+
+  free(rule_grp);
+}
+
+UNUSED
+static void
+destroy_rule_group_no_free(void *rule_grp)
+{
+  destroy_rule_group(rule_grp, 0);
+}
+
+UNUSED
+static void
+destroy_rule_group_free_all(void *rule_grp)
+{
+  destroy_rule_group(rule_grp, 1);
+}
+
+UNUSED
+static void
+destroy_rule_group_free_first(void *rule_grp)
+{
+  destroy_rule_group(rule_grp, 2);
+}
+
+static void
+print_rule_group(void *rule_grp)
+{
+  Rule_group *rg = (Rule_group *) rule_grp;
+
+  for (int i = 0; i < list_length(rg->rights); i++) {
+    List *str = (List *) list_nth(rg->rights, i);
+    printf("Rule %u => ", rg->left);
+    map_list(str, print_tnt);
+    printf("\n");
+  }
+}
+
 TNT *
 new_tnt(int type, ...)
 {
@@ -53,11 +117,104 @@ new_grammar()
   return g;
 }
 
-void
+/* We classify the rules into different rule groups. */
+unsigned char
 build_grammar(Grammar *g, List *rules, List *names)
 {
-  g->rules = rules;
   g->names = names;
+
+  NUM len = list_length(names);
+  NUM rule_len = list_length(rules);
+
+  /* If a rule corresponds to a non-terminal not on the list, signal
+     an error. */
+  for (int i = 0; i < rule_len; i++) {
+    if (((Rule *) list_nth(rules, i))->left >= len) {
+      fleprintf("%s:%d:%d, Rule contains weird non-terminal\n", i);
+      return 1;
+    }
+  }
+
+  List *rule_grps = new_list();
+
+  if (rule_grps == NULL) {
+    fleprintf0("%s:%d, Cannot create list\n");
+    return 1;
+  }
+
+  if (list_assure_size(rule_grps, len)) {
+    fleprintf0("%s:%d, Cannot assure size of rule groups\n");
+    return 1;
+  }
+
+  List *temp_ls = NULL;
+  void *temp_pointer = NULL;
+
+  /* Initialize the list of rule groups */
+  for (int i = 0; i < len; i++) {
+    if ((temp_ls = new_list()) == NULL) {
+      fleprintf("%s:%d:%d, Cannot create list\n", i);
+
+      map_list(rule_grps, destroy_rule_group_no_free);
+      destroy_list(rule_grps, 0);
+      return 1;
+    }
+
+    temp_pointer = MYALLOC(Rule_group, 1);
+
+    if (temp_pointer == NULL) {
+      fleprintf0("%s:%d, Cannot malloc\n");
+      map_list(rule_grps, destroy_rule_group_no_free);
+      destroy_list(rule_grps, 0);
+      destroy_list(temp_ls, 0);
+      return 1;
+    }
+
+    ((Rule_group *) temp_pointer)->left = i;
+    ((Rule_group *) temp_pointer)->rights = temp_ls;
+
+    int result = add_to_list(rule_grps, temp_pointer);
+
+    if (result) {
+      fleprintf("%s:%d:%d, Cannot add to list\n", i);
+
+      map_list(rule_grps, destroy_rule_group_no_free);
+      destroy_list(rule_grps, 0);
+      destroy_list(temp_ls, 0);
+      free(temp_pointer);
+      return 1;
+    }
+  }
+
+  /* Now fill in rule groups */
+
+  for (int i = 0; i < rule_len; i++) {
+    Rule *r = (Rule *) list_nth(rules, i);
+
+    int result = add_to_list
+      (((Rule_group *) list_nth(rule_grps, r->left))->rights,
+       r->right);
+
+    if (result) {
+      fleprintf("%s:%d:%d, Cannot add to list\n", i);
+
+      for (int j = 0; j < list_length(rule_grps); j++) {
+        Rule_group *rg = (Rule_group *) list_nth(rule_grps, j);
+
+        map_list(rg->rights, destroy_list_free_all);
+        destroy_list(rg->rights, 0);
+        free(rg);
+      }
+
+      destroy_list(rule_grps, 0);
+
+      return 1;
+    }
+  }
+
+  g->rule_grps = rule_grps;
+
+  return 0;
 }
 
 void
@@ -82,12 +239,61 @@ print_rule(void *r)
   printf("\n");
 }
 
+NUM
+find_in_cpa_list(NUM *string, NUM size, List *list)
+{
+  NUM len = list_length(list), index = 0;
+
+  unsigned char foundp = 0;
+
+  for (; !foundp && index < len;) {
+    cpa *cpap = list_nth(list, index);
+
+    if (cpap->size != (UNUM) size) {
+      index++;
+      continue;
+    }
+
+    foundp = 1;
+
+    for (NUM j = 0; j < size; j++) {
+      if (*(string+j) != *(cpap->array+j)) {
+        foundp = 0;
+        break;
+      }
+    }
+
+    if (!foundp) index++;
+  }
+
+  return (foundp) ? index : -1;
+}
+
 /* a local function pointer */
 void
 print_name(void *element)
 {
-  char *str = (char *) element;
-  printf("%s", str);
+  cpa *array = (cpa *) element;
+  char *carray = MYALLOC(char, 5);
+  str *string = new_str(carray, 5);
+
+  for (UNUM i = 0; i < array->size; i++) {
+    int result = encode(*(array->array+i), string);
+
+    if (result) {
+      fleprintf("%s:%d:%llu, fail to encode\n", i);
+      str_set_length(string, 5);
+      continue;
+    }
+
+    carray = get_data(string);
+    *(carray+str_length(string)) = 0;
+
+    printf("%s", carray);
+    str_set_length(string, 5);
+  }
+
+  destroy_str(string, 1);
 }
 
 /* a local function pointer */
@@ -103,7 +309,12 @@ print_grammar(Grammar *g)
   printf("Printing a grammar:\n");
   map_list_between(g->names, print_name, print_sep);
   printf("\n");
-  map_list(g->rules, print_rule);
+
+  for (int i = 0; i < list_length(g->rule_grps); i++) {
+    print_rule_group(list_nth(g->rule_grps, i));
+    printf("\n");
+  }
+
   printf("\n");
 }
 
@@ -142,6 +353,12 @@ new_tnt_string(char *format, int format_len, ...)
   return result;
 }
 
+TNT *
+new_tnt_pointer(size_t size)
+{
+  return MYALLOC(TNT, size);
+}
+
 void
 destroy_rule(void *rule, int flag)
 {
@@ -168,23 +385,32 @@ destroy_rule_no_free(void *rule)
   destroy_rule(rule, 0);
 }
 
+void
+destroy_cpa_and_free_all(void *element)
+{
+  cpa *c = (cpa *) element;
+  free(c->array);
+  free(c);
+}
+
 void
 destroy_grammar(void *grammar, int flag)
 {
   Grammar *g = (Grammar *) grammar;
 
-  switch (flag) {
-  case 0:
-    map_list(g->rules, destroy_rule_no_free);
-    break;
-  case 1:
-    map_list(g->rules, destroy_rule_and_free_all);
-    break;
-  default:
-    map_list(g->rules, destroy_rule_and_free_first);
-    break;    
-  }
-
-  destroy_list(g->rules, 0);
-  destroy_list(g->names, flag);
+  if (flag == 1)
+    map_list(g->rule_grps, destroy_rule_group_free_all);
+  else if (flag == 2)
+    map_list(g->rule_grps, destroy_rule_group_free_first);
+  
+  destroy_list(g->rule_grps, 0);
+  
+  /* CPA structure has no concept of freeing first.  So if flag is
+     non-zero, we just free everything, and hope no disaster
+     ensues. */
+  if (flag) map_list(g->names, destroy_cpa_and_free_all);
+
+  destroy_list(g->names, 0);
+
+  free(grammar);
 }
diff --git a/src/grammar.h b/src/grammar.h
index 8353dd5..05bc1fc 100644
--- a/src/grammar.h
+++ b/src/grammar.h
@@ -4,6 +4,7 @@
 #include <stdarg.h>
 #include <stdio.h>
 #include "list.h"
+#include "utf8.h"
 
 /* The class file for grammars */
 
@@ -19,6 +20,15 @@
 typedef unsigned long T;
 typedef unsigned NT;
 
+/* T or NT
+
+   Don't worry, it is not explosive. */
+
+struct TNT_s {
+  int type;                     /* 0 => T and NT otherwise */
+  union { T t; NT nt; } data;
+};
+
 typedef struct TNT_s TNT;
 typedef struct Rule_s Rule;
 
@@ -28,16 +38,30 @@ typedef struct Rule_s Rule;
    strings as well. */
 typedef struct Grammar_s Grammar;
 
-/* G is expected to be allocated before this function. */
-void build_grammar(Grammar *g, List *rules, List *names);
+/* G is expected to be allocated before this function.
+
+   NAMES is a list of CPA pointers. */
+unsigned char build_grammar(Grammar *g, List *rules, List *names);
 
 /* This accepts one and only one optional argument, which is of type
    either T or NT, depending on TYPE being 0 or not. */
 TNT *new_tnt(int type, ...);
 
+/* An array of code-points. */
+
+struct cpa_s {
+  NUM *array;
+  UNUM size;
+};
+
+typedef struct cpa_s cpa;
+
+NUM find_in_cpa_list(NUM *string, NUM size, List *list);
+
 /* For debugging purposes, we need to print out rules to examine their
    values. */
 
+/* assume element is a cpa pointer */
 void print_name(void *element);
 
 void print_tnt(void *element);
@@ -56,6 +80,8 @@ void print_grammar(Grammar *g);
    Upon failure NULL is returned. */
 List *new_tnt_string(char *format, int format_len, ...);
 
+TNT *new_tnt_pointer(size_t size);
+
 /* RIGHT should be created by new_tnt_string or something alike.
 
    If RIGHT is NULL, NULL is returned. */
@@ -68,6 +94,15 @@ void destroy_rule(void *rule, int flag);
 void destroy_rule_and_free_all(void *rule);
 void destroy_rule_and_free_first(void *rule);
 void destroy_rule_no_free(void *rule);
+void destroy_cpa_and_free_all(void *element);
 void destroy_grammar(void *grammar, int flag);
 
+/* convenient macro */
+
+#define NEW_CPA(X, ARRAY, SIZE) do {            \
+    X = MYALLOC(cpa, 1);                        \
+    X->array = ARRAY;                           \
+    X->size = SIZE;                             \
+  } while (0)
+
 #endif
diff --git a/src/list.c b/src/list.c
index d823100..143c693 100644
--- a/src/list.c
+++ b/src/list.c
@@ -5,8 +5,8 @@
 
 struct List_s {
   void **array;                 /* the array of elements */
-  NUM len;                   /* the length of the array */
-  NUM capacity;              /* the number of pre-allocated bytes
+  NUM len;                      /* the length of the array */
+  NUM capacity;                 /* the number of pre-allocated bytes
                                    for the array */
 };
 
@@ -183,14 +183,14 @@ list_assure_size(List *ls, NUM size)
 
   if (array == NULL) return 1;
 
-  for (NUM i = 0; i < ls->capacity; i++)
+  for (NUM i = 0; i < ls->len; i++)
     *(array+i) = *(ls->array+i);
 
   ls->array = realloc(ls->array, sizeof(void*) * size);
 
   if (ls->array == NULL) return 1;
 
-  for (NUM i = 0; i < ls->capacity; i++)
+  for (NUM i = 0; i < ls->len; i++)
     *(ls->array+i) = *(array+i);
 
   free(array);
@@ -206,6 +206,8 @@ list_assure_size(List *ls, NUM size)
 unsigned char
 list_set_length(List *ls, NUM len)
 {
+  list_assure_size(ls, len);
+
   if (ls->len >= len) return 0;
 
   NUM *array = MYALLOC(NUM, len - ls->len);
@@ -240,6 +242,17 @@ list_to_array(List *ls, NUM element_bytes, NUM *num)
   return array;
 }
 
+List *
+array_to_list(void **array, NUM size)
+{
+  List *ls = MYALLOC(List, 1);
+
+  ls->array = array;
+  ls->len = ls->capacity = size;
+
+  return ls;
+}
+
 void
 destroy_list(List *ls, unsigned char all_free_p)
 {
@@ -250,8 +263,29 @@ destroy_list(List *ls, unsigned char all_free_p)
   if (all_free_p == 2)
     free(*(ls->array));
 
+  ls->len = (ls->capacity = 0);
+
   free(ls->array);
   free(ls);
+}
 
-  ls->len = (ls->capacity = 0);
+void
+destroy_list_free_all(void *ls)
+{
+  List *list = (List *) ls;
+  destroy_list(list, 1);
+}
+
+void
+destroy_list_free_first(void *ls)
+{
+  List *list = (List *) ls;
+  destroy_list(list, 2);
+}
+
+void
+destroy_list_no_free(void *ls)
+{
+  List *list = (List *) ls;
+  destroy_list(list, 0);
 }
diff --git a/src/list.h b/src/list.h
index b710aa2..ff401f4 100644
--- a/src/list.h
+++ b/src/list.h
@@ -69,10 +69,21 @@ unsigned char list_set_length(List *ls, NUM len);
    The number of elements of the array will be stored in *NUM. */
 void *list_to_array(List *ls, NUM element_bytes, NUM *num);
 
+/* Create a list from an array.
+
+   The array will be re-used, so it will be destroyed when the list is
+   destroyed. */
+List *array_to_list(void ** array, NUM size);
+
+
 /* destroy the list: If ALL_FREE_P is 1, this frees every void
    pointers contained in the list; if it is 2, this frees the first
    pointer.  In any case, the list is de-allocated. */
 void destroy_list(List *ls, unsigned char all_free_p);
 
+void destroy_list_free_all(void *ls);
+void destroy_list_free_first(void *ls);
+void destroy_list_no_free(void *ls);
+
 
 #endif
diff --git a/src/reader.c b/src/reader.c
index 4bbcf5a..1e7c49c 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -1,20 +1,462 @@
+#include "util.h"
+#include "str.h"
 #include "reader.h"
 
-struct Reader_s {
-  reader_func_t func;
-  List *args;                   /* the list of arguments; could be
-                                   NULL. */
-};
+#define COUNT_CPA(S, E, Y, L) {                         \
+    L = 0;                                              \
+    for (NUM macroind = S; macroind < E; L++) {         \
+      str_info macroinfo = get_info(Y, macroind);       \
+      macroind += macroinfo.step;                       \
+    }                                                   \
+  }
 
-void build_reader(Reader *reader, reader_func_t func, List *args)
+#define READ_CPA(X, S, E, Y, L) {                       \
+    L = 0;                                              \
+    for (NUM macroind = S; macroind < E;) {             \
+      str_info macroinfo = get_info(Y, macroind);       \
+      *(X+(L++)) = macroinfo.value;                     \
+      macroind += macroinfo.step;                       \
+    }                                                   \
+  }
+
+/* Read a string of TNT's into TNTS.  TNTS is expected to be already
+   allocated. */
+static void
+read_tnt_string(str *s, NUM start, NUM end, TNT *tnts, List *cpa_list)
 {
-  reader->func = func;
-  reader->args = args;
+  str_info info;
+
+  /* fleprintf("%s:%d, S = %lu, E = %lu\n", start, end); */
+
+  unsigned char readingp = 0, stop = 0;
+
+  NUM *string = NULL, string_size = 0, nt_index = 0, tnt_count = 0;
+
+  NUM strindex = 0;
+  str_info strinfo = EMPTY_STR_INFO;
+
+  for (NUM index = start, last_index = start;
+       index < end && !stop;) {
+    info = get_info(s, index);
+    switch (info.value) {
+    case 10:
+    case 13:
+      /* if a newline is somehow encountered, we stop reading  */
+      fleprintf0("%s:%d, hi\n");
+      stop = 1;
+      break;
+    case 9:
+    case 32:
+      /* space separated */
+      if (readingp) {
+        COUNT_CPA(last_index, index, s, string_size);
+        string = MYALLOC(NUM, string_size+1);
+        READ_CPA(string, last_index, index, s, string_size);
+
+        nt_index = find_in_cpa_list(string, string_size, cpa_list);
+
+        if (nt_index < 0) {
+          fleprintf("%s:%d:%lu, Undefined non-terminal: ", index);
+          print_name(&((cpa) { string, string_size }));
+          printf("\n");
+          free(string);
+          string = NULL;
+          return;
+        }
+
+        free(string);
+        string = NULL;
+
+        *(tnts+tnt_count++) = (TNT) { 1, { .nt = nt_index } };
+
+        readingp = 0;
+      }
+      break;
+    case 34:
+    case 39:
+      /* read a string */
+      strindex = index + info.step;
+      strinfo = get_info(s, strindex);
+
+      for (;strindex < end &&
+             strinfo.value != 34 &&
+             strinfo.value != 39 &&
+             strinfo.value != 10 &&
+             strinfo.value != 13;) {
+
+        if (strinfo.value == 92) {
+          strindex += info.step;
+
+          unsigned char local_exit = 0;
+
+          /* The following errors should not happen.  I am just
+             paranoid. */
+          if (strindex >= end) {
+            local_exit = 1;
+            fleprintf("%s:%d:%lu, Escape end\n", strindex);
+          }
+
+          strinfo = get_info(s, strindex);
+
+          if (strinfo.value == 10 || strinfo.value == 13) {
+            local_exit = 1;
+            fleprintf("%s:%d:%lu, Escape newline not allowed\n",
+                      strindex);
+          }
+
+          /* cleanup */
+          /* no cleanup needed */
+          if (local_exit) return;
+        }
+
+        *(tnts+tnt_count++) = (TNT) { 0, { .t = strinfo.value } };
+
+        strindex += strinfo.step;
+      }
+      break;
+    default:
+      if (!readingp) {
+        last_index = index;
+        readingp = 1;
+      }
+
+      /* if we are at the end, we need to process one additional
+         non-terminal */
+      if (index < end && index + (NUM) info.step >= end) {
+        /* fleprintf0("%s:%d, hello\n"); */
+        COUNT_CPA(last_index, index+(NUM) info.step,
+                  s, string_size);
+        /* fleprintf0("%s:%d, hello\n"); */
+        string = MYALLOC(NUM, string_size+1);
+        READ_CPA(string, last_index, index+(NUM) info.step,
+                 s, string_size);
+        /* fleprintf0("%s:%d, hello\n"); */
+
+        nt_index = find_in_cpa_list(string, string_size, cpa_list);
+        /* fleprintf0("%s:%d, hello\n"); */
+
+        if (nt_index < 0) {
+          fleprintf("%s:%d:%lu, Undefined non-terminal: ", index);
+          print_name(&((cpa) { string, string_size }));
+          printf("\n");
+          eprintf("END = %lu\n", end);
+          free(string);
+          string = NULL;
+          return;
+        }
+
+        free(string);
+        string = NULL;
+
+        *(tnts+tnt_count++) = (TNT) { 1, { .nt = nt_index } };
+      }
+      break;
+    }
+
+    index += info.step;
+  }
+}
+
+static void
+print_int(void *p)
+{
+  printf("%d\n", *((int *)p));
 }
 
-void destroy_reader(Reader *reader, int flag)
+/* Read a grammar from a string in the format of BNF notation. */
+Grammar *
+read_grammar_from_bnf(str *s)
 {
-  destroy_list(reader->args, flag);
-  reader->args = NULL;
-  reader->func = NULL;
+  NUM len = str_length(s);
+
+  /* fleprintf("%s:%d, len = %lu\n", len); */
+
+  /* The first loop collects names of non-terminals */
+
+  List *names = new_list();
+
+  List *line_indices = new_list();
+
+  unsigned char definitionp = 1, readingp = 0;
+
+  /* last index is used to collect strings */
+  for (NUM index = 0, last_index = 0; index < len;) {
+    str_info info = get_info(s, index);
+
+    /* This is used to collect indices of definitions per line. */
+    NUM *line_index = NULL;
+
+    NUM *string = NULL, string_size = 0;
+
+    cpa *cpap = NULL;
+
+    switch (info.value) {
+      /* spaces and tabs are ignored */
+    case ' ':
+    case 9:
+      readingp = 0;
+
+      if (definitionp) {
+        fleprintf("%s:%d:%lu, A non-terminal cannot have spaces in "
+                  "the name\n", index);
+        /* cleanup */
+        map_list(names, destroy_cpa_and_free_all);
+        destroy_list(names, 0);
+        destroy_list(line_indices, 1);
+        return NULL;
+      }
+      break;
+    case '\n':
+    case 13:
+      definitionp = 1;
+      readingp = 0;
+      break;
+    case ':':
+      if (definitionp) {
+        definitionp = 0;
+        readingp = 0;
+        /* read a non-terminal */
+        COUNT_CPA(last_index, index, s, string_size);
+        string = MYALLOC(NUM, string_size+1);
+        READ_CPA(string, last_index, index, s, string_size);
+        NEW_CPA(cpap, string, string_size);
+
+        line_index = MYALLOC(NUM, 1);
+        *line_index = find_in_cpa_list(string, string_size, names);
+
+        if (*line_index < 0) {
+          *line_index = list_length(names);
+          add_to_list(names, cpap);
+        } else {
+          destroy_cpa_and_free_all(cpap);
+          string = NULL;
+        }
+
+        add_to_list(line_indices, line_index);
+        /* fleprintf0("%s:%d, Definition should be over\n"); */
+      }
+      break;
+    default:
+      if (!readingp) {
+        last_index = index;
+        readingp = 1;
+        /* fleprintf0("%s:%d, Start reading\n"); */
+      }
+      break;
+    }
+
+    index += info.step;
+  }
+
+  printf("%s:%d, print indices\n", __FILE__, __LINE__);
+  map_list(line_indices, print_int);
+  printf("\n");
+
+  /* second round collects all the rules */
+
+  List *rules = new_list();
+
+  NUM line_count = 0;
+
+  unsigned char right_start_p = 0;
+
+  definitionp = 1, readingp = 0;
+
+  NUM tnt_count = 0;
+  NT current_nt = 0;
+
+  /* last index is used to collect strings */
+  for (NUM index = 0, last_index = 0; index < len;) {
+    str_info info = get_info(s, index);
+    /* fleprintf("%s:%d, index = %lu, value = %lu\n", index, info.value); */
+
+    TNT *current_tnt_string = NULL;
+
+    void **temp_pointers = NULL;
+
+    switch (info.value) {
+      /* the quote characters */
+    case 34:                    /* double quote */
+    case 39:                    /* single quote */
+      if (definitionp) {
+        fleprintf("%s:%d:%lu, A non-terminal cannot have quotes in "
+                  "the name\n", index);
+        /* cleanup */
+        map_list(names, destroy_cpa_and_free_all);
+        destroy_list(names, 0);
+
+        destroy_list(line_indices, 1);
+
+        map_list(rules, destroy_rule_and_free_first);
+        destroy_list(rules, 0);
+        return NULL;
+      } else {
+        /* read a string of terminals */
+
+        NUM strindex = index + info.step;
+        str_info strinfo = get_info(s, strindex);
+
+        for (;strindex < len &&
+               strinfo.value != 13 &&
+               strinfo.value != 10 &&
+               strinfo.value != 34 &&
+               strinfo.value != 39;) {
+          strinfo = get_info(s, strindex);
+
+          /* escape character */
+          if (strinfo.value == 92) {
+            strindex += strinfo.step;
+            if (strindex < len) strinfo = get_info(s, strindex);
+
+            unsigned char local_error_p = 0;
+
+            if (strindex >= len) {
+              local_error_p = 1;
+              fleprintf("%s:%d:%ld, Escape at the end not allowed\n",
+                        strindex);
+            } else if (strinfo.value == 13 || strinfo.value == 10) {
+              local_error_p = 1;
+              fleprintf("%s:%d:%ld, Escape newline in string not "
+                        "allowed\n", strindex);
+            }
+
+            if (local_error_p) {
+              /* cleanup */
+              map_list(names, destroy_cpa_and_free_all);
+              destroy_list(names, 0);
+
+              destroy_list(line_indices, 1);
+
+              map_list(rules, destroy_rule_and_free_first);
+              destroy_list(rules, 0);
+              return NULL;
+            }
+          }
+          /* count a terminal */
+          tnt_count++;
+
+          strindex += strinfo.step;
+        }
+
+        if (strinfo.value == 13 || strinfo.value == 10) {
+          fleprintf("%s:%d:%ld, Newline encountered before string "
+                    "ended\n", strindex);
+          /* cleanup */
+          map_list(names, destroy_cpa_and_free_all);
+          destroy_list(names, 0);
+
+          destroy_list(line_indices, 1);
+
+          map_list(rules, destroy_rule_and_free_first);
+          destroy_list(rules, 0);
+          return NULL;
+        }
+
+        index = strindex + 1;
+      }
+
+      break;
+      /* spaces and tabs are ignored */
+    case ' ':
+    case 9:
+      if (definitionp) {
+        fleprintf("%s:%d:%lu, A non-terminal cannot have spaces or "
+                  "tabs in the name\n", index);
+        /* cleanup */
+        map_list(names, destroy_cpa_and_free_all);
+        destroy_list(names, 0);
+
+        destroy_list(line_indices, 1);
+
+        map_list(rules, destroy_rule_and_free_first);
+        destroy_list(rules, 0);
+        return NULL;
+      }
+
+      readingp = 0;
+
+      break;
+    case '\n':
+    case 13:
+      definitionp = 1;
+      readingp = 0;
+      line_count++;
+
+      /* fleprintf("%s:%d, tnt count = %lu\n", tnt_count); */
+
+      if (right_start_p) {
+        right_start_p = 0;
+
+        current_tnt_string = new_tnt_pointer(tnt_count);
+        read_tnt_string(s, last_index, index, current_tnt_string,
+                        names);
+
+        printf("%s:%d:%lu, Printing a TNT string: ",
+               __FILE__, __LINE__, index);
+        for (int i = 0; i < (int) tnt_count;)
+          print_tnt(current_tnt_string+i++);
+        printf("\n");
+
+        temp_pointers = MYALLOC(void *, tnt_count);
+
+        for (NUM tempindex = 0; tempindex < tnt_count; tempindex++)
+          *(temp_pointers + tempindex) = current_tnt_string+tempindex;
+
+        add_to_list
+          (rules,
+           new_rule
+           (current_nt, array_to_list(temp_pointers, tnt_count)));
+      } else {
+        /* empty rule */
+        add_to_list(rules, new_rule(current_nt, new_list()));
+      }
+
+      tnt_count = 0;
+
+      break;
+    case ':':
+      definitionp = 0;
+      readingp = 0;
+      right_start_p = 0;
+      /* the non-terminal at this line is cached by line_indices */
+      current_nt = *((NUM *) list_nth(line_indices, line_count));
+      /* fleprintf("%s:%d, current nt = %u, count = %lu\n",
+       *           current_nt, line_count); */
+      break;
+    default:
+      if (!readingp && !definitionp) tnt_count++;
+
+      if (!readingp &&
+          (definitionp || !right_start_p)) {
+        last_index = index;
+        right_start_p = 1;
+      }
+
+      readingp = 1;
+      break;
+    }
+
+    index += info.step;
+  }
+
+  destroy_list(line_indices, 1);
+
+  /* We build the grammar after the string is processed, so as to
+     avoid de-allocating the grammar should errors occur. */
+
+  Grammar *g = new_grammar();
+
+  if (build_grammar(g, rules, names)) {
+    fleprintf0("%s:%d, Failed to build the grammar\n");
+    map_list(names, destroy_cpa_and_free_all);
+    destroy_list(names, 0);
+    map_list(rules, destroy_rule_and_free_first);
+    destroy_list(rules, 0);
+    destroy_grammar(g, 0);
+    return NULL;
+  }
+
+  destroy_list(rules, 1);
+
+  return g;
 }
diff --git a/src/reader.h b/src/reader.h
index 77e7b84..65ee7ef 100644
--- a/src/reader.h
+++ b/src/reader.h
@@ -1,34 +1,10 @@
 #ifndef INPUT_H
 #define INPUT_H
 
+#include "str.h"
+#include "utf8.h"
 #include "grammar.h"
 
-/* The class of a general reader, which can read a grammar, either
-   from a file, or elsewhere, such as from an Emacs buffer, the
-   standard input, or even from some other places.
-
-   The only expectation, or requirement, of such a reader is to
-   provide a function with a variable number and type of arguments
-   which returns a grammar.  Then the package provides a function to
-   properly read a grammar, according to the specifications of the
-   reader. */
-
-typedef Grammar *(*reader_func_t)(List *args);
-
-typedef struct Reader_s Reader;
-
-/* contructor */
-
-/* Build a reader and assign to *reader.  It is expected to be
-   allocated before this function. */
-void build_reader(Reader *reader, reader_func_t func, List *args);
-
-/* destructor */
-
-/* Does not free READER.
-
-   FLAG is passed to destroy_list, which see. */
-void destroy_reader(Reader *reader, int flag);
-
+Grammar *read_grammar_from_bnf(str *s);
 
 #endif
diff --git a/src/str.c b/src/str.c
new file mode 100644
index 0000000..46dc4a8
--- /dev/null
+++ b/src/str.c
@@ -0,0 +1,86 @@
+#include "str.h"
+#include "str_partial.h"
+#include <string.h>
+
+H_ATTR
+str_info
+_info_getter(str *s, UNUM n)
+{
+  return (str_info) { (NUM) *(s->data+n), 1 };
+}
+
+/* H_ATTR
+ * NUM
+ * _lenner(str *s)
+ * {
+ *   return s->size;
+ * } */
+
+str *
+new_str(char *s, NUM size)
+{
+  str *str_pointer = MYALLOC(str, 1);
+
+  str_pointer->size = size;
+  str_pointer->data = s;
+  str_pointer->getter = _info_getter;
+  /* str_pointer->lenner = _lenner; */
+
+  return str_pointer;
+}
+
+void
+destroy_str(str *s, int flag)
+{
+  if (flag) free(s->data);
+
+  s->size = 0;
+
+  free(s);
+}
+
+H_ATTR
+str_info
+get_info(str *s, UNUM n)
+{
+  return s->getter(s, n);
+}
+
+H_ATTR
+NUM
+str_length(str *s)
+{
+  return s->size;
+}
+
+void
+change_str_content(str *s, char *str, NUM size)
+{
+  s->data = str;
+  s->size = size;
+}
+
+unsigned char
+copy_str(str *dest, str *source)
+{
+  if (str_length(dest) < str_length(source))
+    return 1;
+
+  dest->size = source->size;
+
+  memcpy(dest->data, source->data, source->size);
+
+  return 0;
+}
+
+char *
+get_data(str *s)
+{
+  return s->data;
+}
+
+void
+str_set_length(str *s, UNUM size)
+{
+  s->size = size;
+}
diff --git a/src/str.h b/src/str.h
new file mode 100644
index 0000000..39bddf6
--- /dev/null
+++ b/src/str.h
@@ -0,0 +1,35 @@
+#ifndef STR_H
+#define STR_H
+
+#include <util.h>
+
+typedef struct str_s str;
+
+str *new_str(char *s, NUM size);
+
+/* flag non-zero => free the stored character array as well */
+void destroy_str(str *s, int flag);
+
+/* deliberately expose this struct */
+struct str_info_s {
+  NUM value;
+  size_t step;
+};
+
+typedef struct str_info_s str_info;
+
+#define EMPTY_STR_INFO ((str_info) { 0, 1 })
+
+str_info get_info(str *, UNUM);
+
+char *get_data(str *s);
+
+NUM str_length(str *);
+
+void str_set_length(str *s, UNUM size);
+
+void change_str_content(str *, char *, NUM);
+
+unsigned char copy_str(str *dest, str *source);
+
+#endif
diff --git a/src/str_partial.h b/src/str_partial.h
new file mode 100644
index 0000000..9d4fbd6
--- /dev/null
+++ b/src/str_partial.h
@@ -0,0 +1,13 @@
+#include "str.h"
+
+/* This is meant to be extended, and only has minimal necessary
+   fields. */
+
+typedef str_info (*info_getter) (str *, UNUM);
+
+struct str_s {
+  UNUM size;                    /* the size in bytes, not in chars */
+  char *data;                   /* a void pointer is too general */
+  /* polymorphic behaviour */
+  info_getter getter;
+};
diff --git a/src/test.txt b/src/test.txt
new file mode 100644
index 0000000..71e125b
--- /dev/null
+++ b/src/test.txt
@@ -0,0 +1,7 @@
+S: A A C
+A: B B B
+B: A
+A:
+C: B C A
+B: B B
+A: A B A
diff --git a/src/test/check_grammar.c b/src/test/check_grammar.c
index 39f66ee..98a07cd 100644
--- a/src/test/check_grammar.c
+++ b/src/test/check_grammar.c
@@ -37,8 +37,7 @@ main(U_ATTR int argc, U_ATTR char **argv)
   List *rules = new_list();
   List *names = new_list();
 
-
-  for (int i = 0; i < 3; i++) {
+  for (int i = 0; i < 4; i++) {
     tnt_string = new_tnt_string("ntn", 3,
                                 (NT) 3*(i+1),
                                 (T) 3*(i+1)*(i+1),
@@ -48,18 +47,41 @@ main(U_ATTR int argc, U_ATTR char **argv)
       eprintf("i = %d, cannot create tnt string\n", i);
 
       map_list(rules, destroy_rule_and_free_all);
-
       destroy_list(rules, 0);
-      destroy_list(names, 1);
+
+      map_list(names, destroy_cpa_and_free_all);
+      destroy_list(names, 0);
       return 1;
     }
 
-    rule = new_rule(i, tnt_string);
+    rule = new_rule(i%3, tnt_string);
     add_to_list(rules, rule);
 
     char *name = MYALLOC(char, 7);
     snprintf(name, 7, "Rule %d", i);
-    add_to_list(names, name);
+
+    utf8* uname = new_utf8(name, 6);
+
+    str_info info = get_info((str *)uname, 0);
+
+    NUM *varray = MYALLOC(NUM, 6);
+    NUM vindex = 0;
+
+    for (NUM index = 0;
+         info.value >= 0 && index < str_length((str *) uname);
+         index += info.step, vindex++) {
+      info = get_info((str *)uname, index);
+
+      *(varray+vindex) = info.value;
+    }
+
+    destroy_str((str *) uname, 1);
+
+    cpa *cpap = MYALLOC(cpa, 1);
+    cpap->array = varray;
+    cpap->size = vindex;
+
+    add_to_list(names, cpap);
   }
 
   Grammar *g = new_grammar();
@@ -70,7 +92,7 @@ main(U_ATTR int argc, U_ATTR char **argv)
 
   destroy_grammar(g, 1);
 
-  free(g);
+  destroy_list(rules, 1);
 
   return 0;
 }
diff --git a/src/test/check_list.c b/src/test/check_list.c
index 8677cd3..fe23b2b 100644
--- a/src/test/check_list.c
+++ b/src/test/check_list.c
@@ -2,7 +2,7 @@
 #include <stdlib.h>
 #include "../list.h"
 
-void
+static void
 print_int(void *p)
 {
   printf("%d\n", *((int *)p));
@@ -155,6 +155,8 @@ main(U_ATTR int argc, U_ATTR char **argv)
     exit(1);
   }
 
+  eprintf("Successfully created LS2!\n");
+
   for (NUM i = 0; i < 5; i++) {
     int *pointer = malloc(sizeof *pointer * 1);
 
@@ -197,7 +199,7 @@ main(U_ATTR int argc, U_ATTR char **argv)
   }
 
   NUM arr[10];
-  
+
   for (NUM i = 0; i < 10; i++) {
     arr[i] = i*i-i+1;
     if (add_to_list(ls, arr+i)) {
@@ -211,7 +213,28 @@ main(U_ATTR int argc, U_ATTR char **argv)
 
   destroy_list(ls, 0);
 
+  /* test array_to_list */
+  int *arr3 = MYALLOC(int, 20);
+
+  void **arr2 = NULL;
+
+  for (NUM i = 0; i < 20; i++) {
+    *(arr3+i) = i*i+i+1;
+  }
+
+  arr2 = MYALLOC(void *, 20);
+
+  for (NUM i = 0; i < 20; i++) *(arr2+i) = arr3+i;
+
+  ls = array_to_list(arr2, 20);
+
+  printf("Printing a list from an array\n");
+
+  map_list(ls, print_int);
+
+  destroy_list(ls, 2);
+
   printf("Every test is successful!\n");
-  
+
   return 0;
 }
diff --git a/src/test/check_reader.c b/src/test/check_reader.c
index 1d73b3f..8769cd7 100644
--- a/src/test/check_reader.c
+++ b/src/test/check_reader.c
@@ -6,17 +6,29 @@
 #include "../grammar.h"
 #include "../reader.h"
 
-/* The test of reading grammars should be postponed till later. */
-
-/* Grammar *
- * read_grammar(List *args)
- * {
- * 
- * } */
-
-
+/* TODO: check string */
 int
 main(U_ATTR int argc, U_ATTR char **argv)
 {
-  return 77;
+  /* return 77; */
+
+  char *file_name = "test.txt";
+  char *buffer = MYALLOC(char, 512);
+  NUM buffer_size = 0;
+
+  if (read_entire_file(file_name, &buffer, &buffer_size)) {
+    fleprintf("%s:%d, Cannot read file %s", file_name);
+    free(buffer);
+    return 1;
+  }
+
+  utf8 *s = new_utf8(buffer, buffer_size);
+
+  Grammar *g = read_grammar_from_bnf((str *) s);
+
+  print_grammar(g);
+  destroy_grammar(g, 2);
+  destroy_str((str *)s, 1);
+
+  return 0;
 }
diff --git a/src/utf8.c b/src/utf8.c
new file mode 100644
index 0000000..f840600
--- /dev/null
+++ b/src/utf8.c
@@ -0,0 +1,150 @@
+#include "utf8.h"
+#include "str_partial.h"
+#include <stdint.h>
+#include <stdio.h>
+
+struct utf8_s {
+  str s;
+};
+
+/* The classification and the transition table. */
+
+static const
+uint8_t utf8d[] = {
+  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,  9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,  7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+  8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+  10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
+
+  0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
+  12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
+  12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
+  12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
+  12,36,12,12,12,12,12,12,12,12,12,12,
+};
+
+static UTF8_State
+decode(UTF8_State *state, uint32_t *codep, uint8_t byte)
+{
+  uint32_t type = utf8d[byte];
+
+  *codep = (*state != UTF8_STATE_ACCEPT) ?
+    (byte & 0x3fu) | (*codep << 6) :
+    (0xff >> type) & ((uint32_t) byte);
+
+  *state = utf8d[256 + *state + type];
+  return *state;
+}
+
+str_info
+utf8_get_info(str *s, UNUM n)
+{
+  UTF8_State state = UTF8_STATE_ACCEPT;
+  uint32_t code = 0;
+  NUM orig_n = n;
+
+  /* error */
+  if (n >= s->size) return (str_info) { -1, 0 };
+
+  decode(&state, &code, *(s->data+n));
+
+  if (state == UTF8_STATE_ACCEPT)
+    return (str_info) { code, 1 };
+
+  if (state == UTF8_STATE_REJECT)
+    return (str_info) { -1, 1 };
+
+  n++;
+
+  if (n >= s->size) return (str_info) { -1, 1 };
+
+  for (; n < s->size
+         && state > UTF8_STATE_REJECT;
+       n++) {
+    decode(&state, &code, *(s->data+n));
+  }
+
+  if (state == UTF8_STATE_ACCEPT)
+    return (str_info) { code, n - orig_n };
+
+  return (str_info) { -1, n - orig_n - 1};
+}
+
+utf8 *
+new_utf8(char *string, UNUM size)
+{
+  str *s = new_str(string, size);
+
+  s->getter = utf8_get_info;
+
+  return (utf8 *)s;
+}
+  
+/* result should be long enough to hold the data */
+unsigned char
+encode(NUM code_point, str *result)
+{
+  /* calculate the number of bits of CODE_POINT */
+  int number_of_bits = 0;
+
+  for (;code_point >> number_of_bits;) number_of_bits++;
+
+  if (!number_of_bits) {
+    result->size = 0;
+    return 1;
+  }
+
+  if (number_of_bits <= 7) {
+    if (!(result->size)) {
+      eprintf("%s:%d, Result cannot hold the encoded value\n",
+              __FILE__, __LINE__);
+      return 1;
+    }
+
+    result->size = 1;
+    *(result->data) = (char) code_point;
+  } else if (number_of_bits <= 11) {
+    if (result->size < 2) {
+      eprintf("%s:%d, Result cannot hold the encoded value",
+              __FILE__, __LINE__);
+      return 1;
+    }
+
+    result->size = 2;
+    *(result->data) = (char) (0xc0 | (code_point >> 6));
+    *(result->data+1) = (char) (0x80 | (code_point & 0x3f));
+  } else if (number_of_bits <= 16) {
+    if (result->size < 3) {
+      eprintf("%s:%d, Result cannot hold the encoded value",
+              __FILE__, __LINE__);
+      return 1;
+    }
+
+    result->size = 3;
+    *(result->data) = (char) (0xe0 | (code_point >> 12));
+    *(result->data+1) = (char) (0x80 | ((code_point >> 6) & 0x3f));
+    *(result->data+2) = (char) (0x80 | (code_point & 0x3f));
+  } else if (number_of_bits <= 21) {
+    if (result->size < 4) {
+      eprintf("%s:%d, Result cannot hold the encoded value",
+              __FILE__, __LINE__);
+      return 1;
+    }
+
+    result->size = 4;
+    *(result->data) = (char) (0xf0 | (code_point >> 18));
+    *(result->data+1) = (char) (0x80 | ((code_point >> 12) & 0x3f));
+    *(result->data+2) = (char) (0x80 | ((code_point >> 6) & 0x3f));
+    *(result->data+3) = (char) (0x80 | (code_point & 0x3f));
+  } else {
+    eprintf("%s:%d, Invalid code point to encode",
+            __FILE__, __LINE__);
+    return 1;
+  }
+
+  return 0;
+}
diff --git a/src/utf8.h b/src/utf8.h
new file mode 100644
index 0000000..5b3c9f2
--- /dev/null
+++ b/src/utf8.h
@@ -0,0 +1,25 @@
+#ifndef UTF8_H
+#define UTF8_H
+#include "str.h"
+
+typedef struct utf8_s utf8;
+
+enum UTF8_State_e {
+  UTF8_STATE_ACCEPT = 0,
+  UTF8_STATE_REJECT = 12,
+  UTF8_STATE_1 = 24,
+  UTF8_STATE_2 = 36,
+  UTF8_STATE_3 = 48,
+  UTF8_STATE_4 = 60,
+  UTF8_STATE_5 = 72,
+  UTF8_STATE_6 = 84,
+  UTF8_STATE_7 = 96
+};
+
+typedef enum UTF8_State_e UTF8_State;
+
+utf8 *new_utf8(char *string, UNUM size);
+
+unsigned char encode(NUM code_point, str *result);
+
+#endif
diff --git a/src/util.c b/src/util.c
new file mode 100644
index 0000000..e228fbb
--- /dev/null
+++ b/src/util.c
@@ -0,0 +1,49 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include "util.h"
+
+/* Put the entire file content into the string str and the file size
+   into *len.
+
+   If errors occur, return non-zero; otherwise the return value is
+   zero.
+   
+   Note that *str should be already allocated.
+
+   Note that the length of the string str is actually 1+length, and
+   the last character is the null character. */
+unsigned char
+read_entire_file(const char *file_name, char **str, NUM *len)
+{
+  long file_size = 0;
+
+  FILE *file = fopen(file_name, "r");
+
+  if (!file) {
+    eprintf("%s:%d, Cannot open file \"%s\": ",
+            __FILE__, __LINE__, file_name);
+    perror(NULL);
+    return 1;
+  }
+
+  fseek(file, 0, SEEK_END);
+  file_size = ftell(file);
+  fseek(file, 0, SEEK_SET);
+  
+  *len = 1+file_size;
+
+  *str = realloc(*str, 1+file_size);
+  
+  if (*str == NULL) {
+    fleprintf0("%s:%d, Cannot realloc\n");
+    return 1;
+  }
+
+  fread(*str, sizeof(char), file_size, file);
+
+  fclose(file);
+
+  *(*str+file_size) = 0;
+
+  return 0;
+}
diff --git a/src/util.h b/src/util.h
index d657f0e..9120e9b 100644
--- a/src/util.h
+++ b/src/util.h
@@ -28,7 +28,9 @@ typedef unsigned long long UNUM; /* definitely bigger than size_t */
 
 
 #define eprintf(...) fprintf(stderr, __VA_ARGS__)
+#define fleprintf0(M) eprintf(M, __FILE__, __LINE__)
+#define fleprintf(M, ...) eprintf(M, __FILE__, __LINE__, __VA_ARGS__)
 
-
+unsigned char read_entire_file(const char *file_name, char **str, NUM *len);
 
 #endif
-- 
cgit v1.2.3-18-g5258