summaryrefslogtreecommitdiff
path: root/src/reader.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/reader.c')
-rw-r--r--src/reader.c466
1 files changed, 454 insertions, 12 deletions
diff --git a/src/reader.c b/src/reader.c
index 4bbcf5a..1e7c49c 100644
--- a/src/reader.c
+++ b/src/reader.c
@@ -1,20 +1,462 @@
+#include "util.h"
+#include "str.h"
#include "reader.h"
-struct Reader_s {
- reader_func_t func;
- List *args; /* the list of arguments; could be
- NULL. */
-};
+#define COUNT_CPA(S, E, Y, L) { \
+ L = 0; \
+ for (NUM macroind = S; macroind < E; L++) { \
+ str_info macroinfo = get_info(Y, macroind); \
+ macroind += macroinfo.step; \
+ } \
+ }
-void build_reader(Reader *reader, reader_func_t func, List *args)
+#define READ_CPA(X, S, E, Y, L) { \
+ L = 0; \
+ for (NUM macroind = S; macroind < E;) { \
+ str_info macroinfo = get_info(Y, macroind); \
+ *(X+(L++)) = macroinfo.value; \
+ macroind += macroinfo.step; \
+ } \
+ }
+
+/* Read a string of TNT's into TNTS. TNTS is expected to be already
+ allocated. */
+static void
+read_tnt_string(str *s, NUM start, NUM end, TNT *tnts, List *cpa_list)
{
- reader->func = func;
- reader->args = args;
+ str_info info;
+
+ /* fleprintf("%s:%d, S = %lu, E = %lu\n", start, end); */
+
+ unsigned char readingp = 0, stop = 0;
+
+ NUM *string = NULL, string_size = 0, nt_index = 0, tnt_count = 0;
+
+ NUM strindex = 0;
+ str_info strinfo = EMPTY_STR_INFO;
+
+ for (NUM index = start, last_index = start;
+ index < end && !stop;) {
+ info = get_info(s, index);
+ switch (info.value) {
+ case 10:
+ case 13:
+ /* if a newline is somehow encountered, we stop reading */
+ fleprintf0("%s:%d, hi\n");
+ stop = 1;
+ break;
+ case 9:
+ case 32:
+ /* space separated */
+ if (readingp) {
+ COUNT_CPA(last_index, index, s, string_size);
+ string = MYALLOC(NUM, string_size+1);
+ READ_CPA(string, last_index, index, s, string_size);
+
+ nt_index = find_in_cpa_list(string, string_size, cpa_list);
+
+ if (nt_index < 0) {
+ fleprintf("%s:%d:%lu, Undefined non-terminal: ", index);
+ print_name(&((cpa) { string, string_size }));
+ printf("\n");
+ free(string);
+ string = NULL;
+ return;
+ }
+
+ free(string);
+ string = NULL;
+
+ *(tnts+tnt_count++) = (TNT) { 1, { .nt = nt_index } };
+
+ readingp = 0;
+ }
+ break;
+ case 34:
+ case 39:
+ /* read a string */
+ strindex = index + info.step;
+ strinfo = get_info(s, strindex);
+
+ for (;strindex < end &&
+ strinfo.value != 34 &&
+ strinfo.value != 39 &&
+ strinfo.value != 10 &&
+ strinfo.value != 13;) {
+
+ if (strinfo.value == 92) {
+ strindex += info.step;
+
+ unsigned char local_exit = 0;
+
+ /* The following errors should not happen. I am just
+ paranoid. */
+ if (strindex >= end) {
+ local_exit = 1;
+ fleprintf("%s:%d:%lu, Escape end\n", strindex);
+ }
+
+ strinfo = get_info(s, strindex);
+
+ if (strinfo.value == 10 || strinfo.value == 13) {
+ local_exit = 1;
+ fleprintf("%s:%d:%lu, Escape newline not allowed\n",
+ strindex);
+ }
+
+ /* cleanup */
+ /* no cleanup needed */
+ if (local_exit) return;
+ }
+
+ *(tnts+tnt_count++) = (TNT) { 0, { .t = strinfo.value } };
+
+ strindex += strinfo.step;
+ }
+ break;
+ default:
+ if (!readingp) {
+ last_index = index;
+ readingp = 1;
+ }
+
+ /* if we are at the end, we need to process one additional
+ non-terminal */
+ if (index < end && index + (NUM) info.step >= end) {
+ /* fleprintf0("%s:%d, hello\n"); */
+ COUNT_CPA(last_index, index+(NUM) info.step,
+ s, string_size);
+ /* fleprintf0("%s:%d, hello\n"); */
+ string = MYALLOC(NUM, string_size+1);
+ READ_CPA(string, last_index, index+(NUM) info.step,
+ s, string_size);
+ /* fleprintf0("%s:%d, hello\n"); */
+
+ nt_index = find_in_cpa_list(string, string_size, cpa_list);
+ /* fleprintf0("%s:%d, hello\n"); */
+
+ if (nt_index < 0) {
+ fleprintf("%s:%d:%lu, Undefined non-terminal: ", index);
+ print_name(&((cpa) { string, string_size }));
+ printf("\n");
+ eprintf("END = %lu\n", end);
+ free(string);
+ string = NULL;
+ return;
+ }
+
+ free(string);
+ string = NULL;
+
+ *(tnts+tnt_count++) = (TNT) { 1, { .nt = nt_index } };
+ }
+ break;
+ }
+
+ index += info.step;
+ }
+}
+
+static void
+print_int(void *p)
+{
+ printf("%d\n", *((int *)p));
}
-void destroy_reader(Reader *reader, int flag)
+/* Read a grammar from a string in the format of BNF notation. */
+Grammar *
+read_grammar_from_bnf(str *s)
{
- destroy_list(reader->args, flag);
- reader->args = NULL;
- reader->func = NULL;
+ NUM len = str_length(s);
+
+ /* fleprintf("%s:%d, len = %lu\n", len); */
+
+ /* The first loop collects names of non-terminals */
+
+ List *names = new_list();
+
+ List *line_indices = new_list();
+
+ unsigned char definitionp = 1, readingp = 0;
+
+ /* last index is used to collect strings */
+ for (NUM index = 0, last_index = 0; index < len;) {
+ str_info info = get_info(s, index);
+
+ /* This is used to collect indices of definitions per line. */
+ NUM *line_index = NULL;
+
+ NUM *string = NULL, string_size = 0;
+
+ cpa *cpap = NULL;
+
+ switch (info.value) {
+ /* spaces and tabs are ignored */
+ case ' ':
+ case 9:
+ readingp = 0;
+
+ if (definitionp) {
+ fleprintf("%s:%d:%lu, A non-terminal cannot have spaces in "
+ "the name\n", index);
+ /* cleanup */
+ map_list(names, destroy_cpa_and_free_all);
+ destroy_list(names, 0);
+ destroy_list(line_indices, 1);
+ return NULL;
+ }
+ break;
+ case '\n':
+ case 13:
+ definitionp = 1;
+ readingp = 0;
+ break;
+ case ':':
+ if (definitionp) {
+ definitionp = 0;
+ readingp = 0;
+ /* read a non-terminal */
+ COUNT_CPA(last_index, index, s, string_size);
+ string = MYALLOC(NUM, string_size+1);
+ READ_CPA(string, last_index, index, s, string_size);
+ NEW_CPA(cpap, string, string_size);
+
+ line_index = MYALLOC(NUM, 1);
+ *line_index = find_in_cpa_list(string, string_size, names);
+
+ if (*line_index < 0) {
+ *line_index = list_length(names);
+ add_to_list(names, cpap);
+ } else {
+ destroy_cpa_and_free_all(cpap);
+ string = NULL;
+ }
+
+ add_to_list(line_indices, line_index);
+ /* fleprintf0("%s:%d, Definition should be over\n"); */
+ }
+ break;
+ default:
+ if (!readingp) {
+ last_index = index;
+ readingp = 1;
+ /* fleprintf0("%s:%d, Start reading\n"); */
+ }
+ break;
+ }
+
+ index += info.step;
+ }
+
+ printf("%s:%d, print indices\n", __FILE__, __LINE__);
+ map_list(line_indices, print_int);
+ printf("\n");
+
+ /* second round collects all the rules */
+
+ List *rules = new_list();
+
+ NUM line_count = 0;
+
+ unsigned char right_start_p = 0;
+
+ definitionp = 1, readingp = 0;
+
+ NUM tnt_count = 0;
+ NT current_nt = 0;
+
+ /* last index is used to collect strings */
+ for (NUM index = 0, last_index = 0; index < len;) {
+ str_info info = get_info(s, index);
+ /* fleprintf("%s:%d, index = %lu, value = %lu\n", index, info.value); */
+
+ TNT *current_tnt_string = NULL;
+
+ void **temp_pointers = NULL;
+
+ switch (info.value) {
+ /* the quote characters */
+ case 34: /* double quote */
+ case 39: /* single quote */
+ if (definitionp) {
+ fleprintf("%s:%d:%lu, A non-terminal cannot have quotes in "
+ "the name\n", index);
+ /* cleanup */
+ map_list(names, destroy_cpa_and_free_all);
+ destroy_list(names, 0);
+
+ destroy_list(line_indices, 1);
+
+ map_list(rules, destroy_rule_and_free_first);
+ destroy_list(rules, 0);
+ return NULL;
+ } else {
+ /* read a string of terminals */
+
+ NUM strindex = index + info.step;
+ str_info strinfo = get_info(s, strindex);
+
+ for (;strindex < len &&
+ strinfo.value != 13 &&
+ strinfo.value != 10 &&
+ strinfo.value != 34 &&
+ strinfo.value != 39;) {
+ strinfo = get_info(s, strindex);
+
+ /* escape character */
+ if (strinfo.value == 92) {
+ strindex += strinfo.step;
+ if (strindex < len) strinfo = get_info(s, strindex);
+
+ unsigned char local_error_p = 0;
+
+ if (strindex >= len) {
+ local_error_p = 1;
+ fleprintf("%s:%d:%ld, Escape at the end not allowed\n",
+ strindex);
+ } else if (strinfo.value == 13 || strinfo.value == 10) {
+ local_error_p = 1;
+ fleprintf("%s:%d:%ld, Escape newline in string not "
+ "allowed\n", strindex);
+ }
+
+ if (local_error_p) {
+ /* cleanup */
+ map_list(names, destroy_cpa_and_free_all);
+ destroy_list(names, 0);
+
+ destroy_list(line_indices, 1);
+
+ map_list(rules, destroy_rule_and_free_first);
+ destroy_list(rules, 0);
+ return NULL;
+ }
+ }
+ /* count a terminal */
+ tnt_count++;
+
+ strindex += strinfo.step;
+ }
+
+ if (strinfo.value == 13 || strinfo.value == 10) {
+ fleprintf("%s:%d:%ld, Newline encountered before string "
+ "ended\n", strindex);
+ /* cleanup */
+ map_list(names, destroy_cpa_and_free_all);
+ destroy_list(names, 0);
+
+ destroy_list(line_indices, 1);
+
+ map_list(rules, destroy_rule_and_free_first);
+ destroy_list(rules, 0);
+ return NULL;
+ }
+
+ index = strindex + 1;
+ }
+
+ break;
+ /* spaces and tabs are ignored */
+ case ' ':
+ case 9:
+ if (definitionp) {
+ fleprintf("%s:%d:%lu, A non-terminal cannot have spaces or "
+ "tabs in the name\n", index);
+ /* cleanup */
+ map_list(names, destroy_cpa_and_free_all);
+ destroy_list(names, 0);
+
+ destroy_list(line_indices, 1);
+
+ map_list(rules, destroy_rule_and_free_first);
+ destroy_list(rules, 0);
+ return NULL;
+ }
+
+ readingp = 0;
+
+ break;
+ case '\n':
+ case 13:
+ definitionp = 1;
+ readingp = 0;
+ line_count++;
+
+ /* fleprintf("%s:%d, tnt count = %lu\n", tnt_count); */
+
+ if (right_start_p) {
+ right_start_p = 0;
+
+ current_tnt_string = new_tnt_pointer(tnt_count);
+ read_tnt_string(s, last_index, index, current_tnt_string,
+ names);
+
+ printf("%s:%d:%lu, Printing a TNT string: ",
+ __FILE__, __LINE__, index);
+ for (int i = 0; i < (int) tnt_count;)
+ print_tnt(current_tnt_string+i++);
+ printf("\n");
+
+ temp_pointers = MYALLOC(void *, tnt_count);
+
+ for (NUM tempindex = 0; tempindex < tnt_count; tempindex++)
+ *(temp_pointers + tempindex) = current_tnt_string+tempindex;
+
+ add_to_list
+ (rules,
+ new_rule
+ (current_nt, array_to_list(temp_pointers, tnt_count)));
+ } else {
+ /* empty rule */
+ add_to_list(rules, new_rule(current_nt, new_list()));
+ }
+
+ tnt_count = 0;
+
+ break;
+ case ':':
+ definitionp = 0;
+ readingp = 0;
+ right_start_p = 0;
+ /* the non-terminal at this line is cached by line_indices */
+ current_nt = *((NUM *) list_nth(line_indices, line_count));
+ /* fleprintf("%s:%d, current nt = %u, count = %lu\n",
+ * current_nt, line_count); */
+ break;
+ default:
+ if (!readingp && !definitionp) tnt_count++;
+
+ if (!readingp &&
+ (definitionp || !right_start_p)) {
+ last_index = index;
+ right_start_p = 1;
+ }
+
+ readingp = 1;
+ break;
+ }
+
+ index += info.step;
+ }
+
+ destroy_list(line_indices, 1);
+
+ /* We build the grammar after the string is processed, so as to
+ avoid de-allocating the grammar should errors occur. */
+
+ Grammar *g = new_grammar();
+
+ if (build_grammar(g, rules, names)) {
+ fleprintf0("%s:%d, Failed to build the grammar\n");
+ map_list(names, destroy_cpa_and_free_all);
+ destroy_list(names, 0);
+ map_list(rules, destroy_rule_and_free_first);
+ destroy_list(rules, 0);
+ destroy_grammar(g, 0);
+ return NULL;
+ }
+
+ destroy_list(rules, 1);
+
+ return g;
}