#include "util.h" #include "str.h" #include "reader.h" #define COUNT_CPA(S, E, Y, L) { \ L = 0; \ for (NUM macroind = S; macroind < E; L++) { \ str_info macroinfo = get_info(Y, macroind); \ macroind += macroinfo.step; \ } \ } #define READ_CPA(X, S, E, Y, L) { \ L = 0; \ for (NUM macroind = S; macroind < E;) { \ str_info macroinfo = get_info(Y, macroind); \ *(X+(L++)) = macroinfo.value; \ macroind += macroinfo.step; \ } \ } /* Read a string of TNT's into TNTS. TNTS is expected to be already allocated. */ static void read_tnt_string(str *s, NUM start, NUM end, TNT *tnts, List *cpa_list) { str_info info; /* fleprintf("%s:%d, S = %lu, E = %lu\n", start, end); */ unsigned char readingp = 0, stop = 0; NUM *string = NULL, string_size = 0, nt_index = 0, tnt_count = 0; NUM strindex = 0; str_info strinfo = EMPTY_STR_INFO; for (NUM index = start, last_index = start; index < end && !stop;) { info = get_info(s, index); switch (info.value) { case 10: case 13: /* if a newline is somehow encountered, we stop reading */ fleprintf0("%s:%d, hi\n"); stop = 1; break; case 9: case 32: /* space separated */ if (readingp) { COUNT_CPA(last_index, index, s, string_size); string = MYALLOC(NUM, string_size+1); READ_CPA(string, last_index, index, s, string_size); nt_index = find_in_cpa_list(string, string_size, cpa_list); if (nt_index < 0) { fleprintf("%s:%d:%lu, Undefined non-terminal: ", index); print_name(&((cpa) { string, string_size })); printf("\n"); free(string); string = NULL; return; } free(string); string = NULL; *(tnts+tnt_count++) = (TNT) { 1, { .nt = nt_index } }; readingp = 0; } break; case 34: case 39: /* read a string */ strindex = index + info.step; strinfo = get_info(s, strindex); for (;strindex < end && strinfo.value != 34 && strinfo.value != 39 && strinfo.value != 10 && strinfo.value != 13;) { if (strinfo.value == 92) { strindex += info.step; unsigned char local_exit = 0; /* The following errors should not happen. I am just paranoid. */ if (strindex >= end) { local_exit = 1; fleprintf("%s:%d:%lu, Escape end\n", strindex); } strinfo = get_info(s, strindex); if (strinfo.value == 10 || strinfo.value == 13) { local_exit = 1; fleprintf("%s:%d:%lu, Escape newline not allowed\n", strindex); } /* cleanup */ /* no cleanup needed */ if (local_exit) return; } *(tnts+tnt_count++) = (TNT) { 0, { .t = strinfo.value } }; strindex += strinfo.step; } break; default: if (!readingp) { last_index = index; readingp = 1; } /* if we are at the end, we need to process one additional non-terminal */ if (index < end && index + (NUM) info.step >= end) { /* fleprintf0("%s:%d, hello\n"); */ COUNT_CPA(last_index, index+(NUM) info.step, s, string_size); /* fleprintf0("%s:%d, hello\n"); */ string = MYALLOC(NUM, string_size+1); READ_CPA(string, last_index, index+(NUM) info.step, s, string_size); /* fleprintf0("%s:%d, hello\n"); */ nt_index = find_in_cpa_list(string, string_size, cpa_list); /* fleprintf0("%s:%d, hello\n"); */ if (nt_index < 0) { fleprintf("%s:%d:%lu, Undefined non-terminal: ", index); print_name(&((cpa) { string, string_size })); printf("\n"); eprintf("END = %lu\n", end); free(string); string = NULL; return; } free(string); string = NULL; *(tnts+tnt_count++) = (TNT) { 1, { .nt = nt_index } }; } break; } index += info.step; } } static void print_int(void *p) { printf("%d\n", *((int *)p)); } /* Read a grammar from a string in the format of BNF notation. */ Grammar * read_grammar_from_bnf(str *s) { NUM len = str_length(s); /* fleprintf("%s:%d, len = %lu\n", len); */ /* The first loop collects names of non-terminals */ List *names = new_list(); List *line_indices = new_list(); unsigned char definitionp = 1, readingp = 0; /* last index is used to collect strings */ for (NUM index = 0, last_index = 0; index < len;) { str_info info = get_info(s, index); /* This is used to collect indices of definitions per line. */ NUM *line_index = NULL; NUM *string = NULL, string_size = 0; cpa *cpap = NULL; switch (info.value) { /* spaces and tabs are ignored */ case ' ': case 9: readingp = 0; if (definitionp) { fleprintf("%s:%d:%lu, A non-terminal cannot have spaces in " "the name\n", index); /* cleanup */ map_list(names, destroy_cpa_and_free_all); destroy_list(names, 0); destroy_list(line_indices, 1); return NULL; } break; case '\n': case 13: definitionp = 1; readingp = 0; break; case ':': if (definitionp) { definitionp = 0; readingp = 0; /* read a non-terminal */ COUNT_CPA(last_index, index, s, string_size); string = MYALLOC(NUM, string_size+1); READ_CPA(string, last_index, index, s, string_size); NEW_CPA(cpap, string, string_size); line_index = MYALLOC(NUM, 1); *line_index = find_in_cpa_list(string, string_size, names); if (*line_index < 0) { *line_index = list_length(names); add_to_list(names, cpap); } else { destroy_cpa_and_free_all(cpap); string = NULL; } add_to_list(line_indices, line_index); /* fleprintf0("%s:%d, Definition should be over\n"); */ } break; default: if (!readingp) { last_index = index; readingp = 1; /* fleprintf0("%s:%d, Start reading\n"); */ } break; } index += info.step; } printf("%s:%d, print indices\n", __FILE__, __LINE__); map_list(line_indices, print_int); printf("\n"); /* second round collects all the rules */ List *rules = new_list(); NUM line_count = 0; unsigned char right_start_p = 0; definitionp = 1, readingp = 0; NUM tnt_count = 0; NT current_nt = 0; /* last index is used to collect strings */ for (NUM index = 0, last_index = 0; index < len;) { str_info info = get_info(s, index); /* fleprintf("%s:%d, index = %lu, value = %lu\n", index, info.value); */ TNT *current_tnt_string = NULL; void **temp_pointers = NULL; switch (info.value) { /* the quote characters */ case 34: /* double quote */ case 39: /* single quote */ if (definitionp) { fleprintf("%s:%d:%lu, A non-terminal cannot have quotes in " "the name\n", index); /* cleanup */ map_list(names, destroy_cpa_and_free_all); destroy_list(names, 0); destroy_list(line_indices, 1); map_list(rules, destroy_rule_and_free_first); destroy_list(rules, 0); return NULL; } else { /* read a string of terminals */ NUM strindex = index + info.step; str_info strinfo = get_info(s, strindex); for (;strindex < len && strinfo.value != 13 && strinfo.value != 10 && strinfo.value != 34 && strinfo.value != 39;) { strinfo = get_info(s, strindex); /* escape character */ if (strinfo.value == 92) { strindex += strinfo.step; if (strindex < len) strinfo = get_info(s, strindex); unsigned char local_error_p = 0; if (strindex >= len) { local_error_p = 1; fleprintf("%s:%d:%ld, Escape at the end not allowed\n", strindex); } else if (strinfo.value == 13 || strinfo.value == 10) { local_error_p = 1; fleprintf("%s:%d:%ld, Escape newline in string not " "allowed\n", strindex); } if (local_error_p) { /* cleanup */ map_list(names, destroy_cpa_and_free_all); destroy_list(names, 0); destroy_list(line_indices, 1); map_list(rules, destroy_rule_and_free_first); destroy_list(rules, 0); return NULL; } } /* count a terminal */ tnt_count++; strindex += strinfo.step; } if (strinfo.value == 13 || strinfo.value == 10) { fleprintf("%s:%d:%ld, Newline encountered before string " "ended\n", strindex); /* cleanup */ map_list(names, destroy_cpa_and_free_all); destroy_list(names, 0); destroy_list(line_indices, 1); map_list(rules, destroy_rule_and_free_first); destroy_list(rules, 0); return NULL; } index = strindex + 1; } break; /* spaces and tabs are ignored */ case ' ': case 9: if (definitionp) { fleprintf("%s:%d:%lu, A non-terminal cannot have spaces or " "tabs in the name\n", index); /* cleanup */ map_list(names, destroy_cpa_and_free_all); destroy_list(names, 0); destroy_list(line_indices, 1); map_list(rules, destroy_rule_and_free_first); destroy_list(rules, 0); return NULL; } readingp = 0; break; case '\n': case 13: definitionp = 1; readingp = 0; line_count++; /* fleprintf("%s:%d, tnt count = %lu\n", tnt_count); */ if (right_start_p) { right_start_p = 0; current_tnt_string = new_tnt_pointer(tnt_count); read_tnt_string(s, last_index, index, current_tnt_string, names); printf("%s:%d:%lu, Printing a TNT string: ", __FILE__, __LINE__, index); for (int i = 0; i < (int) tnt_count;) print_tnt(current_tnt_string+i++); printf("\n"); temp_pointers = MYALLOC(void *, tnt_count); for (NUM tempindex = 0; tempindex < tnt_count; tempindex++) *(temp_pointers + tempindex) = current_tnt_string+tempindex; add_to_list (rules, new_rule (current_nt, array_to_list(temp_pointers, tnt_count))); } else { /* empty rule */ add_to_list(rules, new_rule(current_nt, new_list())); } tnt_count = 0; break; case ':': definitionp = 0; readingp = 0; right_start_p = 0; /* the non-terminal at this line is cached by line_indices */ current_nt = *((NUM *) list_nth(line_indices, line_count)); /* fleprintf("%s:%d, current nt = %u, count = %lu\n", * current_nt, line_count); */ break; default: if (!readingp && !definitionp) tnt_count++; if (!readingp && (definitionp || !right_start_p)) { last_index = index; right_start_p = 1; } readingp = 1; break; } index += info.step; } destroy_list(line_indices, 1); /* We build the grammar after the string is processed, so as to avoid de-allocating the grammar should errors occur. */ Grammar *g = new_grammar(); if (build_grammar(g, rules, names)) { fleprintf0("%s:%d, Failed to build the grammar\n"); map_list(names, destroy_cpa_and_free_all); destroy_list(names, 0); map_list(rules, destroy_rule_and_free_first); destroy_list(rules, 0); destroy_grammar(g, 0); return NULL; } destroy_list(rules, 1); return g; }