#include "util.h" #include "str.h" #include "reader.h" #define COUNT_CPA(S, E, Y, L) { \ L = 0; \ for (NUM macroind = S; macroind < E; L++) { \ str_info macroinfo = get_info(Y, macroind); \ macroind += macroinfo.step; \ } \ } #define READ_CPA(X, S, E, Y, L) { \ L = 0; \ for (NUM macroind = S; macroind < E;) { \ str_info macroinfo = get_info(Y, macroind); \ *(X+(L++)) = macroinfo.value; \ macroind += macroinfo.step; \ } \ } /* Read a string of TNT's into TNTS. TNTS is expected to be already allocated. */ static BOOL read_tnt_string(const str * const restrict s, NUM start, NUM end, TNT *tnts, List *cpa_list) { str_info info; /* fleprintf("S = %lu, E = %lu\n", start, end); */ BOOL readingp = 0, stop = 0, double_quote_p = 0; NUM *string = NULL, string_size = 0, nt_index = 0, tnt_count = 0; NUM strindex = 0; str_info strinfo = EMPTY_STR_INFO; for (NUM index = start, last_index = start; index < end && !stop;) { info = get_info(s, index); switch (info.value) { case 10: case 13: /* if a newline is somehow encountered, we stop reading */ /* fleprintf0("hi\n"); */ stop = 1; break; case 9: case 32: /* space separated */ if (readingp) { COUNT_CPA(last_index, index, s, string_size); string = MYALLOC(NUM, string_size+1); READ_CPA(string, last_index, index, s, string_size); nt_index = find_in_cpa_list(string, string_size, cpa_list); if (nt_index < 0) { fleprintf("index = %lu, Undefined non-terminal: ", index); print_name(&((cpa) { string, string_size })); printf("\n"); free(string); string = NULL; return 1; } free(string); string = NULL; *(tnts+tnt_count++) = (TNT) { 1, { .nt = nt_index } }; readingp = 0; } break; case 34: double_quote_p = 1; case 39: /* read a string */ strindex = index + info.step; strinfo = get_info(s, strindex); for (;strindex < end && ((double_quote_p && strinfo.value != 34) || (!double_quote_p && strinfo.value != 39)) && strinfo.value != 10 && strinfo.value != 13;) { strinfo = get_info(s, strindex); if (strinfo.value == 92) { strindex += info.step; BOOL local_exit = 0; /* The following errors should not happen. I am just paranoid. */ if (strindex >= end) { local_exit = 1; fleprintf("strindex = %lu, Escape end\n", strindex); } strinfo = get_info(s, strindex); if (strinfo.value == 10 || strinfo.value == 13) { local_exit = 1; fleprintf("strindex = %lu, Escape newline not allowed\n", strindex); } /* cleanup */ /* no cleanup needed */ if (local_exit) return 1; } *(tnts+tnt_count++) = (TNT) { 0, { .t = strinfo.value } }; strindex += strinfo.step; } strindex -= strinfo.step; tnt_count--; double_quote_p = 0; index = strindex; info = get_info(s, index); break; default: if (!readingp) { last_index = index; readingp = 1; } /* if we are at the end, we need to process one additional non-terminal */ if (index < end && index + (NUM) info.step >= end) { /* fleprintf0("hello\n"); */ COUNT_CPA(last_index, index+(NUM) info.step, s, string_size); /* fleprintf0("hello\n"); */ string = MYALLOC(NUM, string_size+1); READ_CPA(string, last_index, index+(NUM) info.step, s, string_size); /* fleprintf0("hello\n"); */ nt_index = find_in_cpa_list(string, string_size, cpa_list); /* fleprintf0("hello\n"); */ if (nt_index < 0) { fleprintf("index = %lu, Undefined non-terminal: ", index); print_name(&((cpa) { string, string_size })); printf("\n"); eprintf("END = %lu\n", end); free(string); string = NULL; return 1; } free(string); string = NULL; *(tnts+tnt_count++) = (TNT) { 1, { .nt = nt_index } }; } break; } index += info.step; } return 0; } UNUSED static void print_int(void *p) { printf("%d\n", *((int *)p)); } /* Read a grammar from a string in the format of BNF notation. */ Grammar * read_grammar_from_bnf(const str * const restrict s) { NUM len = str_length(s); /* fleprintf("len = %lu\n", len); */ /* The first loop collects names of non-terminals */ List *names = new_list(); List *line_indices = new_list(); BOOL definitionp = 1, readingp = 0; /* last index is used to collect strings */ for (NUM index = 0, last_index = 0; index < len;) { str_info info = get_info(s, index); /* This is used to collect indices of definitions per line. */ NUM *line_index = NULL; NUM *string = NULL, string_size = 0; cpa *cpap = NULL; switch (info.value) { /* spaces and tabs are ignored */ case ' ': case 9: readingp = 0; if (definitionp && readingp) { fleprintf("%lu, A non-terminal cannot have spaces in " "the name\n", index); /* cleanup */ map_list(names, destroy_cpa_and_free_all); destroy_list(names, 0); destroy_list(line_indices, 1); return NULL; } break; case '\n': case 13: definitionp = 1; readingp = 0; break; case ':': if (definitionp) { definitionp = 0; readingp = 0; /* read a non-terminal */ COUNT_CPA(last_index, index, s, string_size); string = MYALLOC(NUM, string_size+1); READ_CPA(string, last_index, index, s, string_size); NEW_CPA(cpap, string, string_size); line_index = MYALLOC(NUM, 1); *line_index = find_in_cpa_list(string, string_size, names); if (*line_index < 0) { *line_index = list_length(names); add_to_list(names, cpap); } else { destroy_cpa_and_free_all(cpap); string = NULL; } add_to_list(line_indices, line_index); /* fleprintf0("Definition should be over\n"); */ } break; default: if (!readingp) { last_index = index; readingp = 1; /* fleprintf0("Start reading\n"); */ } break; } index += info.step; } /* printf("%s:%d, print indices\n", __FILE__, __LINE__); * map_list(line_indices, print_int); * printf("\n"); */ /* second round collects all the rules */ List *rules = new_list(); NUM line_count = 0; BOOL right_start_p = 0, double_quote_p = 0; definitionp = 1, readingp = 0; NUM tnt_count = 0; NT current_nt = 0; /* last index is used to collect strings */ for (NUM index = 0, last_index = 0; index < len;) { str_info info = get_info(s, index); /* fleprintf("index = %lu, value = %lu\n", index, info.value); */ TNT *current_tnt_string = NULL; void **temp_pointers = NULL; switch (info.value) { /* the quote characters */ case 34: /* double quote */ double_quote_p = 1; case 39: /* single quote */ if (definitionp) { fleprintf("%lu, A non-terminal cannot have quotes in " "the name\n", index); /* cleanup */ map_list(names, destroy_cpa_and_free_all); destroy_list(names, 0); destroy_list(line_indices, 1); map_list(rules, destroy_rule_and_free_first); destroy_list(rules, 0); return NULL; } else { /* read a string of terminals */ if (readingp) { fleprintf("Name of non-terminal should not contain quotes, " "index = %lu\n", index); /* cleanup */ map_list(names, destroy_cpa_and_free_all); destroy_list(names, 0); destroy_list(line_indices, 1); map_list(rules, destroy_rule_and_free_first); destroy_list(rules, 0); return NULL; } if (!readingp && !right_start_p) { last_index = index; right_start_p = 1; } NUM strindex = index + info.step; str_info strinfo = get_info(s, strindex); /* fleprintf("start reading string at %lu, value = %lu\n", * strindex, strinfo.value); */ for (;strindex < len && strinfo.value != 13 && strinfo.value != 10 && ((double_quote_p && strinfo.value != 34) || (!double_quote_p && strinfo.value != 39));) { strinfo = get_info(s, strindex); /* escape character */ if (strinfo.value == 92) { strindex += strinfo.step; if (strindex < len) strinfo = get_info(s, strindex); BOOL local_error_p = 0; if (strindex >= len) { local_error_p = 1; fleprintf("%ld, Escape at the end not allowed\n", strindex); } else if (strinfo.value == 13 || strinfo.value == 10) { local_error_p = 1; fleprintf("%ld, Escape newline in string not " "allowed\n", strindex); } if (local_error_p) { /* cleanup */ map_list(names, destroy_cpa_and_free_all); destroy_list(names, 0); destroy_list(line_indices, 1); map_list(rules, destroy_rule_and_free_first); destroy_list(rules, 0); return NULL; } } /* count a terminal */ tnt_count++; strindex += strinfo.step; /* fleprintf("now strindex = %lu, value = %lu\n", * strindex, strinfo.value); */ } if (strindex < len) { strindex -= strinfo.step; tnt_count--; index = strindex; info = get_info(s, index); /* fleprintf("after reading a string, index = %lu, value = %lu\n", * index, info.value); */ if (strinfo.value == 13 || strinfo.value == 10) { fleprintf("%ld, Newline encountered before string " "ended\n", strindex); /* cleanup */ map_list(names, destroy_cpa_and_free_all); destroy_list(names, 0); destroy_list(line_indices, 1); map_list(rules, destroy_rule_and_free_first); destroy_list(rules, 0); return NULL; } } else { if ((double_quote_p && strinfo.value != 34) || (!double_quote_p && strinfo.value != 39)) { fleprintf0("input ended before string is left\n"); /* cleanup */ map_list(names, destroy_cpa_and_free_all); destroy_list(names, 0); destroy_list(line_indices, 1); map_list(rules, destroy_rule_and_free_first); destroy_list(rules, 0); return NULL; } } } double_quote_p = 0; break; /* spaces and tabs are ignored */ case ' ': case 9: if (definitionp && readingp) { fleprintf("%lu, A non-terminal cannot have spaces or " "tabs in the name\n", index); /* cleanup */ map_list(names, destroy_cpa_and_free_all); destroy_list(names, 0); destroy_list(line_indices, 1); map_list(rules, destroy_rule_and_free_first); destroy_list(rules, 0); return NULL; } readingp = 0; break; case '\n': case 13: if (definitionp && !readingp) { /* empty line */ break; } definitionp = 1; readingp = 0; line_count++; /* fleprintf("tnt count = %lu\n", tnt_count); * fleprintf("right = %u\n", right_start_p); */ if (right_start_p) { right_start_p = 0; current_tnt_string = new_tnt_pointer(tnt_count); if (read_tnt_string (s, last_index, index, current_tnt_string, names)) { /* something went wrong in readnig TNT string */ /* cleanup */ map_list(names, destroy_cpa_and_free_all); destroy_list(names, 0); destroy_list(line_indices, 1); map_list(rules, destroy_rule_and_free_first); destroy_list(rules, 0); return NULL; } /* printf("%s:%d:%lu, Printing a TNT string: ", * __FILE__, __LINE__, index); */ /* for (int i = 0; i < (int) tnt_count;) { * print_tnt(current_tnt_string+i++); * if (i