From 949888ad5d0cd0f9f9a87f9938a632b3e32df051 Mon Sep 17 00:00:00 2001 From: JSDurand Date: Tue, 4 Jan 2022 20:24:07 +0800 Subject: Fix problems Now some problems are fixed. It can now read grammar correctly (hopefully) from a file, in the BNF format. And strings (terminals) are handled fine as well. So glad that there are no leak problems now. --- src/Makefile.am | 4 +- src/grammar.c | 38 ++++++------ src/reader.c | 154 +++++++++++++++++++++++++++++++++++------------- src/test.txt | 4 +- src/test/check_reader.c | 15 +++-- src/util.c | 4 +- src/util.h | 4 +- 7 files changed, 150 insertions(+), 73 deletions(-) diff --git a/src/Makefile.am b/src/Makefile.am index 5eff3c5..4690876 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -2,9 +2,9 @@ AM_CFLAGS = -Wall -Wextra noinst_LIBRARIES = libeps.a libeps_a_SOURCES = grammar.c list.c util.c reader.c \ -str.c utf8.c \ +str.c utf8.c lr_table.c \ grammar.h list.h util.h reader.h \ -str_partial.h str.h utf8.h +str_partial.h str.h utf8.h lr_table.h libeps_a_CFLAGS = $(AM_CFLAGS) --pedantic diff --git a/src/grammar.c b/src/grammar.c index 118a10e..4052510 100644 --- a/src/grammar.c +++ b/src/grammar.c @@ -67,6 +67,12 @@ destroy_rule_group_free_first(void *rule_grp) destroy_rule_group(rule_grp, 2); } +static void +print_sep() +{ + printf(", "); +} + static void print_rule_group(void *rule_grp) { @@ -75,7 +81,7 @@ print_rule_group(void *rule_grp) for (int i = 0; i < list_length(rg->rights); i++) { List *str = (List *) list_nth(rg->rights, i); printf("Rule %u => ", rg->left); - map_list(str, print_tnt); + map_list_between(str, print_tnt, print_sep); printf("\n"); } } @@ -130,7 +136,7 @@ build_grammar(Grammar *g, List *rules, List *names) an error. */ for (int i = 0; i < rule_len; i++) { if (((Rule *) list_nth(rules, i))->left >= len) { - fleprintf("%s:%d:%d, Rule contains weird non-terminal\n", i); + fleprintf("%d, Rule contains weird non-terminal\n", i); return 1; } } @@ -138,12 +144,12 @@ build_grammar(Grammar *g, List *rules, List *names) List *rule_grps = new_list(); if (rule_grps == NULL) { - fleprintf0("%s:%d, Cannot create list\n"); + fleprintf0("Cannot create list\n"); return 1; } if (list_assure_size(rule_grps, len)) { - fleprintf0("%s:%d, Cannot assure size of rule groups\n"); + fleprintf0("Cannot assure size of rule groups\n"); return 1; } @@ -153,7 +159,7 @@ build_grammar(Grammar *g, List *rules, List *names) /* Initialize the list of rule groups */ for (int i = 0; i < len; i++) { if ((temp_ls = new_list()) == NULL) { - fleprintf("%s:%d:%d, Cannot create list\n", i); + fleprintf("%d, Cannot create list\n", i); map_list(rule_grps, destroy_rule_group_no_free); destroy_list(rule_grps, 0); @@ -163,7 +169,7 @@ build_grammar(Grammar *g, List *rules, List *names) temp_pointer = MYALLOC(Rule_group, 1); if (temp_pointer == NULL) { - fleprintf0("%s:%d, Cannot malloc\n"); + fleprintf0("Cannot malloc\n"); map_list(rule_grps, destroy_rule_group_no_free); destroy_list(rule_grps, 0); destroy_list(temp_ls, 0); @@ -176,7 +182,7 @@ build_grammar(Grammar *g, List *rules, List *names) int result = add_to_list(rule_grps, temp_pointer); if (result) { - fleprintf("%s:%d:%d, Cannot add to list\n", i); + fleprintf("%d, Cannot add to list\n", i); map_list(rule_grps, destroy_rule_group_no_free); destroy_list(rule_grps, 0); @@ -196,7 +202,7 @@ build_grammar(Grammar *g, List *rules, List *names) r->right); if (result) { - fleprintf("%s:%d:%d, Cannot add to list\n", i); + fleprintf("%d, Cannot add to list\n", i); for (int j = 0; j < list_length(rule_grps); j++) { Rule_group *rg = (Rule_group *) list_nth(rule_grps, j); @@ -223,9 +229,9 @@ print_tnt(void *element) TNT *tnt = (TNT*) element; if (tnt->type) - printf("NT %u, ", tnt->data.nt); + printf("NT %u", tnt->data.nt); else - printf("T %lu, ", tnt->data.t); + printf("T %lu", tnt->data.t); } void @@ -269,7 +275,6 @@ find_in_cpa_list(NUM *string, NUM size, List *list) return (foundp) ? index : -1; } -/* a local function pointer */ void print_name(void *element) { @@ -281,7 +286,7 @@ print_name(void *element) int result = encode(*(array->array+i), string); if (result) { - fleprintf("%s:%d:%llu, fail to encode\n", i); + fleprintf("%llu, fail to encode\n", i); str_set_length(string, 5); continue; } @@ -296,13 +301,8 @@ print_name(void *element) destroy_str(string, 1); } -/* a local function pointer */ -void -print_sep() -{ - printf(", "); -} - +/* REVIEW: Print the names of non-terminals out, instead of printing + the numbers? */ void print_grammar(Grammar *g) { diff --git a/src/reader.c b/src/reader.c index 1e7c49c..18435a9 100644 --- a/src/reader.c +++ b/src/reader.c @@ -21,12 +21,12 @@ /* Read a string of TNT's into TNTS. TNTS is expected to be already allocated. */ -static void +static unsigned char read_tnt_string(str *s, NUM start, NUM end, TNT *tnts, List *cpa_list) { str_info info; - /* fleprintf("%s:%d, S = %lu, E = %lu\n", start, end); */ + /* fleprintf("S = %lu, E = %lu\n", start, end); */ unsigned char readingp = 0, stop = 0; @@ -42,7 +42,7 @@ read_tnt_string(str *s, NUM start, NUM end, TNT *tnts, List *cpa_list) case 10: case 13: /* if a newline is somehow encountered, we stop reading */ - fleprintf0("%s:%d, hi\n"); + /* fleprintf0("hi\n"); */ stop = 1; break; case 9: @@ -56,12 +56,12 @@ read_tnt_string(str *s, NUM start, NUM end, TNT *tnts, List *cpa_list) nt_index = find_in_cpa_list(string, string_size, cpa_list); if (nt_index < 0) { - fleprintf("%s:%d:%lu, Undefined non-terminal: ", index); + fleprintf("index = %lu, Undefined non-terminal: ", index); print_name(&((cpa) { string, string_size })); printf("\n"); free(string); string = NULL; - return; + return 1; } free(string); @@ -83,6 +83,7 @@ read_tnt_string(str *s, NUM start, NUM end, TNT *tnts, List *cpa_list) strinfo.value != 39 && strinfo.value != 10 && strinfo.value != 13;) { + strinfo = get_info(s, strindex); if (strinfo.value == 92) { strindex += info.step; @@ -93,26 +94,32 @@ read_tnt_string(str *s, NUM start, NUM end, TNT *tnts, List *cpa_list) paranoid. */ if (strindex >= end) { local_exit = 1; - fleprintf("%s:%d:%lu, Escape end\n", strindex); + fleprintf("strindex = %lu, Escape end\n", strindex); } strinfo = get_info(s, strindex); if (strinfo.value == 10 || strinfo.value == 13) { local_exit = 1; - fleprintf("%s:%d:%lu, Escape newline not allowed\n", + fleprintf("strindex = %lu, Escape newline not allowed\n", strindex); } /* cleanup */ /* no cleanup needed */ - if (local_exit) return; + if (local_exit) return 1; } *(tnts+tnt_count++) = (TNT) { 0, { .t = strinfo.value } }; strindex += strinfo.step; } + + strindex -= strinfo.step; + tnt_count--; + + index = strindex; + info = get_info(s, index); break; default: if (!readingp) { @@ -123,26 +130,26 @@ read_tnt_string(str *s, NUM start, NUM end, TNT *tnts, List *cpa_list) /* if we are at the end, we need to process one additional non-terminal */ if (index < end && index + (NUM) info.step >= end) { - /* fleprintf0("%s:%d, hello\n"); */ + /* fleprintf0("hello\n"); */ COUNT_CPA(last_index, index+(NUM) info.step, s, string_size); - /* fleprintf0("%s:%d, hello\n"); */ + /* fleprintf0("hello\n"); */ string = MYALLOC(NUM, string_size+1); READ_CPA(string, last_index, index+(NUM) info.step, s, string_size); - /* fleprintf0("%s:%d, hello\n"); */ + /* fleprintf0("hello\n"); */ nt_index = find_in_cpa_list(string, string_size, cpa_list); - /* fleprintf0("%s:%d, hello\n"); */ + /* fleprintf0("hello\n"); */ if (nt_index < 0) { - fleprintf("%s:%d:%lu, Undefined non-terminal: ", index); + fleprintf("index = %lu, Undefined non-terminal: ", index); print_name(&((cpa) { string, string_size })); printf("\n"); eprintf("END = %lu\n", end); free(string); string = NULL; - return; + return 1; } free(string); @@ -155,6 +162,8 @@ read_tnt_string(str *s, NUM start, NUM end, TNT *tnts, List *cpa_list) index += info.step; } + + return 0; } static void @@ -169,7 +178,7 @@ read_grammar_from_bnf(str *s) { NUM len = str_length(s); - /* fleprintf("%s:%d, len = %lu\n", len); */ + /* fleprintf("len = %lu\n", len); */ /* The first loop collects names of non-terminals */ @@ -197,7 +206,7 @@ read_grammar_from_bnf(str *s) readingp = 0; if (definitionp) { - fleprintf("%s:%d:%lu, A non-terminal cannot have spaces in " + fleprintf("%lu, A non-terminal cannot have spaces in " "the name\n", index); /* cleanup */ map_list(names, destroy_cpa_and_free_all); @@ -233,14 +242,14 @@ read_grammar_from_bnf(str *s) } add_to_list(line_indices, line_index); - /* fleprintf0("%s:%d, Definition should be over\n"); */ + /* fleprintf0("Definition should be over\n"); */ } break; default: if (!readingp) { last_index = index; readingp = 1; - /* fleprintf0("%s:%d, Start reading\n"); */ + /* fleprintf0("Start reading\n"); */ } break; } @@ -268,7 +277,7 @@ read_grammar_from_bnf(str *s) /* last index is used to collect strings */ for (NUM index = 0, last_index = 0; index < len;) { str_info info = get_info(s, index); - /* fleprintf("%s:%d, index = %lu, value = %lu\n", index, info.value); */ + /* fleprintf("index = %lu, value = %lu\n", index, info.value); */ TNT *current_tnt_string = NULL; @@ -279,7 +288,7 @@ read_grammar_from_bnf(str *s) case 34: /* double quote */ case 39: /* single quote */ if (definitionp) { - fleprintf("%s:%d:%lu, A non-terminal cannot have quotes in " + fleprintf("%lu, A non-terminal cannot have quotes in " "the name\n", index); /* cleanup */ map_list(names, destroy_cpa_and_free_all); @@ -293,9 +302,31 @@ read_grammar_from_bnf(str *s) } else { /* read a string of terminals */ + if (readingp) { + fleprintf("Name of non-terminal should not contain quotes, " + "index = %lu\n", index); + /* cleanup */ + map_list(names, destroy_cpa_and_free_all); + destroy_list(names, 0); + + destroy_list(line_indices, 1); + + map_list(rules, destroy_rule_and_free_first); + destroy_list(rules, 0); + return NULL; + } + + if (!readingp && !right_start_p) { + last_index = index; + right_start_p = 1; + } + NUM strindex = index + info.step; str_info strinfo = get_info(s, strindex); + /* fleprintf("start reading string at %lu, value = %lu\n", + * strindex, strinfo.value); */ + for (;strindex < len && strinfo.value != 13 && strinfo.value != 10 && @@ -312,11 +343,11 @@ read_grammar_from_bnf(str *s) if (strindex >= len) { local_error_p = 1; - fleprintf("%s:%d:%ld, Escape at the end not allowed\n", + fleprintf("%ld, Escape at the end not allowed\n", strindex); } else if (strinfo.value == 13 || strinfo.value == 10) { local_error_p = 1; - fleprintf("%s:%d:%ld, Escape newline in string not " + fleprintf("%ld, Escape newline in string not " "allowed\n", strindex); } @@ -336,23 +367,49 @@ read_grammar_from_bnf(str *s) tnt_count++; strindex += strinfo.step; + /* fleprintf("now strindex = %lu, value = %lu\n", + * strindex, strinfo.value); */ } - if (strinfo.value == 13 || strinfo.value == 10) { - fleprintf("%s:%d:%ld, Newline encountered before string " - "ended\n", strindex); - /* cleanup */ - map_list(names, destroy_cpa_and_free_all); - destroy_list(names, 0); + if (strindex < len) { + strindex -= strinfo.step; + tnt_count--; - destroy_list(line_indices, 1); + index = strindex; - map_list(rules, destroy_rule_and_free_first); - destroy_list(rules, 0); - return NULL; - } + info = get_info(s, index); + /* fleprintf("after reading a string, index = %lu, value = %lu\n", + * index, info.value); */ + + if (strinfo.value == 13 || strinfo.value == 10) { + fleprintf("%ld, Newline encountered before string " + "ended\n", strindex); + /* cleanup */ + map_list(names, destroy_cpa_and_free_all); + destroy_list(names, 0); + + destroy_list(line_indices, 1); - index = strindex + 1; + map_list(rules, destroy_rule_and_free_first); + destroy_list(rules, 0); + return NULL; + } + } else { + if (strinfo.value != 34 && + strinfo.value != 39) { + fleprintf0("input ended before string is left\n"); + /* cleanup */ + map_list(names, destroy_cpa_and_free_all); + destroy_list(names, 0); + + destroy_list(line_indices, 1); + + map_list(rules, destroy_rule_and_free_first); + destroy_list(rules, 0); + return NULL; + } + + } } break; @@ -360,7 +417,7 @@ read_grammar_from_bnf(str *s) case ' ': case 9: if (definitionp) { - fleprintf("%s:%d:%lu, A non-terminal cannot have spaces or " + fleprintf("%lu, A non-terminal cannot have spaces or " "tabs in the name\n", index); /* cleanup */ map_list(names, destroy_cpa_and_free_all); @@ -382,19 +439,34 @@ read_grammar_from_bnf(str *s) readingp = 0; line_count++; - /* fleprintf("%s:%d, tnt count = %lu\n", tnt_count); */ + /* fleprintf("tnt count = %lu\n", tnt_count); + * fleprintf("right = %u\n", right_start_p); */ if (right_start_p) { right_start_p = 0; current_tnt_string = new_tnt_pointer(tnt_count); - read_tnt_string(s, last_index, index, current_tnt_string, - names); + if (read_tnt_string + (s, last_index, index, current_tnt_string, + names)) { + /* something went wrong in readnig TNT string */ + /* cleanup */ + map_list(names, destroy_cpa_and_free_all); + destroy_list(names, 0); + + destroy_list(line_indices, 1); + + map_list(rules, destroy_rule_and_free_first); + destroy_list(rules, 0); + return NULL; + } printf("%s:%d:%lu, Printing a TNT string: ", __FILE__, __LINE__, index); - for (int i = 0; i < (int) tnt_count;) + for (int i = 0; i < (int) tnt_count;) { print_tnt(current_tnt_string+i++); + if (i