diff options
author | JSDurand <mmemmew@gmail.com> | 2022-01-04 20:24:07 +0800 |
---|---|---|
committer | JSDurand <mmemmew@gmail.com> | 2022-01-04 20:24:07 +0800 |
commit | 949888ad5d0cd0f9f9a87f9938a632b3e32df051 (patch) | |
tree | ae9155484a3a5b4124c6d3fae14ada9d718c8579 /src | |
parent | 55dc897da6e81f2a26cfc7e66ac942824773498b (diff) |
Fix problems
Now some problems are fixed.
It can now read grammar correctly (hopefully) from a file, in the BNF
format. And strings (terminals) are handled fine as well. So glad
that there are no leak problems now.
Diffstat (limited to 'src')
-rw-r--r-- | src/Makefile.am | 4 | ||||
-rw-r--r-- | src/grammar.c | 38 | ||||
-rw-r--r-- | src/reader.c | 154 | ||||
-rw-r--r-- | src/test.txt | 4 | ||||
-rw-r--r-- | src/test/check_reader.c | 15 | ||||
-rw-r--r-- | src/util.c | 4 | ||||
-rw-r--r-- | src/util.h | 4 |
7 files changed, 150 insertions, 73 deletions
diff --git a/src/Makefile.am b/src/Makefile.am index 5eff3c5..4690876 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -2,9 +2,9 @@ AM_CFLAGS = -Wall -Wextra noinst_LIBRARIES = libeps.a libeps_a_SOURCES = grammar.c list.c util.c reader.c \ -str.c utf8.c \ +str.c utf8.c lr_table.c \ grammar.h list.h util.h reader.h \ -str_partial.h str.h utf8.h +str_partial.h str.h utf8.h lr_table.h libeps_a_CFLAGS = $(AM_CFLAGS) --pedantic diff --git a/src/grammar.c b/src/grammar.c index 118a10e..4052510 100644 --- a/src/grammar.c +++ b/src/grammar.c @@ -68,6 +68,12 @@ destroy_rule_group_free_first(void *rule_grp) } static void +print_sep() +{ + printf(", "); +} + +static void print_rule_group(void *rule_grp) { Rule_group *rg = (Rule_group *) rule_grp; @@ -75,7 +81,7 @@ print_rule_group(void *rule_grp) for (int i = 0; i < list_length(rg->rights); i++) { List *str = (List *) list_nth(rg->rights, i); printf("Rule %u => ", rg->left); - map_list(str, print_tnt); + map_list_between(str, print_tnt, print_sep); printf("\n"); } } @@ -130,7 +136,7 @@ build_grammar(Grammar *g, List *rules, List *names) an error. */ for (int i = 0; i < rule_len; i++) { if (((Rule *) list_nth(rules, i))->left >= len) { - fleprintf("%s:%d:%d, Rule contains weird non-terminal\n", i); + fleprintf("%d, Rule contains weird non-terminal\n", i); return 1; } } @@ -138,12 +144,12 @@ build_grammar(Grammar *g, List *rules, List *names) List *rule_grps = new_list(); if (rule_grps == NULL) { - fleprintf0("%s:%d, Cannot create list\n"); + fleprintf0("Cannot create list\n"); return 1; } if (list_assure_size(rule_grps, len)) { - fleprintf0("%s:%d, Cannot assure size of rule groups\n"); + fleprintf0("Cannot assure size of rule groups\n"); return 1; } @@ -153,7 +159,7 @@ build_grammar(Grammar *g, List *rules, List *names) /* Initialize the list of rule groups */ for (int i = 0; i < len; i++) { if ((temp_ls = new_list()) == NULL) { - fleprintf("%s:%d:%d, Cannot create list\n", i); + fleprintf("%d, Cannot create list\n", i); map_list(rule_grps, destroy_rule_group_no_free); destroy_list(rule_grps, 0); @@ -163,7 +169,7 @@ build_grammar(Grammar *g, List *rules, List *names) temp_pointer = MYALLOC(Rule_group, 1); if (temp_pointer == NULL) { - fleprintf0("%s:%d, Cannot malloc\n"); + fleprintf0("Cannot malloc\n"); map_list(rule_grps, destroy_rule_group_no_free); destroy_list(rule_grps, 0); destroy_list(temp_ls, 0); @@ -176,7 +182,7 @@ build_grammar(Grammar *g, List *rules, List *names) int result = add_to_list(rule_grps, temp_pointer); if (result) { - fleprintf("%s:%d:%d, Cannot add to list\n", i); + fleprintf("%d, Cannot add to list\n", i); map_list(rule_grps, destroy_rule_group_no_free); destroy_list(rule_grps, 0); @@ -196,7 +202,7 @@ build_grammar(Grammar *g, List *rules, List *names) r->right); if (result) { - fleprintf("%s:%d:%d, Cannot add to list\n", i); + fleprintf("%d, Cannot add to list\n", i); for (int j = 0; j < list_length(rule_grps); j++) { Rule_group *rg = (Rule_group *) list_nth(rule_grps, j); @@ -223,9 +229,9 @@ print_tnt(void *element) TNT *tnt = (TNT*) element; if (tnt->type) - printf("NT %u, ", tnt->data.nt); + printf("NT %u", tnt->data.nt); else - printf("T %lu, ", tnt->data.t); + printf("T %lu", tnt->data.t); } void @@ -269,7 +275,6 @@ find_in_cpa_list(NUM *string, NUM size, List *list) return (foundp) ? index : -1; } -/* a local function pointer */ void print_name(void *element) { @@ -281,7 +286,7 @@ print_name(void *element) int result = encode(*(array->array+i), string); if (result) { - fleprintf("%s:%d:%llu, fail to encode\n", i); + fleprintf("%llu, fail to encode\n", i); str_set_length(string, 5); continue; } @@ -296,13 +301,8 @@ print_name(void *element) destroy_str(string, 1); } -/* a local function pointer */ -void -print_sep() -{ - printf(", "); -} - +/* REVIEW: Print the names of non-terminals out, instead of printing + the numbers? */ void print_grammar(Grammar *g) { diff --git a/src/reader.c b/src/reader.c index 1e7c49c..18435a9 100644 --- a/src/reader.c +++ b/src/reader.c @@ -21,12 +21,12 @@ /* Read a string of TNT's into TNTS. TNTS is expected to be already allocated. */ -static void +static unsigned char read_tnt_string(str *s, NUM start, NUM end, TNT *tnts, List *cpa_list) { str_info info; - /* fleprintf("%s:%d, S = %lu, E = %lu\n", start, end); */ + /* fleprintf("S = %lu, E = %lu\n", start, end); */ unsigned char readingp = 0, stop = 0; @@ -42,7 +42,7 @@ read_tnt_string(str *s, NUM start, NUM end, TNT *tnts, List *cpa_list) case 10: case 13: /* if a newline is somehow encountered, we stop reading */ - fleprintf0("%s:%d, hi\n"); + /* fleprintf0("hi\n"); */ stop = 1; break; case 9: @@ -56,12 +56,12 @@ read_tnt_string(str *s, NUM start, NUM end, TNT *tnts, List *cpa_list) nt_index = find_in_cpa_list(string, string_size, cpa_list); if (nt_index < 0) { - fleprintf("%s:%d:%lu, Undefined non-terminal: ", index); + fleprintf("index = %lu, Undefined non-terminal: ", index); print_name(&((cpa) { string, string_size })); printf("\n"); free(string); string = NULL; - return; + return 1; } free(string); @@ -83,6 +83,7 @@ read_tnt_string(str *s, NUM start, NUM end, TNT *tnts, List *cpa_list) strinfo.value != 39 && strinfo.value != 10 && strinfo.value != 13;) { + strinfo = get_info(s, strindex); if (strinfo.value == 92) { strindex += info.step; @@ -93,26 +94,32 @@ read_tnt_string(str *s, NUM start, NUM end, TNT *tnts, List *cpa_list) paranoid. */ if (strindex >= end) { local_exit = 1; - fleprintf("%s:%d:%lu, Escape end\n", strindex); + fleprintf("strindex = %lu, Escape end\n", strindex); } strinfo = get_info(s, strindex); if (strinfo.value == 10 || strinfo.value == 13) { local_exit = 1; - fleprintf("%s:%d:%lu, Escape newline not allowed\n", + fleprintf("strindex = %lu, Escape newline not allowed\n", strindex); } /* cleanup */ /* no cleanup needed */ - if (local_exit) return; + if (local_exit) return 1; } *(tnts+tnt_count++) = (TNT) { 0, { .t = strinfo.value } }; strindex += strinfo.step; } + + strindex -= strinfo.step; + tnt_count--; + + index = strindex; + info = get_info(s, index); break; default: if (!readingp) { @@ -123,26 +130,26 @@ read_tnt_string(str *s, NUM start, NUM end, TNT *tnts, List *cpa_list) /* if we are at the end, we need to process one additional non-terminal */ if (index < end && index + (NUM) info.step >= end) { - /* fleprintf0("%s:%d, hello\n"); */ + /* fleprintf0("hello\n"); */ COUNT_CPA(last_index, index+(NUM) info.step, s, string_size); - /* fleprintf0("%s:%d, hello\n"); */ + /* fleprintf0("hello\n"); */ string = MYALLOC(NUM, string_size+1); READ_CPA(string, last_index, index+(NUM) info.step, s, string_size); - /* fleprintf0("%s:%d, hello\n"); */ + /* fleprintf0("hello\n"); */ nt_index = find_in_cpa_list(string, string_size, cpa_list); - /* fleprintf0("%s:%d, hello\n"); */ + /* fleprintf0("hello\n"); */ if (nt_index < 0) { - fleprintf("%s:%d:%lu, Undefined non-terminal: ", index); + fleprintf("index = %lu, Undefined non-terminal: ", index); print_name(&((cpa) { string, string_size })); printf("\n"); eprintf("END = %lu\n", end); free(string); string = NULL; - return; + return 1; } free(string); @@ -155,6 +162,8 @@ read_tnt_string(str *s, NUM start, NUM end, TNT *tnts, List *cpa_list) index += info.step; } + + return 0; } static void @@ -169,7 +178,7 @@ read_grammar_from_bnf(str *s) { NUM len = str_length(s); - /* fleprintf("%s:%d, len = %lu\n", len); */ + /* fleprintf("len = %lu\n", len); */ /* The first loop collects names of non-terminals */ @@ -197,7 +206,7 @@ read_grammar_from_bnf(str *s) readingp = 0; if (definitionp) { - fleprintf("%s:%d:%lu, A non-terminal cannot have spaces in " + fleprintf("%lu, A non-terminal cannot have spaces in " "the name\n", index); /* cleanup */ map_list(names, destroy_cpa_and_free_all); @@ -233,14 +242,14 @@ read_grammar_from_bnf(str *s) } add_to_list(line_indices, line_index); - /* fleprintf0("%s:%d, Definition should be over\n"); */ + /* fleprintf0("Definition should be over\n"); */ } break; default: if (!readingp) { last_index = index; readingp = 1; - /* fleprintf0("%s:%d, Start reading\n"); */ + /* fleprintf0("Start reading\n"); */ } break; } @@ -268,7 +277,7 @@ read_grammar_from_bnf(str *s) /* last index is used to collect strings */ for (NUM index = 0, last_index = 0; index < len;) { str_info info = get_info(s, index); - /* fleprintf("%s:%d, index = %lu, value = %lu\n", index, info.value); */ + /* fleprintf("index = %lu, value = %lu\n", index, info.value); */ TNT *current_tnt_string = NULL; @@ -279,7 +288,7 @@ read_grammar_from_bnf(str *s) case 34: /* double quote */ case 39: /* single quote */ if (definitionp) { - fleprintf("%s:%d:%lu, A non-terminal cannot have quotes in " + fleprintf("%lu, A non-terminal cannot have quotes in " "the name\n", index); /* cleanup */ map_list(names, destroy_cpa_and_free_all); @@ -293,9 +302,31 @@ read_grammar_from_bnf(str *s) } else { /* read a string of terminals */ + if (readingp) { + fleprintf("Name of non-terminal should not contain quotes, " + "index = %lu\n", index); + /* cleanup */ + map_list(names, destroy_cpa_and_free_all); + destroy_list(names, 0); + + destroy_list(line_indices, 1); + + map_list(rules, destroy_rule_and_free_first); + destroy_list(rules, 0); + return NULL; + } + + if (!readingp && !right_start_p) { + last_index = index; + right_start_p = 1; + } + NUM strindex = index + info.step; str_info strinfo = get_info(s, strindex); + /* fleprintf("start reading string at %lu, value = %lu\n", + * strindex, strinfo.value); */ + for (;strindex < len && strinfo.value != 13 && strinfo.value != 10 && @@ -312,11 +343,11 @@ read_grammar_from_bnf(str *s) if (strindex >= len) { local_error_p = 1; - fleprintf("%s:%d:%ld, Escape at the end not allowed\n", + fleprintf("%ld, Escape at the end not allowed\n", strindex); } else if (strinfo.value == 13 || strinfo.value == 10) { local_error_p = 1; - fleprintf("%s:%d:%ld, Escape newline in string not " + fleprintf("%ld, Escape newline in string not " "allowed\n", strindex); } @@ -336,23 +367,49 @@ read_grammar_from_bnf(str *s) tnt_count++; strindex += strinfo.step; + /* fleprintf("now strindex = %lu, value = %lu\n", + * strindex, strinfo.value); */ } - if (strinfo.value == 13 || strinfo.value == 10) { - fleprintf("%s:%d:%ld, Newline encountered before string " - "ended\n", strindex); - /* cleanup */ - map_list(names, destroy_cpa_and_free_all); - destroy_list(names, 0); + if (strindex < len) { + strindex -= strinfo.step; + tnt_count--; - destroy_list(line_indices, 1); + index = strindex; - map_list(rules, destroy_rule_and_free_first); - destroy_list(rules, 0); - return NULL; - } + info = get_info(s, index); + /* fleprintf("after reading a string, index = %lu, value = %lu\n", + * index, info.value); */ + + if (strinfo.value == 13 || strinfo.value == 10) { + fleprintf("%ld, Newline encountered before string " + "ended\n", strindex); + /* cleanup */ + map_list(names, destroy_cpa_and_free_all); + destroy_list(names, 0); + + destroy_list(line_indices, 1); - index = strindex + 1; + map_list(rules, destroy_rule_and_free_first); + destroy_list(rules, 0); + return NULL; + } + } else { + if (strinfo.value != 34 && + strinfo.value != 39) { + fleprintf0("input ended before string is left\n"); + /* cleanup */ + map_list(names, destroy_cpa_and_free_all); + destroy_list(names, 0); + + destroy_list(line_indices, 1); + + map_list(rules, destroy_rule_and_free_first); + destroy_list(rules, 0); + return NULL; + } + + } } break; @@ -360,7 +417,7 @@ read_grammar_from_bnf(str *s) case ' ': case 9: if (definitionp) { - fleprintf("%s:%d:%lu, A non-terminal cannot have spaces or " + fleprintf("%lu, A non-terminal cannot have spaces or " "tabs in the name\n", index); /* cleanup */ map_list(names, destroy_cpa_and_free_all); @@ -382,19 +439,34 @@ read_grammar_from_bnf(str *s) readingp = 0; line_count++; - /* fleprintf("%s:%d, tnt count = %lu\n", tnt_count); */ + /* fleprintf("tnt count = %lu\n", tnt_count); + * fleprintf("right = %u\n", right_start_p); */ if (right_start_p) { right_start_p = 0; current_tnt_string = new_tnt_pointer(tnt_count); - read_tnt_string(s, last_index, index, current_tnt_string, - names); + if (read_tnt_string + (s, last_index, index, current_tnt_string, + names)) { + /* something went wrong in readnig TNT string */ + /* cleanup */ + map_list(names, destroy_cpa_and_free_all); + destroy_list(names, 0); + + destroy_list(line_indices, 1); + + map_list(rules, destroy_rule_and_free_first); + destroy_list(rules, 0); + return NULL; + } printf("%s:%d:%lu, Printing a TNT string: ", __FILE__, __LINE__, index); - for (int i = 0; i < (int) tnt_count;) + for (int i = 0; i < (int) tnt_count;) { print_tnt(current_tnt_string+i++); + if (i<tnt_count) printf(", "); + } printf("\n"); temp_pointers = MYALLOC(void *, tnt_count); @@ -420,7 +492,7 @@ read_grammar_from_bnf(str *s) right_start_p = 0; /* the non-terminal at this line is cached by line_indices */ current_nt = *((NUM *) list_nth(line_indices, line_count)); - /* fleprintf("%s:%d, current nt = %u, count = %lu\n", + /* fleprintf("current nt = %u, count = %lu\n", * current_nt, line_count); */ break; default: @@ -447,7 +519,7 @@ read_grammar_from_bnf(str *s) Grammar *g = new_grammar(); if (build_grammar(g, rules, names)) { - fleprintf0("%s:%d, Failed to build the grammar\n"); + fleprintf0("Failed to build the grammar\n"); map_list(names, destroy_cpa_and_free_all); destroy_list(names, 0); map_list(rules, destroy_rule_and_free_first); diff --git a/src/test.txt b/src/test.txt index 71e125b..be9b3ba 100644 --- a/src/test.txt +++ b/src/test.txt @@ -1,7 +1,9 @@ -S: A A C +S: A A LONG C 奎佑 A: B B B B: A A: +LONG: B A C: B C A B: B B +奎佑: "handsome" A: A B A diff --git a/src/test/check_reader.c b/src/test/check_reader.c index 8769cd7..3184e41 100644 --- a/src/test/check_reader.c +++ b/src/test/check_reader.c @@ -6,7 +6,6 @@ #include "../grammar.h" #include "../reader.h" -/* TODO: check string */ int main(U_ATTR int argc, U_ATTR char **argv) { @@ -17,7 +16,7 @@ main(U_ATTR int argc, U_ATTR char **argv) NUM buffer_size = 0; if (read_entire_file(file_name, &buffer, &buffer_size)) { - fleprintf("%s:%d, Cannot read file %s", file_name); + fleprintf("Cannot read file %s", file_name); free(buffer); return 1; } @@ -26,9 +25,13 @@ main(U_ATTR int argc, U_ATTR char **argv) Grammar *g = read_grammar_from_bnf((str *) s); - print_grammar(g); - destroy_grammar(g, 2); - destroy_str((str *)s, 1); + if (g) { + print_grammar(g); + destroy_grammar(g, 2); + destroy_str((str *)s, 1); - return 0; + return 0; + } + + return 1; } @@ -20,7 +20,7 @@ read_entire_file(const char *file_name, char **str, NUM *len) FILE *file = fopen(file_name, "r"); if (!file) { - eprintf("%s:%d, Cannot open file \"%s\": ", + eprintf("%s:%Cannot open file \"%s\": ", __FILE__, __LINE__, file_name); perror(NULL); return 1; @@ -35,7 +35,7 @@ read_entire_file(const char *file_name, char **str, NUM *len) *str = realloc(*str, 1+file_size); if (*str == NULL) { - fleprintf0("%s:%d, Cannot realloc\n"); + fleprintf0("Cannot realloc\n"); return 1; } @@ -28,8 +28,8 @@ typedef unsigned long long UNUM; /* definitely bigger than size_t */ #define eprintf(...) fprintf(stderr, __VA_ARGS__) -#define fleprintf0(M) eprintf(M, __FILE__, __LINE__) -#define fleprintf(M, ...) eprintf(M, __FILE__, __LINE__, __VA_ARGS__) +#define fleprintf0(M) eprintf("%s:%d, " M, __FILE__, __LINE__) +#define fleprintf(M, ...) eprintf("%s:%d, " M, __FILE__, __LINE__, __VA_ARGS__) unsigned char read_entire_file(const char *file_name, char **str, NUM *len); |