From 5426d9e2a6b820e34809d639838b26643df9ab17 Mon Sep 17 00:00:00 2001 From: JSDurand Date: Tue, 8 Feb 2022 00:29:10 +0800 Subject: fix errors There are multiple subtle errors in the previous version, both in the codes and in the description of the BNF format. This version should fix some problems now. This version can successfully parse the grammar of its own grammar format, which is quite nice. See test/check_reader.c for parsing this format. --- src/test/check_reader.c | 594 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 578 insertions(+), 16 deletions(-) (limited to 'src/test/check_reader.c') diff --git a/src/test/check_reader.c b/src/test/check_reader.c index a4e184f..45025ff 100644 --- a/src/test/check_reader.c +++ b/src/test/check_reader.c @@ -1,18 +1,517 @@ +#include #include #include +#include +#include -#include "../util.h" -#include "../list.h" -#include "../grammar.h" -#include "../reader.h" +#include "../cnp.h" + +#define TITO struct timespec tic, toc + +#define TIC do { \ + clock_gettime(CLOCK_MONOTONIC_RAW, &tic); \ + } while (0) + +#define TOC do { \ + clock_gettime(CLOCK_MONOTONIC_RAW, &toc); \ + printf("\nTotal time = %f seconds\n", \ + (toc.tv_nsec - tic.tv_nsec) / \ + 1000000000.0 + \ + toc.tv_sec - tic.tv_sec); \ + } while (0) + +#define READ_INTO_CPA(N, U, L, I, VA, VI, CP) do { \ + U = new_utf8(N, L); \ + I = get_info((str *)U, 0); \ + VI = 0; \ + for (NUM index = 0; \ + I.value >= 0 && index < str_length((str*) U); \ + index += I.step, VI++) { \ + I = get_info((str*)U, index); \ + } \ + SAFE_MALLOC(NUM, VA, VI, return 1;); \ + I = get_info((str *)U, 0); \ + VI = 0; \ + for (NUM index = 0; \ + I.value >= 0 && index < str_length((str *) U); \ + index += I.step, VI++) { \ + I = get_info((str *)U, index); \ + *(VA+VI) = I.value; \ + } \ + SAFE_MALLOC(cpa, CP, 1, return 1;); \ + CP->array = VA; \ + CP->size = VI; \ + if (add_to_list(names, CP)) { \ + fleprintf0("Fail to add to names\n"); \ + return 1; \ + } \ + destroy_str((str *)U, 1); \ + } while (0) + +#define READ_TNT_STRING(LEFT, FORMAT, LEN, ...) do { \ + tnt_string = new_tnt_string(FORMAT, LEN, __VA_ARGS__); \ + if (!tnt_string) { \ + fleprintf("left = %d, f = %s, l = %d, " \ + "cannot create tnt string\n", \ + LEFT, FORMAT, LEN); \ + map_list(rules, destroy_rule_and_free_all); \ + destroy_list(rules, 0); \ + map_list(names, destroy_cpa_and_free_all); \ + destroy_list(names, 0); \ + return 1; \ + } \ + rule = new_rule(LEFT, tnt_string); \ + add_to_list(rules, rule); \ + } while (0) + +#define ADD_EMPTY_RULE(N) do { \ + rule = new_rule(N, new_list()); \ + add_to_list(rules, rule); \ + } while (0) int -main(U_ATTR int argc, U_ATTR char **argv) +main(int UNUSED argc, char UNUSED **argv) { - /* return 77; */ + List *tnt_string = NULL; + Rule *rule = NULL; + List *rules = new_list(); + List *names = new_list(); + List *preds = new_list(); + + char *user_name = NULL; + char *raw_name = NULL; + + str *user_name_s = NULL, *raw_name_s = NULL; + + char *name = NULL; + utf8* uname = NULL; + + int name_len = 0; + + str_info info = EMPTY_STR_INFO; + + NUM *varray = NULL; + NUM vindex = 0; + cpa *cpap = NULL; + + SAFE_MALLOC(char, name, 3, return 1;); + + *name = 'B'; + *(name+1) = 'N'; + *(name+2) = 'F'; + + READ_INTO_CPA(name, uname, 3, info, varray, vindex, cpap); + + SAFE_MALLOC(char, name, 6, return 1;); + + *name = 's'; + *(name+1) = 'p'; + *(name+2) = 'a'; + *(name+3) = 'c'; + *(name+4) = 'e'; + *(name+5) = 's'; + + READ_INTO_CPA(name, uname, 6, info, varray, vindex, cpap); + + SAFE_MALLOC(char, name, 15, return 1;); + + *name = 'o'; + *(name+1) = 'p'; + *(name+2) = 't'; + *(name+3) = 'i'; + *(name+4) = 'o'; + *(name+5) = 'n'; + *(name+6) = 'a'; + *(name+7) = 'l'; + *(name+8) = '_'; + *(name+9) = 's'; + *(name+10) = 'p'; + *(name+11) = 'a'; + *(name+12) = 'c'; + *(name+13) = 'e'; + *(name+14) = 's'; + + READ_INTO_CPA(name, uname, 15, info, varray, vindex, cpap); + + SAFE_MALLOC(char, name, 5, return 1;); + + *name = 'e'; + *(name+1) = 'm'; + *(name+2) = 'p'; + *(name+3) = 't'; + *(name+4) = 'y'; + + READ_INTO_CPA(name, uname, 5, info, varray, vindex, cpap); + + SAFE_MALLOC(char, name, 11, return 1;); + + memcpy(name, "notnewlines", 11); + + READ_INTO_CPA(name, uname, 11, info, varray, vindex, cpap); + + SAFE_MALLOC(char, name, 17, return 1;); + + memcpy(name, "predicate_section", 17); + + READ_INTO_CPA(name, uname, 17, info, varray, vindex, cpap); + + SAFE_MALLOC(char, name, 9, return 1;); + + memcpy(name, "predicate", 9); + + READ_INTO_CPA(name, uname, 9, info, varray, vindex, cpap); + + name_len = 3; + SAFE_MALLOC(char, name, name_len, return 1;); + + memcpy(name, "ids", name_len); + + READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap); + + name_len = 5; + SAFE_MALLOC(char, name, name_len, return 1;); + + memcpy(name, "class", name_len); + + READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap); + + name_len = 14; + SAFE_MALLOC(char, name, name_len, return 1;); + + memcpy(name, "positive_class", name_len); + + READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap); + + name_len = 22; + SAFE_MALLOC(char, name, name_len, return 1;); + + memcpy(name, "positive_specification", name_len); + + READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap); + + name_len = 18; + SAFE_MALLOC(char, name, name_len, return 1;); + + memcpy(name, "class_single_chars", name_len); + + READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap); + + name_len = 17; + SAFE_MALLOC(char, name, name_len, return 1;); + + memcpy(name, "class_range_chars", name_len); + + READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap); + + name_len = 13; + SAFE_MALLOC(char, name, name_len, return 1;); + + memcpy(name, "rules_section", name_len); + + READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap); + + name_len = 4; + SAFE_MALLOC(char, name, name_len, return 1;); + + memcpy(name, "rule", name_len); + + READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap); + + name_len = 9; + SAFE_MALLOC(char, name, name_len, return 1;); + + memcpy(name, "rule_name", name_len); + + READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap); + + name_len = 25; + SAFE_MALLOC(char, name, name_len, return 1;); + + memcpy(name, "spaces-or-escaped-newline", name_len); + + READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap); + + name_len = 8; + SAFE_MALLOC(char, name, name_len, return 1;); + + memcpy(name, "rule_rhs", name_len); + + READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap); + + name_len = 19; + SAFE_MALLOC(char, name, name_len, return 1;); + + memcpy(name, "double_string_chars", name_len); + + READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap); + + name_len = 19; + SAFE_MALLOC(char, name, name_len, return 1;); + + memcpy(name, "single_string_chars", name_len); + + READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap); + + READ_TNT_STRING(0, "tnnn", 4, (T) '#', (NT) 4, (NT) 3, (NT) 0); + READ_TNT_STRING(0, "ntttnn", 6, (NT) 5, + (T) '-', (T) '-', (T) 0xa, (NT) 3, (NT) 13); + READ_TNT_STRING(0, "n", 1, (NT) 13); + ADD_EMPTY_RULE(0); + + READ_TNT_STRING(1, "pn", 2, (PT) 0, (NT) 1); + READ_TNT_STRING(1, "p", 1, (PT) 0); + + READ_TNT_STRING(2, "pn", 2, (PT) 0, (NT) 2); + ADD_EMPTY_RULE(2); + + READ_TNT_STRING(3, "pn", 2, (PT) 0, (NT) 3); + READ_TNT_STRING(3, "tn", 2, (T) 0xa, (NT) 3); + READ_TNT_STRING(3, "tn", 2, (T) 0xd, (NT) 3); + READ_TNT_STRING(3, "ttnn", 4, (T) 0xa, (T) 0x23, + (NT) 4, (NT) 3); + ADD_EMPTY_RULE(3); + + READ_TNT_STRING(4, "pn", 2, (PT) 3, (NT) 4); + ADD_EMPTY_RULE(4); + + READ_TNT_STRING(5, "nntn", 4, (NT) 6, (NT) 3, (T) 0xa, (NT) 5); + ADD_EMPTY_RULE(5); + + READ_TNT_STRING(6, "tnttnn", 6, (T) '[', (NT) 7, + (T) ']', (T) ':', (NT) 2, (NT) 8); + + READ_TNT_STRING(7, "pn", 2, (PT) 1, (NT) 7); + READ_TNT_STRING(7, "p", 1, (PT) 1); + + READ_TNT_STRING(8, "n", 1, (NT) 9); + READ_TNT_STRING(8, "tn", 2, (T) '^', (NT) 9); + + READ_TNT_STRING(9, "nn", 2, (NT) 10, (NT) 9); + ADD_EMPTY_RULE(9); + + READ_TNT_STRING(10, "n", 1, (NT) 11); + READ_TNT_STRING(10, "ntn", 3, (NT) 12, (T) '-', (NT) 12); + + READ_TNT_STRING(11, "p", 1, (PT) 5); + READ_TNT_STRING(11, "tp", 2, (T) 0x5c, (PT) 6); + + READ_TNT_STRING(12, "p", 1, (PT) 4); + READ_TNT_STRING(12, "tp", 2, (T) 0x5c, (PT) 6); + + READ_TNT_STRING(13, "nntn", 4, (NT) 14, (NT) 3, (T) 0xa, (NT) 13); + ADD_EMPTY_RULE(13); + + READ_TNT_STRING(14, "nntnn", 5, + (NT) 15, (NT) 2, (T) ':', + (NT) 2, (NT) 17); + + READ_TNT_STRING(15, "pn", 2, (PT) 2, (NT) 15); + ADD_EMPTY_RULE(15); + + READ_TNT_STRING(16, "n", 1, (NT) 2); + READ_TNT_STRING(16, "tt", 2, (T) 0x5c, (T) 0xa); + + READ_TNT_STRING(17, "nnn", 3, (NT) 7, (NT) 16, (NT) 17); + READ_TNT_STRING(17, "tntnn", 5, + (T) '[', (NT) 7, (T) ']', (NT) 16, (NT) 17); + READ_TNT_STRING(17, "tntnn", 5, + (T) 0x22, (NT) 18, (T) 0x22, (NT) 16, (NT) 17); + READ_TNT_STRING(17, "tntnn", 5, + (T) 0x27, (NT) 19, (T) 0x27, (NT) 16, (NT) 17); + ADD_EMPTY_RULE(17); + + READ_TNT_STRING(18, "p", 1, (PT) 7); + READ_TNT_STRING(18, "tp", 2, (T) '\\', (PT) 6); + READ_TNT_STRING(18, "pn", 2, (PT) 7, (NT) 18); + READ_TNT_STRING(18, "tpn", 3, (T) '\\', (PT) 6, (NT) 18); + + READ_TNT_STRING(19, "p", 1, (PT) 8); + READ_TNT_STRING(19, "tp", 2, (T) '\\', (PT) 6); + READ_TNT_STRING(19, "pn", 2, (PT) 8, (NT) 19); + READ_TNT_STRING(19, "tpn", 3, (T) '\\', (PT) 6, (NT) 19); + + SAFE_MALLOC(char, user_name, 6, return 1;); + memcpy(user_name, "space", 6); + user_name_s = (str *) new_utf8(user_name, 6); + + SAFE_MALLOC(char, raw_name, 7, return 1;); + memcpy(raw_name, "%x20\\t", 7); + raw_name_s = (str *) new_utf8(raw_name, 7); + + if (add_to_list(preds, + new_ptd(user_name_s, raw_name_s, + dfa_from_ranges + (2, (NUM[]) { ' ', ' ', 9, 9 })))) { + fleprintf0("Fail to add a predicate\n"); + return 1; + } + + SAFE_MALLOC(char, user_name, 3, return 1;); + memcpy(user_name, "id", 3); + user_name_s = (str *) new_utf8(user_name, 3); + + SAFE_MALLOC(char, raw_name, 12, return 1;); + memcpy(raw_name, "a-zA-Z-_+*@", 12); + raw_name_s = (str *) new_utf8(raw_name, 12); + + if (add_to_list(preds, + new_ptd(user_name_s, raw_name_s, + dfa_from_ranges + (6, (NUM[]) { + 'a', 'z', 'A', 'Z', + '-', '-', '_', '_', + '*', '+', '@', '@' + })))) { + fleprintf0("Fail to add a predicate\n"); + return 1; + } + + SAFE_MALLOC(char, user_name, 15, return 1;); + memcpy(user_name, "rule_name_char", 15); + user_name_s = (str *) new_utf8(user_name, 15); + + SAFE_MALLOC(char, raw_name, 12, return 1;); + memcpy(raw_name, "^[]:%x20\\n#", 12); + raw_name_s = (str *) new_utf8(raw_name, 12); + + if (add_to_list(preds, + new_ptd(user_name_s, raw_name_s, + dfa_from_ranges_neg + (6, (NUM[]) { + '[', '[', ']', ']', + ':', ':', ' ', ' ', + 0xa, 0xa, 0x23, 0x23 + })))) { + fleprintf0("Fail to add a predicate\n"); + return 1; + } + + SAFE_MALLOC(char, user_name, 11, return 1;); + memcpy(user_name, "notnewline", 11); + user_name_s = (str *) new_utf8(user_name, 11); + + SAFE_MALLOC(char, raw_name, 6, return 1;); + memcpy(raw_name, "^\\n\\r", 6); + raw_name_s = (str *) new_utf8(raw_name, 6); + + if (add_to_list(preds, + new_ptd(user_name_s, raw_name_s, + dfa_from_ranges_neg + (2, (NUM[]) { + '\n', '\n', '\r', '\r' + })))) { + fleprintf0("Fail to add a predicate\n"); + return 1; + } + + SAFE_MALLOC(char, user_name, 17, return 1;); + memcpy(user_name, "class_range_char", 17); + user_name_s = (str *) new_utf8(user_name, 17); + + SAFE_MALLOC(char, raw_name, 10, return 1;); + memcpy(raw_name, "^\\n\\r\\^\\\\", 10); + raw_name_s = (str *) new_utf8(raw_name, 10); + + if (add_to_list(preds, + new_ptd(user_name_s, raw_name_s, + dfa_from_ranges_neg + (4, (NUM[]) { + '\n', '\n', '\r', '\r', + '^', '^', '\\', '\\' + })))) { + fleprintf0("Fail to add a predicate\n"); + return 1; + } + + SAFE_MALLOC(char, user_name, 18, return 1;); + memcpy(user_name, "class_single_char", 18); + user_name_s = (str *) new_utf8(user_name, 18); + + SAFE_MALLOC(char, raw_name, 11, return 1;); + memcpy(raw_name, "^\\n\\r\\^\\\\-", 11); + raw_name_s = (str *) new_utf8(raw_name, 11); + + if (add_to_list(preds, + new_ptd(user_name_s, raw_name_s, + dfa_from_ranges_neg + (5, (NUM[]) { + '\n', '\n', '\r', '\r', + '^', '^', '\\', '\\', + '-', '-' + })))) { + fleprintf0("Fail to add a predicate\n"); + return 1; + } + + SAFE_MALLOC(char, user_name, 4, return 1;); + memcpy(user_name, "any", 4); + user_name_s = (str *) new_utf8(user_name, 4); + + SAFE_MALLOC(char, raw_name, 2, return 2;); + memcpy(raw_name, " ", 2); + raw_name_s = (str *) new_utf8(raw_name, 2); + + if (add_to_list(preds, + new_ptd(user_name_s, raw_name_s, + dfa_from_ranges + (1, (NUM[]) { + 0, 0x10ffff + })))) { + fleprintf0("Fail to add a predicate\n"); + return 1; + } + + SAFE_MALLOC(char, user_name, 19, return 1;); + memcpy(user_name, "double_string_char", 19); + user_name_s = (str *) new_utf8(user_name, 19); + + SAFE_MALLOC(char, raw_name, 5, return 1;); + memcpy(raw_name, "^\"\\n", 5); + raw_name_s = (str *) new_utf8(raw_name, 5); + + if (add_to_list(preds, + new_ptd(user_name_s, raw_name_s, + dfa_from_ranges_neg + (2, (NUM[]) { + 0x22, 0x22, 0xa, 0xa + })))) { + fleprintf0("Fail to add a predicate\n"); + return 1; + } + + SAFE_MALLOC(char, user_name, 19, return 1;); + memcpy(user_name, "single_string_char", 19); + user_name_s = (str *) new_utf8(user_name, 19); + + SAFE_MALLOC(char, raw_name, 5, return 1;); + memcpy(raw_name, "^'\\n", 5); + raw_name_s = (str *) new_utf8(raw_name, 5); + + if (add_to_list(preds, + new_ptd(user_name_s, raw_name_s, + dfa_from_ranges_neg + (2, (NUM[]) { + 0x27, 0x27, 0xa, 0xa + })))) { + fleprintf0("Fail to add a predicate\n"); + return 1; + } + + Grammar *g = new_grammar(); + + build_grammar(g, rules, names, preds); + + print_grammar(g); + + /* utf8 *string = new_utf8("# comment\n\n[P]: a-z\n\n--\n\nS : AB\n" + * "# another comment\nA: \"S\"\nB : [P]\n", + * 65); */ + + char *file_name = "bnf.bnf"; + char *buffer = NULL; + + SAFE_MALLOC(char, buffer, 1<<9, return 1;); - char *file_name = "brainfuck.bnf"; - char *buffer = MYALLOC(char, 512); NUM buffer_size = 0; if (read_entire_file(file_name, &buffer, &buffer_size)) { @@ -21,17 +520,80 @@ main(U_ATTR int argc, U_ATTR char **argv) return 1; } - utf8 *s = new_utf8(buffer, buffer_size); + /* The size includes the trailing null byte, so we shall exclude + it. */ + buffer_size--; + + str *string = (str *) new_utf8(buffer, buffer_size); + + printf("\nPrinting the input...\n%s\n", get_data(string)); + + printf("Input size = %ld", buffer_size); + + TITO; + + TIC; - Grammar *g = read_grammar_from_bnf((str *) s); + Environment *env = cnp_parse(g, string); - if (g) { - print_grammar(g); - destroy_grammar(g, 2); - destroy_str((str *)s, 1); + TOC; - return 0; + if (env) { + if (!(env_error_p(env))) { + BOOL result = bsr_lookup + (env_bsrp(env), 0, 0, str_length((str *) string)); + + if (result) { + printf("\nSuccessfully parsed the input!\n"); + } else { + printf("\nThe input does not parse!\n"); + } + + printf("\nAll BSRs follow:\n\n"); + if (argc == 1) + bsr_print(env_bsrp(env), env_grammar(env), 1); + } else { + printf("There are errors!\n"); + } + + destroy_env(env); } - return 1; + destroy_grammar(g, 1); + + destroy_list(rules, 1); + + destroy_str(string, 1); + + return 0; } + + +/* archives */ + +/* char *file_name = "brainfuck.bnf"; + * char *buffer = NULL; + * + * SAFE_MALLOC(char, buffer, 1<<9, return 1;); + * + * NUM buffer_size = 0; + * + * if (read_entire_file(file_name, &buffer, &buffer_size)) { + * fleprintf("Cannot read file %s", file_name); + * free(buffer); + * return 1; + * } + * + * utf8 *s = new_utf8(buffer, buffer_size); + * + * Grammar *g = read_grammar_from_bnf((str *) s); + * + * if (g) { + * print_grammar(g); + * destroy_grammar(g, 2); + * destroy_str((str *)s, 1); + * + * return 0; + * } + * + * return 1; */ -- cgit v1.2.3-18-g5258