From a8bd5e9d85ac9928bd29add82e887f82642af893 Mon Sep 17 00:00:00 2001 From: JSDurand Date: Mon, 31 Jan 2022 09:23:20 +0800 Subject: test/check_cnp: working algorithm I now have a working algorithm in test/check_cnp. It can correctly parse the grammar for an esoteric language called "Brainfuck". This language does not matter. What matters is that it contains parentheses. So this shows that at least for grammars as complex as parentheses, this parser works well. Haha. --- src/test/check_cnp.c | 291 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 291 insertions(+) create mode 100644 src/test/check_cnp.c (limited to 'src/test/check_cnp.c') diff --git a/src/test/check_cnp.c b/src/test/check_cnp.c new file mode 100644 index 0000000..00c8a64 --- /dev/null +++ b/src/test/check_cnp.c @@ -0,0 +1,291 @@ +#include "../cnp.h" + +#define READ_INTO_CPA(N, U, L, I, VA, VI, CP) do { \ + U = new_utf8(N, L); \ + I = get_info((str *)U, 0); \ + VA = MYALLOC(NUM, 2); \ + VI = 0; \ + for (NUM index = 0; \ + I.value >= 0 && index < str_length((str *) U); \ + index += I.step, VI++) { \ + I = get_info((str *)U, index); \ + *(VA+VI) = I.value; \ + } \ + CP = MYALLOC(cpa, 1); \ + CP->array = VA; \ + CP->size = VI; \ + add_to_list(names, CP); \ + destroy_str((str *)U, 1); \ + } while (0) + +#define READ_TNT_STRING(LEFT, FORMAT, LEN, ...) do { \ + tnt_string = new_tnt_string(FORMAT, LEN, __VA_ARGS__); \ + if (!tnt_string) { \ + fleprintf("left = %d, f = %s, l = %d, " \ + "cannot create tnt string\n", \ + LEFT, FORMAT, LEN); \ + map_list(rules, destroy_rule_and_free_all); \ + destroy_list(rules, 0); \ + map_list(names, destroy_cpa_and_free_all); \ + destroy_list(names, 0); \ + return 1; \ + } \ + rule = new_rule(LEFT, tnt_string); \ + add_to_list(rules, rule); \ + } while (0) + +int +main(int UNUSED argc, char ** UNUSED argv) +{ + /* have a grammar for testing */ + List *tnt_string = NULL; + Rule *rule = NULL; + List *rules = new_list(); + List *names = new_list(); + + char *name = MYALLOC(char, 2); + *(name+1) = 0; + *name = 'B'; + + utf8* uname = NULL; + + str_info info = EMPTY_STR_INFO; + + NUM *varray = NULL; + NUM vindex = 0; + cpa *cpap = NULL; + + READ_INTO_CPA(name, uname, 1, info, varray, vindex, cpap); + + name = MYALLOC(char, 3); + *(name+1) = 0; + *name = 'U'; + + READ_INTO_CPA(name, uname, 1, info, varray, vindex, cpap); + + /* name = MYALLOC(char, 2); + * *(name+1) = 0; + * *name = 'F'; + * + * READ_INTO_CPA(name, uname, 1, info, varray, vindex, cpap); + * + * name = MYALLOC(char, 2); + * *(name+1) = 0; + * *name = 'D'; + * + * READ_INTO_CPA(name, uname, 1, info, varray, vindex, cpap); */ + + READ_TNT_STRING(0, "nn", 2, (NT) 1, (NT) 0); + rule = new_rule(0, new_list()); + add_to_list(rules, rule); + + READ_TNT_STRING(1, "t", 1, (T) '>'); + READ_TNT_STRING(1, "t", 1, (T) '<'); + READ_TNT_STRING(1, "t", 1, (T) '+'); + READ_TNT_STRING(1, "t", 1, (T) '-'); + READ_TNT_STRING(1, "t", 1, (T) ','); + READ_TNT_STRING(1, "t", 1, (T) '.'); + READ_TNT_STRING(1, "tnt", 3, (T) '[', (NT) 0, (T) ']'); + + Grammar *g = new_grammar(); + + build_grammar(g, rules, names); + + print_grammar(g); + + utf8 *string = new_utf8("[,[.[+]]-]", 10); + + printf("\nPrinting the input...\n%s\n", get_data((str *) string)); + + Environment *env = new_env(g, (str *) string); + + if (crf_add_node(env_crfp(env), (pair2) { 0 })) goto cleanup; + + nt_add(env, 0, 0); + + CHECK_ENV_ERROR(env, goto cleanup;); + + NUM current_grade = 0; + + prodecor *current_prodecor = NULL; + + Rule_group *rg = NULL; + List *right = NULL, *tnts = NULL; + + NUM *num_string = NULL, num_string_len = 0; + BOOL errorp = 0; + + env_str(env, &num_string, &num_string_len); + + while ((current_prodecor = + pop_procr(env_r(env), env_u(env), ¤t_grade))) { + /* fleprintf("{ %ld, %ld, %ld, %ld, %ld }\n", + * current_prodecor->x, current_prodecor->y, + * current_prodecor->z, current_prodecor->w, + * current_grade); + * print_procr(env_r(env)); */ + + /* different cases */ + + rg = grammar_rule(env_grammar(env), current_prodecor->x); + right = rg_nth(rg, current_prodecor->y); + + /* separate the empty rules out */ + if (list_length(right) == 0) { + errorp = + bsr_add(env_grammar(env), env_bsrp(env), + current_prodecor->x, current_prodecor->y, 0, + current_grade, current_grade, current_grade); + + if (errorp) goto cleanup; + + if (env_follow_p(env, current_prodecor->x, + *(num_string+current_grade))) { + rtn(env, current_prodecor->x, + current_prodecor->w, current_grade); + } + + goto continue_label; + } + + /* fleprintf0("non empty rule\n"); */ + + /* if at the end of a rule, return if possible */ + if (current_prodecor->z == list_length(right)) { + if (env_follow_p(env, current_prodecor->x, + *(num_string+current_grade))) { + rtn(env, current_prodecor->x, + current_prodecor->w, current_grade); + } + + goto continue_label; + } + + /* fleprintf0("not at the end of the rule\n"); */ + + /* if at the start of a rule, no need for test_select */ + if (current_prodecor->z == 0) { + /* fleprintf0("at the start of the rule\n"); */ + current_prodecor->z = 1; + + /* otherwise test select */ + } else { + /* fleprintf0("start by test select\n"); */ + tnts = + array_to_list(list_array(right)+current_prodecor->z, + list_length(right)-current_prodecor->z); + if (test_select(env, *(num_string+current_grade), + current_prodecor->x, tnts)) { + /* fleprintf0("successful test\n"); */ + free(tnts); + tnts = NULL; + ++(current_prodecor->z); + } else { + /* fleprintf0("failed test\n"); */ + free(tnts); + tnts = NULL; + goto continue_label; + } + } + + CHECK_ENV_ERROR(env, goto cleanup;); + + /* while there are terminals */ + while (((TNT *) list_nth(right, current_prodecor->z-1))->type == + TERMINAL) { + /* fleprintf0("found terminal\n"); */ + /* add to BSR set */ + errorp = + bsr_add(env_grammar(env), env_bsrp(env), + current_prodecor->x, current_prodecor->y, + current_prodecor->z, current_prodecor->w, + current_grade, current_grade+1); + + if (errorp) goto cleanup; + + current_grade++; + + /* if at the end of the rule, return if possible */ + if (current_prodecor->z == list_length(right)) { + if (env_follow_p(env, current_prodecor->x, + *(num_string+current_grade))) { + /* fleprintf0("we choose to return\n"); */ + rtn(env, current_prodecor->x, + current_prodecor->w, current_grade); + /* fleprintf0("hi\n"); */ + } + + goto continue_label; + } + /* fleprintf0("terminal not at the end\n"); */ + /* else test select */ + tnts = + array_to_list(list_array(right)+current_prodecor->z, + list_length(right)-current_prodecor->z); + if (test_select(env, *(num_string+current_grade), + current_prodecor->x, tnts)) { + /* fleprintf0("Successful test\n"); */ + free(tnts); + tnts = NULL; + (current_prodecor->z)++; + } else { + /* fleprintf0("Failed test\n"); */ + /* printf("next thing is "); */ + /* print_tnt(list_nth(right, current_prodecor->z)); */ + /* printf("\n"); */ + free(tnts); + tnts = NULL; + goto continue_label; + } + } + + /* fleprintf0("encountered non-terminal\n"); */ + + /* when a nonterminal is encountered, we call it */ + cnp_call(env, (pair3) { + .x = current_prodecor->x, + .y = current_prodecor->y, + .z = current_prodecor->z + }, current_prodecor->w, current_grade); + + /* fleprintf("after call { %ld, %ld, %ld, %ld, %ld }\n", + * current_prodecor->x, current_prodecor->y, + * current_prodecor->z, current_prodecor->w, + * current_grade); */ + + continue_label: + + CHECK_ENV_ERROR(env, goto cleanup;); + + free(current_prodecor); + } + + cleanup: + bsr_print(env_bsrp(env), env_grammar(env), 1); + + destroy_env(env); + + destroy_grammar(g, 1); + + destroy_list(rules, 1); + + free(string); + + return 0; +} + +/* archives + + printf("\nprint first for 0:\n"); + env_print_first(env, 0); + + printf("\nprint first for 1:\n"); + env_print_first(env, 1); + + printf("\nprint follow for 0:\n"); + env_print_follow(env, 0); + + printf("\nprint follow for 1:\n"); + env_print_follow(env, 1); + + */ -- cgit v1.2.3-18-g5258