From eb007d554251456a2a508849edf91b15aab1333e Mon Sep 17 00:00:00 2001 From: JSDurand Date: Mon, 31 Jan 2022 15:59:11 +0800 Subject: cnp: save point Now we need to implement predicates, in order to have practical applications. --- src/cnp.c | 176 ++++++++++++++++++++++++++++++++++++++++++++++++ src/cnp.h | 3 +- src/dfa.c | 26 +++++++- src/dfa.h | 5 +- src/grammar.c | 13 +++- src/grammar.h | 20 ++---- src/json.bnf | 161 ++++++++++++++++++++++++++++++++++++++++++++ src/test/check_cnp.c | 185 ++------------------------------------------------- 8 files changed, 393 insertions(+), 196 deletions(-) create mode 100644 src/json.bnf (limited to 'src') diff --git a/src/cnp.c b/src/cnp.c index 8c7c519..91a8df7 100644 --- a/src/cnp.c +++ b/src/cnp.c @@ -522,3 +522,179 @@ destroy_env(Environment *env) free(env->gi); free(env); } + +H_ATTR +Environment * +cnp_parse(Grammar *g, str *string) +{ + Environment *env = new_env(g, (str *) string); + + if (crf_add_node(env_crfp(env), (pair2) { 0 })) goto cleanup; + + nt_add(env, 0, 0); + + CHECK_ENV_ERROR(env, goto cleanup;); + + NUM current_grade = 0; + + prodecor *current_prodecor = NULL; + + Rule_group *rg = NULL; + List *right = NULL, *tnts = NULL; + + NUM *num_string = NULL, num_string_len = 0; + BOOL errorp = 0; + + env_str(env, &num_string, &num_string_len); + + while ((current_prodecor = + pop_procr(env_r(env), env_u(env), ¤t_grade))) { + /* fleprintf("{ %ld, %ld, %ld, %ld, %ld }\n", + * current_prodecor->x, current_prodecor->y, + * current_prodecor->z, current_prodecor->w, + * current_grade); + * print_procr(env_r(env)); */ + + /* different cases */ + + rg = grammar_rule(env_grammar(env), current_prodecor->x); + right = rg_nth(rg, current_prodecor->y); + + /* separate the empty rules out */ + if (list_length(right) == 0) { + errorp = + bsr_add(env_grammar(env), env_bsrp(env), + current_prodecor->x, current_prodecor->y, 0, + current_grade, current_grade, current_grade); + + if (errorp) goto cleanup; + + if (env_follow_p(env, current_prodecor->x, + *(num_string+current_grade))) { + rtn(env, current_prodecor->x, + current_prodecor->w, current_grade); + } + + goto continue_label; + } + + /* fleprintf0("non empty rule\n"); */ + + /* if at the end of a rule, return if possible */ + if (current_prodecor->z == list_length(right)) { + if (env_follow_p(env, current_prodecor->x, + *(num_string+current_grade))) { + rtn(env, current_prodecor->x, + current_prodecor->w, current_grade); + } + + goto continue_label; + } + + /* fleprintf0("not at the end of the rule\n"); */ + + /* if at the start of a rule, no need for test_select */ + if (current_prodecor->z == 0) { + /* fleprintf0("at the start of the rule\n"); */ + current_prodecor->z = 1; + + /* otherwise test select */ + } else { + /* fleprintf0("start by test select\n"); */ + tnts = + array_to_list(list_array(right)+current_prodecor->z, + list_length(right)-current_prodecor->z); + if (test_select(env, *(num_string+current_grade), + current_prodecor->x, tnts)) { + /* fleprintf0("successful test\n"); */ + free(tnts); + tnts = NULL; + ++(current_prodecor->z); + } else { + /* fleprintf0("failed test\n"); */ + free(tnts); + tnts = NULL; + goto continue_label; + } + } + + CHECK_ENV_ERROR(env, goto cleanup;); + + /* while there are terminals */ + while (((TNT *) list_nth(right, current_prodecor->z-1))->type == + TERMINAL) { + /* fleprintf0("found terminal\n"); */ + /* add to BSR set */ + errorp = + bsr_add(env_grammar(env), env_bsrp(env), + current_prodecor->x, current_prodecor->y, + current_prodecor->z, current_prodecor->w, + current_grade, current_grade+1); + + if (errorp) goto cleanup; + + current_grade++; + + /* if at the end of the rule, return if possible */ + if (current_prodecor->z == list_length(right)) { + if (env_follow_p(env, current_prodecor->x, + *(num_string+current_grade))) { + /* fleprintf0("we choose to return\n"); */ + rtn(env, current_prodecor->x, + current_prodecor->w, current_grade); + /* fleprintf0("hi\n"); */ + } + + goto continue_label; + } + /* fleprintf0("terminal not at the end\n"); */ + /* else test select */ + tnts = + array_to_list(list_array(right)+current_prodecor->z, + list_length(right)-current_prodecor->z); + if (test_select(env, *(num_string+current_grade), + current_prodecor->x, tnts)) { + /* fleprintf0("Successful test\n"); */ + free(tnts); + tnts = NULL; + (current_prodecor->z)++; + } else { + /* fleprintf0("Failed test\n"); */ + /* printf("next thing is "); */ + /* print_tnt(list_nth(right, current_prodecor->z)); */ + /* printf("\n"); */ + free(tnts); + tnts = NULL; + goto continue_label; + } + } + + /* fleprintf0("encountered non-terminal\n"); */ + + /* when a nonterminal is encountered, we call it */ + cnp_call(env, (pair3) { + .x = current_prodecor->x, + .y = current_prodecor->y, + .z = current_prodecor->z + }, current_prodecor->w, current_grade); + + /* fleprintf("after call { %ld, %ld, %ld, %ld, %ld }\n", + * current_prodecor->x, current_prodecor->y, + * current_prodecor->z, current_prodecor->w, + * current_grade); */ + + continue_label: + + CHECK_ENV_ERROR(env, goto cleanup;); + + free(current_prodecor); + } + + return env; + + cleanup: + + destroy_env(env); + + return NULL; +} diff --git a/src/cnp.h b/src/cnp.h index 2b0c104..904e383 100644 --- a/src/cnp.h +++ b/src/cnp.h @@ -58,6 +58,7 @@ void rtn(Environment *env, NUM X, NUM k, NUM j); void cnp_call(Environment *env, pair3 label, NUM i, NUM j); -/* TODO: Main algorithm */ +/* On error return NULL */ +H_ATTR Environment *cnp_parse(Grammar *g, str *string); #endif diff --git a/src/dfa.c b/src/dfa.c index e3bcdf0..fb67047 100644 --- a/src/dfa.c +++ b/src/dfa.c @@ -59,12 +59,15 @@ void destroy_dfa(dfa *table) { /* function pointers need no destructors */ - if (table->type == DFA_TYPE_SPECIAL) return; + if (table->type == DFA_TYPE_SPECIAL) goto free_last; for (int i = 0; i < table->data.table.row_n;) free(*(table->data.table.table+i++)); free(table->data.table.table); + + free_last: + free(table); } @@ -690,3 +693,24 @@ run_dfa(const dfa * const restrict table, const NUM code) return 0; } + +P_ATTR +static BOOL +dfa_any_fun(const NUM UNUSED code) { return 1; } + +dfa * +dfa_from_func(special_dfa func) +{ + dfa *result = NULL; + SAFE_MALLOC(dfa, result, 1, return NULL;); + result->type = DFA_TYPE_SPECIAL; + result->data.fn = func; + + return result; +} + +dfa * +dfa_any(void) +{ + return dfa_from_func(dfa_any_fun); +} diff --git a/src/dfa.h b/src/dfa.h index 70c8bf6..9116999 100644 --- a/src/dfa.h +++ b/src/dfa.h @@ -57,7 +57,10 @@ dfa *dfa_from_bytes_both(int sequence_size, /* TODO: Construct some basic frequently used character classes. */ -inline BOOL dfa_any_fun(const NUM UNUSED code) { return 1; } +dfa *dfa_from_func(special_dfa func); + +/* return a new instance of the any class */ +dfa *dfa_any(void); BOOL run_dfa(CCR_MOD(dfa *) table, const NUM code); diff --git a/src/grammar.c b/src/grammar.c index 3b5a959..afb9353 100644 --- a/src/grammar.c +++ b/src/grammar.c @@ -44,6 +44,15 @@ rg_nth(CCR_MOD(Rule_group *) rg, NUM n) return (List *) list_nth(rg->rights, n); } +struct PT_DATA_s { + dfa *dfap; + /* NULL-terminated strings */ + char *user_name; + char *raw_name; +}; + +typedef struct PT_DATA_s PTD; + /* TODO: Add some statistic counts to assist the hash tables. For example, store the total number of terminals as an integer; then the hash table can be hinted at initialization to contain that many @@ -63,7 +72,8 @@ struct Grammar_s { To print them, first encode them into normal strings. */ List *names; - /* TODO: Add an array of predicates. */ + /* an array of predicates. */ + PTD *predicates; }; static void @@ -179,6 +189,7 @@ BOOL build_grammar(Grammar *g, const List *rules, CC_MOD(List *) names) { g->names = (List *) names; + g->predicates = NULL; NUM len = list_length(names); NUM rule_len = list_length(rules); diff --git a/src/grammar.h b/src/grammar.h index cbda887..d51c612 100644 --- a/src/grammar.h +++ b/src/grammar.h @@ -51,20 +51,12 @@ typedef long T; typedef unsigned long NT; typedef unsigned long PT; -enum PT_TYPE_e { - PT_DFA, - PT_SPECIAL -}; - -typedef enum PT_TYPE_e PT_TYPE; - -typedef struct PT_DATA_s PTD; - -struct PT_DATA_s { - PT_TYPE type; - union { special_dfa s; dfa *d; } data; - char *label; /* NULL-terminated string */ -}; +/* enum PT_TYPE_e { + * PT_DFA, + * PT_SPECIAL + * }; + * + * typedef enum PT_TYPE_e PT_TYPE; */ /* T or NT diff --git a/src/json.bnf b/src/json.bnf new file mode 100644 index 0000000..c1b5440 --- /dev/null +++ b/src/json.bnf @@ -0,0 +1,161 @@ +[unescaped]: \ -!\#-\[\]-%x10FFFF + +-- + +JSON-text: ws value ws + +begin-array: ws "[" ws + +begin-object: ws "{" ws + +end-array: ws "]" ws + +end-object: ws "}" ws + +name-separator: ws ":" ws + +value-separator: ws "," ws + +ws: whitespace ws +ws: + +whitespace: " " +whitespace: "\t" +whitespace: "\n" +whitespace: "\r" + +value: false +value: null +value: true +value: object +value: array +value: number +value: string + +false = "false" + +null = "null" + +true = "true" + +object: begin-object members end-object + +members: +members: member more-member + +more-member: value-separator member more-member +more-member: + +member: string name-separator value + +array: begin-array array-elements end-array + +array-elements: +array-elements: value more-elements + +more-elements: value-separator value more-elements +more-elements: + +number: minus-or-not int frac-or-not exp-or-not + +minus-or-not: minus +minus-or-not: + +frac-or-not: frac +frac-or-not: + +exp-or-not: exp +exp-or-not: + +decimal-point: "." + +digit1-9: "1" +digit1-9: "2" +digit1-9: "3" +digit1-9: "4" +digit1-9: "5" +digit1-9: "6" +digit1-9: "7" +digit1-9: "8" +digit1-9: "9" + +e: "e" +e: "E" + +exp: e minus-or-plus-or-none DIGITS + +minus-or-plus-or-none: minus +minus-or-plus-or-none: plus +minus-or-plus-or-none: + +frac: decimal-point DIGITS + +int: zero +int: digit1-9 +int: digit1-9 DIGITS + +minus: "-" +plus: "+" +zero: "0" + +DIGIT: "0" +DIGIT: "1" +DIGIT: "2" +DIGIT: "3" +DIGIT: "4" +DIGIT: "5" +DIGIT: "6" +DIGIT: "7" +DIGIT: "8" +DIGIT: "9" + +DIGITS: DIGIT DIGITS +DIGITS: DIGIT + +string: quotation-mark chars quotation-mark + +chars: char chars +chars: + +char: [unescaped] +char: escape "\"" +char: escape "\\" +char: escape "/" +char: escape "b" +char: escape "f" +char: escape "n" +char: escape "r" +char: escape "t" +char: escape "u" 4HEXDIG + +escape: "\\" + +quotation-mark: "\"" + +4HEXDIG: HEXDIG HEXDIG HEXDIG HEXDIG + +HEXDIG: DIGIT +HEXDIG: letter_a +HEXDIG: letter_b +HEXDIG: letter_c +HEXDIG: letter_d +HEXDIG: letter_e +HEXDIG: letter_F + +letter_a: "a" +letter_a: "A" + +letter_b: "b" +letter_b: "B" + +letter_c: "c" +letter_c: "C" + +letter_d: "d" +letter_d: "D" + +letter_e: "e" +letter_e: "E" + +letter_f: "f" +letter_f: "F" diff --git a/src/test/check_cnp.c b/src/test/check_cnp.c index 00c8a64..7a0a325 100644 --- a/src/test/check_cnp.c +++ b/src/test/check_cnp.c @@ -97,174 +97,19 @@ main(int UNUSED argc, char ** UNUSED argv) printf("\nPrinting the input...\n%s\n", get_data((str *) string)); - Environment *env = new_env(g, (str *) string); + Environment *env = cnp_parse(g, (str *) string); - if (crf_add_node(env_crfp(env), (pair2) { 0 })) goto cleanup; - - nt_add(env, 0, 0); - - CHECK_ENV_ERROR(env, goto cleanup;); - - NUM current_grade = 0; - - prodecor *current_prodecor = NULL; - - Rule_group *rg = NULL; - List *right = NULL, *tnts = NULL; - - NUM *num_string = NULL, num_string_len = 0; - BOOL errorp = 0; - - env_str(env, &num_string, &num_string_len); - - while ((current_prodecor = - pop_procr(env_r(env), env_u(env), ¤t_grade))) { - /* fleprintf("{ %ld, %ld, %ld, %ld, %ld }\n", - * current_prodecor->x, current_prodecor->y, - * current_prodecor->z, current_prodecor->w, - * current_grade); - * print_procr(env_r(env)); */ - - /* different cases */ - - rg = grammar_rule(env_grammar(env), current_prodecor->x); - right = rg_nth(rg, current_prodecor->y); - - /* separate the empty rules out */ - if (list_length(right) == 0) { - errorp = - bsr_add(env_grammar(env), env_bsrp(env), - current_prodecor->x, current_prodecor->y, 0, - current_grade, current_grade, current_grade); - - if (errorp) goto cleanup; - - if (env_follow_p(env, current_prodecor->x, - *(num_string+current_grade))) { - rtn(env, current_prodecor->x, - current_prodecor->w, current_grade); - } - - goto continue_label; - } - - /* fleprintf0("non empty rule\n"); */ - - /* if at the end of a rule, return if possible */ - if (current_prodecor->z == list_length(right)) { - if (env_follow_p(env, current_prodecor->x, - *(num_string+current_grade))) { - rtn(env, current_prodecor->x, - current_prodecor->w, current_grade); - } - - goto continue_label; - } - - /* fleprintf0("not at the end of the rule\n"); */ - - /* if at the start of a rule, no need for test_select */ - if (current_prodecor->z == 0) { - /* fleprintf0("at the start of the rule\n"); */ - current_prodecor->z = 1; - - /* otherwise test select */ + if (env) { + if (!(env_error_p(env))) { + printf("Successfully parsed the input:\n"); + bsr_print(env_bsrp(env), env_grammar(env), 1); } else { - /* fleprintf0("start by test select\n"); */ - tnts = - array_to_list(list_array(right)+current_prodecor->z, - list_length(right)-current_prodecor->z); - if (test_select(env, *(num_string+current_grade), - current_prodecor->x, tnts)) { - /* fleprintf0("successful test\n"); */ - free(tnts); - tnts = NULL; - ++(current_prodecor->z); - } else { - /* fleprintf0("failed test\n"); */ - free(tnts); - tnts = NULL; - goto continue_label; - } + printf("There are errors!\n"); } - CHECK_ENV_ERROR(env, goto cleanup;); - - /* while there are terminals */ - while (((TNT *) list_nth(right, current_prodecor->z-1))->type == - TERMINAL) { - /* fleprintf0("found terminal\n"); */ - /* add to BSR set */ - errorp = - bsr_add(env_grammar(env), env_bsrp(env), - current_prodecor->x, current_prodecor->y, - current_prodecor->z, current_prodecor->w, - current_grade, current_grade+1); - - if (errorp) goto cleanup; - - current_grade++; - - /* if at the end of the rule, return if possible */ - if (current_prodecor->z == list_length(right)) { - if (env_follow_p(env, current_prodecor->x, - *(num_string+current_grade))) { - /* fleprintf0("we choose to return\n"); */ - rtn(env, current_prodecor->x, - current_prodecor->w, current_grade); - /* fleprintf0("hi\n"); */ - } - - goto continue_label; - } - /* fleprintf0("terminal not at the end\n"); */ - /* else test select */ - tnts = - array_to_list(list_array(right)+current_prodecor->z, - list_length(right)-current_prodecor->z); - if (test_select(env, *(num_string+current_grade), - current_prodecor->x, tnts)) { - /* fleprintf0("Successful test\n"); */ - free(tnts); - tnts = NULL; - (current_prodecor->z)++; - } else { - /* fleprintf0("Failed test\n"); */ - /* printf("next thing is "); */ - /* print_tnt(list_nth(right, current_prodecor->z)); */ - /* printf("\n"); */ - free(tnts); - tnts = NULL; - goto continue_label; - } - } - - /* fleprintf0("encountered non-terminal\n"); */ - - /* when a nonterminal is encountered, we call it */ - cnp_call(env, (pair3) { - .x = current_prodecor->x, - .y = current_prodecor->y, - .z = current_prodecor->z - }, current_prodecor->w, current_grade); - - /* fleprintf("after call { %ld, %ld, %ld, %ld, %ld }\n", - * current_prodecor->x, current_prodecor->y, - * current_prodecor->z, current_prodecor->w, - * current_grade); */ - - continue_label: - - CHECK_ENV_ERROR(env, goto cleanup;); - - free(current_prodecor); + destroy_env(env); } - cleanup: - bsr_print(env_bsrp(env), env_grammar(env), 1); - - destroy_env(env); - destroy_grammar(g, 1); destroy_list(rules, 1); @@ -273,19 +118,3 @@ main(int UNUSED argc, char ** UNUSED argv) return 0; } - -/* archives - - printf("\nprint first for 0:\n"); - env_print_first(env, 0); - - printf("\nprint first for 1:\n"); - env_print_first(env, 1); - - printf("\nprint follow for 0:\n"); - env_print_follow(env, 0); - - printf("\nprint follow for 1:\n"); - env_print_follow(env, 1); - - */ -- cgit v1.2.3-18-g5258