summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJSDurand <mmemmew@gmail.com>2022-01-31 15:59:11 +0800
committerJSDurand <mmemmew@gmail.com>2022-01-31 15:59:11 +0800
commiteb007d554251456a2a508849edf91b15aab1333e (patch)
treebd88e78debdd646da87aa60f1bf2904eaa4370ca
parenta8bd5e9d85ac9928bd29add82e887f82642af893 (diff)
cnp: save point
Now we need to implement predicates, in order to have practical applications.
-rw-r--r--src/cnp.c176
-rw-r--r--src/cnp.h3
-rw-r--r--src/dfa.c26
-rw-r--r--src/dfa.h5
-rw-r--r--src/grammar.c13
-rw-r--r--src/grammar.h20
-rw-r--r--src/json.bnf161
-rw-r--r--src/test/check_cnp.c185
8 files changed, 393 insertions, 196 deletions
diff --git a/src/cnp.c b/src/cnp.c
index 8c7c519..91a8df7 100644
--- a/src/cnp.c
+++ b/src/cnp.c
@@ -522,3 +522,179 @@ destroy_env(Environment *env)
free(env->gi);
free(env);
}
+
+H_ATTR
+Environment *
+cnp_parse(Grammar *g, str *string)
+{
+ Environment *env = new_env(g, (str *) string);
+
+ if (crf_add_node(env_crfp(env), (pair2) { 0 })) goto cleanup;
+
+ nt_add(env, 0, 0);
+
+ CHECK_ENV_ERROR(env, goto cleanup;);
+
+ NUM current_grade = 0;
+
+ prodecor *current_prodecor = NULL;
+
+ Rule_group *rg = NULL;
+ List *right = NULL, *tnts = NULL;
+
+ NUM *num_string = NULL, num_string_len = 0;
+ BOOL errorp = 0;
+
+ env_str(env, &num_string, &num_string_len);
+
+ while ((current_prodecor =
+ pop_procr(env_r(env), env_u(env), &current_grade))) {
+ /* fleprintf("{ %ld, %ld, %ld, %ld, %ld }\n",
+ * current_prodecor->x, current_prodecor->y,
+ * current_prodecor->z, current_prodecor->w,
+ * current_grade);
+ * print_procr(env_r(env)); */
+
+ /* different cases */
+
+ rg = grammar_rule(env_grammar(env), current_prodecor->x);
+ right = rg_nth(rg, current_prodecor->y);
+
+ /* separate the empty rules out */
+ if (list_length(right) == 0) {
+ errorp =
+ bsr_add(env_grammar(env), env_bsrp(env),
+ current_prodecor->x, current_prodecor->y, 0,
+ current_grade, current_grade, current_grade);
+
+ if (errorp) goto cleanup;
+
+ if (env_follow_p(env, current_prodecor->x,
+ *(num_string+current_grade))) {
+ rtn(env, current_prodecor->x,
+ current_prodecor->w, current_grade);
+ }
+
+ goto continue_label;
+ }
+
+ /* fleprintf0("non empty rule\n"); */
+
+ /* if at the end of a rule, return if possible */
+ if (current_prodecor->z == list_length(right)) {
+ if (env_follow_p(env, current_prodecor->x,
+ *(num_string+current_grade))) {
+ rtn(env, current_prodecor->x,
+ current_prodecor->w, current_grade);
+ }
+
+ goto continue_label;
+ }
+
+ /* fleprintf0("not at the end of the rule\n"); */
+
+ /* if at the start of a rule, no need for test_select */
+ if (current_prodecor->z == 0) {
+ /* fleprintf0("at the start of the rule\n"); */
+ current_prodecor->z = 1;
+
+ /* otherwise test select */
+ } else {
+ /* fleprintf0("start by test select\n"); */
+ tnts =
+ array_to_list(list_array(right)+current_prodecor->z,
+ list_length(right)-current_prodecor->z);
+ if (test_select(env, *(num_string+current_grade),
+ current_prodecor->x, tnts)) {
+ /* fleprintf0("successful test\n"); */
+ free(tnts);
+ tnts = NULL;
+ ++(current_prodecor->z);
+ } else {
+ /* fleprintf0("failed test\n"); */
+ free(tnts);
+ tnts = NULL;
+ goto continue_label;
+ }
+ }
+
+ CHECK_ENV_ERROR(env, goto cleanup;);
+
+ /* while there are terminals */
+ while (((TNT *) list_nth(right, current_prodecor->z-1))->type ==
+ TERMINAL) {
+ /* fleprintf0("found terminal\n"); */
+ /* add to BSR set */
+ errorp =
+ bsr_add(env_grammar(env), env_bsrp(env),
+ current_prodecor->x, current_prodecor->y,
+ current_prodecor->z, current_prodecor->w,
+ current_grade, current_grade+1);
+
+ if (errorp) goto cleanup;
+
+ current_grade++;
+
+ /* if at the end of the rule, return if possible */
+ if (current_prodecor->z == list_length(right)) {
+ if (env_follow_p(env, current_prodecor->x,
+ *(num_string+current_grade))) {
+ /* fleprintf0("we choose to return\n"); */
+ rtn(env, current_prodecor->x,
+ current_prodecor->w, current_grade);
+ /* fleprintf0("hi\n"); */
+ }
+
+ goto continue_label;
+ }
+ /* fleprintf0("terminal not at the end\n"); */
+ /* else test select */
+ tnts =
+ array_to_list(list_array(right)+current_prodecor->z,
+ list_length(right)-current_prodecor->z);
+ if (test_select(env, *(num_string+current_grade),
+ current_prodecor->x, tnts)) {
+ /* fleprintf0("Successful test\n"); */
+ free(tnts);
+ tnts = NULL;
+ (current_prodecor->z)++;
+ } else {
+ /* fleprintf0("Failed test\n"); */
+ /* printf("next thing is "); */
+ /* print_tnt(list_nth(right, current_prodecor->z)); */
+ /* printf("\n"); */
+ free(tnts);
+ tnts = NULL;
+ goto continue_label;
+ }
+ }
+
+ /* fleprintf0("encountered non-terminal\n"); */
+
+ /* when a nonterminal is encountered, we call it */
+ cnp_call(env, (pair3) {
+ .x = current_prodecor->x,
+ .y = current_prodecor->y,
+ .z = current_prodecor->z
+ }, current_prodecor->w, current_grade);
+
+ /* fleprintf("after call { %ld, %ld, %ld, %ld, %ld }\n",
+ * current_prodecor->x, current_prodecor->y,
+ * current_prodecor->z, current_prodecor->w,
+ * current_grade); */
+
+ continue_label:
+
+ CHECK_ENV_ERROR(env, goto cleanup;);
+
+ free(current_prodecor);
+ }
+
+ return env;
+
+ cleanup:
+
+ destroy_env(env);
+
+ return NULL;
+}
diff --git a/src/cnp.h b/src/cnp.h
index 2b0c104..904e383 100644
--- a/src/cnp.h
+++ b/src/cnp.h
@@ -58,6 +58,7 @@ void rtn(Environment *env, NUM X, NUM k, NUM j);
void cnp_call(Environment *env, pair3 label, NUM i, NUM j);
-/* TODO: Main algorithm */
+/* On error return NULL */
+H_ATTR Environment *cnp_parse(Grammar *g, str *string);
#endif
diff --git a/src/dfa.c b/src/dfa.c
index e3bcdf0..fb67047 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -59,12 +59,15 @@ void
destroy_dfa(dfa *table)
{
/* function pointers need no destructors */
- if (table->type == DFA_TYPE_SPECIAL) return;
+ if (table->type == DFA_TYPE_SPECIAL) goto free_last;
for (int i = 0; i < table->data.table.row_n;)
free(*(table->data.table.table+i++));
free(table->data.table.table);
+
+ free_last:
+
free(table);
}
@@ -690,3 +693,24 @@ run_dfa(const dfa * const restrict table, const NUM code)
return 0;
}
+
+P_ATTR
+static BOOL
+dfa_any_fun(const NUM UNUSED code) { return 1; }
+
+dfa *
+dfa_from_func(special_dfa func)
+{
+ dfa *result = NULL;
+ SAFE_MALLOC(dfa, result, 1, return NULL;);
+ result->type = DFA_TYPE_SPECIAL;
+ result->data.fn = func;
+
+ return result;
+}
+
+dfa *
+dfa_any(void)
+{
+ return dfa_from_func(dfa_any_fun);
+}
diff --git a/src/dfa.h b/src/dfa.h
index 70c8bf6..9116999 100644
--- a/src/dfa.h
+++ b/src/dfa.h
@@ -57,7 +57,10 @@ dfa *dfa_from_bytes_both(int sequence_size,
/* TODO: Construct some basic frequently used character classes. */
-inline BOOL dfa_any_fun(const NUM UNUSED code) { return 1; }
+dfa *dfa_from_func(special_dfa func);
+
+/* return a new instance of the any class */
+dfa *dfa_any(void);
BOOL run_dfa(CCR_MOD(dfa *) table, const NUM code);
diff --git a/src/grammar.c b/src/grammar.c
index 3b5a959..afb9353 100644
--- a/src/grammar.c
+++ b/src/grammar.c
@@ -44,6 +44,15 @@ rg_nth(CCR_MOD(Rule_group *) rg, NUM n)
return (List *) list_nth(rg->rights, n);
}
+struct PT_DATA_s {
+ dfa *dfap;
+ /* NULL-terminated strings */
+ char *user_name;
+ char *raw_name;
+};
+
+typedef struct PT_DATA_s PTD;
+
/* TODO: Add some statistic counts to assist the hash tables. For
example, store the total number of terminals as an integer; then
the hash table can be hinted at initialization to contain that many
@@ -63,7 +72,8 @@ struct Grammar_s {
To print them, first encode them into normal strings. */
List *names;
- /* TODO: Add an array of predicates. */
+ /* an array of predicates. */
+ PTD *predicates;
};
static void
@@ -179,6 +189,7 @@ BOOL
build_grammar(Grammar *g, const List *rules, CC_MOD(List *) names)
{
g->names = (List *) names;
+ g->predicates = NULL;
NUM len = list_length(names);
NUM rule_len = list_length(rules);
diff --git a/src/grammar.h b/src/grammar.h
index cbda887..d51c612 100644
--- a/src/grammar.h
+++ b/src/grammar.h
@@ -51,20 +51,12 @@ typedef long T;
typedef unsigned long NT;
typedef unsigned long PT;
-enum PT_TYPE_e {
- PT_DFA,
- PT_SPECIAL
-};
-
-typedef enum PT_TYPE_e PT_TYPE;
-
-typedef struct PT_DATA_s PTD;
-
-struct PT_DATA_s {
- PT_TYPE type;
- union { special_dfa s; dfa *d; } data;
- char *label; /* NULL-terminated string */
-};
+/* enum PT_TYPE_e {
+ * PT_DFA,
+ * PT_SPECIAL
+ * };
+ *
+ * typedef enum PT_TYPE_e PT_TYPE; */
/* T or NT
diff --git a/src/json.bnf b/src/json.bnf
new file mode 100644
index 0000000..c1b5440
--- /dev/null
+++ b/src/json.bnf
@@ -0,0 +1,161 @@
+[unescaped]: \ -!\#-\[\]-%x10FFFF
+
+--
+
+JSON-text: ws value ws
+
+begin-array: ws "[" ws
+
+begin-object: ws "{" ws
+
+end-array: ws "]" ws
+
+end-object: ws "}" ws
+
+name-separator: ws ":" ws
+
+value-separator: ws "," ws
+
+ws: whitespace ws
+ws:
+
+whitespace: " "
+whitespace: "\t"
+whitespace: "\n"
+whitespace: "\r"
+
+value: false
+value: null
+value: true
+value: object
+value: array
+value: number
+value: string
+
+false = "false"
+
+null = "null"
+
+true = "true"
+
+object: begin-object members end-object
+
+members:
+members: member more-member
+
+more-member: value-separator member more-member
+more-member:
+
+member: string name-separator value
+
+array: begin-array array-elements end-array
+
+array-elements:
+array-elements: value more-elements
+
+more-elements: value-separator value more-elements
+more-elements:
+
+number: minus-or-not int frac-or-not exp-or-not
+
+minus-or-not: minus
+minus-or-not:
+
+frac-or-not: frac
+frac-or-not:
+
+exp-or-not: exp
+exp-or-not:
+
+decimal-point: "."
+
+digit1-9: "1"
+digit1-9: "2"
+digit1-9: "3"
+digit1-9: "4"
+digit1-9: "5"
+digit1-9: "6"
+digit1-9: "7"
+digit1-9: "8"
+digit1-9: "9"
+
+e: "e"
+e: "E"
+
+exp: e minus-or-plus-or-none DIGITS
+
+minus-or-plus-or-none: minus
+minus-or-plus-or-none: plus
+minus-or-plus-or-none:
+
+frac: decimal-point DIGITS
+
+int: zero
+int: digit1-9
+int: digit1-9 DIGITS
+
+minus: "-"
+plus: "+"
+zero: "0"
+
+DIGIT: "0"
+DIGIT: "1"
+DIGIT: "2"
+DIGIT: "3"
+DIGIT: "4"
+DIGIT: "5"
+DIGIT: "6"
+DIGIT: "7"
+DIGIT: "8"
+DIGIT: "9"
+
+DIGITS: DIGIT DIGITS
+DIGITS: DIGIT
+
+string: quotation-mark chars quotation-mark
+
+chars: char chars
+chars:
+
+char: [unescaped]
+char: escape "\""
+char: escape "\\"
+char: escape "/"
+char: escape "b"
+char: escape "f"
+char: escape "n"
+char: escape "r"
+char: escape "t"
+char: escape "u" 4HEXDIG
+
+escape: "\\"
+
+quotation-mark: "\""
+
+4HEXDIG: HEXDIG HEXDIG HEXDIG HEXDIG
+
+HEXDIG: DIGIT
+HEXDIG: letter_a
+HEXDIG: letter_b
+HEXDIG: letter_c
+HEXDIG: letter_d
+HEXDIG: letter_e
+HEXDIG: letter_F
+
+letter_a: "a"
+letter_a: "A"
+
+letter_b: "b"
+letter_b: "B"
+
+letter_c: "c"
+letter_c: "C"
+
+letter_d: "d"
+letter_d: "D"
+
+letter_e: "e"
+letter_e: "E"
+
+letter_f: "f"
+letter_f: "F"
diff --git a/src/test/check_cnp.c b/src/test/check_cnp.c
index 00c8a64..7a0a325 100644
--- a/src/test/check_cnp.c
+++ b/src/test/check_cnp.c
@@ -97,174 +97,19 @@ main(int UNUSED argc, char ** UNUSED argv)
printf("\nPrinting the input...\n%s\n", get_data((str *) string));
- Environment *env = new_env(g, (str *) string);
+ Environment *env = cnp_parse(g, (str *) string);
- if (crf_add_node(env_crfp(env), (pair2) { 0 })) goto cleanup;
-
- nt_add(env, 0, 0);
-
- CHECK_ENV_ERROR(env, goto cleanup;);
-
- NUM current_grade = 0;
-
- prodecor *current_prodecor = NULL;
-
- Rule_group *rg = NULL;
- List *right = NULL, *tnts = NULL;
-
- NUM *num_string = NULL, num_string_len = 0;
- BOOL errorp = 0;
-
- env_str(env, &num_string, &num_string_len);
-
- while ((current_prodecor =
- pop_procr(env_r(env), env_u(env), &current_grade))) {
- /* fleprintf("{ %ld, %ld, %ld, %ld, %ld }\n",
- * current_prodecor->x, current_prodecor->y,
- * current_prodecor->z, current_prodecor->w,
- * current_grade);
- * print_procr(env_r(env)); */
-
- /* different cases */
-
- rg = grammar_rule(env_grammar(env), current_prodecor->x);
- right = rg_nth(rg, current_prodecor->y);
-
- /* separate the empty rules out */
- if (list_length(right) == 0) {
- errorp =
- bsr_add(env_grammar(env), env_bsrp(env),
- current_prodecor->x, current_prodecor->y, 0,
- current_grade, current_grade, current_grade);
-
- if (errorp) goto cleanup;
-
- if (env_follow_p(env, current_prodecor->x,
- *(num_string+current_grade))) {
- rtn(env, current_prodecor->x,
- current_prodecor->w, current_grade);
- }
-
- goto continue_label;
- }
-
- /* fleprintf0("non empty rule\n"); */
-
- /* if at the end of a rule, return if possible */
- if (current_prodecor->z == list_length(right)) {
- if (env_follow_p(env, current_prodecor->x,
- *(num_string+current_grade))) {
- rtn(env, current_prodecor->x,
- current_prodecor->w, current_grade);
- }
-
- goto continue_label;
- }
-
- /* fleprintf0("not at the end of the rule\n"); */
-
- /* if at the start of a rule, no need for test_select */
- if (current_prodecor->z == 0) {
- /* fleprintf0("at the start of the rule\n"); */
- current_prodecor->z = 1;
-
- /* otherwise test select */
+ if (env) {
+ if (!(env_error_p(env))) {
+ printf("Successfully parsed the input:\n");
+ bsr_print(env_bsrp(env), env_grammar(env), 1);
} else {
- /* fleprintf0("start by test select\n"); */
- tnts =
- array_to_list(list_array(right)+current_prodecor->z,
- list_length(right)-current_prodecor->z);
- if (test_select(env, *(num_string+current_grade),
- current_prodecor->x, tnts)) {
- /* fleprintf0("successful test\n"); */
- free(tnts);
- tnts = NULL;
- ++(current_prodecor->z);
- } else {
- /* fleprintf0("failed test\n"); */
- free(tnts);
- tnts = NULL;
- goto continue_label;
- }
+ printf("There are errors!\n");
}
- CHECK_ENV_ERROR(env, goto cleanup;);
-
- /* while there are terminals */
- while (((TNT *) list_nth(right, current_prodecor->z-1))->type ==
- TERMINAL) {
- /* fleprintf0("found terminal\n"); */
- /* add to BSR set */
- errorp =
- bsr_add(env_grammar(env), env_bsrp(env),
- current_prodecor->x, current_prodecor->y,
- current_prodecor->z, current_prodecor->w,
- current_grade, current_grade+1);
-
- if (errorp) goto cleanup;
-
- current_grade++;
-
- /* if at the end of the rule, return if possible */
- if (current_prodecor->z == list_length(right)) {
- if (env_follow_p(env, current_prodecor->x,
- *(num_string+current_grade))) {
- /* fleprintf0("we choose to return\n"); */
- rtn(env, current_prodecor->x,
- current_prodecor->w, current_grade);
- /* fleprintf0("hi\n"); */
- }
-
- goto continue_label;
- }
- /* fleprintf0("terminal not at the end\n"); */
- /* else test select */
- tnts =
- array_to_list(list_array(right)+current_prodecor->z,
- list_length(right)-current_prodecor->z);
- if (test_select(env, *(num_string+current_grade),
- current_prodecor->x, tnts)) {
- /* fleprintf0("Successful test\n"); */
- free(tnts);
- tnts = NULL;
- (current_prodecor->z)++;
- } else {
- /* fleprintf0("Failed test\n"); */
- /* printf("next thing is "); */
- /* print_tnt(list_nth(right, current_prodecor->z)); */
- /* printf("\n"); */
- free(tnts);
- tnts = NULL;
- goto continue_label;
- }
- }
-
- /* fleprintf0("encountered non-terminal\n"); */
-
- /* when a nonterminal is encountered, we call it */
- cnp_call(env, (pair3) {
- .x = current_prodecor->x,
- .y = current_prodecor->y,
- .z = current_prodecor->z
- }, current_prodecor->w, current_grade);
-
- /* fleprintf("after call { %ld, %ld, %ld, %ld, %ld }\n",
- * current_prodecor->x, current_prodecor->y,
- * current_prodecor->z, current_prodecor->w,
- * current_grade); */
-
- continue_label:
-
- CHECK_ENV_ERROR(env, goto cleanup;);
-
- free(current_prodecor);
+ destroy_env(env);
}
- cleanup:
- bsr_print(env_bsrp(env), env_grammar(env), 1);
-
- destroy_env(env);
-
destroy_grammar(g, 1);
destroy_list(rules, 1);
@@ -273,19 +118,3 @@ main(int UNUSED argc, char ** UNUSED argv)
return 0;
}
-
-/* archives
-
- printf("\nprint first for 0:\n");
- env_print_first(env, 0);
-
- printf("\nprint first for 1:\n");
- env_print_first(env, 1);
-
- printf("\nprint follow for 0:\n");
- env_print_follow(env, 0);
-
- printf("\nprint follow for 1:\n");
- env_print_follow(env, 1);
-
- */