summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJSDurand <mmemmew@gmail.com>2022-02-05 23:23:03 +0800
committerJSDurand <mmemmew@gmail.com>2022-02-05 23:23:03 +0800
commit1c8cbfd09ff9dd02f6d8d938c45e3f29a35f6f32 (patch)
treee3d6bb3b5246bcfa38662615ad59663d5b0377d1
parent510b10b96b546fcc6c6b6be85050305ddd192a41 (diff)
predicates start working now
Now we have a working implementation of predicates. It now only remains to write the parser of grammars. Of course we shall generate this parser by this parser generator itself, because why not. ;-P
-rw-r--r--src/Makefile.am4
-rw-r--r--src/cnp.c68
-rw-r--r--src/dfa.c25
-rw-r--r--src/grammar.c31
-rw-r--r--src/grammar.h5
-rw-r--r--src/test/check_cnp.c9
-rw-r--r--src/test/check_pred.c165
7 files changed, 275 insertions, 32 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index 2b2d7d8..d097f1f 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -48,8 +48,8 @@ ht.c utf8.c str.c dfa.c tuple.c
check_cnp_SOURCES = test/check_cnp.c crf.c cnp.c grammar.c list.c \
util.c ht.c utf8.c str.c dfa.c bsr.c tuple.c
-check_pred_SOURCES = test/check_pred.c dfa.c list.c str.c utf8.c \
-grammar.c util.c ht.c
+check_pred_SOURCES = test/check_pred.c crf.c cnp.c grammar.c list.c \
+util.c ht.c utf8.c str.c dfa.c bsr.c tuple.c
check_tuple_SOURCES = test/check_tuple.c tuple.c util.c
diff --git a/src/cnp.c b/src/cnp.c
index fe5a4dc..ce5a527 100644
--- a/src/cnp.c
+++ b/src/cnp.c
@@ -32,6 +32,7 @@ nt_add(Environment *env, NUM X, NUM j)
for (NUM i = 0; i < rg_len(RG); i++) {
BOOL result =
test_select(env, *(env->string+j), X, rg_nth(RG, i));
+ /* fleprintf("i = %ld, after test select\n", i); */
for (NUM k = 0; k < grammar_left_len(GI->g); k++) {
ht_reset(env->result_ts+k, DELETE_KEY);
@@ -83,10 +84,15 @@ test_select(Environment *env, NUM b, NUM X, CCR_MOD(List *) tnts)
goto success;
}
- /* TODO: predicates haven't been implemented yet */
- /* for (NUM i = 0; i < ht_size(result_ps); i++) {
- *
- * } */
+ for (NUM i = 0; i < ht_size(result_ps); i++) {
+ PTD *ptdp = grammar_ptd(gi->g, **((PT **) ht_keys(result_ps+i)));
+
+ if (ptd_run(ptdp, b)) {
+ /* fleprintf("i = %ld\n", i); */
+ result = 1;
+ goto success;
+ }
+ }
for (NUM i = 0; i < list_length(tnts); i++) {
@@ -106,9 +112,20 @@ test_select(Environment *env, NUM b, NUM X, CCR_MOD(List *) tnts)
}
- if (ht_find(gi->sts+X, &b) != NULL) result = 1;
+ if (ht_find(gi->sts+X, &b) != NULL) {
+ result = 1;
+ goto success;
+ }
- /* TODO: predicates haven't been implemented yet */
+ for (NUM i = 0; i < ht_size(gi->sps+X); i++) {
+ PTD *ptdp = grammar_ptd(gi->g, **((PT **) ht_keys(gi->sps+X)));
+ /* fleprintf("i = %ld\n", i); */
+ if (ptd_run(ptdp, b)) {
+ /* fleprintf("i = %ld\n", i); */
+ result = 1;
+ goto success;
+ }
+ }
goto success;
@@ -244,9 +261,13 @@ cnp_call(Environment *env, pair3 label, NUM i, NUM j)
TNT *xtnt = (TNT *) list_nth(tnts, label.z - 1);
- if (xtnt->type != NONTERMINAL) goto cleanup;
+ if (xtnt->type != NONTERMINAL) {
+ fleprintf("Wrong type: %d\n", xtnt->type);
+ goto cleanup;
+ }
NUM X = (NUM) xtnt->data.nt;
+ /* fleprintf("X = %ld, j = %ld\n", X, j); */
pair2 node = (pair2) { .x = X, .y = j };
pair4 u = (pair4)
@@ -258,20 +279,29 @@ cnp_call(Environment *env, pair3 label, NUM i, NUM j)
};
if (!(crf_find_node(env->crfp, node))) {
- if (crf_add_node(env->crfp, node)) goto cleanup;
+ if (crf_add_node(env->crfp, node)) {
+ fleprintf0("fail to add node to crf\n");
+ goto cleanup;
+ }
- if (crf_add_edge(env->crfp, node, u)) goto cleanup;
+ if (crf_add_edge(env->crfp, node, u)) {
+ fleprintf0("fail to add edge to crf\n");
+ goto cleanup;
+ }
/* errors will be stored in ENV, so we simply return */
nt_add(env, X, j);
- /* printf("X = %ld, j = %ld\n", X, j); */
+ /* fleprintf("X = %ld, j = %ld\n", X, j); */
return;
}
if (!(crf_find_edge(env->crfp, node, u))) {
- if (crf_add_edge(env->crfp, node, u)) goto cleanup;
+ if (crf_add_edge(env->crfp, node, u)) {
+ fleprintf0("fail to add edge to crf\n");
+ goto cleanup;
+ }
call_internal_env = env;
call_internal_label5 =
@@ -344,7 +374,17 @@ H_ATTR
BOOL
env_follow_p(CCR_MOD(Environment *) env, NUM X, NUM t)
{
- return ht_find(((env->gi->sts)+X), &t) != NULL;
+ if (ht_find(((env->gi->sts)+X), &t) != NULL) return 1;
+
+ for (NUM i = 0; i < ht_size(env->gi->sps+X); i++) {
+ PTD *ptdp = grammar_ptd(env->gi->g,
+ **((PT **) ht_keys(env->gi->sps+X)));
+ if (ptd_run(ptdp, t)) return 1;
+ }
+
+ /* fleprintf("X = %ld, t = %ld\n", X, t); */
+
+ return 0;
}
void
@@ -688,7 +728,9 @@ cnp_parse(Grammar *g, str *string)
/* while there are terminals */
while (((TNT *) list_nth(right, current_prodecor->z-1))->type ==
- TERMINAL) {
+ TERMINAL ||
+ ((TNT *) list_nth(right, current_prodecor->z-1))->type ==
+ PREDICATE) {
/* fleprintf0("found terminal\n"); */
/* add to BSR set */
errorp =
diff --git a/src/dfa.c b/src/dfa.c
index fb67047..9da201d 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -623,11 +623,14 @@ run_dfa(const dfa * const restrict table, const NUM code)
break;
}
- char *s = MYALLOC(char, 5);
+ /* negative codes have special purposes */
+ if (code < 0) return 0;
+
+ char s[5];
str *strp = new_str(s, 5);
if (encode(code, strp)) {
- destroy_str(strp, 1);
+ destroy_str(strp, 0);
fleprintf0("Fail to encode\n");
return 0;
}
@@ -636,33 +639,33 @@ run_dfa(const dfa * const restrict table, const NUM code)
for (NUM i = 0; i < str_length(strp); i++) {
unsigned char c = (unsigned char) *(s+i);
+
int lookup_value = 0;
for (int j = 0, len = 0; len <= c;) {
+
len += *(*(table->data.table.table+current_row)+(j<<1)+1);
lookup_value = *(*(table->data.table.table+current_row)
+(j++<<1));
}
- /* fleprintf("lookup_value = %d\n", lookup_value); */
-
if (lookup_value > 0) {
current_row = lookup_value;
} else {
switch (lookup_value) {
case DFA_STATE_REJECT:
- destroy_str(strp, 1);
+ destroy_str(strp, 0);
return 0;
break;
case DFA_STATE_UNKNOWN:
switch (table->type) {
case DFA_TYPE_BOTH:
case DFA_TYPE_POSITIVE:
- destroy_str(strp, 1);
+ destroy_str(strp, 0);
return 0;
break;
case DFA_TYPE_NEGATIVE:
- destroy_str(strp, 1);
+ destroy_str(strp, 0);
return 1;
break;
default:
@@ -670,26 +673,26 @@ run_dfa(const dfa * const restrict table, const NUM code)
the table is of the special type. But I am apparently
paranoia. */
fleprintf0("Special table in the wrong place\n");
- destroy_str(strp, 1);
+ destroy_str(strp, 0);
return 0;
break;
}
break;
case DFA_STATE_ACCEPT:
- destroy_str(strp, 1);
+ destroy_str(strp, 0);
return 1;
break;
default:
fleprintf("Unknown state number for char %d: %d\n",
c, lookup_value);
- destroy_str(strp, 1);
+ destroy_str(strp, 0);
return 0;
break;
}
}
}
- destroy_str(strp, 1);
+ destroy_str(strp, 0);
return 0;
}
diff --git a/src/grammar.c b/src/grammar.c
index 0b2be8d..1ea52b0 100644
--- a/src/grammar.c
+++ b/src/grammar.c
@@ -73,6 +73,12 @@ destroy_ptd(PTD *p, int flag)
free(p);
}
+BOOL
+ptd_run(PTD *p, NUM code)
+{
+ return run_dfa(p->dfap, code);
+}
+
static void
destroy_ptd_free_all(void *e)
{
@@ -85,7 +91,7 @@ destroy_ptd_no_free(void *e)
destroy_ptd((PTD *)e, 0);
}
-/* TODO: Add some statistic counts to assist the hash tables. For
+/* REVIEW: Add some statistic counts to assist the hash tables. For
example, store the total number of terminals as an integer; then
the hash table can be hinted at initialization to contain that many
elements, which can reduce the number of time a hash table needs to
@@ -411,9 +417,21 @@ void
print_grammar(CC_MOD(Grammar *) g)
{
printf("Printing a grammar:\n");
+ printf("Non-terminals...\n");
map_list_between(g->names, print_name, print_sep);
+
printf("\n");
+ printf("Predicates...\n");
+
+ for (int i = 0; i < list_length(g->predicates); i++) {
+ PTD *ptdp = (PTD *) list_nth(g->predicates, i);
+
+ printf("%s: %s\n",
+ get_data(ptdp->user_name), get_data(ptdp->raw_name));
+ }
+
+ printf("Rules...\n");
for (int i = 0; i < list_length(g->rule_grps); i++) {
print_rule_group(list_nth(g->rule_grps, i));
printf("\n");
@@ -539,6 +557,17 @@ grammar_rule(CCR_MOD(Grammar *) g, NT nt)
return (Rule_group *) list_nth(g->rule_grps, nt);
}
+PTD *
+grammar_ptd(CCR_MOD(Grammar *) g, PT pt)
+{
+ if ((NUM) pt >= list_length(g->predicates)) {
+ fleprintf("Invalid predicate terminal: %lu\n", pt);
+ return NULL;
+ }
+
+ return (PTD *) list_nth(g->predicates, pt);
+}
+
NUM
grammar_left_len(CCR_MOD(Grammar *)g)
{
diff --git a/src/grammar.h b/src/grammar.h
index e18ad0e..89a66ba 100644
--- a/src/grammar.h
+++ b/src/grammar.h
@@ -78,6 +78,8 @@ PTD *new_ptd(str *user_name, str *raw_name, dfa *dfap);
/* FLAG is used to destroy the strings contained within */
void destroy_ptd(PTD *p, int flag);
+BOOL ptd_run(PTD *p, NUM code);
+
/* If a TNT of type TERMINAL has value END_OF_INPUT, then it means,
surprisingly, the end of input. */
enum { END_OF_INPUT = -1 };
@@ -172,6 +174,9 @@ List *grammar_names(CCR_MOD(Grammar *)g);
/* look up a rule */
Rule_group *grammar_rule(CCR_MOD(Grammar *) g, NT nt);
+/* lookup a predicate */
+PTD *grammar_ptd(CCR_MOD(Grammar *) g, PT pt);
+
/* NTS should be already allocated to be an array of BOOLs. The size
of this array should be the same as the length of the array of
non-terminals of G. If any of the above two conditions is not
diff --git a/src/test/check_cnp.c b/src/test/check_cnp.c
index a21d74e..f01a68c 100644
--- a/src/test/check_cnp.c
+++ b/src/test/check_cnp.c
@@ -9,8 +9,9 @@ print_label6(pair6 label)
label.x, label.y, label.z, label.u, label.v, label.w);
}
-#define TIC struct timespec tic, toc; \
- do { clock_gettime(CLOCK_MONOTONIC_RAW, &tic); } while (0)
+#define TITO struct timespec tic, toc
+
+#define TIC do { clock_gettime(CLOCK_MONOTONIC_RAW, &tic); } while (0)
#define TOC do { \
clock_gettime(CLOCK_MONOTONIC_RAW, &toc); \
@@ -53,7 +54,7 @@ print_label6(pair6 label)
add_to_list(rules, rule); \
} while (0)
-#define GRAMMAR 2
+#define GRAMMAR 4
int
main(int UNUSED argc, char ** UNUSED argv)
@@ -197,6 +198,8 @@ main(int UNUSED argc, char ** UNUSED argv)
printf("\nPrinting the input...\n%s\n", get_data((str *) string));
+ TITO;
+
TIC;
Environment *env = cnp_parse(g, (str *) string);
diff --git a/src/test/check_pred.c b/src/test/check_pred.c
index b0b95b3..2767c45 100644
--- a/src/test/check_pred.c
+++ b/src/test/check_pred.c
@@ -1,7 +1,168 @@
-#include "../grammar.h"
+#include "time.h"
+#include "../cnp.h"
+
+#define TITO struct timespec tic, toc
+
+#define TIC do { clock_gettime(CLOCK_MONOTONIC_RAW, &tic); } while (0)
+
+#define TOC do { \
+ clock_gettime(CLOCK_MONOTONIC_RAW, &toc); \
+ printf("\nTotal time = %f seconds\n", \
+ (toc.tv_nsec - tic.tv_nsec) / 1000000000.0 + \
+ toc.tv_sec - tic.tv_sec); \
+ } while (0)
+
+#define READ_INTO_CPA(N, U, L, I, VA, VI, CP) do { \
+ U = new_utf8(N, L); \
+ I = get_info((str *)U, 0); \
+ VA = MYALLOC(NUM, 2); \
+ VI = 0; \
+ for (NUM index = 0; \
+ I.value >= 0 && index < str_length((str *) U); \
+ index += I.step, VI++) { \
+ I = get_info((str *)U, index); \
+ *(VA+VI) = I.value; \
+ } \
+ CP = MYALLOC(cpa, 1); \
+ CP->array = VA; \
+ CP->size = VI; \
+ add_to_list(names, CP); \
+ destroy_str((str *)U, 1); \
+ } while (0)
+
+#define READ_TNT_STRING(LEFT, FORMAT, LEN, ...) do { \
+ tnt_string = new_tnt_string(FORMAT, LEN, __VA_ARGS__); \
+ if (!tnt_string) { \
+ fleprintf("left = %d, f = %s, l = %d, " \
+ "cannot create tnt string\n", \
+ LEFT, FORMAT, LEN); \
+ map_list(rules, destroy_rule_and_free_all); \
+ destroy_list(rules, 0); \
+ map_list(names, destroy_cpa_and_free_all); \
+ destroy_list(names, 0); \
+ return 1; \
+ } \
+ rule = new_rule(LEFT, tnt_string); \
+ add_to_list(rules, rule); \
+ } while (0)
int
main(int UNUSED argc, char ** UNUSED argv)
{
- return 77;
+ /* have a grammar for testing */
+ List *tnt_string = NULL;
+ Rule *rule = NULL;
+ List *rules = new_list();
+ List *names = new_list();
+ List *preds = new_list();
+
+ char *user_name = NULL;
+ char *raw_name = NULL;
+
+ str *user_name_s = NULL, *raw_name_s = NULL;
+ NUM *pred_bytes = NULL, pred_bytes_len = 0;
+
+ char *name = MYALLOC(char, 2);
+ utf8* uname = NULL;
+
+ str_info info = EMPTY_STR_INFO;
+
+ NUM *varray = NULL;
+ NUM vindex = 0;
+ cpa *cpap = NULL;
+
+ *(name+1) = 0;
+ *name = 'S';
+
+ READ_INTO_CPA(name, uname, 1, info, varray, vindex, cpap);
+
+ READ_TNT_STRING(0, "pn", 2, (PT) 0, (NT) 0);
+ rule = new_rule(0, new_list());
+ add_to_list(rules, rule);
+
+ SAFE_MALLOC(char, user_name, 6, return 1;);
+ *(user_name) = 'a';
+ *(user_name+1) = 's';
+ *(user_name+2) = 'c';
+ *(user_name+3) = 'i';
+ *(user_name+4) = 'i';
+ *(user_name+5) = 0;
+
+ user_name_s = new_utf8(user_name, 5);
+
+ SAFE_MALLOC(char, raw_name, 7, return 1;);
+
+ *(raw_name+0) = 'a';
+ *(raw_name+1) = '-';
+ *(raw_name+2) = 'z';
+ *(raw_name+3) = 'A';
+ *(raw_name+4) = '-';
+ *(raw_name+5) = 'Z';
+ *(raw_name+6) = 0;
+
+ raw_name_s = new_utf8(raw_name, 6);
+
+ SAFE_MALLOC(NUM, pred_bytes, 52, return 1;);
+
+ for (int i = 0; i < 26; i++)
+ *(pred_bytes+pred_bytes_len++) = 'a' + i;
+
+ for (int i = 0; i < 26; i++)
+ *(pred_bytes+pred_bytes_len++) = 'A' + i;
+
+ if (add_to_list(preds, new_ptd(user_name_s, raw_name_s,
+ dfa_from_bytes
+ (pred_bytes_len, pred_bytes)))) {
+ fleprintf0("Fail to add a predicate\n");
+ return 1;
+ }
+
+ free(pred_bytes);
+
+ Grammar *g = new_grammar();
+
+ build_grammar(g, rules, names, preds);
+
+ print_grammar(g);
+
+ utf8 *string = new_utf8("awdfsdjbfsjdhxy", 15);
+
+ printf("\nPrinting the input...\n%s\n", get_data((str *) string));
+
+ TITO;
+
+ TIC;
+
+ Environment *env = cnp_parse(g, (str *) string);
+
+ TOC;
+
+ if (env) {
+ if (!(env_error_p(env))) {
+ BOOL result = bsr_lookup
+ (env_bsrp(env), 0, 0, str_length((str *) string));
+
+ if (result) {
+ printf("\nSuccessfully parsed the input!\n");
+ } else {
+ printf("\nThe input does not parse!\n");
+ }
+
+ printf("\nAll BSRs follow:\n\n");
+ if (argc == 1)
+ bsr_print(env_bsrp(env), env_grammar(env), 1);
+ } else {
+ printf("There are errors!\n");
+ }
+
+ destroy_env(env);
+ }
+
+ destroy_grammar(g, 1);
+
+ destroy_list(rules, 1);
+
+ free(string);
+
+ return 0;
}