From 1c8cbfd09ff9dd02f6d8d938c45e3f29a35f6f32 Mon Sep 17 00:00:00 2001 From: JSDurand Date: Sat, 5 Feb 2022 23:23:03 +0800 Subject: predicates start working now Now we have a working implementation of predicates. It now only remains to write the parser of grammars. Of course we shall generate this parser by this parser generator itself, because why not. ;-P --- src/Makefile.am | 4 +- src/cnp.c | 68 +++++++++++++++++---- src/dfa.c | 25 ++++---- src/grammar.c | 31 +++++++++- src/grammar.h | 5 ++ src/test/check_cnp.c | 9 ++- src/test/check_pred.c | 165 +++++++++++++++++++++++++++++++++++++++++++++++++- 7 files changed, 275 insertions(+), 32 deletions(-) diff --git a/src/Makefile.am b/src/Makefile.am index 2b2d7d8..d097f1f 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -48,8 +48,8 @@ ht.c utf8.c str.c dfa.c tuple.c check_cnp_SOURCES = test/check_cnp.c crf.c cnp.c grammar.c list.c \ util.c ht.c utf8.c str.c dfa.c bsr.c tuple.c -check_pred_SOURCES = test/check_pred.c dfa.c list.c str.c utf8.c \ -grammar.c util.c ht.c +check_pred_SOURCES = test/check_pred.c crf.c cnp.c grammar.c list.c \ +util.c ht.c utf8.c str.c dfa.c bsr.c tuple.c check_tuple_SOURCES = test/check_tuple.c tuple.c util.c diff --git a/src/cnp.c b/src/cnp.c index fe5a4dc..ce5a527 100644 --- a/src/cnp.c +++ b/src/cnp.c @@ -32,6 +32,7 @@ nt_add(Environment *env, NUM X, NUM j) for (NUM i = 0; i < rg_len(RG); i++) { BOOL result = test_select(env, *(env->string+j), X, rg_nth(RG, i)); + /* fleprintf("i = %ld, after test select\n", i); */ for (NUM k = 0; k < grammar_left_len(GI->g); k++) { ht_reset(env->result_ts+k, DELETE_KEY); @@ -83,10 +84,15 @@ test_select(Environment *env, NUM b, NUM X, CCR_MOD(List *) tnts) goto success; } - /* TODO: predicates haven't been implemented yet */ - /* for (NUM i = 0; i < ht_size(result_ps); i++) { - * - * } */ + for (NUM i = 0; i < ht_size(result_ps); i++) { + PTD *ptdp = grammar_ptd(gi->g, **((PT **) ht_keys(result_ps+i))); + + if (ptd_run(ptdp, b)) { + /* fleprintf("i = %ld\n", i); */ + result = 1; + goto success; + } + } for (NUM i = 0; i < list_length(tnts); i++) { @@ -106,9 +112,20 @@ test_select(Environment *env, NUM b, NUM X, CCR_MOD(List *) tnts) } - if (ht_find(gi->sts+X, &b) != NULL) result = 1; + if (ht_find(gi->sts+X, &b) != NULL) { + result = 1; + goto success; + } - /* TODO: predicates haven't been implemented yet */ + for (NUM i = 0; i < ht_size(gi->sps+X); i++) { + PTD *ptdp = grammar_ptd(gi->g, **((PT **) ht_keys(gi->sps+X))); + /* fleprintf("i = %ld\n", i); */ + if (ptd_run(ptdp, b)) { + /* fleprintf("i = %ld\n", i); */ + result = 1; + goto success; + } + } goto success; @@ -244,9 +261,13 @@ cnp_call(Environment *env, pair3 label, NUM i, NUM j) TNT *xtnt = (TNT *) list_nth(tnts, label.z - 1); - if (xtnt->type != NONTERMINAL) goto cleanup; + if (xtnt->type != NONTERMINAL) { + fleprintf("Wrong type: %d\n", xtnt->type); + goto cleanup; + } NUM X = (NUM) xtnt->data.nt; + /* fleprintf("X = %ld, j = %ld\n", X, j); */ pair2 node = (pair2) { .x = X, .y = j }; pair4 u = (pair4) @@ -258,20 +279,29 @@ cnp_call(Environment *env, pair3 label, NUM i, NUM j) }; if (!(crf_find_node(env->crfp, node))) { - if (crf_add_node(env->crfp, node)) goto cleanup; + if (crf_add_node(env->crfp, node)) { + fleprintf0("fail to add node to crf\n"); + goto cleanup; + } - if (crf_add_edge(env->crfp, node, u)) goto cleanup; + if (crf_add_edge(env->crfp, node, u)) { + fleprintf0("fail to add edge to crf\n"); + goto cleanup; + } /* errors will be stored in ENV, so we simply return */ nt_add(env, X, j); - /* printf("X = %ld, j = %ld\n", X, j); */ + /* fleprintf("X = %ld, j = %ld\n", X, j); */ return; } if (!(crf_find_edge(env->crfp, node, u))) { - if (crf_add_edge(env->crfp, node, u)) goto cleanup; + if (crf_add_edge(env->crfp, node, u)) { + fleprintf0("fail to add edge to crf\n"); + goto cleanup; + } call_internal_env = env; call_internal_label5 = @@ -344,7 +374,17 @@ H_ATTR BOOL env_follow_p(CCR_MOD(Environment *) env, NUM X, NUM t) { - return ht_find(((env->gi->sts)+X), &t) != NULL; + if (ht_find(((env->gi->sts)+X), &t) != NULL) return 1; + + for (NUM i = 0; i < ht_size(env->gi->sps+X); i++) { + PTD *ptdp = grammar_ptd(env->gi->g, + **((PT **) ht_keys(env->gi->sps+X))); + if (ptd_run(ptdp, t)) return 1; + } + + /* fleprintf("X = %ld, t = %ld\n", X, t); */ + + return 0; } void @@ -688,7 +728,9 @@ cnp_parse(Grammar *g, str *string) /* while there are terminals */ while (((TNT *) list_nth(right, current_prodecor->z-1))->type == - TERMINAL) { + TERMINAL || + ((TNT *) list_nth(right, current_prodecor->z-1))->type == + PREDICATE) { /* fleprintf0("found terminal\n"); */ /* add to BSR set */ errorp = diff --git a/src/dfa.c b/src/dfa.c index fb67047..9da201d 100644 --- a/src/dfa.c +++ b/src/dfa.c @@ -623,11 +623,14 @@ run_dfa(const dfa * const restrict table, const NUM code) break; } - char *s = MYALLOC(char, 5); + /* negative codes have special purposes */ + if (code < 0) return 0; + + char s[5]; str *strp = new_str(s, 5); if (encode(code, strp)) { - destroy_str(strp, 1); + destroy_str(strp, 0); fleprintf0("Fail to encode\n"); return 0; } @@ -636,33 +639,33 @@ run_dfa(const dfa * const restrict table, const NUM code) for (NUM i = 0; i < str_length(strp); i++) { unsigned char c = (unsigned char) *(s+i); + int lookup_value = 0; for (int j = 0, len = 0; len <= c;) { + len += *(*(table->data.table.table+current_row)+(j<<1)+1); lookup_value = *(*(table->data.table.table+current_row) +(j++<<1)); } - /* fleprintf("lookup_value = %d\n", lookup_value); */ - if (lookup_value > 0) { current_row = lookup_value; } else { switch (lookup_value) { case DFA_STATE_REJECT: - destroy_str(strp, 1); + destroy_str(strp, 0); return 0; break; case DFA_STATE_UNKNOWN: switch (table->type) { case DFA_TYPE_BOTH: case DFA_TYPE_POSITIVE: - destroy_str(strp, 1); + destroy_str(strp, 0); return 0; break; case DFA_TYPE_NEGATIVE: - destroy_str(strp, 1); + destroy_str(strp, 0); return 1; break; default: @@ -670,26 +673,26 @@ run_dfa(const dfa * const restrict table, const NUM code) the table is of the special type. But I am apparently paranoia. */ fleprintf0("Special table in the wrong place\n"); - destroy_str(strp, 1); + destroy_str(strp, 0); return 0; break; } break; case DFA_STATE_ACCEPT: - destroy_str(strp, 1); + destroy_str(strp, 0); return 1; break; default: fleprintf("Unknown state number for char %d: %d\n", c, lookup_value); - destroy_str(strp, 1); + destroy_str(strp, 0); return 0; break; } } } - destroy_str(strp, 1); + destroy_str(strp, 0); return 0; } diff --git a/src/grammar.c b/src/grammar.c index 0b2be8d..1ea52b0 100644 --- a/src/grammar.c +++ b/src/grammar.c @@ -73,6 +73,12 @@ destroy_ptd(PTD *p, int flag) free(p); } +BOOL +ptd_run(PTD *p, NUM code) +{ + return run_dfa(p->dfap, code); +} + static void destroy_ptd_free_all(void *e) { @@ -85,7 +91,7 @@ destroy_ptd_no_free(void *e) destroy_ptd((PTD *)e, 0); } -/* TODO: Add some statistic counts to assist the hash tables. For +/* REVIEW: Add some statistic counts to assist the hash tables. For example, store the total number of terminals as an integer; then the hash table can be hinted at initialization to contain that many elements, which can reduce the number of time a hash table needs to @@ -411,9 +417,21 @@ void print_grammar(CC_MOD(Grammar *) g) { printf("Printing a grammar:\n"); + printf("Non-terminals...\n"); map_list_between(g->names, print_name, print_sep); + printf("\n"); + printf("Predicates...\n"); + + for (int i = 0; i < list_length(g->predicates); i++) { + PTD *ptdp = (PTD *) list_nth(g->predicates, i); + + printf("%s: %s\n", + get_data(ptdp->user_name), get_data(ptdp->raw_name)); + } + + printf("Rules...\n"); for (int i = 0; i < list_length(g->rule_grps); i++) { print_rule_group(list_nth(g->rule_grps, i)); printf("\n"); @@ -539,6 +557,17 @@ grammar_rule(CCR_MOD(Grammar *) g, NT nt) return (Rule_group *) list_nth(g->rule_grps, nt); } +PTD * +grammar_ptd(CCR_MOD(Grammar *) g, PT pt) +{ + if ((NUM) pt >= list_length(g->predicates)) { + fleprintf("Invalid predicate terminal: %lu\n", pt); + return NULL; + } + + return (PTD *) list_nth(g->predicates, pt); +} + NUM grammar_left_len(CCR_MOD(Grammar *)g) { diff --git a/src/grammar.h b/src/grammar.h index e18ad0e..89a66ba 100644 --- a/src/grammar.h +++ b/src/grammar.h @@ -78,6 +78,8 @@ PTD *new_ptd(str *user_name, str *raw_name, dfa *dfap); /* FLAG is used to destroy the strings contained within */ void destroy_ptd(PTD *p, int flag); +BOOL ptd_run(PTD *p, NUM code); + /* If a TNT of type TERMINAL has value END_OF_INPUT, then it means, surprisingly, the end of input. */ enum { END_OF_INPUT = -1 }; @@ -172,6 +174,9 @@ List *grammar_names(CCR_MOD(Grammar *)g); /* look up a rule */ Rule_group *grammar_rule(CCR_MOD(Grammar *) g, NT nt); +/* lookup a predicate */ +PTD *grammar_ptd(CCR_MOD(Grammar *) g, PT pt); + /* NTS should be already allocated to be an array of BOOLs. The size of this array should be the same as the length of the array of non-terminals of G. If any of the above two conditions is not diff --git a/src/test/check_cnp.c b/src/test/check_cnp.c index a21d74e..f01a68c 100644 --- a/src/test/check_cnp.c +++ b/src/test/check_cnp.c @@ -9,8 +9,9 @@ print_label6(pair6 label) label.x, label.y, label.z, label.u, label.v, label.w); } -#define TIC struct timespec tic, toc; \ - do { clock_gettime(CLOCK_MONOTONIC_RAW, &tic); } while (0) +#define TITO struct timespec tic, toc + +#define TIC do { clock_gettime(CLOCK_MONOTONIC_RAW, &tic); } while (0) #define TOC do { \ clock_gettime(CLOCK_MONOTONIC_RAW, &toc); \ @@ -53,7 +54,7 @@ print_label6(pair6 label) add_to_list(rules, rule); \ } while (0) -#define GRAMMAR 2 +#define GRAMMAR 4 int main(int UNUSED argc, char ** UNUSED argv) @@ -197,6 +198,8 @@ main(int UNUSED argc, char ** UNUSED argv) printf("\nPrinting the input...\n%s\n", get_data((str *) string)); + TITO; + TIC; Environment *env = cnp_parse(g, (str *) string); diff --git a/src/test/check_pred.c b/src/test/check_pred.c index b0b95b3..2767c45 100644 --- a/src/test/check_pred.c +++ b/src/test/check_pred.c @@ -1,7 +1,168 @@ -#include "../grammar.h" +#include "time.h" +#include "../cnp.h" + +#define TITO struct timespec tic, toc + +#define TIC do { clock_gettime(CLOCK_MONOTONIC_RAW, &tic); } while (0) + +#define TOC do { \ + clock_gettime(CLOCK_MONOTONIC_RAW, &toc); \ + printf("\nTotal time = %f seconds\n", \ + (toc.tv_nsec - tic.tv_nsec) / 1000000000.0 + \ + toc.tv_sec - tic.tv_sec); \ + } while (0) + +#define READ_INTO_CPA(N, U, L, I, VA, VI, CP) do { \ + U = new_utf8(N, L); \ + I = get_info((str *)U, 0); \ + VA = MYALLOC(NUM, 2); \ + VI = 0; \ + for (NUM index = 0; \ + I.value >= 0 && index < str_length((str *) U); \ + index += I.step, VI++) { \ + I = get_info((str *)U, index); \ + *(VA+VI) = I.value; \ + } \ + CP = MYALLOC(cpa, 1); \ + CP->array = VA; \ + CP->size = VI; \ + add_to_list(names, CP); \ + destroy_str((str *)U, 1); \ + } while (0) + +#define READ_TNT_STRING(LEFT, FORMAT, LEN, ...) do { \ + tnt_string = new_tnt_string(FORMAT, LEN, __VA_ARGS__); \ + if (!tnt_string) { \ + fleprintf("left = %d, f = %s, l = %d, " \ + "cannot create tnt string\n", \ + LEFT, FORMAT, LEN); \ + map_list(rules, destroy_rule_and_free_all); \ + destroy_list(rules, 0); \ + map_list(names, destroy_cpa_and_free_all); \ + destroy_list(names, 0); \ + return 1; \ + } \ + rule = new_rule(LEFT, tnt_string); \ + add_to_list(rules, rule); \ + } while (0) int main(int UNUSED argc, char ** UNUSED argv) { - return 77; + /* have a grammar for testing */ + List *tnt_string = NULL; + Rule *rule = NULL; + List *rules = new_list(); + List *names = new_list(); + List *preds = new_list(); + + char *user_name = NULL; + char *raw_name = NULL; + + str *user_name_s = NULL, *raw_name_s = NULL; + NUM *pred_bytes = NULL, pred_bytes_len = 0; + + char *name = MYALLOC(char, 2); + utf8* uname = NULL; + + str_info info = EMPTY_STR_INFO; + + NUM *varray = NULL; + NUM vindex = 0; + cpa *cpap = NULL; + + *(name+1) = 0; + *name = 'S'; + + READ_INTO_CPA(name, uname, 1, info, varray, vindex, cpap); + + READ_TNT_STRING(0, "pn", 2, (PT) 0, (NT) 0); + rule = new_rule(0, new_list()); + add_to_list(rules, rule); + + SAFE_MALLOC(char, user_name, 6, return 1;); + *(user_name) = 'a'; + *(user_name+1) = 's'; + *(user_name+2) = 'c'; + *(user_name+3) = 'i'; + *(user_name+4) = 'i'; + *(user_name+5) = 0; + + user_name_s = new_utf8(user_name, 5); + + SAFE_MALLOC(char, raw_name, 7, return 1;); + + *(raw_name+0) = 'a'; + *(raw_name+1) = '-'; + *(raw_name+2) = 'z'; + *(raw_name+3) = 'A'; + *(raw_name+4) = '-'; + *(raw_name+5) = 'Z'; + *(raw_name+6) = 0; + + raw_name_s = new_utf8(raw_name, 6); + + SAFE_MALLOC(NUM, pred_bytes, 52, return 1;); + + for (int i = 0; i < 26; i++) + *(pred_bytes+pred_bytes_len++) = 'a' + i; + + for (int i = 0; i < 26; i++) + *(pred_bytes+pred_bytes_len++) = 'A' + i; + + if (add_to_list(preds, new_ptd(user_name_s, raw_name_s, + dfa_from_bytes + (pred_bytes_len, pred_bytes)))) { + fleprintf0("Fail to add a predicate\n"); + return 1; + } + + free(pred_bytes); + + Grammar *g = new_grammar(); + + build_grammar(g, rules, names, preds); + + print_grammar(g); + + utf8 *string = new_utf8("awdfsdjbfsjdhxy", 15); + + printf("\nPrinting the input...\n%s\n", get_data((str *) string)); + + TITO; + + TIC; + + Environment *env = cnp_parse(g, (str *) string); + + TOC; + + if (env) { + if (!(env_error_p(env))) { + BOOL result = bsr_lookup + (env_bsrp(env), 0, 0, str_length((str *) string)); + + if (result) { + printf("\nSuccessfully parsed the input!\n"); + } else { + printf("\nThe input does not parse!\n"); + } + + printf("\nAll BSRs follow:\n\n"); + if (argc == 1) + bsr_print(env_bsrp(env), env_grammar(env), 1); + } else { + printf("There are errors!\n"); + } + + destroy_env(env); + } + + destroy_grammar(g, 1); + + destroy_list(rules, 1); + + free(string); + + return 0; } -- cgit v1.2.3-18-g5258