diff options
-rw-r--r-- | README | 28 | ||||
-rw-r--r-- | src/Makefile.am | 9 | ||||
-rw-r--r-- | src/brainfuck.bnf | 10 | ||||
-rw-r--r-- | src/bsr.c | 16 | ||||
-rw-r--r-- | src/cnp.c | 523 | ||||
-rw-r--r-- | src/cnp.h | 61 | ||||
-rw-r--r-- | src/crf.c | 68 | ||||
-rw-r--r-- | src/crf.h | 10 | ||||
-rw-r--r-- | src/dfa.h | 12 | ||||
-rw-r--r-- | src/grammar.c | 9 | ||||
-rw-r--r-- | src/grammar.h | 23 | ||||
-rw-r--r-- | src/ht.c | 30 | ||||
-rw-r--r-- | src/ht.h | 7 | ||||
-rw-r--r-- | src/list.c | 2 | ||||
-rw-r--r-- | src/list.h | 2 | ||||
-rw-r--r-- | src/test/check_cnp.c | 291 | ||||
-rw-r--r-- | src/test/check_ht.c | 4 | ||||
-rw-r--r-- | src/test/check_reader.c | 2 |
18 files changed, 1068 insertions, 39 deletions
@@ -11,4 +11,32 @@ without warranty of any kind. About ===== + +Similar projects +================ + +Some similar projects are listed below, along with some comparisons +between this project and them. + +- Tree-sitter + + This is a famous project, and I think it is really cool. But my + goal is to provide the user with an "interactive tool" to manipulate + different context-free grammars, whereas Tree-sitter compiles the + grammars to static codes. + + In addition, Tree-sitter implements the algorithm "GLR", which is an + abbreviation of "Generalized Left-to-right (reversed) Right-most" + algorithm. It is in my humble opinion the most wide-spread + generalized parsing algorithm that has been implemented in other + places as well. On the other hand, my aim is to implement many + parsing algorithms. Right now I have only implemented the variant + of the "GLL" algorithm, called the "Clustered Non-terminal Parsing" + algorithm, but in the future I hope I will implement many more. + +- Emacs-Parser-Generator + <https://github.com/cjohansson/emacs-parser-generator> + + This package seems interesting, as its whole codes are written in + Emacs Lisp.
\ No newline at end of file diff --git a/src/Makefile.am b/src/Makefile.am index 3ec4f19..446961d 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -2,9 +2,9 @@ AM_CFLAGS = -Wall -Wextra noinst_LIBRARIES = libeps.a libeps_a_SOURCES = grammar.c list.c util.c reader.c str.c \ -utf8.c dfa.c ht.c bsr.c crf.c \ +utf8.c dfa.c ht.c bsr.c crf.c cnp.c \ grammar.h list.h util.h reader.h str_partial.h \ -str.h utf8.h dfa.h ht.h bsr.h crf.h +str.h utf8.h dfa.h ht.h bsr.h crf.h cnp.h libeps_a_CFLAGS = $(AM_CFLAGS) --pedantic @@ -22,7 +22,7 @@ CLEANFILES = TAGS # tests check_PROGRAMS = check_list check_grammar check_reader check_dfa \ -check_ht check_bsr check_crf +check_ht check_bsr check_crf check_cnp check_list_SOURCES = test/check_list.c list.c @@ -42,6 +42,9 @@ ht.c utf8.c str.c check_crf_SOURCES = test/check_crf.c crf.c grammar.c list.c util.c \ ht.c utf8.c str.c +check_cnp_SOURCES = test/check_cnp.c crf.c cnp.c grammar.c list.c \ +util.c ht.c utf8.c str.c dfa.c bsr.c + TESTS = $(check_PROGRAMS) AM_COLOR_TESTS = always diff --git a/src/brainfuck.bnf b/src/brainfuck.bnf new file mode 100644 index 0000000..d714a05 --- /dev/null +++ b/src/brainfuck.bnf @@ -0,0 +1,10 @@ +brainfuck: unit brainfuck +brainfuck: + +unit: '>' +unit: '<' +unit: '+' +unit: '-' +unit: ',' +unit: '.' +unit: '[' brainfuck ']' @@ -210,7 +210,6 @@ bsr_add(CCR_MOD(Grammar *) g, bsr * const restrict b, #define ARR (b->f.array+X) if (!(ARR->initialized)) { - ARR->initialized = 1; ARR->table = new_ht2(HT_INIT_CAP); if (ARR->table == NULL) { @@ -218,6 +217,8 @@ bsr_add(CCR_MOD(Grammar *) g, bsr * const restrict b, goto cleanup; } + ARR->initialized = 1; + value_table = new_ht2(HT_INIT_CAP); if (value_table == NULL) { @@ -358,6 +359,7 @@ bsr_add(CCR_MOD(Grammar *) g, bsr * const restrict b, #undef AR #undef ARR #undef ARRR + } goto success; @@ -472,6 +474,12 @@ bsr_find(CCR_MOD(bsr *) b, CCR_MOD(Grammar *) g, return result; } +static void +print_sep() +{ + printf(" "); +} + void bsr_print(CCR_MOD(bsr *) b, CCR_MOD(Grammar *) g, NUM line_len) { @@ -500,11 +508,11 @@ bsr_print(CCR_MOD(bsr *) b, CCR_MOD(Grammar *) g, NUM line_len) printf("("); print_name(list_nth(grammar_names(g), i)); printf(" := "); - map_list + map_list_between (rg_nth (grammar_rule(g, i), (*(VALUE_TABLE_KEYS+k))->x), - print_tnt); + print_tnt, print_sep); printf(", %ld, %ld, %ld)", (*(KEYS+j))->x, (*(VALUE_TABLE_KEYS+k))->y, @@ -589,7 +597,7 @@ bsr_print(CCR_MOD(bsr *) b, CCR_MOD(Grammar *) g, NUM line_len) } } } - + printf("\n"); #undef ELEMENT @@ -1 +1,524 @@ +#include <string.h> #include "cnp.h" + +struct Environment_s { + grammar_info *gi; + /* RESULT_TS and RESULT_PS are temporary arrays used in the + computations. */ + ht *result_ts; + ht *result_ps; + NUM *string; + UNUM string_len; + crf *crfp; + procu *pu; + procr *pr; + spa *spap; + bsr *bsrp; + /* If there are errors this will be non-zero. When this field is + non-zero, every function that involves this struct will do + nothing. */ + BOOL error; +}; + +void +nt_add(Environment *env, NUM X, NUM j) +{ + + if (env->error) return; + +#define GI env->gi +#define RG (grammar_rule(GI->g, X)) + + for (NUM i = 0; i < rg_len(RG); i++) { + BOOL result = + test_select(env, *(env->string+j), X, rg_nth(RG, i)); + + for (NUM k = 0; k < grammar_left_len(GI->g); k++) { + ht_reset(env->result_ts+k, DELETE_KEY); + ht_reset(env->result_ps+k, DELETE_KEY); + } + + if (env->error) break; + + if (result) { + /* fleprintf("i = %ld, j = %ld\n", i, j); */ + env->error = + desc_add(env->pr, env->pu, j, + (pair4) { .x = X, .y = i, .z = 0, .w = j }); + + /* fleprintf("env->error = %d\n", env->error); */ + /* fleprintf("pr len = %ld\n", env->pr->len); + * fleprintf("pr ini = %d\n", (env->pr->array)->initialized); */ + } + + if (env->error) break; + } + +#undef RG +#undef GI + +} + +BOOL +test_select(Environment *env, NUM b, NUM X, CCR_MOD(List *) tnts) +{ + if (env->error) return 0; + + BOOL result = 0; + + grammar_info *gi = env->gi; + + ht *result_ts = env->result_ts; + ht *result_ps = env->result_ps; + + if (tnt_first(gi->pts, gi->pps, gi->nts, grammar_left_len(gi->g), + tnts, result_ts, result_ps)) { + fleprintf0("Fail to find the first set of the TNT string.\n"); + goto cleanup; + } + + if (ht_find(result_ts, &b) != NULL) { + result = 1; + goto success; + } + + /* predicates haven't been implemented yet */ + /* for (NUM i = 0; i < ht_size(result_ps); i++) { + * + * } */ + + for (NUM i = 0; i < list_length(tnts); i++) { + +#define TOP ((TNT*)list_nth(tnts, i)) + + switch (TOP->type) { + case TERMINAL: + case PREDICATE: + goto success; + break; + default: + if (!(gi->nts+TOP->data.nt)) goto success; + break; + } + +#undef TOP + + } + + if (ht_find(gi->sts+X, &b) != NULL) result = 1; + + /* predicates haven't been implemented yet */ + + goto success; + + cleanup: + env->error = 1; + return result; + + success: + return result; +} + +void +rtn(Environment *env, NUM X, NUM k, NUM j) +{ + if (env->error) return; + + pair3 label = (pair3) { .x = X, .y = k, .z = j }; + pair2 label2 = (pair2) { .x = X, .y = k }; + + if (spa_belong(env->spap, label)) return; + + /* fleprintf0("new spa\n"); */ + + if ((env->error = spa_insert(env->spap, label))) return; + + ht *edge_ht = NULL; + + if ((edge_ht = crf_find_node(env->crfp, label2)) == NULL) { + fleprintf0("this should not happen\n"); + env->error = 1; + return; + } + +#define EDGE_LABELS ((pair4 **) ht_keys(edge_ht)) + + for (NUM i = 0; i < ht_size(edge_ht); i++) { + +#define EDGE (*(EDGE_LABELS+i)) + + if ((env->error = desc_add(env->pr, env->pu, j, *EDGE))) return; + /* fleprintf("added descriptor (%ld, %ld, %ld, %ld, %ld)\n", + * EDGE->x, EDGE->y, EDGE->z, EDGE->w, j); */ + if ((env->error = + bsr_add(env->gi->g, env->bsrp, + EDGE->x, EDGE->y, EDGE->z, EDGE->w, k, j))) + return; + } + +#undef EDGE_LABELS +#undef EDGE + +} + +void +cnp_call(Environment *env, pair3 label, NUM i, NUM j) +{ + if (env->error) return; + + if (label.z < 1) goto cleanup; + + NUM Y = label.x; + List *tnts = rg_nth(grammar_rule(env->gi->g, Y), label.y); + + TNT *xtnt = (TNT *) list_nth(tnts, label.z - 1); + + if (xtnt->type != NONTERMINAL) goto cleanup; + + NUM X = (NUM) xtnt->data.nt, h = 0; + + pair2 node = (pair2) { .x = X, .y = j }; + pair4 u = (pair4) + { + .x = label.x, + .y = label.y, + .z = label.z, + .w = i + }; + + ht *edge_ht = NULL, *spa_ht = NULL; + + if ((edge_ht = crf_find_node(env->crfp, node)) == NULL) { + if (crf_add_node(env->crfp, node)) goto cleanup; + + if (crf_add_edge(env->crfp, node, u)) goto cleanup; + + /* errors will be stored in ENV, so we simply return */ + nt_add(env, X, j); + + return; + } + + if (ht_find(edge_ht, &u) == NULL) { + if (crf_add_edge(env->crfp, node, u)) goto cleanup; + + spa_ht = spa_find(env->spap, node); + + if (spa_ht == NULL) return; + + for (NUM k = 0; k < ht_size(spa_ht); k++) { + h = **((NUM **) ht_keys(spa_ht) + k); + + if (desc_add(env->pr, env->pu, h, u)) goto cleanup; + + if (bsr_add(env->gi->g, env->bsrp, + label.x, label.y, label.z, + i, j, h)) + goto cleanup; + } + } + + return; + + cleanup: + fleprintf0("error\n"); + env->error = 1; + return; +} + +HP_ATTR +BOOL +env_error_p(CCR_MOD(Environment *) env) +{ + return env->error; +} + +P_ATTR bsr * +env_bsrp(CCR_MOD(Environment *) env) +{ + return env->bsrp; +} + +P_ATTR +Grammar * +env_grammar(CCR_MOD(Environment *) env) +{ + return env->gi->g; +} + +P_ATTR +crf * +env_crfp(CCR_MOD(Environment *) env) +{ + return env->crfp; +} + +P_ATTR +procr * +env_r(CCR_MOD(Environment *) env) +{ + return env->pr; +} + +P_ATTR +procu * +env_u(CCR_MOD(Environment *) env) +{ + return env->pu; +} + +H_ATTR +void +env_str(CCR_MOD(Environment *) env, NUM **str, NUM *str_len) +{ + *str = env->string; + *str_len = env->string_len; +} + +H_ATTR +BOOL +env_follow_p(CCR_MOD(Environment *) env, NUM X, NUM t) +{ + return ht_find(((env->gi->sts)+X), &t) != NULL; +} + +void +env_print_follow(CCR_MOD(Environment *) env, NUM X) +{ + for (NUM i = 0; i < ht_size((env->gi->sts+X)); i++) { + if (**(((NUM **) ht_keys(env->gi->sts+X)) + i) != + END_OF_INPUT) + printf("key %ld = %ld\n", + i, **(((NUM **) ht_keys(env->gi->sts+X)) + i)); + else + printf("key %ld = END OF INPUT\n", i); + } +} + +void +env_print_first(CCR_MOD(Environment *) env, NUM X) +{ + for (NUM i = 0; i < ht_size((env->gi->pts+X)); i++) { + printf("key %ld = %ld\n", + i, **(((NUM **) ht_keys(env->gi->pts+X)) + i)); + } +} + +Environment * +new_env(Grammar *g, str *string) +{ + Environment *env = NULL; + ht *htp = NULL; + NUM current_index = 0, follow_current_index = 0; + NUM result_current_index = 0; + + SAFE_MALLOC(Environment, env, 1, goto cleanup;); + + env->gi = NULL; + + SAFE_MALLOC(grammar_info, env->gi, 1, goto cleanup;); + + memset(env->gi, 0, sizeof(grammar_info)); + + env->gi->g = g; + + NUM left_len = grammar_left_len(g); + + SAFE_MALLOC(BOOL, env->gi->nts, sizeof(BOOL) * left_len, + goto cleanup;); + + epsilon_nts(g, env->gi->nts); + + SAFE_MALLOC(ht, env->gi->pts, sizeof(ht) * left_len, + goto cleanup;); + SAFE_MALLOC(ht, env->gi->pps, sizeof(ht) * left_len, + goto cleanup;); + + for (current_index = 0; current_index < left_len; current_index++) { + if ((htp = new_ht(HT_INIT_CAP, 0)) == NULL) goto cleanup; + *(env->gi->pts+current_index) = *htp; + free(htp); + + if ((htp = new_ht(HT_INIT_CAP, 0)) == NULL) { + destroy_ht(env->gi->pts+current_index, DESTROY_NONE_NO_SELF); + goto cleanup; + } + *(env->gi->pps+current_index) = *htp; + free(htp); + } + + if (nt_first(g, env->gi->nts, env->gi->pts, env->gi->pps)) + goto cleanup; + + SAFE_MALLOC(ht, env->gi->sts, sizeof(ht) * left_len, + goto cleanup;); + SAFE_MALLOC(ht, env->gi->sps, sizeof(ht) * left_len, + goto cleanup;); + + for (follow_current_index = 0; + follow_current_index < left_len; + follow_current_index++) { + if ((htp = new_ht(HT_INIT_CAP, 0)) == NULL) goto cleanup; + + *(env->gi->sts+follow_current_index) = *htp; + + free(htp); + + if ((htp = new_ht(HT_INIT_CAP, 0)) == NULL) { + destroy_ht(env->gi->sts+follow_current_index, + DESTROY_NONE_NO_SELF); + goto cleanup; + } + + *(env->gi->sps+follow_current_index) = *htp; + + free(htp); + } + + if (nt_follow(g, env->gi->nts, + env->gi->pts, env->gi->pps, + env->gi->sts, env->gi->sps)) + goto cleanup; + + SAFE_MALLOC(ht, env->result_ts, sizeof(ht) * left_len, + goto cleanup;); + SAFE_MALLOC(ht, env->result_ps, sizeof(ht) * left_len, + goto cleanup;); + + for (result_current_index = 0; + result_current_index < left_len; + result_current_index++) { + if ((htp = new_ht(HT_INIT_CAP, 0)) == NULL) goto cleanup; + + *(env->result_ts+result_current_index) = *htp; + + free(htp); + + if ((htp = new_ht(HT_INIT_CAP, 0)) == NULL) { + destroy_ht(env->result_ts+result_current_index, + DESTROY_NONE_NO_SELF); + goto cleanup; + } + + *(env->result_ps+result_current_index) = *htp; + + free(htp); + + } + + SAFE_MALLOC(NUM, env->string, str_length(string)+1, goto cleanup;); + + str_info info = EMPTY_STR_INFO; + + env->string_len = 0; + + for (NUM i = 0; i < str_length(string); i += info.step) { + info = get_info(string, i); + *(env->string+(env->string_len)++) = info.value; + } + + *(env->string+(env->string_len)++) = END_OF_INPUT; + + if ((env->crfp = new_crf()) == NULL) goto cleanup; + + BOOL error = 0; + + env->pu = new_procu(env->string_len, &error); + + if (error) goto cleanup; + + env->pr = new_procr(env->string_len, &error); + + if (error) goto cleanup; + + if ((env->spap = new_spa()) == NULL) goto cleanup; + + env->bsrp = new_bsr(&error, g); + + if (error) goto cleanup; + + return env; + + cleanup: + + if (env) { + if (env->gi) { + if (env->gi->nts) free(env->gi->nts); + + for (NUM i = 0; i < current_index; i++) { + destroy_ht(env->gi->pts+i, DESTROY_NONE_NO_SELF); + destroy_ht(env->gi->pps+i, DESTROY_NONE_NO_SELF); + } + + if (env->gi->pts) free(env->gi->pts); + if (env->gi->pps) free(env->gi->pps); + + for (NUM i = 0; i < follow_current_index; i++) { + destroy_ht(env->gi->sts+i, DESTROY_NONE_NO_SELF); + destroy_ht(env->gi->sps+i, DESTROY_NONE_NO_SELF); + } + + if (env->gi->sts) free(env->gi->sts); + if (env->gi->sps) free(env->gi->sps); + + free(env->gi); + } + + for (NUM i = 0; i < result_current_index; i++) { + destroy_ht(env->result_ts+i, DESTROY_NONE_NO_SELF); + destroy_ht(env->result_ps+i, DESTROY_NONE_NO_SELF); + } + + if (env->result_ts) free(env->result_ts); + if (env->result_ps) free(env->result_ps); + + if (env->string) free(env->string); + + if (env->crfp) destroy_crf(env->crfp); + + if (env->pu) destroy_procu(env->pu); + if (env->pr) destroy_procr(env->pr); + if (env->spap) destroy_spa(env->spap); + if (env->bsrp) destroy_bsr(env->bsrp); + + free(env); + } + + return NULL; +} + +void +destroy_env(Environment *env) +{ + if (env == NULL) return; + + free(env->gi->nts); + + NUM left_len = grammar_left_len(env->gi->g); + + for (NUM i = 0; i < left_len; i++) { + destroy_ht(env->gi->pts+i, DESTROY_KEY_NO_SELF); + destroy_ht(env->gi->pps+i, DESTROY_KEY_NO_SELF); + destroy_ht(env->gi->sts+i, DESTROY_KEY_NO_SELF); + destroy_ht(env->gi->sps+i, DESTROY_KEY_NO_SELF); + destroy_ht(env->result_ts+i, DESTROY_KEY_NO_SELF); + destroy_ht(env->result_ps+i, DESTROY_KEY_NO_SELF); + } + + free(env->gi->pts); + free(env->gi->pps); + free(env->gi->sts); + free(env->gi->sps); + free(env->result_ts); + free(env->result_ps); + free(env->string); + + destroy_crf(env->crfp); + destroy_procu(env->pu); + destroy_procr(env->pr); + destroy_spa(env->spap); + destroy_bsr(env->bsrp); + + free(env->gi); + free(env); +} @@ -1,4 +1,63 @@ #ifndef CNP_H #define CNP_H -/* TODO */ +#include "str.h" +#include "utf8.h" +#include "bsr.h" +#include "crf.h" + +/* We start with implementing some support functions. These support + functions are not supposed to be put in the crf.h header file, as + they are not related to the underlying (static) data structures + really. */ + +typedef struct Environment_s Environment; + +/* On error return NULL */ +Environment *new_env(Grammar *g, str *string); + +HP_ATTR BOOL env_error_p(CCR_MOD(Environment *) env); + +P_ATTR bsr *env_bsrp(CCR_MOD(Environment *) env); + +P_ATTR Grammar *env_grammar(CCR_MOD(Environment *) env); + +HP_ATTR crf *env_crfp(CCR_MOD(Environment *) env); + +HP_ATTR procr *env_r(CCR_MOD(Environment *) env); +HP_ATTR procu *env_u(CCR_MOD(Environment *) env); +H_ATTR void env_str(CCR_MOD(Environment *) env, + NUM **str, NUM *str_len); + +H_ATTR BOOL env_follow_p(CCR_MOD(Environment *) env, NUM X, NUM t); + +void env_print_follow(CCR_MOD(Environment *) env, NUM X); + +void env_print_first(CCR_MOD(Environment *) env, NUM X); + +/* Convenient macro */ + +#define CHECK_ENV_ERROR(ENV, CLEANUP) do { \ + if (env_error_p(ENV)) { \ + fleprintf0("Error occurs!\n"); \ + { CLEANUP }; \ + } \ + } while (0) + +/* NOTE: Since the grammar pointer just points to the existing grammar + when we construct it, we shall not destroy the grammar when we + destroy the environment, so that we can later reuse that grammar, + should the need arise. */ +void destroy_env(Environment *env); + +void nt_add(Environment *env, NUM X, NUM j); + +BOOL test_select(Environment *env, NUM b, NUM X, + CCR_MOD(List *) tnts); + +void rtn(Environment *env, NUM X, NUM k, NUM j); + +void cnp_call(Environment *env, pair3 label, NUM i, NUM j); + +/* TODO: Main algorithm */ + #endif @@ -134,6 +134,13 @@ destroy_spa(spa * const restrict spap) free(spap); } +__attribute__((__pure__, __cold__)) +ht * +spa_ht(CCR_MOD(spa *) spap) +{ + return spap->table; +} + BOOL spa_insert(spa * const restrict spap, pair3 label) { @@ -270,6 +277,7 @@ new_procr(NUM size, BOOL * const restrict errorp) pr->len = size; pr->array = NULL; + pr->mg = 0; SAFE_MALLOC(procr_array_element, pr->array, sizeof(procr_array_element) * size, @@ -287,6 +295,8 @@ new_procr(NUM size, BOOL * const restrict errorp) if (pr) free(pr); + pr = NULL; + success: return pr; @@ -320,12 +330,37 @@ new_procu(NUM size, BOOL * const restrict errorp) if (pu) free(pu); + pu = NULL; + success: return pu; } void +print_procr(CCR_MOD(procr *) pr) +{ + eprintf("\n"); + fleprintf0("Printing procr...\n"); + fleprintf("LEN = %ld, MG = %ld\n", pr->len, pr->mg); + for (NUM i = 0; i < pr->len; i++) { + if ((pr->array+i)->initialized) { + if (list_length((pr->array+i)->ls)) eprintf("%ld: ", i); + for (NUM j = 0; j < list_length((pr->array+i)->ls); j++) { + fleprintf("(%ld, %ld, %ld, %ld)", + ((pair4 *) list_nth((pr->array+i)->ls, j))->x, + ((pair4 *) list_nth((pr->array+i)->ls, j))->y, + ((pair4 *) list_nth((pr->array+i)->ls, j))->z, + ((pair4 *) list_nth((pr->array+i)->ls, j))->w); + if (j+1==list_length((pr->array+i)->ls)) eprintf("\n"); + else eprintf(", "); + } + } + } + eprintf("\n"); +} + +void destroy_procr(procr * const restrict pr) { for (NUM i = 0; i < pr->len; i++) @@ -353,13 +388,13 @@ BOOL desc_add(procr * const restrict pr, procu * const restrict pu, NUM grade, prodecor dec) { + pair4 *p4 = NULL; + if (grade < 0 || grade >= pu->len) { fleprintf("Invalid grade: %ld\n", grade); goto cleanup; } - pair4 *p4 = NULL; - #define HELE (pu->array+grade) #define LELE (pr->array+grade) @@ -441,6 +476,8 @@ desc_add(procr * const restrict pr, procu * const restrict pu, cleanup: + if (p4) free(p4); + return 1; success: @@ -457,13 +494,10 @@ pop_procr(procr * pr, procu * pu, NUM *gradep) #define ELEMENT (pr->array+pr->mg) - if (!(ELEMENT->initialized)) goto reset_mg; - - *gradep = pr->mg; - - result = pop_from_list(ELEMENT->ls); + if (ELEMENT->initialized && list_length(ELEMENT->ls)) + goto no_reset_mg; - if (list_length(ELEMENT->ls) == 0) { + if (ELEMENT->initialized && list_length(ELEMENT->ls) == 0) { destroy_ht((pu->array+pr->mg)->table, DESTROY_KEY_SELF); (pu->array+pr->mg)->initialized = 0; (pu->array+pr->mg)->table = NULL; @@ -471,17 +505,23 @@ pop_procr(procr * pr, procu * pu, NUM *gradep) destroy_list(ELEMENT->ls, 1); ELEMENT->initialized = 0; ELEMENT->ls = NULL; + } - /* REVIEW: Maybe we can just increment the minimal grade, as we - won't jump over two steps? */ - reset_mg: - while (++(pr->mg) < pr->len) { - if (ELEMENT->initialized) break; - } + while (++(pr->mg) < pr->len) { + if (ELEMENT->initialized) break; } + if (pr->mg >= pr->len) goto success; + + no_reset_mg: + + result = pop_from_list(ELEMENT->ls); + + *gradep = pr->mg; + #undef ELEMENT success: return result; + } @@ -1,5 +1,5 @@ -#ifndef CNP_H -#define CNP_H +#ifndef CRF_H +#define CRF_H #include "ht.h" /* This file implements some support functions for the Clustered @@ -83,6 +83,8 @@ spa *new_spa(); void destroy_spa(spa * const restrict spap); +__attribute__((__pure__, __cold__)) ht *spa_ht(CCR_MOD(spa *) spap); + /* On error return non-zero */ BOOL spa_insert(spa * const restrict spap, pair3 label); @@ -156,8 +158,12 @@ BOOL desc_add(procr * const restrict pr, procu * const restrict pu, /* if no more elements are left, return NULL */ prodecor *pop_procr(procr * pr, procu * pu, NUM *gradep); +void print_procr(CCR_MOD(procr *) pr); + /* destructor */ void destroy_procr(procr * const restrict pr); void destroy_procu(procu * const restrict pu); +#define TEST haha + #endif @@ -32,18 +32,18 @@ dfa *new_dfa(); void destroy_dfa(dfa *table); -void print_dfa(const dfa * const restrict table); +void print_dfa(CCR_MOD(dfa *) table); dfa *dfa_from_bytes(int sequence_size, - const NUM * const restrict data); + CCR_MOD(NUM *) data); dfa *dfa_from_bytes_neg(int sequence_size, - const NUM * const restrict data); + CCR_MOD(NUM *) data); dfa *dfa_from_bytes_both(int sequence_size, - const NUM * const restrict data, + CCR_MOD(NUM *) data, int neg_sequence_size, - const NUM * const restrict negdata); + CCR_MOD(NUM *) negdata); /* TODO: Reject character bytes from a given DFA. */ @@ -59,6 +59,6 @@ dfa *dfa_from_bytes_both(int sequence_size, inline BOOL dfa_any_fun(const NUM UNUSED code) { return 1; } -BOOL run_dfa(const dfa * const restrict table, const NUM code); +BOOL run_dfa(CCR_MOD(dfa *) table, const NUM code); #endif diff --git a/src/grammar.c b/src/grammar.c index c12ddec..3b5a959 100644 --- a/src/grammar.c +++ b/src/grammar.c @@ -676,7 +676,7 @@ nt_first(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts, BOOL tnt_first(CC_MOD(ht *) terminal_hts, CC_MOD(ht *) predicate_hts, - CCR_MOD(BOOL *) nts, NUM len, List *tnts, + CCR_MOD(BOOL *) nts, NUM len, CCR_MOD(List *) tnts, ht * const restrict result_terminals, ht * const restrict result_predicates) { @@ -787,8 +787,8 @@ nt_follow(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts, Rule_group *rg = grammar_rule(g, (NT) i); NUM rg_length = rg_len(rg); - for (NUM j = 0; j < rg_length;) { - List *string = rg_nth(rg, j++); + for (NUM j = 0; j < rg_length; j++) { + List *string = rg_nth(rg, j); NUM string_len = list_length(string); TNT *top = NULL; @@ -842,13 +842,12 @@ nt_follow(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts, if (k+1 == string_len || (rest_produce_epsilon >= 0 && - rest_produce_epsilon <= k)) + rest_produce_epsilon <= k+1)) goto add_to_follow; break; add_to_follow: - ht_len = ht_size(result_terminals+i); keys = (NUM **) ht_keys(result_terminals+i); diff --git a/src/grammar.h b/src/grammar.h index 8d6cb06..cbda887 100644 --- a/src/grammar.h +++ b/src/grammar.h @@ -213,7 +213,7 @@ BOOL nt_first(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts, On error return non-zero. */ BOOL tnt_first(CC_MOD(ht *) terminal_hts, CC_MOD(ht *) predicate_hts, - CCR_MOD(BOOL *) nts, NUM len, List *tnts, + CCR_MOD(BOOL *) nts, NUM len, CCR_MOD(List *) tnts, ht * const restrict result_terminals, ht * const restrict result_predicates); @@ -231,6 +231,27 @@ BOOL nt_follow(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts, ht * const result_terminals, ht * const result_predicates); +/* struct that holds information about grammars */ + +typedef struct grammar_info_s grammar_info; + +struct grammar_info_s { + /* the grammar itself */ + Grammar *g; + /* the array about epsilon non-terminals */ + BOOL *nts; + /* the array of hash tables of first (premier) terminals */ + ht *pts; + /* the array of hash tables of first (premier) predicate + terminals */ + ht *pps; + /* the array of hash tables of follow (suivant) terminals */ + ht *sts; + /* the array of hash tables of follow (suivant) predicate + terminals */ + ht *sps; +}; + /* convenient macro */ #define NEW_CPA(X, ARRAY, SIZE) do { \ @@ -323,6 +323,36 @@ ht_delete(ht * const restrict htp, void *key, HT_DELETE_FLAG flag) return 0; } +void +ht_reset(ht * const restrict htp, HT_DELETE_FLAG flag) +{ + for (int i = 0; i < htp->size; i++) { + switch (flag) { + case DELETE_KEY: + case DELETE_EVERY: + free(*(htp->keys+i)); + break; + default: + break; + } + + switch (flag) { + case DELETE_VALUE: + case DELETE_EVERY: + free(*(htp->values+i)); + break; + default: + break; + } + } + + htp->size = 0; + + memset(htp->values, 0, sizeof(void*) * htp->capability); + memset(htp->keys, 0, sizeof(void*) * htp->capability); + memset(htp->indices, 0xff, sizeof (NUM) * htp->capability); +} + P_ATTR void * ht_find(CCR_MOD(ht *) htp, void *key) @@ -27,6 +27,11 @@ typedef BOOL (*compare_func_t) (CCR_MOD(void *)keya, CCR_MOD(void *)keyb); /* FIXME: Hide this struct from the header file. */ +/* WARNING: Don't use these fields directly. These fields are exposed + in this header file simply because I want to use pointers to ht and + do arithmetic on those pointers. But these fields might change + anytime in the future, if I think of some new ideas, or something + like that. */ struct ht_s { void **keys; void **values; @@ -106,6 +111,8 @@ typedef enum HT_DELETE_FLAG_e HT_DELETE_FLAG; BOOL ht_delete(ht * const restrict htp, void *key, HT_DELETE_FLAG flag); +void ht_reset(ht * const restrict htp, HT_DELETE_FLAG flag); + P_ATTR void *ht_find(CCR_MOD(ht *) htp, void *key); /* Pairing hash tables */ @@ -148,7 +148,7 @@ copy_list(List *dest, List *source, copyer copyf) H_ATTR void * -list_nth(const List * const ls, NUM n) +list_nth(const List * const restrict ls, NUM n) { return *(ls->array+n); } @@ -45,7 +45,7 @@ void *copy_num(void *); /* upon failure return 1 */ BOOL copy_list(List *dest, List *source, copyer copyf); -void *list_nth(const List * const ls, NUM n); +void *list_nth(CCR_MOD(List *) ls, NUM n); NUM list_length(const List * const restrict ls); diff --git a/src/test/check_cnp.c b/src/test/check_cnp.c new file mode 100644 index 0000000..00c8a64 --- /dev/null +++ b/src/test/check_cnp.c @@ -0,0 +1,291 @@ +#include "../cnp.h" + +#define READ_INTO_CPA(N, U, L, I, VA, VI, CP) do { \ + U = new_utf8(N, L); \ + I = get_info((str *)U, 0); \ + VA = MYALLOC(NUM, 2); \ + VI = 0; \ + for (NUM index = 0; \ + I.value >= 0 && index < str_length((str *) U); \ + index += I.step, VI++) { \ + I = get_info((str *)U, index); \ + *(VA+VI) = I.value; \ + } \ + CP = MYALLOC(cpa, 1); \ + CP->array = VA; \ + CP->size = VI; \ + add_to_list(names, CP); \ + destroy_str((str *)U, 1); \ + } while (0) + +#define READ_TNT_STRING(LEFT, FORMAT, LEN, ...) do { \ + tnt_string = new_tnt_string(FORMAT, LEN, __VA_ARGS__); \ + if (!tnt_string) { \ + fleprintf("left = %d, f = %s, l = %d, " \ + "cannot create tnt string\n", \ + LEFT, FORMAT, LEN); \ + map_list(rules, destroy_rule_and_free_all); \ + destroy_list(rules, 0); \ + map_list(names, destroy_cpa_and_free_all); \ + destroy_list(names, 0); \ + return 1; \ + } \ + rule = new_rule(LEFT, tnt_string); \ + add_to_list(rules, rule); \ + } while (0) + +int +main(int UNUSED argc, char ** UNUSED argv) +{ + /* have a grammar for testing */ + List *tnt_string = NULL; + Rule *rule = NULL; + List *rules = new_list(); + List *names = new_list(); + + char *name = MYALLOC(char, 2); + *(name+1) = 0; + *name = 'B'; + + utf8* uname = NULL; + + str_info info = EMPTY_STR_INFO; + + NUM *varray = NULL; + NUM vindex = 0; + cpa *cpap = NULL; + + READ_INTO_CPA(name, uname, 1, info, varray, vindex, cpap); + + name = MYALLOC(char, 3); + *(name+1) = 0; + *name = 'U'; + + READ_INTO_CPA(name, uname, 1, info, varray, vindex, cpap); + + /* name = MYALLOC(char, 2); + * *(name+1) = 0; + * *name = 'F'; + * + * READ_INTO_CPA(name, uname, 1, info, varray, vindex, cpap); + * + * name = MYALLOC(char, 2); + * *(name+1) = 0; + * *name = 'D'; + * + * READ_INTO_CPA(name, uname, 1, info, varray, vindex, cpap); */ + + READ_TNT_STRING(0, "nn", 2, (NT) 1, (NT) 0); + rule = new_rule(0, new_list()); + add_to_list(rules, rule); + + READ_TNT_STRING(1, "t", 1, (T) '>'); + READ_TNT_STRING(1, "t", 1, (T) '<'); + READ_TNT_STRING(1, "t", 1, (T) '+'); + READ_TNT_STRING(1, "t", 1, (T) '-'); + READ_TNT_STRING(1, "t", 1, (T) ','); + READ_TNT_STRING(1, "t", 1, (T) '.'); + READ_TNT_STRING(1, "tnt", 3, (T) '[', (NT) 0, (T) ']'); + + Grammar *g = new_grammar(); + + build_grammar(g, rules, names); + + print_grammar(g); + + utf8 *string = new_utf8("[,[.[+]]-]", 10); + + printf("\nPrinting the input...\n%s\n", get_data((str *) string)); + + Environment *env = new_env(g, (str *) string); + + if (crf_add_node(env_crfp(env), (pair2) { 0 })) goto cleanup; + + nt_add(env, 0, 0); + + CHECK_ENV_ERROR(env, goto cleanup;); + + NUM current_grade = 0; + + prodecor *current_prodecor = NULL; + + Rule_group *rg = NULL; + List *right = NULL, *tnts = NULL; + + NUM *num_string = NULL, num_string_len = 0; + BOOL errorp = 0; + + env_str(env, &num_string, &num_string_len); + + while ((current_prodecor = + pop_procr(env_r(env), env_u(env), ¤t_grade))) { + /* fleprintf("{ %ld, %ld, %ld, %ld, %ld }\n", + * current_prodecor->x, current_prodecor->y, + * current_prodecor->z, current_prodecor->w, + * current_grade); + * print_procr(env_r(env)); */ + + /* different cases */ + + rg = grammar_rule(env_grammar(env), current_prodecor->x); + right = rg_nth(rg, current_prodecor->y); + + /* separate the empty rules out */ + if (list_length(right) == 0) { + errorp = + bsr_add(env_grammar(env), env_bsrp(env), + current_prodecor->x, current_prodecor->y, 0, + current_grade, current_grade, current_grade); + + if (errorp) goto cleanup; + + if (env_follow_p(env, current_prodecor->x, + *(num_string+current_grade))) { + rtn(env, current_prodecor->x, + current_prodecor->w, current_grade); + } + + goto continue_label; + } + + /* fleprintf0("non empty rule\n"); */ + + /* if at the end of a rule, return if possible */ + if (current_prodecor->z == list_length(right)) { + if (env_follow_p(env, current_prodecor->x, + *(num_string+current_grade))) { + rtn(env, current_prodecor->x, + current_prodecor->w, current_grade); + } + + goto continue_label; + } + + /* fleprintf0("not at the end of the rule\n"); */ + + /* if at the start of a rule, no need for test_select */ + if (current_prodecor->z == 0) { + /* fleprintf0("at the start of the rule\n"); */ + current_prodecor->z = 1; + + /* otherwise test select */ + } else { + /* fleprintf0("start by test select\n"); */ + tnts = + array_to_list(list_array(right)+current_prodecor->z, + list_length(right)-current_prodecor->z); + if (test_select(env, *(num_string+current_grade), + current_prodecor->x, tnts)) { + /* fleprintf0("successful test\n"); */ + free(tnts); + tnts = NULL; + ++(current_prodecor->z); + } else { + /* fleprintf0("failed test\n"); */ + free(tnts); + tnts = NULL; + goto continue_label; + } + } + + CHECK_ENV_ERROR(env, goto cleanup;); + + /* while there are terminals */ + while (((TNT *) list_nth(right, current_prodecor->z-1))->type == + TERMINAL) { + /* fleprintf0("found terminal\n"); */ + /* add to BSR set */ + errorp = + bsr_add(env_grammar(env), env_bsrp(env), + current_prodecor->x, current_prodecor->y, + current_prodecor->z, current_prodecor->w, + current_grade, current_grade+1); + + if (errorp) goto cleanup; + + current_grade++; + + /* if at the end of the rule, return if possible */ + if (current_prodecor->z == list_length(right)) { + if (env_follow_p(env, current_prodecor->x, + *(num_string+current_grade))) { + /* fleprintf0("we choose to return\n"); */ + rtn(env, current_prodecor->x, + current_prodecor->w, current_grade); + /* fleprintf0("hi\n"); */ + } + + goto continue_label; + } + /* fleprintf0("terminal not at the end\n"); */ + /* else test select */ + tnts = + array_to_list(list_array(right)+current_prodecor->z, + list_length(right)-current_prodecor->z); + if (test_select(env, *(num_string+current_grade), + current_prodecor->x, tnts)) { + /* fleprintf0("Successful test\n"); */ + free(tnts); + tnts = NULL; + (current_prodecor->z)++; + } else { + /* fleprintf0("Failed test\n"); */ + /* printf("next thing is "); */ + /* print_tnt(list_nth(right, current_prodecor->z)); */ + /* printf("\n"); */ + free(tnts); + tnts = NULL; + goto continue_label; + } + } + + /* fleprintf0("encountered non-terminal\n"); */ + + /* when a nonterminal is encountered, we call it */ + cnp_call(env, (pair3) { + .x = current_prodecor->x, + .y = current_prodecor->y, + .z = current_prodecor->z + }, current_prodecor->w, current_grade); + + /* fleprintf("after call { %ld, %ld, %ld, %ld, %ld }\n", + * current_prodecor->x, current_prodecor->y, + * current_prodecor->z, current_prodecor->w, + * current_grade); */ + + continue_label: + + CHECK_ENV_ERROR(env, goto cleanup;); + + free(current_prodecor); + } + + cleanup: + bsr_print(env_bsrp(env), env_grammar(env), 1); + + destroy_env(env); + + destroy_grammar(g, 1); + + destroy_list(rules, 1); + + free(string); + + return 0; +} + +/* archives + + printf("\nprint first for 0:\n"); + env_print_first(env, 0); + + printf("\nprint first for 1:\n"); + env_print_first(env, 1); + + printf("\nprint follow for 0:\n"); + env_print_follow(env, 0); + + printf("\nprint follow for 1:\n"); + env_print_follow(env, 1); + + */ diff --git a/src/test/check_ht.c b/src/test/check_ht.c index b572549..62a7bee 100644 --- a/src/test/check_ht.c +++ b/src/test/check_ht.c @@ -231,6 +231,10 @@ int main(int UNUSED argc, char ** UNUSED argv) ht_find(htp, &testk4)); } + ht_reset(htp, DELETE_KEY); + + printf("After reset the size is %ld\n", ht_size(htp)); + destroy_ht(htp, DESTROY_KEY_SELF); return 0; diff --git a/src/test/check_reader.c b/src/test/check_reader.c index 3184e41..a4e184f 100644 --- a/src/test/check_reader.c +++ b/src/test/check_reader.c @@ -11,7 +11,7 @@ main(U_ATTR int argc, U_ATTR char **argv) { /* return 77; */ - char *file_name = "test.txt"; + char *file_name = "brainfuck.bnf"; char *buffer = MYALLOC(char, 512); NUM buffer_size = 0; |