summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJSDurand <mmemmew@gmail.com>2022-01-31 09:23:20 +0800
committerJSDurand <mmemmew@gmail.com>2022-01-31 09:23:20 +0800
commita8bd5e9d85ac9928bd29add82e887f82642af893 (patch)
tree74e377f9fccffc2779ff97fa0bd8ad180b9c865c
parente555c88b8107caf886da229444c2bed1aaef6c2c (diff)
test/check_cnp: working algorithm
I now have a working algorithm in test/check_cnp. It can correctly parse the grammar for an esoteric language called "Brainfuck". This language does not matter. What matters is that it contains parentheses. So this shows that at least for grammars as complex as parentheses, this parser works well. Haha.
-rw-r--r--README28
-rw-r--r--src/Makefile.am9
-rw-r--r--src/brainfuck.bnf10
-rw-r--r--src/bsr.c16
-rw-r--r--src/cnp.c523
-rw-r--r--src/cnp.h61
-rw-r--r--src/crf.c68
-rw-r--r--src/crf.h10
-rw-r--r--src/dfa.h12
-rw-r--r--src/grammar.c9
-rw-r--r--src/grammar.h23
-rw-r--r--src/ht.c30
-rw-r--r--src/ht.h7
-rw-r--r--src/list.c2
-rw-r--r--src/list.h2
-rw-r--r--src/test/check_cnp.c291
-rw-r--r--src/test/check_ht.c4
-rw-r--r--src/test/check_reader.c2
18 files changed, 1068 insertions, 39 deletions
diff --git a/README b/README
index 5753ba3..d50ff3c 100644
--- a/README
+++ b/README
@@ -11,4 +11,32 @@ without warranty of any kind.
About
=====
+
+Similar projects
+================
+
+Some similar projects are listed below, along with some comparisons
+between this project and them.
+
+- Tree-sitter
+
+ This is a famous project, and I think it is really cool. But my
+ goal is to provide the user with an "interactive tool" to manipulate
+ different context-free grammars, whereas Tree-sitter compiles the
+ grammars to static codes.
+
+ In addition, Tree-sitter implements the algorithm "GLR", which is an
+ abbreviation of "Generalized Left-to-right (reversed) Right-most"
+ algorithm. It is in my humble opinion the most wide-spread
+ generalized parsing algorithm that has been implemented in other
+ places as well. On the other hand, my aim is to implement many
+ parsing algorithms. Right now I have only implemented the variant
+ of the "GLL" algorithm, called the "Clustered Non-terminal Parsing"
+ algorithm, but in the future I hope I will implement many more.
+
+- Emacs-Parser-Generator
+ <https://github.com/cjohansson/emacs-parser-generator>
+
+ This package seems interesting, as its whole codes are written in
+ Emacs Lisp. \ No newline at end of file
diff --git a/src/Makefile.am b/src/Makefile.am
index 3ec4f19..446961d 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -2,9 +2,9 @@ AM_CFLAGS = -Wall -Wextra
noinst_LIBRARIES = libeps.a
libeps_a_SOURCES = grammar.c list.c util.c reader.c str.c \
-utf8.c dfa.c ht.c bsr.c crf.c \
+utf8.c dfa.c ht.c bsr.c crf.c cnp.c \
grammar.h list.h util.h reader.h str_partial.h \
-str.h utf8.h dfa.h ht.h bsr.h crf.h
+str.h utf8.h dfa.h ht.h bsr.h crf.h cnp.h
libeps_a_CFLAGS = $(AM_CFLAGS) --pedantic
@@ -22,7 +22,7 @@ CLEANFILES = TAGS
# tests
check_PROGRAMS = check_list check_grammar check_reader check_dfa \
-check_ht check_bsr check_crf
+check_ht check_bsr check_crf check_cnp
check_list_SOURCES = test/check_list.c list.c
@@ -42,6 +42,9 @@ ht.c utf8.c str.c
check_crf_SOURCES = test/check_crf.c crf.c grammar.c list.c util.c \
ht.c utf8.c str.c
+check_cnp_SOURCES = test/check_cnp.c crf.c cnp.c grammar.c list.c \
+util.c ht.c utf8.c str.c dfa.c bsr.c
+
TESTS = $(check_PROGRAMS)
AM_COLOR_TESTS = always
diff --git a/src/brainfuck.bnf b/src/brainfuck.bnf
new file mode 100644
index 0000000..d714a05
--- /dev/null
+++ b/src/brainfuck.bnf
@@ -0,0 +1,10 @@
+brainfuck: unit brainfuck
+brainfuck:
+
+unit: '>'
+unit: '<'
+unit: '+'
+unit: '-'
+unit: ','
+unit: '.'
+unit: '[' brainfuck ']'
diff --git a/src/bsr.c b/src/bsr.c
index e737d7a..bd0a84c 100644
--- a/src/bsr.c
+++ b/src/bsr.c
@@ -210,7 +210,6 @@ bsr_add(CCR_MOD(Grammar *) g, bsr * const restrict b,
#define ARR (b->f.array+X)
if (!(ARR->initialized)) {
- ARR->initialized = 1;
ARR->table = new_ht2(HT_INIT_CAP);
if (ARR->table == NULL) {
@@ -218,6 +217,8 @@ bsr_add(CCR_MOD(Grammar *) g, bsr * const restrict b,
goto cleanup;
}
+ ARR->initialized = 1;
+
value_table = new_ht2(HT_INIT_CAP);
if (value_table == NULL) {
@@ -358,6 +359,7 @@ bsr_add(CCR_MOD(Grammar *) g, bsr * const restrict b,
#undef AR
#undef ARR
#undef ARRR
+
}
goto success;
@@ -472,6 +474,12 @@ bsr_find(CCR_MOD(bsr *) b, CCR_MOD(Grammar *) g,
return result;
}
+static void
+print_sep()
+{
+ printf(" ");
+}
+
void
bsr_print(CCR_MOD(bsr *) b, CCR_MOD(Grammar *) g, NUM line_len)
{
@@ -500,11 +508,11 @@ bsr_print(CCR_MOD(bsr *) b, CCR_MOD(Grammar *) g, NUM line_len)
printf("(");
print_name(list_nth(grammar_names(g), i));
printf(" := ");
- map_list
+ map_list_between
(rg_nth
(grammar_rule(g, i),
(*(VALUE_TABLE_KEYS+k))->x),
- print_tnt);
+ print_tnt, print_sep);
printf(", %ld, %ld, %ld)",
(*(KEYS+j))->x,
(*(VALUE_TABLE_KEYS+k))->y,
@@ -589,7 +597,7 @@ bsr_print(CCR_MOD(bsr *) b, CCR_MOD(Grammar *) g, NUM line_len)
}
}
}
-
+
printf("\n");
#undef ELEMENT
diff --git a/src/cnp.c b/src/cnp.c
index d9a4978..8c7c519 100644
--- a/src/cnp.c
+++ b/src/cnp.c
@@ -1 +1,524 @@
+#include <string.h>
#include "cnp.h"
+
+struct Environment_s {
+ grammar_info *gi;
+ /* RESULT_TS and RESULT_PS are temporary arrays used in the
+ computations. */
+ ht *result_ts;
+ ht *result_ps;
+ NUM *string;
+ UNUM string_len;
+ crf *crfp;
+ procu *pu;
+ procr *pr;
+ spa *spap;
+ bsr *bsrp;
+ /* If there are errors this will be non-zero. When this field is
+ non-zero, every function that involves this struct will do
+ nothing. */
+ BOOL error;
+};
+
+void
+nt_add(Environment *env, NUM X, NUM j)
+{
+
+ if (env->error) return;
+
+#define GI env->gi
+#define RG (grammar_rule(GI->g, X))
+
+ for (NUM i = 0; i < rg_len(RG); i++) {
+ BOOL result =
+ test_select(env, *(env->string+j), X, rg_nth(RG, i));
+
+ for (NUM k = 0; k < grammar_left_len(GI->g); k++) {
+ ht_reset(env->result_ts+k, DELETE_KEY);
+ ht_reset(env->result_ps+k, DELETE_KEY);
+ }
+
+ if (env->error) break;
+
+ if (result) {
+ /* fleprintf("i = %ld, j = %ld\n", i, j); */
+ env->error =
+ desc_add(env->pr, env->pu, j,
+ (pair4) { .x = X, .y = i, .z = 0, .w = j });
+
+ /* fleprintf("env->error = %d\n", env->error); */
+ /* fleprintf("pr len = %ld\n", env->pr->len);
+ * fleprintf("pr ini = %d\n", (env->pr->array)->initialized); */
+ }
+
+ if (env->error) break;
+ }
+
+#undef RG
+#undef GI
+
+}
+
+BOOL
+test_select(Environment *env, NUM b, NUM X, CCR_MOD(List *) tnts)
+{
+ if (env->error) return 0;
+
+ BOOL result = 0;
+
+ grammar_info *gi = env->gi;
+
+ ht *result_ts = env->result_ts;
+ ht *result_ps = env->result_ps;
+
+ if (tnt_first(gi->pts, gi->pps, gi->nts, grammar_left_len(gi->g),
+ tnts, result_ts, result_ps)) {
+ fleprintf0("Fail to find the first set of the TNT string.\n");
+ goto cleanup;
+ }
+
+ if (ht_find(result_ts, &b) != NULL) {
+ result = 1;
+ goto success;
+ }
+
+ /* predicates haven't been implemented yet */
+ /* for (NUM i = 0; i < ht_size(result_ps); i++) {
+ *
+ * } */
+
+ for (NUM i = 0; i < list_length(tnts); i++) {
+
+#define TOP ((TNT*)list_nth(tnts, i))
+
+ switch (TOP->type) {
+ case TERMINAL:
+ case PREDICATE:
+ goto success;
+ break;
+ default:
+ if (!(gi->nts+TOP->data.nt)) goto success;
+ break;
+ }
+
+#undef TOP
+
+ }
+
+ if (ht_find(gi->sts+X, &b) != NULL) result = 1;
+
+ /* predicates haven't been implemented yet */
+
+ goto success;
+
+ cleanup:
+ env->error = 1;
+ return result;
+
+ success:
+ return result;
+}
+
+void
+rtn(Environment *env, NUM X, NUM k, NUM j)
+{
+ if (env->error) return;
+
+ pair3 label = (pair3) { .x = X, .y = k, .z = j };
+ pair2 label2 = (pair2) { .x = X, .y = k };
+
+ if (spa_belong(env->spap, label)) return;
+
+ /* fleprintf0("new spa\n"); */
+
+ if ((env->error = spa_insert(env->spap, label))) return;
+
+ ht *edge_ht = NULL;
+
+ if ((edge_ht = crf_find_node(env->crfp, label2)) == NULL) {
+ fleprintf0("this should not happen\n");
+ env->error = 1;
+ return;
+ }
+
+#define EDGE_LABELS ((pair4 **) ht_keys(edge_ht))
+
+ for (NUM i = 0; i < ht_size(edge_ht); i++) {
+
+#define EDGE (*(EDGE_LABELS+i))
+
+ if ((env->error = desc_add(env->pr, env->pu, j, *EDGE))) return;
+ /* fleprintf("added descriptor (%ld, %ld, %ld, %ld, %ld)\n",
+ * EDGE->x, EDGE->y, EDGE->z, EDGE->w, j); */
+ if ((env->error =
+ bsr_add(env->gi->g, env->bsrp,
+ EDGE->x, EDGE->y, EDGE->z, EDGE->w, k, j)))
+ return;
+ }
+
+#undef EDGE_LABELS
+#undef EDGE
+
+}
+
+void
+cnp_call(Environment *env, pair3 label, NUM i, NUM j)
+{
+ if (env->error) return;
+
+ if (label.z < 1) goto cleanup;
+
+ NUM Y = label.x;
+ List *tnts = rg_nth(grammar_rule(env->gi->g, Y), label.y);
+
+ TNT *xtnt = (TNT *) list_nth(tnts, label.z - 1);
+
+ if (xtnt->type != NONTERMINAL) goto cleanup;
+
+ NUM X = (NUM) xtnt->data.nt, h = 0;
+
+ pair2 node = (pair2) { .x = X, .y = j };
+ pair4 u = (pair4)
+ {
+ .x = label.x,
+ .y = label.y,
+ .z = label.z,
+ .w = i
+ };
+
+ ht *edge_ht = NULL, *spa_ht = NULL;
+
+ if ((edge_ht = crf_find_node(env->crfp, node)) == NULL) {
+ if (crf_add_node(env->crfp, node)) goto cleanup;
+
+ if (crf_add_edge(env->crfp, node, u)) goto cleanup;
+
+ /* errors will be stored in ENV, so we simply return */
+ nt_add(env, X, j);
+
+ return;
+ }
+
+ if (ht_find(edge_ht, &u) == NULL) {
+ if (crf_add_edge(env->crfp, node, u)) goto cleanup;
+
+ spa_ht = spa_find(env->spap, node);
+
+ if (spa_ht == NULL) return;
+
+ for (NUM k = 0; k < ht_size(spa_ht); k++) {
+ h = **((NUM **) ht_keys(spa_ht) + k);
+
+ if (desc_add(env->pr, env->pu, h, u)) goto cleanup;
+
+ if (bsr_add(env->gi->g, env->bsrp,
+ label.x, label.y, label.z,
+ i, j, h))
+ goto cleanup;
+ }
+ }
+
+ return;
+
+ cleanup:
+ fleprintf0("error\n");
+ env->error = 1;
+ return;
+}
+
+HP_ATTR
+BOOL
+env_error_p(CCR_MOD(Environment *) env)
+{
+ return env->error;
+}
+
+P_ATTR bsr *
+env_bsrp(CCR_MOD(Environment *) env)
+{
+ return env->bsrp;
+}
+
+P_ATTR
+Grammar *
+env_grammar(CCR_MOD(Environment *) env)
+{
+ return env->gi->g;
+}
+
+P_ATTR
+crf *
+env_crfp(CCR_MOD(Environment *) env)
+{
+ return env->crfp;
+}
+
+P_ATTR
+procr *
+env_r(CCR_MOD(Environment *) env)
+{
+ return env->pr;
+}
+
+P_ATTR
+procu *
+env_u(CCR_MOD(Environment *) env)
+{
+ return env->pu;
+}
+
+H_ATTR
+void
+env_str(CCR_MOD(Environment *) env, NUM **str, NUM *str_len)
+{
+ *str = env->string;
+ *str_len = env->string_len;
+}
+
+H_ATTR
+BOOL
+env_follow_p(CCR_MOD(Environment *) env, NUM X, NUM t)
+{
+ return ht_find(((env->gi->sts)+X), &t) != NULL;
+}
+
+void
+env_print_follow(CCR_MOD(Environment *) env, NUM X)
+{
+ for (NUM i = 0; i < ht_size((env->gi->sts+X)); i++) {
+ if (**(((NUM **) ht_keys(env->gi->sts+X)) + i) !=
+ END_OF_INPUT)
+ printf("key %ld = %ld\n",
+ i, **(((NUM **) ht_keys(env->gi->sts+X)) + i));
+ else
+ printf("key %ld = END OF INPUT\n", i);
+ }
+}
+
+void
+env_print_first(CCR_MOD(Environment *) env, NUM X)
+{
+ for (NUM i = 0; i < ht_size((env->gi->pts+X)); i++) {
+ printf("key %ld = %ld\n",
+ i, **(((NUM **) ht_keys(env->gi->pts+X)) + i));
+ }
+}
+
+Environment *
+new_env(Grammar *g, str *string)
+{
+ Environment *env = NULL;
+ ht *htp = NULL;
+ NUM current_index = 0, follow_current_index = 0;
+ NUM result_current_index = 0;
+
+ SAFE_MALLOC(Environment, env, 1, goto cleanup;);
+
+ env->gi = NULL;
+
+ SAFE_MALLOC(grammar_info, env->gi, 1, goto cleanup;);
+
+ memset(env->gi, 0, sizeof(grammar_info));
+
+ env->gi->g = g;
+
+ NUM left_len = grammar_left_len(g);
+
+ SAFE_MALLOC(BOOL, env->gi->nts, sizeof(BOOL) * left_len,
+ goto cleanup;);
+
+ epsilon_nts(g, env->gi->nts);
+
+ SAFE_MALLOC(ht, env->gi->pts, sizeof(ht) * left_len,
+ goto cleanup;);
+ SAFE_MALLOC(ht, env->gi->pps, sizeof(ht) * left_len,
+ goto cleanup;);
+
+ for (current_index = 0; current_index < left_len; current_index++) {
+ if ((htp = new_ht(HT_INIT_CAP, 0)) == NULL) goto cleanup;
+ *(env->gi->pts+current_index) = *htp;
+ free(htp);
+
+ if ((htp = new_ht(HT_INIT_CAP, 0)) == NULL) {
+ destroy_ht(env->gi->pts+current_index, DESTROY_NONE_NO_SELF);
+ goto cleanup;
+ }
+ *(env->gi->pps+current_index) = *htp;
+ free(htp);
+ }
+
+ if (nt_first(g, env->gi->nts, env->gi->pts, env->gi->pps))
+ goto cleanup;
+
+ SAFE_MALLOC(ht, env->gi->sts, sizeof(ht) * left_len,
+ goto cleanup;);
+ SAFE_MALLOC(ht, env->gi->sps, sizeof(ht) * left_len,
+ goto cleanup;);
+
+ for (follow_current_index = 0;
+ follow_current_index < left_len;
+ follow_current_index++) {
+ if ((htp = new_ht(HT_INIT_CAP, 0)) == NULL) goto cleanup;
+
+ *(env->gi->sts+follow_current_index) = *htp;
+
+ free(htp);
+
+ if ((htp = new_ht(HT_INIT_CAP, 0)) == NULL) {
+ destroy_ht(env->gi->sts+follow_current_index,
+ DESTROY_NONE_NO_SELF);
+ goto cleanup;
+ }
+
+ *(env->gi->sps+follow_current_index) = *htp;
+
+ free(htp);
+ }
+
+ if (nt_follow(g, env->gi->nts,
+ env->gi->pts, env->gi->pps,
+ env->gi->sts, env->gi->sps))
+ goto cleanup;
+
+ SAFE_MALLOC(ht, env->result_ts, sizeof(ht) * left_len,
+ goto cleanup;);
+ SAFE_MALLOC(ht, env->result_ps, sizeof(ht) * left_len,
+ goto cleanup;);
+
+ for (result_current_index = 0;
+ result_current_index < left_len;
+ result_current_index++) {
+ if ((htp = new_ht(HT_INIT_CAP, 0)) == NULL) goto cleanup;
+
+ *(env->result_ts+result_current_index) = *htp;
+
+ free(htp);
+
+ if ((htp = new_ht(HT_INIT_CAP, 0)) == NULL) {
+ destroy_ht(env->result_ts+result_current_index,
+ DESTROY_NONE_NO_SELF);
+ goto cleanup;
+ }
+
+ *(env->result_ps+result_current_index) = *htp;
+
+ free(htp);
+
+ }
+
+ SAFE_MALLOC(NUM, env->string, str_length(string)+1, goto cleanup;);
+
+ str_info info = EMPTY_STR_INFO;
+
+ env->string_len = 0;
+
+ for (NUM i = 0; i < str_length(string); i += info.step) {
+ info = get_info(string, i);
+ *(env->string+(env->string_len)++) = info.value;
+ }
+
+ *(env->string+(env->string_len)++) = END_OF_INPUT;
+
+ if ((env->crfp = new_crf()) == NULL) goto cleanup;
+
+ BOOL error = 0;
+
+ env->pu = new_procu(env->string_len, &error);
+
+ if (error) goto cleanup;
+
+ env->pr = new_procr(env->string_len, &error);
+
+ if (error) goto cleanup;
+
+ if ((env->spap = new_spa()) == NULL) goto cleanup;
+
+ env->bsrp = new_bsr(&error, g);
+
+ if (error) goto cleanup;
+
+ return env;
+
+ cleanup:
+
+ if (env) {
+ if (env->gi) {
+ if (env->gi->nts) free(env->gi->nts);
+
+ for (NUM i = 0; i < current_index; i++) {
+ destroy_ht(env->gi->pts+i, DESTROY_NONE_NO_SELF);
+ destroy_ht(env->gi->pps+i, DESTROY_NONE_NO_SELF);
+ }
+
+ if (env->gi->pts) free(env->gi->pts);
+ if (env->gi->pps) free(env->gi->pps);
+
+ for (NUM i = 0; i < follow_current_index; i++) {
+ destroy_ht(env->gi->sts+i, DESTROY_NONE_NO_SELF);
+ destroy_ht(env->gi->sps+i, DESTROY_NONE_NO_SELF);
+ }
+
+ if (env->gi->sts) free(env->gi->sts);
+ if (env->gi->sps) free(env->gi->sps);
+
+ free(env->gi);
+ }
+
+ for (NUM i = 0; i < result_current_index; i++) {
+ destroy_ht(env->result_ts+i, DESTROY_NONE_NO_SELF);
+ destroy_ht(env->result_ps+i, DESTROY_NONE_NO_SELF);
+ }
+
+ if (env->result_ts) free(env->result_ts);
+ if (env->result_ps) free(env->result_ps);
+
+ if (env->string) free(env->string);
+
+ if (env->crfp) destroy_crf(env->crfp);
+
+ if (env->pu) destroy_procu(env->pu);
+ if (env->pr) destroy_procr(env->pr);
+ if (env->spap) destroy_spa(env->spap);
+ if (env->bsrp) destroy_bsr(env->bsrp);
+
+ free(env);
+ }
+
+ return NULL;
+}
+
+void
+destroy_env(Environment *env)
+{
+ if (env == NULL) return;
+
+ free(env->gi->nts);
+
+ NUM left_len = grammar_left_len(env->gi->g);
+
+ for (NUM i = 0; i < left_len; i++) {
+ destroy_ht(env->gi->pts+i, DESTROY_KEY_NO_SELF);
+ destroy_ht(env->gi->pps+i, DESTROY_KEY_NO_SELF);
+ destroy_ht(env->gi->sts+i, DESTROY_KEY_NO_SELF);
+ destroy_ht(env->gi->sps+i, DESTROY_KEY_NO_SELF);
+ destroy_ht(env->result_ts+i, DESTROY_KEY_NO_SELF);
+ destroy_ht(env->result_ps+i, DESTROY_KEY_NO_SELF);
+ }
+
+ free(env->gi->pts);
+ free(env->gi->pps);
+ free(env->gi->sts);
+ free(env->gi->sps);
+ free(env->result_ts);
+ free(env->result_ps);
+ free(env->string);
+
+ destroy_crf(env->crfp);
+ destroy_procu(env->pu);
+ destroy_procr(env->pr);
+ destroy_spa(env->spap);
+ destroy_bsr(env->bsrp);
+
+ free(env->gi);
+ free(env);
+}
diff --git a/src/cnp.h b/src/cnp.h
index 5944ca3..2b0c104 100644
--- a/src/cnp.h
+++ b/src/cnp.h
@@ -1,4 +1,63 @@
#ifndef CNP_H
#define CNP_H
-/* TODO */
+#include "str.h"
+#include "utf8.h"
+#include "bsr.h"
+#include "crf.h"
+
+/* We start with implementing some support functions. These support
+ functions are not supposed to be put in the crf.h header file, as
+ they are not related to the underlying (static) data structures
+ really. */
+
+typedef struct Environment_s Environment;
+
+/* On error return NULL */
+Environment *new_env(Grammar *g, str *string);
+
+HP_ATTR BOOL env_error_p(CCR_MOD(Environment *) env);
+
+P_ATTR bsr *env_bsrp(CCR_MOD(Environment *) env);
+
+P_ATTR Grammar *env_grammar(CCR_MOD(Environment *) env);
+
+HP_ATTR crf *env_crfp(CCR_MOD(Environment *) env);
+
+HP_ATTR procr *env_r(CCR_MOD(Environment *) env);
+HP_ATTR procu *env_u(CCR_MOD(Environment *) env);
+H_ATTR void env_str(CCR_MOD(Environment *) env,
+ NUM **str, NUM *str_len);
+
+H_ATTR BOOL env_follow_p(CCR_MOD(Environment *) env, NUM X, NUM t);
+
+void env_print_follow(CCR_MOD(Environment *) env, NUM X);
+
+void env_print_first(CCR_MOD(Environment *) env, NUM X);
+
+/* Convenient macro */
+
+#define CHECK_ENV_ERROR(ENV, CLEANUP) do { \
+ if (env_error_p(ENV)) { \
+ fleprintf0("Error occurs!\n"); \
+ { CLEANUP }; \
+ } \
+ } while (0)
+
+/* NOTE: Since the grammar pointer just points to the existing grammar
+ when we construct it, we shall not destroy the grammar when we
+ destroy the environment, so that we can later reuse that grammar,
+ should the need arise. */
+void destroy_env(Environment *env);
+
+void nt_add(Environment *env, NUM X, NUM j);
+
+BOOL test_select(Environment *env, NUM b, NUM X,
+ CCR_MOD(List *) tnts);
+
+void rtn(Environment *env, NUM X, NUM k, NUM j);
+
+void cnp_call(Environment *env, pair3 label, NUM i, NUM j);
+
+/* TODO: Main algorithm */
+
#endif
diff --git a/src/crf.c b/src/crf.c
index dbcb050..1cff96f 100644
--- a/src/crf.c
+++ b/src/crf.c
@@ -134,6 +134,13 @@ destroy_spa(spa * const restrict spap)
free(spap);
}
+__attribute__((__pure__, __cold__))
+ht *
+spa_ht(CCR_MOD(spa *) spap)
+{
+ return spap->table;
+}
+
BOOL
spa_insert(spa * const restrict spap, pair3 label)
{
@@ -270,6 +277,7 @@ new_procr(NUM size, BOOL * const restrict errorp)
pr->len = size;
pr->array = NULL;
+ pr->mg = 0;
SAFE_MALLOC(procr_array_element, pr->array,
sizeof(procr_array_element) * size,
@@ -287,6 +295,8 @@ new_procr(NUM size, BOOL * const restrict errorp)
if (pr) free(pr);
+ pr = NULL;
+
success:
return pr;
@@ -320,12 +330,37 @@ new_procu(NUM size, BOOL * const restrict errorp)
if (pu) free(pu);
+ pu = NULL;
+
success:
return pu;
}
void
+print_procr(CCR_MOD(procr *) pr)
+{
+ eprintf("\n");
+ fleprintf0("Printing procr...\n");
+ fleprintf("LEN = %ld, MG = %ld\n", pr->len, pr->mg);
+ for (NUM i = 0; i < pr->len; i++) {
+ if ((pr->array+i)->initialized) {
+ if (list_length((pr->array+i)->ls)) eprintf("%ld: ", i);
+ for (NUM j = 0; j < list_length((pr->array+i)->ls); j++) {
+ fleprintf("(%ld, %ld, %ld, %ld)",
+ ((pair4 *) list_nth((pr->array+i)->ls, j))->x,
+ ((pair4 *) list_nth((pr->array+i)->ls, j))->y,
+ ((pair4 *) list_nth((pr->array+i)->ls, j))->z,
+ ((pair4 *) list_nth((pr->array+i)->ls, j))->w);
+ if (j+1==list_length((pr->array+i)->ls)) eprintf("\n");
+ else eprintf(", ");
+ }
+ }
+ }
+ eprintf("\n");
+}
+
+void
destroy_procr(procr * const restrict pr)
{
for (NUM i = 0; i < pr->len; i++)
@@ -353,13 +388,13 @@ BOOL
desc_add(procr * const restrict pr, procu * const restrict pu,
NUM grade, prodecor dec)
{
+ pair4 *p4 = NULL;
+
if (grade < 0 || grade >= pu->len) {
fleprintf("Invalid grade: %ld\n", grade);
goto cleanup;
}
- pair4 *p4 = NULL;
-
#define HELE (pu->array+grade)
#define LELE (pr->array+grade)
@@ -441,6 +476,8 @@ desc_add(procr * const restrict pr, procu * const restrict pu,
cleanup:
+ if (p4) free(p4);
+
return 1;
success:
@@ -457,13 +494,10 @@ pop_procr(procr * pr, procu * pu, NUM *gradep)
#define ELEMENT (pr->array+pr->mg)
- if (!(ELEMENT->initialized)) goto reset_mg;
-
- *gradep = pr->mg;
-
- result = pop_from_list(ELEMENT->ls);
+ if (ELEMENT->initialized && list_length(ELEMENT->ls))
+ goto no_reset_mg;
- if (list_length(ELEMENT->ls) == 0) {
+ if (ELEMENT->initialized && list_length(ELEMENT->ls) == 0) {
destroy_ht((pu->array+pr->mg)->table, DESTROY_KEY_SELF);
(pu->array+pr->mg)->initialized = 0;
(pu->array+pr->mg)->table = NULL;
@@ -471,17 +505,23 @@ pop_procr(procr * pr, procu * pu, NUM *gradep)
destroy_list(ELEMENT->ls, 1);
ELEMENT->initialized = 0;
ELEMENT->ls = NULL;
+ }
- /* REVIEW: Maybe we can just increment the minimal grade, as we
- won't jump over two steps? */
- reset_mg:
- while (++(pr->mg) < pr->len) {
- if (ELEMENT->initialized) break;
- }
+ while (++(pr->mg) < pr->len) {
+ if (ELEMENT->initialized) break;
}
+ if (pr->mg >= pr->len) goto success;
+
+ no_reset_mg:
+
+ result = pop_from_list(ELEMENT->ls);
+
+ *gradep = pr->mg;
+
#undef ELEMENT
success:
return result;
+
}
diff --git a/src/crf.h b/src/crf.h
index 7ff5a3f..9d1cc78 100644
--- a/src/crf.h
+++ b/src/crf.h
@@ -1,5 +1,5 @@
-#ifndef CNP_H
-#define CNP_H
+#ifndef CRF_H
+#define CRF_H
#include "ht.h"
/* This file implements some support functions for the Clustered
@@ -83,6 +83,8 @@ spa *new_spa();
void destroy_spa(spa * const restrict spap);
+__attribute__((__pure__, __cold__)) ht *spa_ht(CCR_MOD(spa *) spap);
+
/* On error return non-zero */
BOOL spa_insert(spa * const restrict spap, pair3 label);
@@ -156,8 +158,12 @@ BOOL desc_add(procr * const restrict pr, procu * const restrict pu,
/* if no more elements are left, return NULL */
prodecor *pop_procr(procr * pr, procu * pu, NUM *gradep);
+void print_procr(CCR_MOD(procr *) pr);
+
/* destructor */
void destroy_procr(procr * const restrict pr);
void destroy_procu(procu * const restrict pu);
+#define TEST haha
+
#endif
diff --git a/src/dfa.h b/src/dfa.h
index a22409f..70c8bf6 100644
--- a/src/dfa.h
+++ b/src/dfa.h
@@ -32,18 +32,18 @@ dfa *new_dfa();
void destroy_dfa(dfa *table);
-void print_dfa(const dfa * const restrict table);
+void print_dfa(CCR_MOD(dfa *) table);
dfa *dfa_from_bytes(int sequence_size,
- const NUM * const restrict data);
+ CCR_MOD(NUM *) data);
dfa *dfa_from_bytes_neg(int sequence_size,
- const NUM * const restrict data);
+ CCR_MOD(NUM *) data);
dfa *dfa_from_bytes_both(int sequence_size,
- const NUM * const restrict data,
+ CCR_MOD(NUM *) data,
int neg_sequence_size,
- const NUM * const restrict negdata);
+ CCR_MOD(NUM *) negdata);
/* TODO: Reject character bytes from a given DFA. */
@@ -59,6 +59,6 @@ dfa *dfa_from_bytes_both(int sequence_size,
inline BOOL dfa_any_fun(const NUM UNUSED code) { return 1; }
-BOOL run_dfa(const dfa * const restrict table, const NUM code);
+BOOL run_dfa(CCR_MOD(dfa *) table, const NUM code);
#endif
diff --git a/src/grammar.c b/src/grammar.c
index c12ddec..3b5a959 100644
--- a/src/grammar.c
+++ b/src/grammar.c
@@ -676,7 +676,7 @@ nt_first(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts,
BOOL
tnt_first(CC_MOD(ht *) terminal_hts, CC_MOD(ht *) predicate_hts,
- CCR_MOD(BOOL *) nts, NUM len, List *tnts,
+ CCR_MOD(BOOL *) nts, NUM len, CCR_MOD(List *) tnts,
ht * const restrict result_terminals,
ht * const restrict result_predicates)
{
@@ -787,8 +787,8 @@ nt_follow(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts,
Rule_group *rg = grammar_rule(g, (NT) i);
NUM rg_length = rg_len(rg);
- for (NUM j = 0; j < rg_length;) {
- List *string = rg_nth(rg, j++);
+ for (NUM j = 0; j < rg_length; j++) {
+ List *string = rg_nth(rg, j);
NUM string_len = list_length(string);
TNT *top = NULL;
@@ -842,13 +842,12 @@ nt_follow(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts,
if (k+1 == string_len ||
(rest_produce_epsilon >= 0 &&
- rest_produce_epsilon <= k))
+ rest_produce_epsilon <= k+1))
goto add_to_follow;
break;
add_to_follow:
-
ht_len = ht_size(result_terminals+i);
keys = (NUM **) ht_keys(result_terminals+i);
diff --git a/src/grammar.h b/src/grammar.h
index 8d6cb06..cbda887 100644
--- a/src/grammar.h
+++ b/src/grammar.h
@@ -213,7 +213,7 @@ BOOL nt_first(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts,
On error return non-zero. */
BOOL tnt_first(CC_MOD(ht *) terminal_hts, CC_MOD(ht *) predicate_hts,
- CCR_MOD(BOOL *) nts, NUM len, List *tnts,
+ CCR_MOD(BOOL *) nts, NUM len, CCR_MOD(List *) tnts,
ht * const restrict result_terminals,
ht * const restrict result_predicates);
@@ -231,6 +231,27 @@ BOOL nt_follow(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts,
ht * const result_terminals,
ht * const result_predicates);
+/* struct that holds information about grammars */
+
+typedef struct grammar_info_s grammar_info;
+
+struct grammar_info_s {
+ /* the grammar itself */
+ Grammar *g;
+ /* the array about epsilon non-terminals */
+ BOOL *nts;
+ /* the array of hash tables of first (premier) terminals */
+ ht *pts;
+ /* the array of hash tables of first (premier) predicate
+ terminals */
+ ht *pps;
+ /* the array of hash tables of follow (suivant) terminals */
+ ht *sts;
+ /* the array of hash tables of follow (suivant) predicate
+ terminals */
+ ht *sps;
+};
+
/* convenient macro */
#define NEW_CPA(X, ARRAY, SIZE) do { \
diff --git a/src/ht.c b/src/ht.c
index 7965941..45953f4 100644
--- a/src/ht.c
+++ b/src/ht.c
@@ -323,6 +323,36 @@ ht_delete(ht * const restrict htp, void *key, HT_DELETE_FLAG flag)
return 0;
}
+void
+ht_reset(ht * const restrict htp, HT_DELETE_FLAG flag)
+{
+ for (int i = 0; i < htp->size; i++) {
+ switch (flag) {
+ case DELETE_KEY:
+ case DELETE_EVERY:
+ free(*(htp->keys+i));
+ break;
+ default:
+ break;
+ }
+
+ switch (flag) {
+ case DELETE_VALUE:
+ case DELETE_EVERY:
+ free(*(htp->values+i));
+ break;
+ default:
+ break;
+ }
+ }
+
+ htp->size = 0;
+
+ memset(htp->values, 0, sizeof(void*) * htp->capability);
+ memset(htp->keys, 0, sizeof(void*) * htp->capability);
+ memset(htp->indices, 0xff, sizeof (NUM) * htp->capability);
+}
+
P_ATTR
void *
ht_find(CCR_MOD(ht *) htp, void *key)
diff --git a/src/ht.h b/src/ht.h
index 4ac0c5d..65ae747 100644
--- a/src/ht.h
+++ b/src/ht.h
@@ -27,6 +27,11 @@ typedef BOOL (*compare_func_t)
(CCR_MOD(void *)keya, CCR_MOD(void *)keyb);
/* FIXME: Hide this struct from the header file. */
+/* WARNING: Don't use these fields directly. These fields are exposed
+ in this header file simply because I want to use pointers to ht and
+ do arithmetic on those pointers. But these fields might change
+ anytime in the future, if I think of some new ideas, or something
+ like that. */
struct ht_s {
void **keys;
void **values;
@@ -106,6 +111,8 @@ typedef enum HT_DELETE_FLAG_e HT_DELETE_FLAG;
BOOL ht_delete(ht * const restrict htp, void *key,
HT_DELETE_FLAG flag);
+void ht_reset(ht * const restrict htp, HT_DELETE_FLAG flag);
+
P_ATTR void *ht_find(CCR_MOD(ht *) htp, void *key);
/* Pairing hash tables */
diff --git a/src/list.c b/src/list.c
index 62933a0..1b9f4d4 100644
--- a/src/list.c
+++ b/src/list.c
@@ -148,7 +148,7 @@ copy_list(List *dest, List *source, copyer copyf)
H_ATTR
void *
-list_nth(const List * const ls, NUM n)
+list_nth(const List * const restrict ls, NUM n)
{
return *(ls->array+n);
}
diff --git a/src/list.h b/src/list.h
index 76dde0a..33bdf3e 100644
--- a/src/list.h
+++ b/src/list.h
@@ -45,7 +45,7 @@ void *copy_num(void *);
/* upon failure return 1 */
BOOL copy_list(List *dest, List *source, copyer copyf);
-void *list_nth(const List * const ls, NUM n);
+void *list_nth(CCR_MOD(List *) ls, NUM n);
NUM list_length(const List * const restrict ls);
diff --git a/src/test/check_cnp.c b/src/test/check_cnp.c
new file mode 100644
index 0000000..00c8a64
--- /dev/null
+++ b/src/test/check_cnp.c
@@ -0,0 +1,291 @@
+#include "../cnp.h"
+
+#define READ_INTO_CPA(N, U, L, I, VA, VI, CP) do { \
+ U = new_utf8(N, L); \
+ I = get_info((str *)U, 0); \
+ VA = MYALLOC(NUM, 2); \
+ VI = 0; \
+ for (NUM index = 0; \
+ I.value >= 0 && index < str_length((str *) U); \
+ index += I.step, VI++) { \
+ I = get_info((str *)U, index); \
+ *(VA+VI) = I.value; \
+ } \
+ CP = MYALLOC(cpa, 1); \
+ CP->array = VA; \
+ CP->size = VI; \
+ add_to_list(names, CP); \
+ destroy_str((str *)U, 1); \
+ } while (0)
+
+#define READ_TNT_STRING(LEFT, FORMAT, LEN, ...) do { \
+ tnt_string = new_tnt_string(FORMAT, LEN, __VA_ARGS__); \
+ if (!tnt_string) { \
+ fleprintf("left = %d, f = %s, l = %d, " \
+ "cannot create tnt string\n", \
+ LEFT, FORMAT, LEN); \
+ map_list(rules, destroy_rule_and_free_all); \
+ destroy_list(rules, 0); \
+ map_list(names, destroy_cpa_and_free_all); \
+ destroy_list(names, 0); \
+ return 1; \
+ } \
+ rule = new_rule(LEFT, tnt_string); \
+ add_to_list(rules, rule); \
+ } while (0)
+
+int
+main(int UNUSED argc, char ** UNUSED argv)
+{
+ /* have a grammar for testing */
+ List *tnt_string = NULL;
+ Rule *rule = NULL;
+ List *rules = new_list();
+ List *names = new_list();
+
+ char *name = MYALLOC(char, 2);
+ *(name+1) = 0;
+ *name = 'B';
+
+ utf8* uname = NULL;
+
+ str_info info = EMPTY_STR_INFO;
+
+ NUM *varray = NULL;
+ NUM vindex = 0;
+ cpa *cpap = NULL;
+
+ READ_INTO_CPA(name, uname, 1, info, varray, vindex, cpap);
+
+ name = MYALLOC(char, 3);
+ *(name+1) = 0;
+ *name = 'U';
+
+ READ_INTO_CPA(name, uname, 1, info, varray, vindex, cpap);
+
+ /* name = MYALLOC(char, 2);
+ * *(name+1) = 0;
+ * *name = 'F';
+ *
+ * READ_INTO_CPA(name, uname, 1, info, varray, vindex, cpap);
+ *
+ * name = MYALLOC(char, 2);
+ * *(name+1) = 0;
+ * *name = 'D';
+ *
+ * READ_INTO_CPA(name, uname, 1, info, varray, vindex, cpap); */
+
+ READ_TNT_STRING(0, "nn", 2, (NT) 1, (NT) 0);
+ rule = new_rule(0, new_list());
+ add_to_list(rules, rule);
+
+ READ_TNT_STRING(1, "t", 1, (T) '>');
+ READ_TNT_STRING(1, "t", 1, (T) '<');
+ READ_TNT_STRING(1, "t", 1, (T) '+');
+ READ_TNT_STRING(1, "t", 1, (T) '-');
+ READ_TNT_STRING(1, "t", 1, (T) ',');
+ READ_TNT_STRING(1, "t", 1, (T) '.');
+ READ_TNT_STRING(1, "tnt", 3, (T) '[', (NT) 0, (T) ']');
+
+ Grammar *g = new_grammar();
+
+ build_grammar(g, rules, names);
+
+ print_grammar(g);
+
+ utf8 *string = new_utf8("[,[.[+]]-]", 10);
+
+ printf("\nPrinting the input...\n%s\n", get_data((str *) string));
+
+ Environment *env = new_env(g, (str *) string);
+
+ if (crf_add_node(env_crfp(env), (pair2) { 0 })) goto cleanup;
+
+ nt_add(env, 0, 0);
+
+ CHECK_ENV_ERROR(env, goto cleanup;);
+
+ NUM current_grade = 0;
+
+ prodecor *current_prodecor = NULL;
+
+ Rule_group *rg = NULL;
+ List *right = NULL, *tnts = NULL;
+
+ NUM *num_string = NULL, num_string_len = 0;
+ BOOL errorp = 0;
+
+ env_str(env, &num_string, &num_string_len);
+
+ while ((current_prodecor =
+ pop_procr(env_r(env), env_u(env), &current_grade))) {
+ /* fleprintf("{ %ld, %ld, %ld, %ld, %ld }\n",
+ * current_prodecor->x, current_prodecor->y,
+ * current_prodecor->z, current_prodecor->w,
+ * current_grade);
+ * print_procr(env_r(env)); */
+
+ /* different cases */
+
+ rg = grammar_rule(env_grammar(env), current_prodecor->x);
+ right = rg_nth(rg, current_prodecor->y);
+
+ /* separate the empty rules out */
+ if (list_length(right) == 0) {
+ errorp =
+ bsr_add(env_grammar(env), env_bsrp(env),
+ current_prodecor->x, current_prodecor->y, 0,
+ current_grade, current_grade, current_grade);
+
+ if (errorp) goto cleanup;
+
+ if (env_follow_p(env, current_prodecor->x,
+ *(num_string+current_grade))) {
+ rtn(env, current_prodecor->x,
+ current_prodecor->w, current_grade);
+ }
+
+ goto continue_label;
+ }
+
+ /* fleprintf0("non empty rule\n"); */
+
+ /* if at the end of a rule, return if possible */
+ if (current_prodecor->z == list_length(right)) {
+ if (env_follow_p(env, current_prodecor->x,
+ *(num_string+current_grade))) {
+ rtn(env, current_prodecor->x,
+ current_prodecor->w, current_grade);
+ }
+
+ goto continue_label;
+ }
+
+ /* fleprintf0("not at the end of the rule\n"); */
+
+ /* if at the start of a rule, no need for test_select */
+ if (current_prodecor->z == 0) {
+ /* fleprintf0("at the start of the rule\n"); */
+ current_prodecor->z = 1;
+
+ /* otherwise test select */
+ } else {
+ /* fleprintf0("start by test select\n"); */
+ tnts =
+ array_to_list(list_array(right)+current_prodecor->z,
+ list_length(right)-current_prodecor->z);
+ if (test_select(env, *(num_string+current_grade),
+ current_prodecor->x, tnts)) {
+ /* fleprintf0("successful test\n"); */
+ free(tnts);
+ tnts = NULL;
+ ++(current_prodecor->z);
+ } else {
+ /* fleprintf0("failed test\n"); */
+ free(tnts);
+ tnts = NULL;
+ goto continue_label;
+ }
+ }
+
+ CHECK_ENV_ERROR(env, goto cleanup;);
+
+ /* while there are terminals */
+ while (((TNT *) list_nth(right, current_prodecor->z-1))->type ==
+ TERMINAL) {
+ /* fleprintf0("found terminal\n"); */
+ /* add to BSR set */
+ errorp =
+ bsr_add(env_grammar(env), env_bsrp(env),
+ current_prodecor->x, current_prodecor->y,
+ current_prodecor->z, current_prodecor->w,
+ current_grade, current_grade+1);
+
+ if (errorp) goto cleanup;
+
+ current_grade++;
+
+ /* if at the end of the rule, return if possible */
+ if (current_prodecor->z == list_length(right)) {
+ if (env_follow_p(env, current_prodecor->x,
+ *(num_string+current_grade))) {
+ /* fleprintf0("we choose to return\n"); */
+ rtn(env, current_prodecor->x,
+ current_prodecor->w, current_grade);
+ /* fleprintf0("hi\n"); */
+ }
+
+ goto continue_label;
+ }
+ /* fleprintf0("terminal not at the end\n"); */
+ /* else test select */
+ tnts =
+ array_to_list(list_array(right)+current_prodecor->z,
+ list_length(right)-current_prodecor->z);
+ if (test_select(env, *(num_string+current_grade),
+ current_prodecor->x, tnts)) {
+ /* fleprintf0("Successful test\n"); */
+ free(tnts);
+ tnts = NULL;
+ (current_prodecor->z)++;
+ } else {
+ /* fleprintf0("Failed test\n"); */
+ /* printf("next thing is "); */
+ /* print_tnt(list_nth(right, current_prodecor->z)); */
+ /* printf("\n"); */
+ free(tnts);
+ tnts = NULL;
+ goto continue_label;
+ }
+ }
+
+ /* fleprintf0("encountered non-terminal\n"); */
+
+ /* when a nonterminal is encountered, we call it */
+ cnp_call(env, (pair3) {
+ .x = current_prodecor->x,
+ .y = current_prodecor->y,
+ .z = current_prodecor->z
+ }, current_prodecor->w, current_grade);
+
+ /* fleprintf("after call { %ld, %ld, %ld, %ld, %ld }\n",
+ * current_prodecor->x, current_prodecor->y,
+ * current_prodecor->z, current_prodecor->w,
+ * current_grade); */
+
+ continue_label:
+
+ CHECK_ENV_ERROR(env, goto cleanup;);
+
+ free(current_prodecor);
+ }
+
+ cleanup:
+ bsr_print(env_bsrp(env), env_grammar(env), 1);
+
+ destroy_env(env);
+
+ destroy_grammar(g, 1);
+
+ destroy_list(rules, 1);
+
+ free(string);
+
+ return 0;
+}
+
+/* archives
+
+ printf("\nprint first for 0:\n");
+ env_print_first(env, 0);
+
+ printf("\nprint first for 1:\n");
+ env_print_first(env, 1);
+
+ printf("\nprint follow for 0:\n");
+ env_print_follow(env, 0);
+
+ printf("\nprint follow for 1:\n");
+ env_print_follow(env, 1);
+
+ */
diff --git a/src/test/check_ht.c b/src/test/check_ht.c
index b572549..62a7bee 100644
--- a/src/test/check_ht.c
+++ b/src/test/check_ht.c
@@ -231,6 +231,10 @@ int main(int UNUSED argc, char ** UNUSED argv)
ht_find(htp, &testk4));
}
+ ht_reset(htp, DELETE_KEY);
+
+ printf("After reset the size is %ld\n", ht_size(htp));
+
destroy_ht(htp, DESTROY_KEY_SELF);
return 0;
diff --git a/src/test/check_reader.c b/src/test/check_reader.c
index 3184e41..a4e184f 100644
--- a/src/test/check_reader.c
+++ b/src/test/check_reader.c
@@ -11,7 +11,7 @@ main(U_ATTR int argc, U_ATTR char **argv)
{
/* return 77; */
- char *file_name = "test.txt";
+ char *file_name = "brainfuck.bnf";
char *buffer = MYALLOC(char, 512);
NUM buffer_size = 0;