summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README28
-rw-r--r--src/Makefile.am9
-rw-r--r--src/brainfuck.bnf10
-rw-r--r--src/bsr.c16
-rw-r--r--src/cnp.c523
-rw-r--r--src/cnp.h61
-rw-r--r--src/crf.c68
-rw-r--r--src/crf.h10
-rw-r--r--src/dfa.h12
-rw-r--r--src/grammar.c9
-rw-r--r--src/grammar.h23
-rw-r--r--src/ht.c30
-rw-r--r--src/ht.h7
-rw-r--r--src/list.c2
-rw-r--r--src/list.h2
-rw-r--r--src/test/check_cnp.c291
-rw-r--r--src/test/check_ht.c4
-rw-r--r--src/test/check_reader.c2
18 files changed, 1068 insertions, 39 deletions
diff --git a/README b/README
index 5753ba3..d50ff3c 100644
--- a/README
+++ b/README
@@ -11,4 +11,32 @@ without warranty of any kind.
About
=====
+
+Similar projects
+================
+
+Some similar projects are listed below, along with some comparisons
+between this project and them.
+
+- Tree-sitter
+
+ This is a famous project, and I think it is really cool. But my
+ goal is to provide the user with an "interactive tool" to manipulate
+ different context-free grammars, whereas Tree-sitter compiles the
+ grammars to static codes.
+
+ In addition, Tree-sitter implements the algorithm "GLR", which is an
+ abbreviation of "Generalized Left-to-right (reversed) Right-most"
+ algorithm. It is in my humble opinion the most wide-spread
+ generalized parsing algorithm that has been implemented in other
+ places as well. On the other hand, my aim is to implement many
+ parsing algorithms. Right now I have only implemented the variant
+ of the "GLL" algorithm, called the "Clustered Non-terminal Parsing"
+ algorithm, but in the future I hope I will implement many more.
+
+- Emacs-Parser-Generator
+ <https://github.com/cjohansson/emacs-parser-generator>
+
+ This package seems interesting, as its whole codes are written in
+ Emacs Lisp. \ No newline at end of file
diff --git a/src/Makefile.am b/src/Makefile.am
index 3ec4f19..446961d 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -2,9 +2,9 @@ AM_CFLAGS = -Wall -Wextra
noinst_LIBRARIES = libeps.a
libeps_a_SOURCES = grammar.c list.c util.c reader.c str.c \
-utf8.c dfa.c ht.c bsr.c crf.c \
+utf8.c dfa.c ht.c bsr.c crf.c cnp.c \
grammar.h list.h util.h reader.h str_partial.h \
-str.h utf8.h dfa.h ht.h bsr.h crf.h
+str.h utf8.h dfa.h ht.h bsr.h crf.h cnp.h
libeps_a_CFLAGS = $(AM_CFLAGS) --pedantic
@@ -22,7 +22,7 @@ CLEANFILES = TAGS
# tests
check_PROGRAMS = check_list check_grammar check_reader check_dfa \
-check_ht check_bsr check_crf
+check_ht check_bsr check_crf check_cnp
check_list_SOURCES = test/check_list.c list.c
@@ -42,6 +42,9 @@ ht.c utf8.c str.c
check_crf_SOURCES = test/check_crf.c crf.c grammar.c list.c util.c \
ht.c utf8.c str.c
+check_cnp_SOURCES = test/check_cnp.c crf.c cnp.c grammar.c list.c \
+util.c ht.c utf8.c str.c dfa.c bsr.c
+
TESTS = $(check_PROGRAMS)
AM_COLOR_TESTS = always
diff --git a/src/brainfuck.bnf b/src/brainfuck.bnf
new file mode 100644
index 0000000..d714a05
--- /dev/null
+++ b/src/brainfuck.bnf
@@ -0,0 +1,10 @@
+brainfuck: unit brainfuck
+brainfuck:
+
+unit: '>'
+unit: '<'
+unit: '+'
+unit: '-'
+unit: ','
+unit: '.'
+unit: '[' brainfuck ']'
diff --git a/src/bsr.c b/src/bsr.c
index e737d7a..bd0a84c 100644
--- a/src/bsr.c
+++ b/src/bsr.c
@@ -210,7 +210,6 @@ bsr_add(CCR_MOD(Grammar *) g, bsr * const restrict b,
#define ARR (b->f.array+X)
if (!(ARR->initialized)) {
- ARR->initialized = 1;
ARR->table = new_ht2(HT_INIT_CAP);
if (ARR->table == NULL) {
@@ -218,6 +217,8 @@ bsr_add(CCR_MOD(Grammar *) g, bsr * const restrict b,
goto cleanup;
}
+ ARR->initialized = 1;
+
value_table = new_ht2(HT_INIT_CAP);
if (value_table == NULL) {
@@ -358,6 +359,7 @@ bsr_add(CCR_MOD(Grammar *) g, bsr * const restrict b,
#undef AR
#undef ARR
#undef ARRR
+
}
goto success;
@@ -472,6 +474,12 @@ bsr_find(CCR_MOD(bsr *) b, CCR_MOD(Grammar *) g,
return result;
}
+static void
+print_sep()
+{
+ printf(" ");
+}
+
void
bsr_print(CCR_MOD(bsr *) b, CCR_MOD(Grammar *) g, NUM line_len)
{
@@ -500,11 +508,11 @@ bsr_print(CCR_MOD(bsr *) b, CCR_MOD(Grammar *) g, NUM line_len)
printf("(");
print_name(list_nth(grammar_names(g), i));
printf(" := ");
- map_list
+ map_list_between
(rg_nth
(grammar_rule(g, i),
(*(VALUE_TABLE_KEYS+k))->x),
- print_tnt);
+ print_tnt, print_sep);
printf(", %ld, %ld, %ld)",
(*(KEYS+j))->x,
(*(VALUE_TABLE_KEYS+k))->y,
@@ -589,7 +597,7 @@ bsr_print(CCR_MOD(bsr *) b, CCR_MOD(Grammar *) g, NUM line_len)
}
}
}
-
+
printf("\n");
#undef ELEMENT
diff --git a/src/cnp.c b/src/cnp.c
index d9a4978..8c7c519 100644
--- a/src/cnp.c
+++ b/src/cnp.c
@@ -1 +1,524 @@
+#include <string.h>
#include "cnp.h"
+
+struct Environment_s {
+ grammar_info *gi;
+ /* RESULT_TS and RESULT_PS are temporary arrays used in the
+ computations. */
+ ht *result_ts;
+ ht *result_ps;
+ NUM *string;
+ UNUM string_len;
+ crf *crfp;
+ procu *pu;
+ procr *pr;
+ spa *spap;
+ bsr *bsrp;
+ /* If there are errors this will be non-zero. When this field is
+ non-zero, every function that involves this struct will do
+ nothing. */
+ BOOL error;
+};
+
+void
+nt_add(Environment *env, NUM X, NUM j)
+{
+
+ if (env->error) return;
+
+#define GI env->gi
+#define RG (grammar_rule(GI->g, X))
+
+ for (NUM i = 0; i < rg_len(RG); i++) {
+ BOOL result =
+ test_select(env, *(env->string+j), X, rg_nth(RG, i));
+
+ for (NUM k = 0; k < grammar_left_len(GI->g); k++) {
+ ht_reset(env->result_ts+k, DELETE_KEY);
+ ht_reset(env->result_ps+k, DELETE_KEY);
+ }
+
+ if (env->error) break;
+
+ if (result) {
+ /* fleprintf("i = %ld, j = %ld\n", i, j); */
+ env->error =
+ desc_add(env->pr, env->pu, j,
+ (pair4) { .x = X, .y = i, .z = 0, .w = j });
+
+ /* fleprintf("env->error = %d\n", env->error); */
+ /* fleprintf("pr len = %ld\n", env->pr->len);
+ * fleprintf("pr ini = %d\n", (env->pr->array)->initialized); */
+ }
+
+ if (env->error) break;
+ }
+
+#undef RG
+#undef GI
+
+}
+
+BOOL
+test_select(Environment *env, NUM b, NUM X, CCR_MOD(List *) tnts)
+{
+ if (env->error) return 0;
+
+ BOOL result = 0;
+
+ grammar_info *gi = env->gi;
+
+ ht *result_ts = env->result_ts;
+ ht *result_ps = env->result_ps;
+
+ if (tnt_first(gi->pts, gi->pps, gi->nts, grammar_left_len(gi->g),
+ tnts, result_ts, result_ps)) {
+ fleprintf0("Fail to find the first set of the TNT string.\n");
+ goto cleanup;
+ }
+
+ if (ht_find(result_ts, &b) != NULL) {
+ result = 1;
+ goto success;
+ }
+
+ /* predicates haven't been implemented yet */
+ /* for (NUM i = 0; i < ht_size(result_ps); i++) {
+ *
+ * } */
+
+ for (NUM i = 0; i < list_length(tnts); i++) {
+
+#define TOP ((TNT*)list_nth(tnts, i))
+
+ switch (TOP->type) {
+ case TERMINAL:
+ case PREDICATE:
+ goto success;
+ break;
+ default:
+ if (!(gi->nts+TOP->data.nt)) goto success;
+ break;
+ }
+
+#undef TOP
+
+ }
+
+ if (ht_find(gi->sts+X, &b) != NULL) result = 1;
+
+ /* predicates haven't been implemented yet */
+
+ goto success;
+
+ cleanup:
+ env->error = 1;
+ return result;
+
+ success:
+ return result;
+}
+
+void
+rtn(Environment *env, NUM X, NUM k, NUM j)
+{
+ if (env->error) return;
+
+ pair3 label = (pair3) { .x = X, .y = k, .z = j };
+ pair2 label2 = (pair2) { .x = X, .y = k };
+
+ if (spa_belong(env->spap, label)) return;
+
+ /* fleprintf0("new spa\n"); */
+
+ if ((env->error = spa_insert(env->spap, label))) return;
+
+ ht *edge_ht = NULL;
+
+ if ((edge_ht = crf_find_node(env->crfp, label2)) == NULL) {
+ fleprintf0("this should not happen\n");
+ env->error = 1;
+ return;
+ }
+
+#define EDGE_LABELS ((pair4 **) ht_keys(edge_ht))
+
+ for (NUM i = 0; i < ht_size(edge_ht); i++) {
+
+#define EDGE (*(EDGE_LABELS+i))
+
+ if ((env->error = desc_add(env->pr, env->pu, j, *EDGE))) return;
+ /* fleprintf("added descriptor (%ld, %ld, %ld, %ld, %ld)\n",
+ * EDGE->x, EDGE->y, EDGE->z, EDGE->w, j); */
+ if ((env->error =
+ bsr_add(env->gi->g, env->bsrp,
+ EDGE->x, EDGE->y, EDGE->z, EDGE->w, k, j)))
+ return;
+ }
+
+#undef EDGE_LABELS
+#undef EDGE
+
+}
+
+void
+cnp_call(Environment *env, pair3 label, NUM i, NUM j)
+{
+ if (env->error) return;
+
+ if (label.z < 1) goto cleanup;
+
+ NUM Y = label.x;
+ List *tnts = rg_nth(grammar_rule(env->gi->g, Y), label.y);
+
+ TNT *xtnt = (TNT *) list_nth(tnts, label.z - 1);
+
+ if (xtnt->type != NONTERMINAL) goto cleanup;
+
+ NUM X = (NUM) xtnt->data.nt, h = 0;
+
+ pair2 node = (pair2) { .x = X, .y = j };
+ pair4 u = (pair4)
+ {
+ .x = label.x,
+ .y = label.y,
+ .z = label.z,
+ .w = i
+ };
+
+ ht *edge_ht = NULL, *spa_ht = NULL;
+
+ if ((edge_ht = crf_find_node(env->crfp, node)) == NULL) {
+ if (crf_add_node(env->crfp, node)) goto cleanup;
+
+ if (crf_add_edge(env->crfp, node, u)) goto cleanup;
+
+ /* errors will be stored in ENV, so we simply return */
+ nt_add(env, X, j);
+
+ return;
+ }
+
+ if (ht_find(edge_ht, &u) == NULL) {
+ if (crf_add_edge(env->crfp, node, u)) goto cleanup;
+
+ spa_ht = spa_find(env->spap, node);
+
+ if (spa_ht == NULL) return;
+
+ for (NUM k = 0; k < ht_size(spa_ht); k++) {
+ h = **((NUM **) ht_keys(spa_ht) + k);
+
+ if (desc_add(env->pr, env->pu, h, u)) goto cleanup;
+
+ if (bsr_add(env->gi->g, env->bsrp,
+ label.x, label.y, label.z,
+ i, j, h))
+ goto cleanup;
+ }
+ }
+
+ return;
+
+ cleanup:
+ fleprintf0("error\n");
+ env->error = 1;
+ return;
+}
+
+HP_ATTR
+BOOL
+env_error_p(CCR_MOD(Environment *) env)
+{
+ return env->error;
+}
+
+P_ATTR bsr *
+env_bsrp(CCR_MOD(Environment *) env)
+{
+ return env->bsrp;
+}
+
+P_ATTR
+Grammar *
+env_grammar(CCR_MOD(Environment *) env)
+{
+ return env->gi->g;
+}
+
+P_ATTR
+crf *
+env_crfp(CCR_MOD(Environment *) env)
+{
+ return env->crfp;
+}
+
+P_ATTR
+procr *
+env_r(CCR_MOD(Environment *) env)
+{
+ return env->pr;
+}
+
+P_ATTR
+procu *
+env_u(CCR_MOD(Environment *) env)
+{
+ return env->pu;
+}
+
+H_ATTR
+void
+env_str(CCR_MOD(Environment *) env, NUM **str, NUM *str_len)
+{
+ *str = env->string;
+ *str_len = env->string_len;
+}
+
+H_ATTR
+BOOL
+env_follow_p(CCR_MOD(Environment *) env, NUM X, NUM t)
+{
+ return ht_find(((env->gi->sts)+X), &t) != NULL;
+}
+
+void
+env_print_follow(CCR_MOD(Environment *) env, NUM X)
+{
+ for (NUM i = 0; i < ht_size((env->gi->sts+X)); i++) {
+ if (**(((NUM **) ht_keys(env->gi->sts+X)) + i) !=
+ END_OF_INPUT)
+ printf("key %ld = %ld\n",
+ i, **(((NUM **) ht_keys(env->gi->sts+X)) + i));
+ else
+ printf("key %ld = END OF INPUT\n", i);
+ }
+}
+
+void
+env_print_first(CCR_MOD(Environment *) env, NUM X)
+{
+ for (NUM i = 0; i < ht_size((env->gi->pts+X)); i++) {
+ printf("key %ld = %ld\n",
+ i, **(((NUM **) ht_keys(env->gi->pts+X)) + i));
+ }
+}
+
+Environment *
+new_env(Grammar *g, str *string)
+{
+ Environment *env = NULL;
+ ht *htp = NULL;
+ NUM current_index = 0, follow_current_index = 0;
+ NUM result_current_index = 0;
+
+ SAFE_MALLOC(Environment, env, 1, goto cleanup;);
+
+ env->gi = NULL;
+
+ SAFE_MALLOC(grammar_info, env->gi, 1, goto cleanup;);
+
+ memset(env->gi, 0, sizeof(grammar_info));
+
+ env->gi->g = g;
+
+ NUM left_len = grammar_left_len(g);
+
+ SAFE_MALLOC(BOOL, env->gi->nts, sizeof(BOOL) * left_len,
+ goto cleanup;);
+
+ epsilon_nts(g, env->gi->nts);
+
+ SAFE_MALLOC(ht, env->gi->pts, sizeof(ht) * left_len,
+ goto cleanup;);
+ SAFE_MALLOC(ht, env->gi->pps, sizeof(ht) * left_len,
+ goto cleanup;);
+
+ for (current_index = 0; current_index < left_len; current_index++) {
+ if ((htp = new_ht(HT_INIT_CAP, 0)) == NULL) goto cleanup;
+ *(env->gi->pts+current_index) = *htp;
+ free(htp);
+
+ if ((htp = new_ht(HT_INIT_CAP, 0)) == NULL) {
+ destroy_ht(env->gi->pts+current_index, DESTROY_NONE_NO_SELF);
+ goto cleanup;
+ }
+ *(env->gi->pps+current_index) = *htp;
+ free(htp);
+ }
+
+ if (nt_first(g, env->gi->nts, env->gi->pts, env->gi->pps))
+ goto cleanup;
+
+ SAFE_MALLOC(ht, env->gi->sts, sizeof(ht) * left_len,
+ goto cleanup;);
+ SAFE_MALLOC(ht, env->gi->sps, sizeof(ht) * left_len,
+ goto cleanup;);
+
+ for (follow_current_index = 0;
+ follow_current_index < left_len;
+ follow_current_index++) {
+ if ((htp = new_ht(HT_INIT_CAP, 0)) == NULL) goto cleanup;
+
+ *(env->gi->sts+follow_current_index) = *htp;
+
+ free(htp);
+
+ if ((htp = new_ht(HT_INIT_CAP, 0)) == NULL) {
+ destroy_ht(env->gi->sts+follow_current_index,
+ DESTROY_NONE_NO_SELF);
+ goto cleanup;
+ }
+
+ *(env->gi->sps+follow_current_index) = *htp;
+
+ free(htp);
+ }
+
+ if (nt_follow(g, env->gi->nts,
+ env->gi->pts, env->gi->pps,
+ env->gi->sts, env->gi->sps))
+ goto cleanup;
+
+ SAFE_MALLOC(ht, env->result_ts, sizeof(ht) * left_len,
+ goto cleanup;);
+ SAFE_MALLOC(ht, env->result_ps, sizeof(ht) * left_len,
+ goto cleanup;);
+
+ for (result_current_index = 0;
+ result_current_index < left_len;
+ result_current_index++) {
+ if ((htp = new_ht(HT_INIT_CAP, 0)) == NULL) goto cleanup;
+
+ *(env->result_ts+result_current_index) = *htp;
+
+ free(htp);
+
+ if ((htp = new_ht(HT_INIT_CAP, 0)) == NULL) {
+ destroy_ht(env->result_ts+result_current_index,
+ DESTROY_NONE_NO_SELF);
+ goto cleanup;
+ }
+
+ *(env->result_ps+result_current_index) = *htp;
+
+ free(htp);
+
+ }
+
+ SAFE_MALLOC(NUM, env->string, str_length(string)+1, goto cleanup;);
+
+ str_info info = EMPTY_STR_INFO;
+
+ env->string_len = 0;
+
+ for (NUM i = 0; i < str_length(string); i += info.step) {
+ info = get_info(string, i);
+ *(env->string+(env->string_len)++) = info.value;
+ }
+
+ *(env->string+(env->string_len)++) = END_OF_INPUT;
+
+ if ((env->crfp = new_crf()) == NULL) goto cleanup;
+
+ BOOL error = 0;
+
+ env->pu = new_procu(env->string_len, &error);
+
+ if (error) goto cleanup;
+
+ env->pr = new_procr(env->string_len, &error);
+
+ if (error) goto cleanup;
+
+ if ((env->spap = new_spa()) == NULL) goto cleanup;
+
+ env->bsrp = new_bsr(&error, g);
+
+ if (error) goto cleanup;
+
+ return env;
+
+ cleanup:
+
+ if (env) {
+ if (env->gi) {
+ if (env->gi->nts) free(env->gi->nts);
+
+ for (NUM i = 0; i < current_index; i++) {
+ destroy_ht(env->gi->pts+i, DESTROY_NONE_NO_SELF);
+ destroy_ht(env->gi->pps+i, DESTROY_NONE_NO_SELF);
+ }
+
+ if (env->gi->pts) free(env->gi->pts);
+ if (env->gi->pps) free(env->gi->pps);
+
+ for (NUM i = 0; i < follow_current_index; i++) {
+ destroy_ht(env->gi->sts+i, DESTROY_NONE_NO_SELF);
+ destroy_ht(env->gi->sps+i, DESTROY_NONE_NO_SELF);
+ }
+
+ if (env->gi->sts) free(env->gi->sts);
+ if (env->gi->sps) free(env->gi->sps);
+
+ free(env->gi);
+ }
+
+ for (NUM i = 0; i < result_current_index; i++) {
+ destroy_ht(env->result_ts+i, DESTROY_NONE_NO_SELF);
+ destroy_ht(env->result_ps+i, DESTROY_NONE_NO_SELF);
+ }
+
+ if (env->result_ts) free(env->result_ts);
+ if (env->result_ps) free(env->result_ps);
+
+ if (env->string) free(env->string);
+
+ if (env->crfp) destroy_crf(env->crfp);
+
+ if (env->pu) destroy_procu(env->pu);
+ if (env->pr) destroy_procr(env->pr);
+ if (env->spap) destroy_spa(env->spap);
+ if (env->bsrp) destroy_bsr(env->bsrp);
+
+ free(env);
+ }
+
+ return NULL;
+}
+
+void
+destroy_env(Environment *env)
+{
+ if (env == NULL) return;
+
+ free(env->gi->nts);
+
+ NUM left_len = grammar_left_len(env->gi->g);
+
+ for (NUM i = 0; i < left_len; i++) {
+ destroy_ht(env->gi->pts+i, DESTROY_KEY_NO_SELF);
+ destroy_ht(env->gi->pps+i, DESTROY_KEY_NO_SELF);
+ destroy_ht(env->gi->sts+i, DESTROY_KEY_NO_SELF);
+ destroy_ht(env->gi->sps+i, DESTROY_KEY_NO_SELF);
+ destroy_ht(env->result_ts+i, DESTROY_KEY_NO_SELF);
+ destroy_ht(env->result_ps+i, DESTROY_KEY_NO_SELF);
+ }
+
+ free(env->gi->pts);
+ free(env->gi->pps);
+ free(env->gi->sts);
+ free(env->gi->sps);
+ free(env->result_ts);
+ free(env->result_ps);
+ free(env->string);
+
+ destroy_crf(env->crfp);
+ destroy_procu(env->pu);
+ destroy_procr(env->pr);
+ destroy_spa(env->spap);
+ destroy_bsr(env->bsrp);
+
+ free(env->gi);
+ free(env);
+}
diff --git a/src/cnp.h b/src/cnp.h
index 5944ca3..2b0c104 100644
--- a/src/cnp.h
+++ b/src/cnp.h
@@ -1,4 +1,63 @@
#ifndef CNP_H
#define CNP_H
-/* TODO */
+#include "str.h"
+#include "utf8.h"
+#include "bsr.h"
+#include "crf.h"
+
+/* We start with implementing some support functions. These support
+ functions are not supposed to be put in the crf.h header file, as
+ they are not related to the underlying (static) data structures
+ really. */
+
+typedef struct Environment_s Environment;
+
+/* On error return NULL */
+Environment *new_env(Grammar *g, str *string);
+
+HP_ATTR BOOL env_error_p(CCR_MOD(Environment *) env);
+
+P_ATTR bsr *env_bsrp(CCR_MOD(Environment *) env);
+
+P_ATTR Grammar *env_grammar(CCR_MOD(Environment *) env);
+
+HP_ATTR crf *env_crfp(CCR_MOD(Environment *) env);
+
+HP_ATTR procr *env_r(CCR_MOD(Environment *) env);
+HP_ATTR procu *env_u(CCR_MOD(Environment *) env);
+H_ATTR void env_str(CCR_MOD(Environment *) env,
+ NUM **str, NUM *str_len);
+
+H_ATTR BOOL env_follow_p(CCR_MOD(Environment *) env, NUM X, NUM t);
+
+void env_print_follow(CCR_MOD(Environment *) env, NUM X);
+
+void env_print_first(CCR_MOD(Environment *) env, NUM X);
+
+/* Convenient macro */
+
+#define CHECK_ENV_ERROR(ENV, CLEANUP) do { \
+ if (env_error_p(ENV)) { \
+ fleprintf0("Error occurs!\n"); \
+ { CLEANUP }; \
+ } \
+ } while (0)
+
+/* NOTE: Since the grammar pointer just points to the existing grammar
+ when we construct it, we shall not destroy the grammar when we
+ destroy the environment, so that we can later reuse that grammar,
+ should the need arise. */
+void destroy_env(Environment *env);
+
+void nt_add(Environment *env, NUM X, NUM j);
+
+BOOL test_select(Environment *env, NUM b, NUM X,
+ CCR_MOD(List *) tnts);
+
+void rtn(Environment *env, NUM X, NUM k, NUM j);
+
+void cnp_call(Environment *env, pair3 label, NUM i, NUM j);
+
+/* TODO: Main algorithm */
+
#endif
diff --git a/src/crf.c b/src/crf.c
index dbcb050..1cff96f 100644
--- a/src/crf.c
+++ b/src/crf.c
@@ -134,6 +134,13 @@ destroy_spa(spa * const restrict spap)
free(spap);
}
+__attribute__((__pure__, __cold__))
+ht *
+spa_ht(CCR_MOD(spa *) spap)
+{
+ return spap->table;
+}
+
BOOL
spa_insert(spa * const restrict spap, pair3 label)
{
@@ -270,6 +277,7 @@ new_procr(NUM size, BOOL * const restrict errorp)
pr->len = size;
pr->array = NULL;
+ pr->mg = 0;
SAFE_MALLOC(procr_array_element, pr->array,
sizeof(procr_array_element) * size,
@@ -287,6 +295,8 @@ new_procr(NUM size, BOOL * const restrict errorp)
if (pr) free(pr);
+ pr = NULL;
+
success:
return pr;
@@ -320,12 +330,37 @@ new_procu(NUM size, BOOL * const restrict errorp)
if (pu) free(pu);
+ pu = NULL;
+
success:
return pu;
}
void
+print_procr(CCR_MOD(procr *) pr)
+{
+ eprintf("\n");
+ fleprintf0("Printing procr...\n");
+ fleprintf("LEN = %ld, MG = %ld\n", pr->len, pr->mg);
+ for (NUM i = 0; i < pr->len; i++) {
+ if ((pr->array+i)->initialized) {
+ if (list_length((pr->array+i)->ls)) eprintf("%ld: ", i);
+ for (NUM j = 0; j < list_length((pr->array+i)->ls); j++) {
+ fleprintf("(%ld, %ld, %ld, %ld)",
+ ((pair4 *) list_nth((pr->array+i)->ls, j))->x,
+ ((pair4 *) list_nth((pr->array+i)->ls, j))->y,
+ ((pair4 *) list_nth((pr->array+i)->ls, j))->z,
+ ((pair4 *) list_nth((pr->array+i)->ls, j))->w);
+ if (j+1==list_length((pr->array+i)->ls)) eprintf("\n");
+ else eprintf(", ");
+ }
+ }
+ }
+ eprintf("\n");
+}
+
+void
destroy_procr(procr * const restrict pr)
{
for (NUM i = 0; i < pr->len; i++)
@@ -353,13 +388,13 @@ BOOL
desc_add(procr * const restrict pr, procu * const restrict pu,
NUM grade, prodecor dec)
{
+ pair4 *p4 = NULL;
+
if (grade < 0 || grade >= pu->len) {
fleprintf("Invalid grade: %ld\n", grade);
goto cleanup;
}
- pair4 *p4 = NULL;
-
#define HELE (pu->array+grade)
#define LELE (pr->array+grade)
@@ -441,6 +476,8 @@ desc_add(procr * const restrict pr, procu * const restrict pu,
cleanup:
+ if (p4) free(p4);
+
return 1;
success:
@@ -457,13 +494,10 @@ pop_procr(procr * pr, procu * pu, NUM *gradep)
#define ELEMENT (pr->array+pr->mg)
- if (!(ELEMENT->initialized)) goto reset_mg;
-
- *gradep = pr->mg;
-
- result = pop_from_list(ELEMENT->ls);
+ if (ELEMENT->initialized && list_length(ELEMENT->ls))
+ goto no_reset_mg;
- if (list_length(ELEMENT->ls) == 0) {
+ if (ELEMENT->initialized && list_length(ELEMENT->ls) == 0) {
destroy_ht((pu->array+pr->mg)->table, DESTROY_KEY_SELF);
(pu->array+pr->mg)->initialized = 0;
(pu->array+pr->mg)->table = NULL;
@@ -471,17 +505,23 @@ pop_procr(procr * pr, procu * pu, NUM *gradep)
destroy_list(ELEMENT->ls, 1);
ELEMENT->initialized = 0;
ELEMENT->ls = NULL;
+ }
- /* REVIEW: Maybe we can just increment the minimal grade, as we
- won't jump over two steps? */
- reset_mg:
- while (++(pr->mg) < pr->len) {
- if (ELEMENT->initialized) break;
- }
+ while (++(pr->mg) < pr->len) {
+ if (ELEMENT->initialized) break;
}
+ if (pr->mg >= pr->len) goto success;
+
+ no_reset_mg:
+
+ result = pop_from_list(ELEMENT->ls);
+
+ *gradep = pr->mg;
+
#undef ELEMENT
success:
return result;
+
}
diff --git a/src/crf.h b/src/crf.h
index 7ff5a3f..9d1cc78 100644
--- a/src/crf.h
+++ b/src/crf.h
@@ -1,5 +1,5 @@
-#ifndef CNP_H
-#define CNP_H
+#ifndef CRF_H
+#define CRF_H
#include "ht.h"
/* This file implements some support functions for the Clustered
@@ -83,6 +83,8 @@ spa *new_spa();
void destroy_spa(spa * const restrict spap);
+__attribute__((__pure__, __cold__)) ht *spa_ht(CCR_MOD(spa *) spap);
+
/* On error return non-zero */
BOOL spa_insert(spa * const restrict spap, pair3 label);
@@ -156,8 +158,12 @@ BOOL desc_add(procr * const restrict pr, procu * const restrict pu,
/* if no more elements are left, return NULL */
prodecor *pop_procr(procr * pr, procu * pu, NUM *gradep);
+void print_procr(CCR_MOD(procr *) pr);
+
/* destructor */
void destroy_procr(procr * const restrict pr);
void destroy_procu(procu * const restrict pu);
+#define TEST haha
+
#endif
diff --git a/src/dfa.h b/src/dfa.h
index a22409f..70c8bf6 100644
--- a/src/dfa.h
+++ b/src/dfa.h
@@ -32,18 +32,18 @@ dfa *new_dfa();
void destroy_dfa(dfa *table);
-void print_dfa(const dfa * const restrict table);
+void print_dfa(CCR_MOD(dfa *) table);
dfa *dfa_from_bytes(int sequence_size,
- const NUM * const restrict data);
+ CCR_MOD(NUM *) data);
dfa *dfa_from_bytes_neg(int sequence_size,
- const NUM * const restrict data);
+ CCR_MOD(NUM *) data);
dfa *dfa_from_bytes_both(int sequence_size,
- const NUM * const restrict data,
+ CCR_MOD(NUM *) data,
int neg_sequence_size,
- const NUM * const restrict negdata);
+ CCR_MOD(NUM *) negdata);
/* TODO: Reject character bytes from a given DFA. */
@@ -59,6 +59,6 @@ dfa *dfa_from_bytes_both(int sequence_size,
inline BOOL dfa_any_fun(const NUM UNUSED code) { return 1; }
-BOOL run_dfa(const dfa * const restrict table, const NUM code);
+BOOL run_dfa(CCR_MOD(dfa *) table, const NUM code);
#endif
diff --git a/src/grammar.c b/src/grammar.c
index c12ddec..3b5a959 100644
--- a/src/grammar.c
+++ b/src/grammar.c
@@ -676,7 +676,7 @@ nt_first(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts,
BOOL
tnt_first(CC_MOD(ht *) terminal_hts, CC_MOD(ht *) predicate_hts,
- CCR_MOD(BOOL *) nts, NUM len, List *tnts,
+ CCR_MOD(BOOL *) nts, NUM len, CCR_MOD(List *) tnts,
ht * const restrict result_terminals,
ht * const restrict result_predicates)
{
@@ -787,8 +787,8 @@ nt_follow(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts,
Rule_group *rg = grammar_rule(g, (NT) i);
NUM rg_length = rg_len(rg);
- for (NUM j = 0; j < rg_length;) {
- List *string = rg_nth(rg, j++);
+ for (NUM j = 0; j < rg_length; j++) {
+ List *string = rg_nth(rg, j);
NUM string_len = list_length(string);
TNT *top = NULL;
@@ -842,13 +842,12 @@ nt_follow(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts,
if (k+1 == string_len ||
(rest_produce_epsilon >= 0 &&
- rest_produce_epsilon <= k))
+ rest_produce_epsilon <= k+1))
goto add_to_follow;
break;
add_to_follow:
-
ht_len = ht_size(result_terminals+i);
keys = (NUM **) ht_keys(result_terminals+i);
diff --git a/src/grammar.h b/src/grammar.h
index 8d6cb06..cbda887 100644
--- a/src/grammar.h
+++ b/src/grammar.h
@@ -213,7 +213,7 @@ BOOL nt_first(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts,
On error return non-zero. */
BOOL tnt_first(CC_MOD(ht *) terminal_hts, CC_MOD(ht *) predicate_hts,
- CCR_MOD(BOOL *) nts, NUM len, List *tnts,
+ CCR_MOD(BOOL *) nts, NUM len, CCR_MOD(List *) tnts,
ht * const restrict result_terminals,
ht * const restrict result_predicates);
@@ -231,6 +231,27 @@ BOOL nt_follow(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts,
ht * const result_terminals,
ht * const result_predicates);
+/* struct that holds information about grammars */
+
+typedef struct grammar_info_s grammar_info;
+
+struct grammar_info_s {
+ /* the grammar itself */
+ Grammar *g;
+ /* the array about epsilon non-terminals */
+ BOOL *nts;
+ /* the array of hash tables of first (premier) terminals */
+ ht *pts;
+ /* the array of hash tables of first (premier) predicate
+ terminals */
+ ht *pps;
+ /* the array of hash tables of follow (suivant) terminals */
+ ht *sts;
+ /* the array of hash tables of follow (suivant) predicate
+ terminals */
+ ht *sps;
+};
+
/* convenient macro */
#define NEW_CPA(X, ARRAY, SIZE) do { \
diff --git a/src/ht.c b/src/ht.c
index 7965941..45953f4 100644
--- a/src/ht.c
+++ b/src/ht.c
@@ -323,6 +323,36 @@ ht_delete(ht * const restrict htp, void *key, HT_DELETE_FLAG flag)
return 0;
}
+void
+ht_reset(ht * const restrict htp, HT_DELETE_FLAG flag)
+{
+ for (int i = 0; i < htp->size; i++) {
+ switch (flag) {
+ case DELETE_KEY:
+ case DELETE_EVERY:
+ free(*(htp->keys+i));
+ break;
+ default:
+ break;
+ }
+
+ switch (flag) {
+ case DELETE_VALUE:
+ case DELETE_EVERY:
+ free(*(htp->values+i));
+ break;
+ default:
+ break;
+ }
+ }
+
+ htp->size = 0;
+
+ memset(htp->values, 0, sizeof(void*) * htp->capability);
+ memset(htp->keys, 0, sizeof(void*) * htp->capability);
+ memset(htp->indices, 0xff, sizeof (NUM) * htp->capability);
+}
+
P_ATTR
void *
ht_find(CCR_MOD(ht *) htp, void *key)
diff --git a/src/ht.h b/src/ht.h
index 4ac0c5d..65ae747 100644
--- a/src/ht.h
+++ b/src/ht.h
@@ -27,6 +27,11 @@ typedef BOOL (*compare_func_t)
(CCR_MOD(void *)keya, CCR_MOD(void *)keyb);
/* FIXME: Hide this struct from the header file. */
+/* WARNING: Don't use these fields directly. These fields are exposed
+ in this header file simply because I want to use pointers to ht and
+ do arithmetic on those pointers. But these fields might change
+ anytime in the future, if I think of some new ideas, or something
+ like that. */
struct ht_s {
void **keys;
void **values;
@@ -106,6 +111,8 @@ typedef enum HT_DELETE_FLAG_e HT_DELETE_FLAG;
BOOL ht_delete(ht * const restrict htp, void *key,
HT_DELETE_FLAG flag);
+void ht_reset(ht * const restrict htp, HT_DELETE_FLAG flag);
+
P_ATTR void *ht_find(CCR_MOD(ht *) htp, void *key);
/* Pairing hash tables */
diff --git a/src/list.c b/src/list.c
index 62933a0..1b9f4d4 100644
--- a/src/list.c
+++ b/src/list.c
@@ -148,7 +148,7 @@ copy_list(List *dest, List *source, copyer copyf)
H_ATTR
void *
-list_nth(const List * const ls, NUM n)
+list_nth(const List * const restrict ls, NUM n)
{
return *(ls->array+n);
}
diff --git a/src/list.h b/src/list.h
index 76dde0a..33bdf3e 100644
--- a/src/list.h
+++ b/src/list.h
@@ -45,7 +45,7 @@ void *copy_num(void *);
/* upon failure return 1 */
BOOL copy_list(List *dest, List *source, copyer copyf);
-void *list_nth(const List * const ls, NUM n);
+void *list_nth(CCR_MOD(List *) ls, NUM n);
NUM list_length(const List * const restrict ls);
diff --git a/src/test/check_cnp.c b/src/test/check_cnp.c
new file mode 100644
index 0000000..00c8a64
--- /dev/null
+++ b/src/test/check_cnp.c
@@ -0,0 +1,291 @@
+#include "../cnp.h"
+
+#define READ_INTO_CPA(N, U, L, I, VA, VI, CP) do { \
+ U = new_utf8(N, L); \
+ I = get_info((str *)U, 0); \
+ VA = MYALLOC(NUM, 2); \
+ VI = 0; \
+ for (NUM index = 0; \
+ I.value >= 0 && index < str_length((str *) U); \
+ index += I.step, VI++) { \
+ I = get_info((str *)U, index); \
+ *(VA+VI) = I.value; \
+ } \
+ CP = MYALLOC(cpa, 1); \
+ CP->array = VA; \
+ CP->size = VI; \
+ add_to_list(names, CP); \
+ destroy_str((str *)U, 1); \
+ } while (0)
+
+#define READ_TNT_STRING(LEFT, FORMAT, LEN, ...) do { \
+ tnt_string = new_tnt_string(FORMAT, LEN, __VA_ARGS__); \
+ if (!tnt_string) { \
+ fleprintf("left = %d, f = %s, l = %d, " \
+ "cannot create tnt string\n", \
+ LEFT, FORMAT, LEN); \
+ map_list(rules, destroy_rule_and_free_all); \
+ destroy_list(rules, 0); \
+ map_list(names, destroy_cpa_and_free_all); \
+ destroy_list(names, 0); \
+ return 1; \
+ } \
+ rule = new_rule(LEFT, tnt_string); \
+ add_to_list(rules, rule); \
+ } while (0)
+
+int
+main(int UNUSED argc, char ** UNUSED argv)
+{
+ /* have a grammar for testing */
+ List *tnt_string = NULL;
+ Rule *rule = NULL;
+ List *rules = new_list();
+ List *names = new_list();
+
+ char *name = MYALLOC(char, 2);
+ *(name+1) = 0;
+ *name = 'B';
+
+ utf8* uname = NULL;
+
+ str_info info = EMPTY_STR_INFO;
+
+ NUM *varray = NULL;
+ NUM vindex = 0;
+ cpa *cpap = NULL;
+
+ READ_INTO_CPA(name, uname, 1, info, varray, vindex, cpap);
+
+ name = MYALLOC(char, 3);
+ *(name+1) = 0;
+ *name = 'U';
+
+ READ_INTO_CPA(name, uname, 1, info, varray, vindex, cpap);
+
+ /* name = MYALLOC(char, 2);
+ * *(name+1) = 0;
+ * *name = 'F';
+ *
+ * READ_INTO_CPA(name, uname, 1, info, varray, vindex, cpap);
+ *
+ * name = MYALLOC(char, 2);
+ * *(name+1) = 0;
+ * *name = 'D';
+ *
+ * READ_INTO_CPA(name, uname, 1, info, varray, vindex, cpap); */
+
+ READ_TNT_STRING(0, "nn", 2, (NT) 1, (NT) 0);
+ rule = new_rule(0, new_list());
+ add_to_list(rules, rule);
+
+ READ_TNT_STRING(1, "t", 1, (T) '>');
+ READ_TNT_STRING(1, "t", 1, (T) '<');
+ READ_TNT_STRING(1, "t", 1, (T) '+');
+ READ_TNT_STRING(1, "t", 1, (T) '-');
+ READ_TNT_STRING(1, "t", 1, (T) ',');
+ READ_TNT_STRING(1, "t", 1, (T) '.');
+ READ_TNT_STRING(1, "tnt", 3, (T) '[', (NT) 0, (T) ']');
+
+ Grammar *g = new_grammar();
+
+ build_grammar(g, rules, names);
+
+ print_grammar(g);
+
+ utf8 *string = new_utf8("[,[.[+]]-]", 10);
+
+ printf("\nPrinting the input...\n%s\n", get_data((str *) string));
+
+ Environment *env = new_env(g, (str *) string);
+
+ if (crf_add_node(env_crfp(env), (pair2) { 0 })) goto cleanup;
+
+ nt_add(env, 0, 0);
+
+ CHECK_ENV_ERROR(env, goto cleanup;);
+
+ NUM current_grade = 0;
+
+ prodecor *current_prodecor = NULL;
+
+ Rule_group *rg = NULL;
+ List *right = NULL, *tnts = NULL;
+
+ NUM *num_string = NULL, num_string_len = 0;
+ BOOL errorp = 0;
+
+ env_str(env, &num_string, &num_string_len);
+
+ while ((current_prodecor =
+ pop_procr(env_r(env), env_u(env), &current_grade))) {
+ /* fleprintf("{ %ld, %ld, %ld, %ld, %ld }\n",
+ * current_prodecor->x, current_prodecor->y,
+ * current_prodecor->z, current_prodecor->w,
+ * current_grade);
+ * print_procr(env_r(env)); */
+
+ /* different cases */
+
+ rg = grammar_rule(env_grammar(env), current_prodecor->x);
+ right = rg_nth(rg, current_prodecor->y);
+
+ /* separate the empty rules out */
+ if (list_length(right) == 0) {
+ errorp =
+ bsr_add(env_grammar(env), env_bsrp(env),
+ current_prodecor->x, current_prodecor->y, 0,
+ current_grade, current_grade, current_grade);
+
+ if (errorp) goto cleanup;
+
+ if (env_follow_p(env, current_prodecor->x,
+ *(num_string+current_grade))) {
+ rtn(env, current_prodecor->x,
+ current_prodecor->w, current_grade);
+ }
+
+ goto continue_label;
+ }
+
+ /* fleprintf0("non empty rule\n"); */
+
+ /* if at the end of a rule, return if possible */
+ if (current_prodecor->z == list_length(right)) {
+ if (env_follow_p(env, current_prodecor->x,
+ *(num_string+current_grade))) {
+ rtn(env, current_prodecor->x,
+ current_prodecor->w, current_grade);
+ }
+
+ goto continue_label;
+ }
+
+ /* fleprintf0("not at the end of the rule\n"); */
+
+ /* if at the start of a rule, no need for test_select */
+ if (current_prodecor->z == 0) {
+ /* fleprintf0("at the start of the rule\n"); */
+ current_prodecor->z = 1;
+
+ /* otherwise test select */
+ } else {
+ /* fleprintf0("start by test select\n"); */
+ tnts =
+ array_to_list(list_array(right)+current_prodecor->z,
+ list_length(right)-current_prodecor->z);
+ if (test_select(env, *(num_string+current_grade),
+ current_prodecor->x, tnts)) {
+ /* fleprintf0("successful test\n"); */
+ free(tnts);
+ tnts = NULL;
+ ++(current_prodecor->z);
+ } else {
+ /* fleprintf0("failed test\n"); */
+ free(tnts);
+ tnts = NULL;
+ goto continue_label;
+ }
+ }
+
+ CHECK_ENV_ERROR(env, goto cleanup;);
+
+ /* while there are terminals */
+ while (((TNT *) list_nth(right, current_prodecor->z-1))->type ==
+ TERMINAL) {
+ /* fleprintf0("found terminal\n"); */
+ /* add to BSR set */
+ errorp =
+ bsr_add(env_grammar(env), env_bsrp(env),
+ current_prodecor->x, current_prodecor->y,
+ current_prodecor->z, current_prodecor->w,
+ current_grade, current_grade+1);
+
+ if (errorp) goto cleanup;
+
+ current_grade++;
+
+ /* if at the end of the rule, return if possible */
+ if (current_prodecor->z == list_length(right)) {
+ if (env_follow_p(env, current_prodecor->x,
+ *(num_string+current_grade))) {
+ /* fleprintf0("we choose to return\n"); */
+ rtn(env, current_prodecor->x,
+ current_prodecor->w, current_grade);
+ /* fleprintf0("hi\n"); */
+ }
+
+ goto continue_label;
+ }
+ /* fleprintf0("terminal not at the end\n"); */
+ /* else test select */
+ tnts =
+ array_to_list(list_array(right)+current_prodecor->z,
+ list_length(right)-current_prodecor->z);
+ if (test_select(env, *(num_string+current_grade),
+ current_prodecor->x, tnts)) {
+ /* fleprintf0("Successful test\n"); */
+ free(tnts);
+ tnts = NULL;
+ (current_prodecor->z)++;
+ } else {
+ /* fleprintf0("Failed test\n"); */
+ /* printf("next thing is "); */
+ /* print_tnt(list_nth(right, current_prodecor->z)); */
+ /* printf("\n"); */
+ free(tnts);
+ tnts = NULL;
+ goto continue_label;
+ }
+ }
+
+ /* fleprintf0("encountered non-terminal\n"); */
+
+ /* when a nonterminal is encountered, we call it */
+ cnp_call(env, (pair3) {
+ .x = current_prodecor->x,
+ .y = current_prodecor->y,
+ .z = current_prodecor->z
+ }, current_prodecor->w, current_grade);
+
+ /* fleprintf("after call { %ld, %ld, %ld, %ld, %ld }\n",
+ * current_prodecor->x, current_prodecor->y,
+ * current_prodecor->z, current_prodecor->w,
+ * current_grade); */
+
+ continue_label:
+
+ CHECK_ENV_ERROR(env, goto cleanup;);
+
+ free(current_prodecor);
+ }
+
+ cleanup:
+ bsr_print(env_bsrp(env), env_grammar(env), 1);
+
+ destroy_env(env);
+
+ destroy_grammar(g, 1);
+
+ destroy_list(rules, 1);
+
+ free(string);
+
+ return 0;
+}
+
+/* archives
+
+ printf("\nprint first for 0:\n");
+ env_print_first(env, 0);
+
+ printf("\nprint first for 1:\n");
+ env_print_first(env, 1);
+
+ printf("\nprint follow for 0:\n");
+ env_print_follow(env, 0);
+
+ printf("\nprint follow for 1:\n");
+ env_print_follow(env, 1);
+
+ */
diff --git a/src/test/check_ht.c b/src/test/check_ht.c
index b572549..62a7bee 100644
--- a/src/test/check_ht.c
+++ b/src/test/check_ht.c
@@ -231,6 +231,10 @@ int main(int UNUSED argc, char ** UNUSED argv)
ht_find(htp, &testk4));
}
+ ht_reset(htp, DELETE_KEY);
+
+ printf("After reset the size is %ld\n", ht_size(htp));
+
destroy_ht(htp, DESTROY_KEY_SELF);
return 0;
diff --git a/src/test/check_reader.c b/src/test/check_reader.c
index 3184e41..a4e184f 100644
--- a/src/test/check_reader.c
+++ b/src/test/check_reader.c
@@ -11,7 +11,7 @@ main(U_ATTR int argc, U_ATTR char **argv)
{
/* return 77; */
- char *file_name = "test.txt";
+ char *file_name = "brainfuck.bnf";
char *buffer = MYALLOC(char, 512);
NUM buffer_size = 0;