summaryrefslogtreecommitdiff
path: root/src/test
diff options
context:
space:
mode:
authorJSDurand <mmemmew@gmail.com>2022-02-08 00:29:10 +0800
committerJSDurand <mmemmew@gmail.com>2022-02-08 12:33:05 +0800
commit5426d9e2a6b820e34809d639838b26643df9ab17 (patch)
tree111f2b478b671092e3f2e64a6171970b8a5cdf99 /src/test
parentaaa12504c6095b2cdfa213a3d4b269bbd5e7038a (diff)
fix errorsHEADmaster
There are multiple subtle errors in the previous version, both in the codes and in the description of the BNF format. This version should fix some problems now. This version can successfully parse the grammar of its own grammar format, which is quite nice. See test/check_reader.c for parsing this format.
Diffstat (limited to 'src/test')
-rw-r--r--src/test/check_reader.c594
1 files changed, 578 insertions, 16 deletions
diff --git a/src/test/check_reader.c b/src/test/check_reader.c
index a4e184f..45025ff 100644
--- a/src/test/check_reader.c
+++ b/src/test/check_reader.c
@@ -1,18 +1,517 @@
+#include <string.h>
#include <stdlib.h>
#include <stdio.h>
+#include <time.h>
+#include <limits.h>
-#include "../util.h"
-#include "../list.h"
-#include "../grammar.h"
-#include "../reader.h"
+#include "../cnp.h"
+
+#define TITO struct timespec tic, toc
+
+#define TIC do { \
+ clock_gettime(CLOCK_MONOTONIC_RAW, &tic); \
+ } while (0)
+
+#define TOC do { \
+ clock_gettime(CLOCK_MONOTONIC_RAW, &toc); \
+ printf("\nTotal time = %f seconds\n", \
+ (toc.tv_nsec - tic.tv_nsec) / \
+ 1000000000.0 + \
+ toc.tv_sec - tic.tv_sec); \
+ } while (0)
+
+#define READ_INTO_CPA(N, U, L, I, VA, VI, CP) do { \
+ U = new_utf8(N, L); \
+ I = get_info((str *)U, 0); \
+ VI = 0; \
+ for (NUM index = 0; \
+ I.value >= 0 && index < str_length((str*) U); \
+ index += I.step, VI++) { \
+ I = get_info((str*)U, index); \
+ } \
+ SAFE_MALLOC(NUM, VA, VI, return 1;); \
+ I = get_info((str *)U, 0); \
+ VI = 0; \
+ for (NUM index = 0; \
+ I.value >= 0 && index < str_length((str *) U); \
+ index += I.step, VI++) { \
+ I = get_info((str *)U, index); \
+ *(VA+VI) = I.value; \
+ } \
+ SAFE_MALLOC(cpa, CP, 1, return 1;); \
+ CP->array = VA; \
+ CP->size = VI; \
+ if (add_to_list(names, CP)) { \
+ fleprintf0("Fail to add to names\n"); \
+ return 1; \
+ } \
+ destroy_str((str *)U, 1); \
+ } while (0)
+
+#define READ_TNT_STRING(LEFT, FORMAT, LEN, ...) do { \
+ tnt_string = new_tnt_string(FORMAT, LEN, __VA_ARGS__); \
+ if (!tnt_string) { \
+ fleprintf("left = %d, f = %s, l = %d, " \
+ "cannot create tnt string\n", \
+ LEFT, FORMAT, LEN); \
+ map_list(rules, destroy_rule_and_free_all); \
+ destroy_list(rules, 0); \
+ map_list(names, destroy_cpa_and_free_all); \
+ destroy_list(names, 0); \
+ return 1; \
+ } \
+ rule = new_rule(LEFT, tnt_string); \
+ add_to_list(rules, rule); \
+ } while (0)
+
+#define ADD_EMPTY_RULE(N) do { \
+ rule = new_rule(N, new_list()); \
+ add_to_list(rules, rule); \
+ } while (0)
int
-main(U_ATTR int argc, U_ATTR char **argv)
+main(int UNUSED argc, char UNUSED **argv)
{
- /* return 77; */
+ List *tnt_string = NULL;
+ Rule *rule = NULL;
+ List *rules = new_list();
+ List *names = new_list();
+ List *preds = new_list();
+
+ char *user_name = NULL;
+ char *raw_name = NULL;
+
+ str *user_name_s = NULL, *raw_name_s = NULL;
+
+ char *name = NULL;
+ utf8* uname = NULL;
+
+ int name_len = 0;
+
+ str_info info = EMPTY_STR_INFO;
+
+ NUM *varray = NULL;
+ NUM vindex = 0;
+ cpa *cpap = NULL;
+
+ SAFE_MALLOC(char, name, 3, return 1;);
+
+ *name = 'B';
+ *(name+1) = 'N';
+ *(name+2) = 'F';
+
+ READ_INTO_CPA(name, uname, 3, info, varray, vindex, cpap);
+
+ SAFE_MALLOC(char, name, 6, return 1;);
+
+ *name = 's';
+ *(name+1) = 'p';
+ *(name+2) = 'a';
+ *(name+3) = 'c';
+ *(name+4) = 'e';
+ *(name+5) = 's';
+
+ READ_INTO_CPA(name, uname, 6, info, varray, vindex, cpap);
+
+ SAFE_MALLOC(char, name, 15, return 1;);
+
+ *name = 'o';
+ *(name+1) = 'p';
+ *(name+2) = 't';
+ *(name+3) = 'i';
+ *(name+4) = 'o';
+ *(name+5) = 'n';
+ *(name+6) = 'a';
+ *(name+7) = 'l';
+ *(name+8) = '_';
+ *(name+9) = 's';
+ *(name+10) = 'p';
+ *(name+11) = 'a';
+ *(name+12) = 'c';
+ *(name+13) = 'e';
+ *(name+14) = 's';
+
+ READ_INTO_CPA(name, uname, 15, info, varray, vindex, cpap);
+
+ SAFE_MALLOC(char, name, 5, return 1;);
+
+ *name = 'e';
+ *(name+1) = 'm';
+ *(name+2) = 'p';
+ *(name+3) = 't';
+ *(name+4) = 'y';
+
+ READ_INTO_CPA(name, uname, 5, info, varray, vindex, cpap);
+
+ SAFE_MALLOC(char, name, 11, return 1;);
+
+ memcpy(name, "notnewlines", 11);
+
+ READ_INTO_CPA(name, uname, 11, info, varray, vindex, cpap);
+
+ SAFE_MALLOC(char, name, 17, return 1;);
+
+ memcpy(name, "predicate_section", 17);
+
+ READ_INTO_CPA(name, uname, 17, info, varray, vindex, cpap);
+
+ SAFE_MALLOC(char, name, 9, return 1;);
+
+ memcpy(name, "predicate", 9);
+
+ READ_INTO_CPA(name, uname, 9, info, varray, vindex, cpap);
+
+ name_len = 3;
+ SAFE_MALLOC(char, name, name_len, return 1;);
+
+ memcpy(name, "ids", name_len);
+
+ READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap);
+
+ name_len = 5;
+ SAFE_MALLOC(char, name, name_len, return 1;);
+
+ memcpy(name, "class", name_len);
+
+ READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap);
+
+ name_len = 14;
+ SAFE_MALLOC(char, name, name_len, return 1;);
+
+ memcpy(name, "positive_class", name_len);
+
+ READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap);
+
+ name_len = 22;
+ SAFE_MALLOC(char, name, name_len, return 1;);
+
+ memcpy(name, "positive_specification", name_len);
+
+ READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap);
+
+ name_len = 18;
+ SAFE_MALLOC(char, name, name_len, return 1;);
+
+ memcpy(name, "class_single_chars", name_len);
+
+ READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap);
+
+ name_len = 17;
+ SAFE_MALLOC(char, name, name_len, return 1;);
+
+ memcpy(name, "class_range_chars", name_len);
+
+ READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap);
+
+ name_len = 13;
+ SAFE_MALLOC(char, name, name_len, return 1;);
+
+ memcpy(name, "rules_section", name_len);
+
+ READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap);
+
+ name_len = 4;
+ SAFE_MALLOC(char, name, name_len, return 1;);
+
+ memcpy(name, "rule", name_len);
+
+ READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap);
+
+ name_len = 9;
+ SAFE_MALLOC(char, name, name_len, return 1;);
+
+ memcpy(name, "rule_name", name_len);
+
+ READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap);
+
+ name_len = 25;
+ SAFE_MALLOC(char, name, name_len, return 1;);
+
+ memcpy(name, "spaces-or-escaped-newline", name_len);
+
+ READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap);
+
+ name_len = 8;
+ SAFE_MALLOC(char, name, name_len, return 1;);
+
+ memcpy(name, "rule_rhs", name_len);
+
+ READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap);
+
+ name_len = 19;
+ SAFE_MALLOC(char, name, name_len, return 1;);
+
+ memcpy(name, "double_string_chars", name_len);
+
+ READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap);
+
+ name_len = 19;
+ SAFE_MALLOC(char, name, name_len, return 1;);
+
+ memcpy(name, "single_string_chars", name_len);
+
+ READ_INTO_CPA(name, uname, name_len, info, varray, vindex, cpap);
+
+ READ_TNT_STRING(0, "tnnn", 4, (T) '#', (NT) 4, (NT) 3, (NT) 0);
+ READ_TNT_STRING(0, "ntttnn", 6, (NT) 5,
+ (T) '-', (T) '-', (T) 0xa, (NT) 3, (NT) 13);
+ READ_TNT_STRING(0, "n", 1, (NT) 13);
+ ADD_EMPTY_RULE(0);
+
+ READ_TNT_STRING(1, "pn", 2, (PT) 0, (NT) 1);
+ READ_TNT_STRING(1, "p", 1, (PT) 0);
+
+ READ_TNT_STRING(2, "pn", 2, (PT) 0, (NT) 2);
+ ADD_EMPTY_RULE(2);
+
+ READ_TNT_STRING(3, "pn", 2, (PT) 0, (NT) 3);
+ READ_TNT_STRING(3, "tn", 2, (T) 0xa, (NT) 3);
+ READ_TNT_STRING(3, "tn", 2, (T) 0xd, (NT) 3);
+ READ_TNT_STRING(3, "ttnn", 4, (T) 0xa, (T) 0x23,
+ (NT) 4, (NT) 3);
+ ADD_EMPTY_RULE(3);
+
+ READ_TNT_STRING(4, "pn", 2, (PT) 3, (NT) 4);
+ ADD_EMPTY_RULE(4);
+
+ READ_TNT_STRING(5, "nntn", 4, (NT) 6, (NT) 3, (T) 0xa, (NT) 5);
+ ADD_EMPTY_RULE(5);
+
+ READ_TNT_STRING(6, "tnttnn", 6, (T) '[', (NT) 7,
+ (T) ']', (T) ':', (NT) 2, (NT) 8);
+
+ READ_TNT_STRING(7, "pn", 2, (PT) 1, (NT) 7);
+ READ_TNT_STRING(7, "p", 1, (PT) 1);
+
+ READ_TNT_STRING(8, "n", 1, (NT) 9);
+ READ_TNT_STRING(8, "tn", 2, (T) '^', (NT) 9);
+
+ READ_TNT_STRING(9, "nn", 2, (NT) 10, (NT) 9);
+ ADD_EMPTY_RULE(9);
+
+ READ_TNT_STRING(10, "n", 1, (NT) 11);
+ READ_TNT_STRING(10, "ntn", 3, (NT) 12, (T) '-', (NT) 12);
+
+ READ_TNT_STRING(11, "p", 1, (PT) 5);
+ READ_TNT_STRING(11, "tp", 2, (T) 0x5c, (PT) 6);
+
+ READ_TNT_STRING(12, "p", 1, (PT) 4);
+ READ_TNT_STRING(12, "tp", 2, (T) 0x5c, (PT) 6);
+
+ READ_TNT_STRING(13, "nntn", 4, (NT) 14, (NT) 3, (T) 0xa, (NT) 13);
+ ADD_EMPTY_RULE(13);
+
+ READ_TNT_STRING(14, "nntnn", 5,
+ (NT) 15, (NT) 2, (T) ':',
+ (NT) 2, (NT) 17);
+
+ READ_TNT_STRING(15, "pn", 2, (PT) 2, (NT) 15);
+ ADD_EMPTY_RULE(15);
+
+ READ_TNT_STRING(16, "n", 1, (NT) 2);
+ READ_TNT_STRING(16, "tt", 2, (T) 0x5c, (T) 0xa);
+
+ READ_TNT_STRING(17, "nnn", 3, (NT) 7, (NT) 16, (NT) 17);
+ READ_TNT_STRING(17, "tntnn", 5,
+ (T) '[', (NT) 7, (T) ']', (NT) 16, (NT) 17);
+ READ_TNT_STRING(17, "tntnn", 5,
+ (T) 0x22, (NT) 18, (T) 0x22, (NT) 16, (NT) 17);
+ READ_TNT_STRING(17, "tntnn", 5,
+ (T) 0x27, (NT) 19, (T) 0x27, (NT) 16, (NT) 17);
+ ADD_EMPTY_RULE(17);
+
+ READ_TNT_STRING(18, "p", 1, (PT) 7);
+ READ_TNT_STRING(18, "tp", 2, (T) '\\', (PT) 6);
+ READ_TNT_STRING(18, "pn", 2, (PT) 7, (NT) 18);
+ READ_TNT_STRING(18, "tpn", 3, (T) '\\', (PT) 6, (NT) 18);
+
+ READ_TNT_STRING(19, "p", 1, (PT) 8);
+ READ_TNT_STRING(19, "tp", 2, (T) '\\', (PT) 6);
+ READ_TNT_STRING(19, "pn", 2, (PT) 8, (NT) 19);
+ READ_TNT_STRING(19, "tpn", 3, (T) '\\', (PT) 6, (NT) 19);
+
+ SAFE_MALLOC(char, user_name, 6, return 1;);
+ memcpy(user_name, "space", 6);
+ user_name_s = (str *) new_utf8(user_name, 6);
+
+ SAFE_MALLOC(char, raw_name, 7, return 1;);
+ memcpy(raw_name, "%x20\\t", 7);
+ raw_name_s = (str *) new_utf8(raw_name, 7);
+
+ if (add_to_list(preds,
+ new_ptd(user_name_s, raw_name_s,
+ dfa_from_ranges
+ (2, (NUM[]) { ' ', ' ', 9, 9 })))) {
+ fleprintf0("Fail to add a predicate\n");
+ return 1;
+ }
+
+ SAFE_MALLOC(char, user_name, 3, return 1;);
+ memcpy(user_name, "id", 3);
+ user_name_s = (str *) new_utf8(user_name, 3);
+
+ SAFE_MALLOC(char, raw_name, 12, return 1;);
+ memcpy(raw_name, "a-zA-Z-_+*@", 12);
+ raw_name_s = (str *) new_utf8(raw_name, 12);
+
+ if (add_to_list(preds,
+ new_ptd(user_name_s, raw_name_s,
+ dfa_from_ranges
+ (6, (NUM[]) {
+ 'a', 'z', 'A', 'Z',
+ '-', '-', '_', '_',
+ '*', '+', '@', '@'
+ })))) {
+ fleprintf0("Fail to add a predicate\n");
+ return 1;
+ }
+
+ SAFE_MALLOC(char, user_name, 15, return 1;);
+ memcpy(user_name, "rule_name_char", 15);
+ user_name_s = (str *) new_utf8(user_name, 15);
+
+ SAFE_MALLOC(char, raw_name, 12, return 1;);
+ memcpy(raw_name, "^[]:%x20\\n#", 12);
+ raw_name_s = (str *) new_utf8(raw_name, 12);
+
+ if (add_to_list(preds,
+ new_ptd(user_name_s, raw_name_s,
+ dfa_from_ranges_neg
+ (6, (NUM[]) {
+ '[', '[', ']', ']',
+ ':', ':', ' ', ' ',
+ 0xa, 0xa, 0x23, 0x23
+ })))) {
+ fleprintf0("Fail to add a predicate\n");
+ return 1;
+ }
+
+ SAFE_MALLOC(char, user_name, 11, return 1;);
+ memcpy(user_name, "notnewline", 11);
+ user_name_s = (str *) new_utf8(user_name, 11);
+
+ SAFE_MALLOC(char, raw_name, 6, return 1;);
+ memcpy(raw_name, "^\\n\\r", 6);
+ raw_name_s = (str *) new_utf8(raw_name, 6);
+
+ if (add_to_list(preds,
+ new_ptd(user_name_s, raw_name_s,
+ dfa_from_ranges_neg
+ (2, (NUM[]) {
+ '\n', '\n', '\r', '\r'
+ })))) {
+ fleprintf0("Fail to add a predicate\n");
+ return 1;
+ }
+
+ SAFE_MALLOC(char, user_name, 17, return 1;);
+ memcpy(user_name, "class_range_char", 17);
+ user_name_s = (str *) new_utf8(user_name, 17);
+
+ SAFE_MALLOC(char, raw_name, 10, return 1;);
+ memcpy(raw_name, "^\\n\\r\\^\\\\", 10);
+ raw_name_s = (str *) new_utf8(raw_name, 10);
+
+ if (add_to_list(preds,
+ new_ptd(user_name_s, raw_name_s,
+ dfa_from_ranges_neg
+ (4, (NUM[]) {
+ '\n', '\n', '\r', '\r',
+ '^', '^', '\\', '\\'
+ })))) {
+ fleprintf0("Fail to add a predicate\n");
+ return 1;
+ }
+
+ SAFE_MALLOC(char, user_name, 18, return 1;);
+ memcpy(user_name, "class_single_char", 18);
+ user_name_s = (str *) new_utf8(user_name, 18);
+
+ SAFE_MALLOC(char, raw_name, 11, return 1;);
+ memcpy(raw_name, "^\\n\\r\\^\\\\-", 11);
+ raw_name_s = (str *) new_utf8(raw_name, 11);
+
+ if (add_to_list(preds,
+ new_ptd(user_name_s, raw_name_s,
+ dfa_from_ranges_neg
+ (5, (NUM[]) {
+ '\n', '\n', '\r', '\r',
+ '^', '^', '\\', '\\',
+ '-', '-'
+ })))) {
+ fleprintf0("Fail to add a predicate\n");
+ return 1;
+ }
+
+ SAFE_MALLOC(char, user_name, 4, return 1;);
+ memcpy(user_name, "any", 4);
+ user_name_s = (str *) new_utf8(user_name, 4);
+
+ SAFE_MALLOC(char, raw_name, 2, return 2;);
+ memcpy(raw_name, " ", 2);
+ raw_name_s = (str *) new_utf8(raw_name, 2);
+
+ if (add_to_list(preds,
+ new_ptd(user_name_s, raw_name_s,
+ dfa_from_ranges
+ (1, (NUM[]) {
+ 0, 0x10ffff
+ })))) {
+ fleprintf0("Fail to add a predicate\n");
+ return 1;
+ }
+
+ SAFE_MALLOC(char, user_name, 19, return 1;);
+ memcpy(user_name, "double_string_char", 19);
+ user_name_s = (str *) new_utf8(user_name, 19);
+
+ SAFE_MALLOC(char, raw_name, 5, return 1;);
+ memcpy(raw_name, "^\"\\n", 5);
+ raw_name_s = (str *) new_utf8(raw_name, 5);
+
+ if (add_to_list(preds,
+ new_ptd(user_name_s, raw_name_s,
+ dfa_from_ranges_neg
+ (2, (NUM[]) {
+ 0x22, 0x22, 0xa, 0xa
+ })))) {
+ fleprintf0("Fail to add a predicate\n");
+ return 1;
+ }
+
+ SAFE_MALLOC(char, user_name, 19, return 1;);
+ memcpy(user_name, "single_string_char", 19);
+ user_name_s = (str *) new_utf8(user_name, 19);
+
+ SAFE_MALLOC(char, raw_name, 5, return 1;);
+ memcpy(raw_name, "^'\\n", 5);
+ raw_name_s = (str *) new_utf8(raw_name, 5);
+
+ if (add_to_list(preds,
+ new_ptd(user_name_s, raw_name_s,
+ dfa_from_ranges_neg
+ (2, (NUM[]) {
+ 0x27, 0x27, 0xa, 0xa
+ })))) {
+ fleprintf0("Fail to add a predicate\n");
+ return 1;
+ }
+
+ Grammar *g = new_grammar();
+
+ build_grammar(g, rules, names, preds);
+
+ print_grammar(g);
+
+ /* utf8 *string = new_utf8("# comment\n\n[P]: a-z\n\n--\n\nS : AB\n"
+ * "# another comment\nA: \"S\"\nB : [P]\n",
+ * 65); */
+
+ char *file_name = "bnf.bnf";
+ char *buffer = NULL;
+
+ SAFE_MALLOC(char, buffer, 1<<9, return 1;);
- char *file_name = "brainfuck.bnf";
- char *buffer = MYALLOC(char, 512);
NUM buffer_size = 0;
if (read_entire_file(file_name, &buffer, &buffer_size)) {
@@ -21,17 +520,80 @@ main(U_ATTR int argc, U_ATTR char **argv)
return 1;
}
- utf8 *s = new_utf8(buffer, buffer_size);
+ /* The size includes the trailing null byte, so we shall exclude
+ it. */
+ buffer_size--;
+
+ str *string = (str *) new_utf8(buffer, buffer_size);
+
+ printf("\nPrinting the input...\n%s\n", get_data(string));
+
+ printf("Input size = %ld", buffer_size);
+
+ TITO;
+
+ TIC;
- Grammar *g = read_grammar_from_bnf((str *) s);
+ Environment *env = cnp_parse(g, string);
- if (g) {
- print_grammar(g);
- destroy_grammar(g, 2);
- destroy_str((str *)s, 1);
+ TOC;
- return 0;
+ if (env) {
+ if (!(env_error_p(env))) {
+ BOOL result = bsr_lookup
+ (env_bsrp(env), 0, 0, str_length((str *) string));
+
+ if (result) {
+ printf("\nSuccessfully parsed the input!\n");
+ } else {
+ printf("\nThe input does not parse!\n");
+ }
+
+ printf("\nAll BSRs follow:\n\n");
+ if (argc == 1)
+ bsr_print(env_bsrp(env), env_grammar(env), 1);
+ } else {
+ printf("There are errors!\n");
+ }
+
+ destroy_env(env);
}
- return 1;
+ destroy_grammar(g, 1);
+
+ destroy_list(rules, 1);
+
+ destroy_str(string, 1);
+
+ return 0;
}
+
+
+/* archives */
+
+/* char *file_name = "brainfuck.bnf";
+ * char *buffer = NULL;
+ *
+ * SAFE_MALLOC(char, buffer, 1<<9, return 1;);
+ *
+ * NUM buffer_size = 0;
+ *
+ * if (read_entire_file(file_name, &buffer, &buffer_size)) {
+ * fleprintf("Cannot read file %s", file_name);
+ * free(buffer);
+ * return 1;
+ * }
+ *
+ * utf8 *s = new_utf8(buffer, buffer_size);
+ *
+ * Grammar *g = read_grammar_from_bnf((str *) s);
+ *
+ * if (g) {
+ * print_grammar(g);
+ * destroy_grammar(g, 2);
+ * destroy_str((str *)s, 1);
+ *
+ * return 0;
+ * }
+ *
+ * return 1; */