From 91016bd3855375796fd1eabd501ebc12491f2655 Mon Sep 17 00:00:00 2001 From: JSDurand Date: Tue, 11 Jan 2022 09:58:43 +0800 Subject: Add the framework for character classes. Now we have the potential to recognize character classes. But the most important task for us now is to experiment with ((B)RN)GLR algorithms, so we leave the character classes at the present state for a moment. --- src/test/check_dfa.c | 108 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 src/test/check_dfa.c (limited to 'src/test/check_dfa.c') diff --git a/src/test/check_dfa.c b/src/test/check_dfa.c new file mode 100644 index 0000000..a728fb8 --- /dev/null +++ b/src/test/check_dfa.c @@ -0,0 +1,108 @@ +#include "../utf8.h" +#include "../str.h" +#include +#include "../util.h" +#include "../dfa.h" + +#define ADD_RANGE(X, Y) do { \ + for (int i = 0; i + (X) <= (Y); i++) \ + *(v+vlen++) = (NUM) i + (X); \ + } while (0) + +int +main(int UNUSED argc, char ** UNUSED argv) +{ + UNUM vlen = 0; + NUM *v = MYALLOC(NUM, 1<<21); + + ADD_RANGE(0x0020, 0x007f); + ADD_RANGE(0x2580, 0x259f); + ADD_RANGE(0x00a0, 0x00ff); + ADD_RANGE(0x25a0, 0x25ff); + ADD_RANGE(0x0100, 0x017f); + ADD_RANGE(0x2600, 0x26ff); + ADD_RANGE(0x0180, 0x024f); + ADD_RANGE(0x2700, 0x27bf); + ADD_RANGE(0x0250, 0x02af); + ADD_RANGE(0x27c0, 0x27ef); + ADD_RANGE(0x02b0, 0x02ff); + ADD_RANGE(0x27f0, 0x27ff); + ADD_RANGE(0x0300, 0x036f); + ADD_RANGE(0x2800, 0x28ff); + ADD_RANGE(0x0370, 0x03ff); + ADD_RANGE(0x2900, 0x297f); + ADD_RANGE(0x0400, 0x04ff); + ADD_RANGE(0x2980, 0x29ff); + ADD_RANGE(0x0500, 0x052f); + ADD_RANGE(0x2a00, 0x2aff); + ADD_RANGE(0x0530, 0x058f); + ADD_RANGE(0x2b00, 0x2bff); + ADD_RANGE(0x0590, 0x05ff); + ADD_RANGE(0x2e80, 0x2eff); + ADD_RANGE(0x0600, 0x06ff); + ADD_RANGE(0x2f00, 0x2fdf); + ADD_RANGE(0x0700, 0x074f); + ADD_RANGE(0x2ff0, 0x2fff); + ADD_RANGE(0x0780, 0x07bf); + ADD_RANGE(0x3000, 0x303f); + ADD_RANGE(0x0900, 0x097f); + ADD_RANGE(0x3040, 0x309f); + ADD_RANGE(0x0980, 0x09ff); + ADD_RANGE(0x30a0, 0x30ff); + ADD_RANGE(0x0a00, 0x0a7f); + ADD_RANGE(0x3100, 0x312f); + ADD_RANGE(0x0a80, 0x0aff); + ADD_RANGE(0x3130, 0x318f); + ADD_RANGE(0x0b00, 0x0b7f); + ADD_RANGE(0x3190, 0x319f); + ADD_RANGE(0x0b80, 0x0bff); + ADD_RANGE(0x31a0, 0x31bf); + ADD_RANGE(0x0c00, 0x0c7f); + ADD_RANGE(0x31f0, 0x31ff); + ADD_RANGE(0x0c80, 0x0cff); + ADD_RANGE(0x3200, 0x32ff); + ADD_RANGE(0x0d00, 0x0d7f); + ADD_RANGE(0x3300, 0x33ff); + ADD_RANGE(0x0d80, 0x0dff); + ADD_RANGE(0x3400, 0x4dbf); + ADD_RANGE(0x0e00, 0x0e7f); + ADD_RANGE(0x4dc0, 0x4dff); + ADD_RANGE(0x0e80, 0x0eff); + ADD_RANGE(0x4e00, 0x9fff); + ADD_RANGE(0x0f00, 0x0fff); + + /* for (int i = 0; i < 190; i++) + * printf("v [%d] = %d\n", i, *(v+i)); */ + + dfa *dfap = dfa_from_bytes(vlen, v); + + free(v); + + if (dfap == NULL) + printf("returned null\n"); + else + print_dfa(dfap); + + /* char *s = MYALLOC(char, 5); + * str *strp = new_str(s, 5); + * + * for (int i = 0; i < 3; i++) { + * printf("\n\n"); + * + * encode(v[i], strp); + * + * for (int j = 0; j < str_length(strp)-1;) + * printf("%d, ", *(s+j++)+(1<<8)); + * + * printf("%d\n", *(s+str_length(strp)-1)+(1<<8)); + * } + * + * destroy_str(strp, 1); */ + + /* BOOL result = run_dfa(dfap, 32239); + * + * printf("the result = %d\n", result); */ + + if (dfap) destroy_dfa(dfap); + return 0; +} -- cgit v1.2.3-18-g5258