summaryrefslogtreecommitdiff
path: root/src/dfa.h
diff options
context:
space:
mode:
authorJSDurand <mmemmew@gmail.com>2022-01-11 09:58:43 +0800
committerJSDurand <mmemmew@gmail.com>2022-01-11 09:58:43 +0800
commit91016bd3855375796fd1eabd501ebc12491f2655 (patch)
treec4a608966b686c9639a90b92c31c00632532fac8 /src/dfa.h
parent949888ad5d0cd0f9f9a87f9938a632b3e32df051 (diff)
Add the framework for character classes.
Now we have the potential to recognize character classes. But the most important task for us now is to experiment with ((B)RN)GLR algorithms, so we leave the character classes at the present state for a moment.
Diffstat (limited to 'src/dfa.h')
-rw-r--r--src/dfa.h64
1 files changed, 64 insertions, 0 deletions
diff --git a/src/dfa.h b/src/dfa.h
new file mode 100644
index 0000000..a22409f
--- /dev/null
+++ b/src/dfa.h
@@ -0,0 +1,64 @@
+#include "util.h"
+#ifndef DFA_H
+#define DFA_H
+
+/* See the comments at the beginning of grammar.h for some backgrounds
+ about this file. */
+
+/* See the following Wikipedia link for details on Run-Length
+ Encoding. <https://en.wikipedia.org/wiki/Run-length_encoding> */
+
+/* Maximal character bytes value */
+
+enum { MAX_CHAR_BYTES_NUM = 256 };
+
+/* Hard-coded state numbers */
+
+enum {
+ DFA_STATE_UNKNOWN = -1,
+ DFA_STATE_ACCEPT = -2,
+ DFA_STATE_REJECT = -3,
+};
+
+/* dfa type */
+
+typedef BOOL (* special_dfa) (const NUM code);
+
+typedef struct dfa_s dfa;
+
+typedef struct compressed_table_s compressed_table;
+
+dfa *new_dfa();
+
+void destroy_dfa(dfa *table);
+
+void print_dfa(const dfa * const restrict table);
+
+dfa *dfa_from_bytes(int sequence_size,
+ const NUM * const restrict data);
+
+dfa *dfa_from_bytes_neg(int sequence_size,
+ const NUM * const restrict data);
+
+dfa *dfa_from_bytes_both(int sequence_size,
+ const NUM * const restrict data,
+ int neg_sequence_size,
+ const NUM * const restrict negdata);
+
+/* TODO: Reject character bytes from a given DFA. */
+
+/* NOTE: Add all unicode valid points to a DFA, so that we can
+ represent the ANY class.
+
+ After having done so, this costs around 16K memory. This is not so
+ satisfactory, as all these memory are just to serve as a ANY
+ character class, which is too excessive. So I extend the DFA by a
+ special type. */
+
+/* TODO: Construct some basic frequently used character classes. */
+
+inline BOOL dfa_any_fun(const NUM UNUSED code) { return 1; }
+
+BOOL run_dfa(const dfa * const restrict table, const NUM code);
+
+#endif