summaryrefslogtreecommitdiff
path: root/src/dfa.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/dfa.h')
-rw-r--r--src/dfa.h64
1 files changed, 64 insertions, 0 deletions
diff --git a/src/dfa.h b/src/dfa.h
new file mode 100644
index 0000000..a22409f
--- /dev/null
+++ b/src/dfa.h
@@ -0,0 +1,64 @@
+#include "util.h"
+#ifndef DFA_H
+#define DFA_H
+
+/* See the comments at the beginning of grammar.h for some backgrounds
+ about this file. */
+
+/* See the following Wikipedia link for details on Run-Length
+ Encoding. <https://en.wikipedia.org/wiki/Run-length_encoding> */
+
+/* Maximal character bytes value */
+
+enum { MAX_CHAR_BYTES_NUM = 256 };
+
+/* Hard-coded state numbers */
+
+enum {
+ DFA_STATE_UNKNOWN = -1,
+ DFA_STATE_ACCEPT = -2,
+ DFA_STATE_REJECT = -3,
+};
+
+/* dfa type */
+
+typedef BOOL (* special_dfa) (const NUM code);
+
+typedef struct dfa_s dfa;
+
+typedef struct compressed_table_s compressed_table;
+
+dfa *new_dfa();
+
+void destroy_dfa(dfa *table);
+
+void print_dfa(const dfa * const restrict table);
+
+dfa *dfa_from_bytes(int sequence_size,
+ const NUM * const restrict data);
+
+dfa *dfa_from_bytes_neg(int sequence_size,
+ const NUM * const restrict data);
+
+dfa *dfa_from_bytes_both(int sequence_size,
+ const NUM * const restrict data,
+ int neg_sequence_size,
+ const NUM * const restrict negdata);
+
+/* TODO: Reject character bytes from a given DFA. */
+
+/* NOTE: Add all unicode valid points to a DFA, so that we can
+ represent the ANY class.
+
+ After having done so, this costs around 16K memory. This is not so
+ satisfactory, as all these memory are just to serve as a ANY
+ character class, which is too excessive. So I extend the DFA by a
+ special type. */
+
+/* TODO: Construct some basic frequently used character classes. */
+
+inline BOOL dfa_any_fun(const NUM UNUSED code) { return 1; }
+
+BOOL run_dfa(const dfa * const restrict table, const NUM code);
+
+#endif