diff options
Diffstat (limited to 'src/dfa.h')
-rw-r--r-- | src/dfa.h | 64 |
1 files changed, 64 insertions, 0 deletions
diff --git a/src/dfa.h b/src/dfa.h new file mode 100644 index 0000000..a22409f --- /dev/null +++ b/src/dfa.h @@ -0,0 +1,64 @@ +#include "util.h" +#ifndef DFA_H +#define DFA_H + +/* See the comments at the beginning of grammar.h for some backgrounds + about this file. */ + +/* See the following Wikipedia link for details on Run-Length + Encoding. <https://en.wikipedia.org/wiki/Run-length_encoding> */ + +/* Maximal character bytes value */ + +enum { MAX_CHAR_BYTES_NUM = 256 }; + +/* Hard-coded state numbers */ + +enum { + DFA_STATE_UNKNOWN = -1, + DFA_STATE_ACCEPT = -2, + DFA_STATE_REJECT = -3, +}; + +/* dfa type */ + +typedef BOOL (* special_dfa) (const NUM code); + +typedef struct dfa_s dfa; + +typedef struct compressed_table_s compressed_table; + +dfa *new_dfa(); + +void destroy_dfa(dfa *table); + +void print_dfa(const dfa * const restrict table); + +dfa *dfa_from_bytes(int sequence_size, + const NUM * const restrict data); + +dfa *dfa_from_bytes_neg(int sequence_size, + const NUM * const restrict data); + +dfa *dfa_from_bytes_both(int sequence_size, + const NUM * const restrict data, + int neg_sequence_size, + const NUM * const restrict negdata); + +/* TODO: Reject character bytes from a given DFA. */ + +/* NOTE: Add all unicode valid points to a DFA, so that we can + represent the ANY class. + + After having done so, this costs around 16K memory. This is not so + satisfactory, as all these memory are just to serve as a ANY + character class, which is too excessive. So I extend the DFA by a + special type. */ + +/* TODO: Construct some basic frequently used character classes. */ + +inline BOOL dfa_any_fun(const NUM UNUSED code) { return 1; } + +BOOL run_dfa(const dfa * const restrict table, const NUM code); + +#endif |