1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
|
#include "util.h"
#ifndef DFA_H
#define DFA_H
/* See the comments at the beginning of grammar.h for some backgrounds
about this file. */
/* See the following Wikipedia link for details on Run-Length
Encoding. <https://en.wikipedia.org/wiki/Run-length_encoding> */
/* Maximal character bytes value */
enum { MAX_CHAR_BYTES_NUM = 256 };
/* Hard-coded state numbers */
enum {
DFA_STATE_UNKNOWN = -1,
DFA_STATE_ACCEPT = -2,
DFA_STATE_REJECT = -3,
};
/* dfa type */
typedef BOOL (* special_dfa) (const NUM code);
typedef struct dfa_s dfa;
typedef struct compressed_table_s compressed_table;
dfa *new_dfa();
void destroy_dfa(dfa *table);
void print_dfa(CCR_MOD(dfa *) table);
dfa *dfa_from_bytes(int sequence_size,
CCR_MOD(NUM *) data);
dfa *dfa_from_bytes_neg(int sequence_size,
CCR_MOD(NUM *) data);
dfa *dfa_from_bytes_both(int sequence_size,
CCR_MOD(NUM *) data,
int neg_sequence_size,
CCR_MOD(NUM *) negdata);
/* TODO: Reject character bytes from a given DFA. */
/* NOTE: Add all unicode valid points to a DFA, so that we can
represent the ANY class.
After having done so, this costs around 16K memory. This is not so
satisfactory, as all these memory are just to serve as a ANY
character class, which is too excessive. So I extend the DFA by a
special type. */
/* TODO: Construct some basic frequently used character classes. */
dfa *dfa_from_func(special_dfa func);
/* return a new instance of the any class */
dfa *dfa_any(void);
BOOL run_dfa(CCR_MOD(dfa *) table, const NUM code);
#endif
|