src/dfa.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64

#include "util.h"
#ifndef DFA_H
#define DFA_H

/* See the comments at the beginning of grammar.h for some backgrounds
   about this file. */

/* See the following Wikipedia link for details on Run-Length
   Encoding.  <https://en.wikipedia.org/wiki/Run-length_encoding> */

/* Maximal character bytes value */

enum { MAX_CHAR_BYTES_NUM = 256 };

/* Hard-coded state numbers */

enum {
  DFA_STATE_UNKNOWN = -1,
  DFA_STATE_ACCEPT = -2,
  DFA_STATE_REJECT = -3,
};

/* dfa type */

typedef BOOL (* special_dfa) (const NUM code);

typedef struct dfa_s dfa;

typedef struct compressed_table_s compressed_table;

dfa *new_dfa();

void destroy_dfa(dfa *table);

void print_dfa(CCR_MOD(dfa *) table);

dfa *dfa_from_bytes(int sequence_size,
                    CCR_MOD(NUM *) data);

dfa *dfa_from_bytes_neg(int sequence_size,
                        CCR_MOD(NUM *) data);

dfa *dfa_from_bytes_both(int sequence_size,
                         CCR_MOD(NUM *) data,
                         int neg_sequence_size,
                         CCR_MOD(NUM *) negdata);

/* TODO: Reject character bytes from a given DFA. */

/* NOTE: Add all unicode valid points to a DFA, so that we can
   represent the ANY class.

   After having done so, this costs around 16K memory.  This is not so
   satisfactory, as all these memory are just to serve as a ANY
   character class, which is too excessive.  So I extend the DFA by a
   special type. */

/* TODO: Construct some basic frequently used character classes. */

inline BOOL dfa_any_fun(const NUM UNUSED code) { return 1; }

BOOL run_dfa(CCR_MOD(dfa *) table, const NUM code);

#endif