#include "util.h" #ifndef DFA_H #define DFA_H /* See the comments at the beginning of grammar.h for some backgrounds about this file. */ /* See the following Wikipedia link for details on Run-Length Encoding. */ /* Maximal character bytes value */ enum { MAX_CHAR_BYTES_NUM = 256 }; /* Hard-coded state numbers */ enum { DFA_STATE_UNKNOWN = -1, DFA_STATE_ACCEPT = -2, DFA_STATE_REJECT = -3, }; /* dfa type */ /* typedef BOOL (* special_dfa) (const NUM code); */ typedef struct special_dfa_s special_dfa; typedef struct dfa_s dfa; typedef struct compressed_table_s compressed_table; dfa *new_dfa(); void destroy_dfa(dfa *table); void print_dfa(CCR_MOD(dfa *) table); dfa *dfa_from_bytes(int sequence_size, CCR_MOD(NUM *) data); dfa *dfa_from_bytes_neg(int sequence_size, CCR_MOD(NUM *) data); dfa *dfa_from_bytes_both(int sequence_size, CCR_MOD(NUM *) data, int neg_sequence_size, CCR_MOD(NUM *) negdata); /* 2*LEN is the lengths of DATA. The number at index 2*i is the start of the i-th range and the number at index 2*i+1 is the end of the i-th range. On error return NULL. */ dfa *dfa_from_ranges(int len, CCR_MOD(NUM *) data); /* mutatis mutandis */ dfa *dfa_from_ranges_neg(int len, CCR_MOD(NUM *) data); /* mutatis mutandis */ dfa *dfa_from_ranges_both(int plen, CCR_MOD(NUM *) pdata, int nlen, CCR_MOD(NUM *) ndata); /* TODO: Reject character bytes from a given DFA. */ /* NOTE: Add all unicode valid points to a DFA, so that we can represent the ANY class. After having done so, this costs around 16K memory. This is not so satisfactory, as all these memory are just to serve as a ANY character class, which is too excessive. So I extend the DFA by a special type. */ /* TODO: Construct some basic frequently used character classes. */ BOOL run_dfa(CCR_MOD(dfa *) table, const NUM code); #endif