From aaa12504c6095b2cdfa213a3d4b269bbd5e7038a Mon Sep 17 00:00:00 2001 From: JSDurand Date: Sun, 6 Feb 2022 23:35:42 +0800 Subject: dfa: add the type of "ranged dfas" Strictly speaking, they are not DFA's at all. They contain ranges which can determine whether or not a character belongs to the specified predicate terminal. --- src/dfa.h | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'src/dfa.h') diff --git a/src/dfa.h b/src/dfa.h index 9116999..9d76a22 100644 --- a/src/dfa.h +++ b/src/dfa.h @@ -22,7 +22,9 @@ enum { /* dfa type */ -typedef BOOL (* special_dfa) (const NUM code); +/* typedef BOOL (* special_dfa) (const NUM code); */ + +typedef struct special_dfa_s special_dfa; typedef struct dfa_s dfa; @@ -45,6 +47,20 @@ dfa *dfa_from_bytes_both(int sequence_size, int neg_sequence_size, CCR_MOD(NUM *) negdata); +/* 2*LEN is the lengths of DATA. The number at index 2*i is the start + of the i-th range and the number at index 2*i+1 is the end of the + i-th range. + + On error return NULL. */ +dfa *dfa_from_ranges(int len, CCR_MOD(NUM *) data); + +/* mutatis mutandis */ +dfa *dfa_from_ranges_neg(int len, CCR_MOD(NUM *) data); + +/* mutatis mutandis */ +dfa *dfa_from_ranges_both(int plen, CCR_MOD(NUM *) pdata, + int nlen, CCR_MOD(NUM *) ndata); + /* TODO: Reject character bytes from a given DFA. */ /* NOTE: Add all unicode valid points to a DFA, so that we can @@ -57,11 +73,6 @@ dfa *dfa_from_bytes_both(int sequence_size, /* TODO: Construct some basic frequently used character classes. */ -dfa *dfa_from_func(special_dfa func); - -/* return a new instance of the any class */ -dfa *dfa_any(void); - BOOL run_dfa(CCR_MOD(dfa *) table, const NUM code); #endif -- cgit v1.2.3-18-g5258