From aaa12504c6095b2cdfa213a3d4b269bbd5e7038a Mon Sep 17 00:00:00 2001
From: JSDurand <mmemmew@gmail.com>
Date: Sun, 6 Feb 2022 23:35:42 +0800
Subject: dfa: add the type of "ranged dfas"

Strictly speaking, they are not DFA's at all.  They contain ranges
which can determine whether or not a character belongs to the
specified predicate terminal.
---
 src/dfa.h | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

(limited to 'src/dfa.h')

diff --git a/src/dfa.h b/src/dfa.h
index 9116999..9d76a22 100644
--- a/src/dfa.h
+++ b/src/dfa.h
@@ -22,7 +22,9 @@ enum {
 
 /* dfa type */
 
-typedef BOOL (* special_dfa) (const NUM code);
+/* typedef BOOL (* special_dfa) (const NUM code); */
+
+typedef struct special_dfa_s special_dfa;
 
 typedef struct dfa_s dfa;
 
@@ -45,6 +47,20 @@ dfa *dfa_from_bytes_both(int sequence_size,
                          int neg_sequence_size,
                          CCR_MOD(NUM *) negdata);
 
+/* 2*LEN is the lengths of DATA.  The number at index 2*i is the start
+   of the i-th range and the number at index 2*i+1 is the end of the
+   i-th range. 
+
+   On error return NULL. */
+dfa *dfa_from_ranges(int len, CCR_MOD(NUM *) data);
+
+/* mutatis mutandis */
+dfa *dfa_from_ranges_neg(int len, CCR_MOD(NUM *) data);
+
+/* mutatis mutandis */
+dfa *dfa_from_ranges_both(int plen, CCR_MOD(NUM *) pdata,
+                          int nlen, CCR_MOD(NUM *) ndata);
+
 /* TODO: Reject character bytes from a given DFA. */
 
 /* NOTE: Add all unicode valid points to a DFA, so that we can
@@ -57,11 +73,6 @@ dfa *dfa_from_bytes_both(int sequence_size,
 
 /* TODO: Construct some basic frequently used character classes. */
 
-dfa *dfa_from_func(special_dfa func);
-
-/* return a new instance of the any class */
-dfa *dfa_any(void);
-
 BOOL run_dfa(CCR_MOD(dfa *) table, const NUM code);
 
 #endif
-- 
cgit v1.2.3-18-g5258