From aaa12504c6095b2cdfa213a3d4b269bbd5e7038a Mon Sep 17 00:00:00 2001 From: JSDurand Date: Sun, 6 Feb 2022 23:35:42 +0800 Subject: dfa: add the type of "ranged dfas" Strictly speaking, they are not DFA's at all. They contain ranges which can determine whether or not a character belongs to the specified predicate terminal. --- src/dfa.c | 197 +++++++++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 162 insertions(+), 35 deletions(-) (limited to 'src/dfa.c') diff --git a/src/dfa.c b/src/dfa.c index 9da201d..2edda77 100644 --- a/src/dfa.c +++ b/src/dfa.c @@ -11,6 +11,17 @@ *(X->children+macro_index++) = DFA_STATE_UNKNOWN; \ } while (0) +struct range_s { + NUM beg, end; +}; + +struct special_dfa_s { + NUM len; + struct range_s *ranges; + /* This is only used when this is a mixed special dfa */ + NUM boundary; +}; + /* The complete, uncompressed table is represented as an array of arrays, each array of which has size MAX_CHAR_BYTES_NUM. The compressed table is represented as run-lengths. The ROW_N is the @@ -32,25 +43,24 @@ enum DFA_TYPE { DFA_TYPE_NEGATIVE, DFA_TYPE_BOTH, DFA_TYPE_SPECIAL, + DFA_TYPE_SPECIAL_NEG, + DFA_TYPE_SPECIAL_BOTH, }; struct dfa_s { enum DFA_TYPE type; union { compressed_table table; - special_dfa fn; + special_dfa sp; } data; }; dfa * new_dfa() { - dfa *dfap = MYALLOC(dfa, 1); - - dfap->type = 0; + dfa *dfap = NULL; - dfap->data.table.row_n = 0; - dfap->data.table.table = NULL; + SAFE_CALLOC(dfa, dfap, 1, return NULL;); return dfap; } @@ -58,8 +68,12 @@ new_dfa() void destroy_dfa(dfa *table) { - /* function pointers need no destructors */ - if (table->type == DFA_TYPE_SPECIAL) goto free_last; + if (table->type == DFA_TYPE_SPECIAL || + table->type == DFA_TYPE_SPECIAL_NEG || + table->type == DFA_TYPE_SPECIAL_BOTH) { + free(table->data.sp.ranges); + goto free_last; + } for (int i = 0; i < table->data.table.row_n;) free(*(table->data.table.table+i++)); @@ -123,21 +137,40 @@ print_dfa(const dfa * const restrict table) switch (table->type) { case DFA_TYPE_POSITIVE: printf("Printing a positive table:\n"); + goto print_table; break; case DFA_TYPE_NEGATIVE: printf("Printing a negative table:\n"); break; case DFA_TYPE_BOTH: printf("Printing a mixed table:\n"); + goto print_table; + break; + case DFA_TYPE_SPECIAL: + printf("Printing a table containing positive ranges:\n"); + break; + case DFA_TYPE_SPECIAL_NEG: + printf("Printing a table containing negative ranges:\n"); + break; + case DFA_TYPE_SPECIAL_BOTH: + printf("Printing a table containing mixed ranges:\n"); break; default: - printf("A special table cannot be printed\n"); + printf("Table of unknown type\n"); return; break; } - printf("Printing a table:\n"); + for (int i = 0; i < table->data.sp.len; i++) { + printf("Range %d: %ld to %ld\n", i, + (table->data.sp.ranges+i)->beg, + (table->data.sp.ranges+i)->end); + } + printf("\n"); + + return; + print_table: for (int i = 0; i < table->data.table.row_n; i++) { printf("Row %d: ", i); int size = 0; @@ -232,7 +265,9 @@ dfa_from_bytes(int sequence_size, /* For debugging */ +#ifdef DEBUG /* print_tries(states); */ +#endif /* construct DFA from the trie */ @@ -610,22 +645,135 @@ dfa *dfa_from_bytes_both(int sequence_size, return dfap; } +dfa * +dfa_from_ranges(int len, CCR_MOD(NUM *) data) +{ + struct range_s *rp = NULL; + SAFE_MALLOC(struct range_s, rp, len, return NULL;); + + for (int i = 0; i < len; i++) { + (rp+i)->beg = *(data+(i<<1)); + (rp+i)->end = *(data+(i<<1)+1); + } + + dfa *dfap = NULL; + SAFE_MALLOC(dfa, dfap, 1, free(rp); return NULL;); + + dfap->type = DFA_TYPE_SPECIAL; + dfap->data.sp = (special_dfa) { len, rp, 0 }; + + return dfap; +} + +dfa * +dfa_from_ranges_neg(int len, CCR_MOD(NUM *) data) +{ + dfa *dfap = dfa_from_ranges(len, data); + if (dfap == NULL) return NULL; + dfap->type = DFA_TYPE_SPECIAL_NEG; + return dfap; +} + +dfa * +dfa_from_ranges_both(int plen, CCR_MOD(NUM *) pdata, + int nlen, CCR_MOD(NUM *) ndata) +{ + int len = plen + nlen; + struct range_s *rp = NULL; + SAFE_MALLOC(struct range_s, rp, len, return NULL;); + + for (int i = 0; i < plen; i++) { + (rp+i)->beg = *(pdata+(i<<1)); + (rp+i)->end = *(pdata+(i<<1)+1); + } + + for (int i = 0; i < nlen; i++) { + (rp+plen+i)->beg = *(ndata+(i<<1)); + (rp+plen+i)->end = *(ndata+(i<<1)+1); + } + + dfa *dfap = NULL; + SAFE_MALLOC(dfa, dfap, 1, free(rp); return NULL;); + + dfap->type = DFA_TYPE_SPECIAL_BOTH; + dfap->data.sp = (special_dfa) { len, rp, plen }; + + return dfap; +} + BOOL run_dfa(const dfa * const restrict table, const NUM code) { + /* negative codes have special purposes */ + if (code < 0) return 0; + switch (table->type) { case DFA_TYPE_POSITIVE: case DFA_TYPE_NEGATIVE: case DFA_TYPE_BOTH: break; + case DFA_TYPE_SPECIAL: + for (int i = 0; i < table->data.sp.len; i++) + if (code >= (table->data.sp.ranges+i)->beg && + code <= (table->data.sp.ranges+i)->end) { +#ifdef DEBUG + fleprintf("code = %ld, beg = %ld, end = %ld\n", + code, + (table->data.sp.ranges+i)->beg, + (table->data.sp.ranges+i)->end); +#endif + return 1; + } + return 0; + break; + case DFA_TYPE_SPECIAL_NEG: + for (int i = 0; i < table->data.sp.len; i++) + if (code >= (table->data.sp.ranges+i)->beg && + code <= (table->data.sp.ranges+i)->end) { +#ifdef DEBUG + fleprintf("code = %ld, beg = %ld, end = %ld\n", + code, + (table->data.sp.ranges+i)->beg, + (table->data.sp.ranges+i)->end); +#endif + return 0; + } + return 1; + break; + case DFA_TYPE_SPECIAL_BOTH: + for (int i = table->data.sp.boundary; + i < table->data.sp.len; + i++) + if (code >= (table->data.sp.ranges+i)->beg && + code <= (table->data.sp.ranges+i)->end) { +#ifdef DEBUG + fleprintf("code = %ld, beg = %ld, end = %ld\n", + code, + (table->data.sp.ranges+i)->beg, + (table->data.sp.ranges+i)->end); +#endif + return 0; + } + for (int i = 0; i < table->data.sp.boundary; i++) + if (code >= (table->data.sp.ranges+i)->beg && + code <= (table->data.sp.ranges+i)->end) { +#ifdef DEBUG + fleprintf("code = %ld, beg = %ld, end = %ld\n", + code, + (table->data.sp.ranges+i)->beg, + (table->data.sp.ranges+i)->end); +#endif + return 1; + } + return 0; + break; default: - return table->data.fn(code); + fleprintf("Unknown type: %d\n", table->type); + return 0; break; } - /* negative codes have special purposes */ - if (code < 0) return 0; - + /* below is for running the DFA */ char s[5]; str *strp = new_str(s, 5); @@ -696,24 +844,3 @@ run_dfa(const dfa * const restrict table, const NUM code) return 0; } - -P_ATTR -static BOOL -dfa_any_fun(const NUM UNUSED code) { return 1; } - -dfa * -dfa_from_func(special_dfa func) -{ - dfa *result = NULL; - SAFE_MALLOC(dfa, result, 1, return NULL;); - result->type = DFA_TYPE_SPECIAL; - result->data.fn = func; - - return result; -} - -dfa * -dfa_any(void) -{ - return dfa_from_func(dfa_any_fun); -} -- cgit v1.2.3-18-g5258