diff options
Diffstat (limited to 'src/grammar.h')
-rw-r--r-- | src/grammar.h | 125 |
1 files changed, 111 insertions, 14 deletions
diff --git a/src/grammar.h b/src/grammar.h index 28f8fd8..956fdcd 100644 --- a/src/grammar.h +++ b/src/grammar.h @@ -1,6 +1,8 @@ #ifndef GRAMMAR_H #define GRAMMAR_H +#include "dfa.h" +#include "ht.h" #include <stdarg.h> #include <stdio.h> #include "list.h" @@ -8,9 +10,6 @@ /* The class file for grammars */ -/* TODO: Terminals should be extended to incorporate the "predicate - terminals", such as character classes. */ - /* A grammar is a list of rules. A rule replaces a non-terminal with a finite string of terminals and non-terminals. @@ -48,20 +47,54 @@ to simply use the Run-Length-Encoding. See the file dfa.h for implementation details. */ -typedef unsigned long T; -typedef unsigned NT; +typedef long T; +typedef unsigned long NT; +typedef unsigned long PT; + +enum PT_TYPE_e { + PT_DFA, + PT_SPECIAL +}; + +typedef enum PT_TYPE_e PT_TYPE; + +typedef struct PT_DATA_s PTD; + +struct PT_DATA_s { + PT_TYPE type; + union { special_dfa s; dfa *d; } data; + char *label; /* NULL-terminated string */ +}; /* T or NT Don't worry, it is not explosive. */ +enum TNT_TYPE_e { + TERMINAL, + PREDICATE, + NONTERMINAL +}; + +typedef enum TNT_TYPE_e TNT_TYPE; + +/* If a TNT of type TERMINAL has value END_OF_INPUT, then it means, + surprisingly, the end of input. */ +enum { END_OF_INPUT = -1 }; + struct TNT_s { - int type; /* 0 => T and NT otherwise */ - union { T t; NT nt; } data; + TNT_TYPE type; + union { T t; NT nt; PT pt; } data; }; typedef struct TNT_s TNT; typedef struct Rule_s Rule; +typedef struct Rule_group_s Rule_group; + +NT rg_left(CCR_MOD(Rule_group *) rg); +NUM rg_len(CCR_MOD(Rule_group *) rg); +List *rg_right(CCR_MOD(Rule_group *) rg); +List *rg_nth(CCR_MOD(Rule_group *) rg, NUM n); /* A grammar is more than a list of rules: it should include a correspondance to associate the names of non-terminals with @@ -74,11 +107,11 @@ typedef struct Grammar_s Grammar; NAMES is a list of CPA pointers. */ BOOL build_grammar(Grammar *g, - const List *rules, const List * const names); + const List *rules, CC_MOD(List *) names); /* This accepts one and only one optional argument, which is of type - either T or NT, depending on TYPE being 0 or not. */ -TNT *new_tnt(int type, ...); + either T, NT, or PT, depending on TYPE. */ +TNT *new_tnt(TNT_TYPE type, ...); /* An array of code-points. */ @@ -90,8 +123,8 @@ struct cpa_s { typedef struct cpa_s cpa; NUM -find_in_cpa_list(const NUM * const restrict string, NUM size, - const List * const restrict list); +find_in_cpa_list(CCR_MOD(NUM *) string, NUM size, + CCR_MOD(List *) list); /* For debugging purposes, we need to print out rules to examine their values. */ @@ -104,7 +137,7 @@ void print_tnt(void *element); void print_rule(void *r); /* This will generate errors if G is not a list of rules. */ -void print_grammar(const Grammar * const g); +void print_grammar(CC_MOD(Grammar *) g); /* constructors */ @@ -120,7 +153,7 @@ TNT *new_tnt_pointer(size_t size); /* RIGHT should be created by new_tnt_string or something alike. If RIGHT is NULL, NULL is returned. */ -Rule *new_rule(NT left, const List * const right); +Rule *new_rule(NT left, CC_MOD(List *) right); Grammar *new_grammar(); @@ -132,6 +165,70 @@ void destroy_rule_no_free(void *rule); void destroy_cpa_and_free_all(void *element); void destroy_grammar(void *grammar, int flag); +NUM grammar_left_len(CCR_MOD(Grammar *)g); + +/* look up a rule */ +Rule_group *grammar_rule(CCR_MOD(Grammar *) g, NT nt); + +/* NTS should be already allocated to be an array of BOOLs. The size + of this array should be the same as the length of the array of + non-terminals of G. If any of the above two conditions is not + satisfied, the behaviour is undefined. + + After this function returns, the element corresponding to a + non-terminal X is non-zero if and only if X has epsilon rules; that + is to say, X can produce the empty string. */ +void epsilon_nts(CC_MOD(Grammar *) g, BOOL * const restrict nts); + +/* TERMINAL_HTS and PREDICATE_HTS are supposed to be already allocated + to have enough space to contain an array of hash tables of the same + length as the array of non-terminals. Each element in the array + should be hash tables that are already created. If any of the + above conditions is not satisfied, the behaviour is undefined. + + TERMINAL_HTS contain mappings from non-terminals to sets of + terminals. A terminal belongs to the hash table corresponding to + the non-terminal X in TERMINAL_HTS if and only if that terminal + belongs to the first set of X. Mutatis mutandis for PREDICATE_HTS. + The reason for two arrays is that terminals and predicates have two + different types in our grammar, and are stored in different arrays, + so we cannot simply refer to them as a single integer. + + The hash tables that result will contain invalid pointers as values + after the function returns. + + On error return non-zero. */ +BOOL nt_first(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts, + ht *terminal_hts, ht *predicate_hts); + +/* Having computed the first sets of non-terminals, we can compute the + first sets of strings of non-terminals and terminals. + RESULT_TERMINALS and RESULT_PREDICATES must satisfy similar + requirements as in nt_first. + + The values are meaningless invalid pointers. So one should not + free those pointers afterwards. + + On error return non-zero. */ +BOOL tnt_first(CC_MOD(ht *) terminal_hts, CC_MOD(ht *) predicate_hts, + CCR_MOD(BOOL *) nts, NUM len, List *tnts, + ht * const restrict result_terminals, + ht * const restrict result_predicates); + +/* Mutatis mutandis to NT_FIRST, but for the follow sets of + non-terminals. + + Note that the hash table RESULT_TERMINAL_HTS is special here. It + not only contains keys for the terminals in the follow set of the + corresponding nonterminal, but it also contains END_OF_INPUT for + the nonterminals whose follow set contains the end-of-input marker. + + On error return non-zero. */ +BOOL nt_follow(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts, + CC_MOD(ht *)terminal_hts, CC_MOD(ht *)predicate_hts, + ht * const result_terminals, + ht * const result_predicates); + /* convenient macro */ #define NEW_CPA(X, ARRAY, SIZE) do { \ |