summaryrefslogtreecommitdiff
path: root/src/grammar.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/grammar.h')
-rw-r--r--src/grammar.h125
1 files changed, 111 insertions, 14 deletions
diff --git a/src/grammar.h b/src/grammar.h
index 28f8fd8..956fdcd 100644
--- a/src/grammar.h
+++ b/src/grammar.h
@@ -1,6 +1,8 @@
#ifndef GRAMMAR_H
#define GRAMMAR_H
+#include "dfa.h"
+#include "ht.h"
#include <stdarg.h>
#include <stdio.h>
#include "list.h"
@@ -8,9 +10,6 @@
/* The class file for grammars */
-/* TODO: Terminals should be extended to incorporate the "predicate
- terminals", such as character classes. */
-
/* A grammar is a list of rules. A rule replaces a non-terminal with
a finite string of terminals and non-terminals.
@@ -48,20 +47,54 @@
to simply use the Run-Length-Encoding. See the file dfa.h for
implementation details. */
-typedef unsigned long T;
-typedef unsigned NT;
+typedef long T;
+typedef unsigned long NT;
+typedef unsigned long PT;
+
+enum PT_TYPE_e {
+ PT_DFA,
+ PT_SPECIAL
+};
+
+typedef enum PT_TYPE_e PT_TYPE;
+
+typedef struct PT_DATA_s PTD;
+
+struct PT_DATA_s {
+ PT_TYPE type;
+ union { special_dfa s; dfa *d; } data;
+ char *label; /* NULL-terminated string */
+};
/* T or NT
Don't worry, it is not explosive. */
+enum TNT_TYPE_e {
+ TERMINAL,
+ PREDICATE,
+ NONTERMINAL
+};
+
+typedef enum TNT_TYPE_e TNT_TYPE;
+
+/* If a TNT of type TERMINAL has value END_OF_INPUT, then it means,
+ surprisingly, the end of input. */
+enum { END_OF_INPUT = -1 };
+
struct TNT_s {
- int type; /* 0 => T and NT otherwise */
- union { T t; NT nt; } data;
+ TNT_TYPE type;
+ union { T t; NT nt; PT pt; } data;
};
typedef struct TNT_s TNT;
typedef struct Rule_s Rule;
+typedef struct Rule_group_s Rule_group;
+
+NT rg_left(CCR_MOD(Rule_group *) rg);
+NUM rg_len(CCR_MOD(Rule_group *) rg);
+List *rg_right(CCR_MOD(Rule_group *) rg);
+List *rg_nth(CCR_MOD(Rule_group *) rg, NUM n);
/* A grammar is more than a list of rules: it should include a
correspondance to associate the names of non-terminals with
@@ -74,11 +107,11 @@ typedef struct Grammar_s Grammar;
NAMES is a list of CPA pointers. */
BOOL
build_grammar(Grammar *g,
- const List *rules, const List * const names);
+ const List *rules, CC_MOD(List *) names);
/* This accepts one and only one optional argument, which is of type
- either T or NT, depending on TYPE being 0 or not. */
-TNT *new_tnt(int type, ...);
+ either T, NT, or PT, depending on TYPE. */
+TNT *new_tnt(TNT_TYPE type, ...);
/* An array of code-points. */
@@ -90,8 +123,8 @@ struct cpa_s {
typedef struct cpa_s cpa;
NUM
-find_in_cpa_list(const NUM * const restrict string, NUM size,
- const List * const restrict list);
+find_in_cpa_list(CCR_MOD(NUM *) string, NUM size,
+ CCR_MOD(List *) list);
/* For debugging purposes, we need to print out rules to examine their
values. */
@@ -104,7 +137,7 @@ void print_tnt(void *element);
void print_rule(void *r);
/* This will generate errors if G is not a list of rules. */
-void print_grammar(const Grammar * const g);
+void print_grammar(CC_MOD(Grammar *) g);
/* constructors */
@@ -120,7 +153,7 @@ TNT *new_tnt_pointer(size_t size);
/* RIGHT should be created by new_tnt_string or something alike.
If RIGHT is NULL, NULL is returned. */
-Rule *new_rule(NT left, const List * const right);
+Rule *new_rule(NT left, CC_MOD(List *) right);
Grammar *new_grammar();
@@ -132,6 +165,70 @@ void destroy_rule_no_free(void *rule);
void destroy_cpa_and_free_all(void *element);
void destroy_grammar(void *grammar, int flag);
+NUM grammar_left_len(CCR_MOD(Grammar *)g);
+
+/* look up a rule */
+Rule_group *grammar_rule(CCR_MOD(Grammar *) g, NT nt);
+
+/* NTS should be already allocated to be an array of BOOLs. The size
+ of this array should be the same as the length of the array of
+ non-terminals of G. If any of the above two conditions is not
+ satisfied, the behaviour is undefined.
+
+ After this function returns, the element corresponding to a
+ non-terminal X is non-zero if and only if X has epsilon rules; that
+ is to say, X can produce the empty string. */
+void epsilon_nts(CC_MOD(Grammar *) g, BOOL * const restrict nts);
+
+/* TERMINAL_HTS and PREDICATE_HTS are supposed to be already allocated
+ to have enough space to contain an array of hash tables of the same
+ length as the array of non-terminals. Each element in the array
+ should be hash tables that are already created. If any of the
+ above conditions is not satisfied, the behaviour is undefined.
+
+ TERMINAL_HTS contain mappings from non-terminals to sets of
+ terminals. A terminal belongs to the hash table corresponding to
+ the non-terminal X in TERMINAL_HTS if and only if that terminal
+ belongs to the first set of X. Mutatis mutandis for PREDICATE_HTS.
+ The reason for two arrays is that terminals and predicates have two
+ different types in our grammar, and are stored in different arrays,
+ so we cannot simply refer to them as a single integer.
+
+ The hash tables that result will contain invalid pointers as values
+ after the function returns.
+
+ On error return non-zero. */
+BOOL nt_first(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts,
+ ht *terminal_hts, ht *predicate_hts);
+
+/* Having computed the first sets of non-terminals, we can compute the
+ first sets of strings of non-terminals and terminals.
+ RESULT_TERMINALS and RESULT_PREDICATES must satisfy similar
+ requirements as in nt_first.
+
+ The values are meaningless invalid pointers. So one should not
+ free those pointers afterwards.
+
+ On error return non-zero. */
+BOOL tnt_first(CC_MOD(ht *) terminal_hts, CC_MOD(ht *) predicate_hts,
+ CCR_MOD(BOOL *) nts, NUM len, List *tnts,
+ ht * const restrict result_terminals,
+ ht * const restrict result_predicates);
+
+/* Mutatis mutandis to NT_FIRST, but for the follow sets of
+ non-terminals.
+
+ Note that the hash table RESULT_TERMINAL_HTS is special here. It
+ not only contains keys for the terminals in the follow set of the
+ corresponding nonterminal, but it also contains END_OF_INPUT for
+ the nonterminals whose follow set contains the end-of-input marker.
+
+ On error return non-zero. */
+BOOL nt_follow(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts,
+ CC_MOD(ht *)terminal_hts, CC_MOD(ht *)predicate_hts,
+ ht * const result_terminals,
+ ht * const result_predicates);
+
/* convenient macro */
#define NEW_CPA(X, ARRAY, SIZE) do { \