diff options
author | JSDurand <mmemmew@gmail.com> | 2023-01-11 23:47:26 +0800 |
---|---|---|
committer | JSDurand <mmemmew@gmail.com> | 2023-01-11 23:47:26 +0800 |
commit | 1a3d346f413325ed37848a6b2526e8e729269833 (patch) | |
tree | ab8812f8094d096c68aee53cf516e986cc9a273a /grammar/src/lib.rs | |
parent | f27d604d93ce583d4404e1874664e08382ea2f00 (diff) |
Record left-linear expansion and forest format
Now the grammar will record the left-linear expansions when generating
the nondeterministic finite automaton frmo its rules, and will record
whether an edge in the nondeterministic finite automaton comes from a
left-linear expansion. The latter is needed because while performing
a chain-rule derivation, we do not need the left-linear expanded
derivations in the "first layer". This might well have been the root
cause of the bad performance of the previous version of this package.
Also I have figured out how to properly generate and handle parse
forests while manipulating the "chain-rule machine".
Diffstat (limited to 'grammar/src/lib.rs')
-rw-r--r-- | grammar/src/lib.rs | 906 |
1 files changed, 176 insertions, 730 deletions
diff --git a/grammar/src/lib.rs b/grammar/src/lib.rs index 4e544c9..627ae6f 100644 --- a/grammar/src/lib.rs +++ b/grammar/src/lib.rs @@ -15,12 +15,15 @@ use nfa::{ nfa::DefaultNFA, regex::{DefaultRegex, ParseError, RegexType}, }, - DOption, Nfa, Regex, SoC, + LabelType, Nfa, NfaLabel, Regex, SoC, TwoEdges, }; use graph::{adlist::ALGBuilder, builder::Builder, Graph}; -use std::{collections::HashSet, fmt::Display}; +use std::{ + collections::{HashMap, HashSet}, + fmt::Display, +}; /// The type of a terminal. /// @@ -88,6 +91,9 @@ impl Display for TNT { #[derive(Debug, Copy, Clone)] #[non_exhaustive] pub enum Error { + /// The operation requires the grammar to be after a certain + /// state, but the grammar is not after that state yet. + WrongState(GrammarState, GrammarState), /// The first component is the index, and the second the bound. IndexOutOfBounds(usize, usize), /// Fail to build the N-th regular expression, due to the @@ -112,6 +118,9 @@ impl Display for Error { "Failed to build the {n}-th regular expression due to error: {pe}" ), Error::NFAFail(nfae) => write!(f, "failed to build NFA because of {nfae}"), + Error::WrongState(current, threshold) => { + write!(f, "require state {threshold}, but in state {current}") + } } } } @@ -146,6 +155,36 @@ impl Rule { } } +/// The state of Grammar. +/// +/// This is used to ensure that the grammar preparation methods are +/// called in the correct order. +#[derive(Debug, Copy, Clone, Default)] +pub enum GrammarState { + /// Just initialized + #[default] + Initial, + /// compute_firsts has been called + AfterComputeFirst, + /// left_closure has been called. + AfterLeftClosure, + /// left_closure_to_nfa has been called. + AfterNFA, +} + +impl Display for GrammarState { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use GrammarState::*; + + match self { + Initial => write!(f, "initial"), + AfterComputeFirst => write!(f, "after computation of first set"), + AfterLeftClosure => write!(f, "after computation of closure"), + AfterNFA => write!(f, "after computation of NFA"), + } + } +} + /// The type of a grammar. #[derive(Debug, Clone, Default)] pub struct Grammar { @@ -158,6 +197,8 @@ pub struct Grammar { /// The length of the list must match that of the list of /// non-terminals. rules: Vec<Rule>, + /// The list of successive sums of lengths of rules. + accumulators: Vec<usize>, // The following two attributes are empty until we call // `compute_firsts` on the grammar. /// The list of sets of "first terminals". @@ -169,9 +210,18 @@ pub struct Grammar { /// /// The length must match that of the list of non-terminals. first_nodes: Vec<Vec<usize>>, - // The following attribute is empty until we call `left_closure` - // on the grammar. - left_closure_branches: HashSet<usize>, + // The following attribute is empty until we call `closure` on the + // NFA with `transform_label_null_epsilon` as the transformer. + /// A hash map that maps a tuple `(pos1, pos2)` of positions + /// `pos1` and `pos2` in the rules to a vector of rule positions. + /// This vector means that in order to expand from `pos1` to + /// `pos`, it is necessary to expand according to the positions in + /// the vector, so we need to add all these expansions into the + /// parse forest later. + expansion_map: HashMap<(usize, usize), Vec<usize>>, + /// The state of the grammar, which tells us what information has + /// been computed for this grammar. + state: GrammarState, } /// A private type to aid the recursive looping of rergular @@ -216,7 +266,14 @@ impl Grammar { .map(|rule| Vec::with_capacity(rule.len())) .collect(); - let left_closure_branches = HashSet::default(); + let state = Default::default(); + + let expansion_map = Default::default(); + + // NOTE: We cannot calculate accumulators here, as we want the + // accumulators of the regular expression of the left-closure, + // not of the original one. + let accumulators = Vec::new(); Self { ter, @@ -224,7 +281,9 @@ impl Grammar { rules, firsts, first_nodes, - left_closure_branches, + state, + expansion_map, + accumulators, } } @@ -258,7 +317,29 @@ impl Grammar { /// Return the total length of all rules. #[inline] pub fn total(&self) -> usize { - self.rules.iter().map(Rule::len).sum() + self.accumulators.last().copied().unwrap_or(0) + } + + /// Query if a position is the starting position of a + /// non-terminal. If it is, return the non-terminal, else return + /// `None` . + #[inline] + pub fn is_nt_start_in_nfa_p(&self, pos: usize) -> Option<usize> { + for (index, accumulator) in self.accumulators.iter().copied().enumerate() { + let shifted_accumulator = accumulator << 1; + + // NOTE: Clippy suggests to call `cmp`, but it seems + // compiler might not yet be smart enough to inline that + // call, so I just silence clippy here. + #[allow(clippy::comparison_chain)] + if pos == shifted_accumulator { + return Some(index); + } else if pos < shifted_accumulator { + break; + } + } + + None } /// Return the number of terminals. @@ -353,754 +434,119 @@ impl Grammar { .contains(&None)) } - /// For a NON_TERMINAL, return an iterator that goes over the - /// nodes that are reachable from the non-terminal through an - /// empty transition of the nondeterministic finite automaton. + /// Query the expansion information from the position `pos1` to + /// the position `pos2` . #[inline] - pub fn first_nodes_of(&self, non_terminal: usize) -> Result<std::slice::Iter<usize>, Error> { - Ok(self - .first_nodes - .get(non_terminal) - .ok_or(Error::IndexOutOfBounds(non_terminal, self.non.len()))? - .iter()) - } - - /// Return a hash set that contains all nodes in the set of - /// left-closure regular languages that are added because of the - /// left-linear expansion. - pub fn left_closure_branches(&self) -> &HashSet<usize> { - &self.left_closure_branches - } - - /// Compute the set of terminals that can appear as the first - /// terminal in some left-linear derivation of a non-terminal, for - /// every non-terminal. - /// - /// This is an algorithm that computes the transitive closure, - /// which is a common approach for this task. But perhaps there - /// are more efficient approaches? - /// - /// Also the function computes the set of "reachable nodes" in the - /// process, and records the information in the `first_nodes` - /// attribute. - pub fn compute_firsts(&mut self) -> Result<(), Error> { - let mut updated = true; - - let non_len = self.non_num(); - - use StackElement::{Seen, Unseen}; - - while updated { - updated = false; - - for (n, regex) in self.rules.iter().map(|rule| &rule.regex).enumerate() { - let root = if let Some(root) = regex.root() { - root - } else { - if !self.is_nullable(n)? { - updated = true; - - self.firsts.get_mut(n).unwrap().insert(None); - - // The default construction of a grammar - // reserves some space for each vector, so - // explicitly setting this can reduce some - // minor memory overhead. - let pointer = self.first_nodes.get_mut(n).unwrap(); - - pointer.clear(); - pointer.shrink_to_fit(); - } - - continue; - }; - - let regex_len = regex.len(); - - let mut stack: Vec<StackElement> = Vec::with_capacity(regex_len); - - stack.push(Unseen(root)); - - let mut children_sets_stack: Vec<HashSet<Option<usize>>> = - Vec::with_capacity(regex_len); - - let mut children_nodes_stack: Vec<HashSet<usize>> = Vec::with_capacity(regex_len); - - while let Some(top) = stack.pop() { - let top_index = top.index(); - let is_seen = top.is_seen(); - - match regex - .vertex_label(top_index) - .map_err(|_| Error::IndexOutOfBounds(top_index, regex_len))? - { - RegexType::Kleene => { - if !is_seen { - stack.push(Seen(top_index)); - - for child in regex.children_of(top_index).unwrap() { - stack.push(Unseen(child)); - } - } else { - let degree = regex.degree(top_index).unwrap(); - let children_stack_len = children_sets_stack.len(); - let children_nodes_len = children_nodes_stack.len(); - - assert!( - children_stack_len >= degree, - "not enough stack elements for {top_index}" - ); - - assert!( - children_nodes_len >= degree, - "not enough stack elements for {top_index}" - ); - - let mut this_set = HashSet::new(); - - this_set.insert(None); - - let mut this_nodes = HashSet::new(); - - this_nodes.insert(top_index); - - if degree == 0 { - children_sets_stack.push(this_set); - children_nodes_stack.push(this_nodes); - - continue; - } - - let mut stop = false; - - for (child_set, child_nodes) in children_sets_stack - .drain((children_stack_len - degree)..) - .zip( - children_nodes_stack.drain((children_nodes_len - degree)..), - ) - { - if stop { - break; - } - - if !child_set.contains(&None) { - stop = true; - } - - this_set.extend(child_set); - this_nodes.extend(child_nodes); - } - - children_sets_stack.push(this_set); - children_nodes_stack.push(this_nodes); - } - } - RegexType::Plus => { - if !is_seen { - stack.push(Seen(top_index)); - - for child in regex.children_of(top_index).unwrap() { - stack.push(Unseen(child)); - } - } else { - let degree = regex.degree(top_index).unwrap(); - let children_stack_len = children_sets_stack.len(); - let children_nodes_len = children_nodes_stack.len(); - - assert!( - children_stack_len >= degree, - "not enough stack elements for {top_index}" - ); - - assert!( - children_nodes_len >= degree, - "not enough stack elements for {top_index}" - ); - - let mut this_set = HashSet::new(); - - let mut this_nodes = HashSet::new(); - - this_nodes.insert(top_index); - - if degree == 0 { - this_set.insert(None); - - children_sets_stack.push(this_set); - children_nodes_stack.push(this_nodes); - - continue; - } - - let mut stop = false; - - for (child_set, child_nodes) in children_sets_stack - .drain((children_stack_len - degree)..) - .zip( - children_nodes_stack.drain((children_nodes_len - degree)..), - ) - { - if stop { - break; - } - - if !child_set.contains(&None) { - stop = true; - } - - this_set.extend(child_set); - this_nodes.extend(child_nodes); - } - - if stop { - this_set.remove(&None); - } - - children_sets_stack.push(this_set); - children_nodes_stack.push(this_nodes); - } - } - RegexType::Optional => { - if !is_seen { - stack.push(Seen(top_index)); - - for child in regex.children_of(top_index).unwrap() { - stack.push(Unseen(child)); - } - } else { - let degree = regex.degree(top_index).unwrap(); - let children_stack_len = children_sets_stack.len(); - let children_nodes_len = children_nodes_stack.len(); - - assert!( - children_stack_len >= degree, - "not enough stack elements for {top_index}" - ); - - assert!( - children_nodes_len >= degree, - "not enough stack elements for {top_index}" - ); - - let mut this_set = HashSet::new(); - - this_set.insert(None); - - let mut this_nodes = HashSet::new(); - - this_nodes.insert(top_index); - - if degree == 0 { - children_sets_stack.push(this_set); - children_nodes_stack.push(this_nodes); - - continue; - } - - let mut stop = false; - - for (child_set, child_nodes) in children_sets_stack - .drain((children_stack_len - degree)..) - .zip( - children_nodes_stack.drain((children_nodes_len - degree)..), - ) - { - if stop { - break; - } - - if !child_set.contains(&None) { - stop = true; - } - - this_set.extend(child_set.iter().copied()); - this_nodes.extend(child_nodes.iter().copied()); - } - - children_sets_stack.push(this_set); - children_nodes_stack.push(this_nodes); - } - } - RegexType::Or => { - if !is_seen { - stack.push(Seen(top_index)); - - for child in regex.children_of(top_index).unwrap() { - stack.push(Unseen(child)); - } - } else { - let degree = regex.degree(top_index).unwrap(); - let children_stack_len = children_sets_stack.len(); - let children_nodes_len = children_nodes_stack.len(); - - assert!( - children_stack_len >= degree, - "not enough stack elements for {top_index}" - ); - - assert!( - children_nodes_len >= degree, - "not enough stack elements for {top_index}" - ); - - let mut this_set = HashSet::new(); - - let mut this_nodes = HashSet::new(); - - this_nodes.insert(top_index); - - if degree == 0 { - this_set.insert(None); - - children_sets_stack.push(this_set); - children_nodes_stack.push(this_nodes); - - continue; - } - - for (child_set, child_nodes) in children_sets_stack - .drain((children_stack_len - degree)..) - .zip( - children_nodes_stack.drain((children_nodes_len - degree)..), - ) - { - this_set.extend(child_set.iter().copied()); - this_nodes.extend(child_nodes.iter().copied()); - } - - children_sets_stack.push(this_set); - children_nodes_stack.push(this_nodes); - } - } - RegexType::Paren => { - // Only for printing purposes - let mut this_set = HashSet::new(); - - this_set.insert(None); - - children_sets_stack.push(this_set); - - let mut this_nodes = HashSet::new(); - - this_nodes.insert(top_index); - - children_nodes_stack.push(this_nodes); - } - RegexType::Empty => { - if !is_seen { - stack.push(Seen(top_index)); - - for child in regex.children_of(top_index).unwrap().rev() { - stack.push(Unseen(child)); - } - } else { - let degree = regex.degree(top_index).unwrap(); - let children_stack_len = children_sets_stack.len(); - let children_nodes_len = children_nodes_stack.len(); - - assert!( - children_stack_len >= degree, - "not enough stack elements for {top_index}" - ); - - assert!( - children_nodes_len >= degree, - "not enough stack elements for {top_index}" - ); - - let mut this_set = HashSet::new(); - - let mut this_nodes = HashSet::new(); - - this_nodes.insert(top_index); - - if degree == 0 { - this_set.insert(None); - - children_sets_stack.push(this_set); - children_nodes_stack.push(this_nodes); - - continue; - } - - let mut stop = false; - - for (child_set, child_nodes) in children_sets_stack - .drain((children_stack_len - degree)..) - .zip( - children_nodes_stack.drain((children_nodes_len - degree)..), - ) - { - if stop { - break; - } - - if !child_set.contains(&None) { - stop = true; - } - - this_set.extend(child_set.iter().copied()); - this_nodes.extend(child_nodes.iter().copied()); - } - - if stop { - this_set.remove(&None); - } - - children_sets_stack.push(this_set); - children_nodes_stack.push(this_nodes); - } - } - RegexType::Lit(tnt) => { - match tnt { - TNT::Ter(t) => { - let mut this_set = HashSet::with_capacity(1); - - this_set.insert(Some(t)); - - children_sets_stack.push(this_set); - } - TNT::Non(non) => { - let this_set = self - .firsts - .get(non) - .ok_or(Error::IndexOutOfBounds(non, non_len))? - .clone(); - - children_sets_stack.push(this_set); - } - } - - let mut this_nodes = HashSet::with_capacity(1); - this_nodes.insert(top_index); - - children_nodes_stack.push(this_nodes); - } - } - } - - assert_eq!( - children_sets_stack.len(), - 1, - "Too many elements left at the end" - ); - - assert_eq!( - children_nodes_stack.len(), - 1, - "Too many elements left at the end" - ); - - for first in children_sets_stack.pop().unwrap().into_iter() { - if !self.firsts.get(n).unwrap().contains(&first) { - updated = true; + pub fn query_expansion(&self, pos1: usize, pos2: usize) -> Result<Option<&[usize]>, Error> { + if pos1 >= self.total() { + return Err(Error::IndexOutOfBounds(pos1, self.total())); + } - self.firsts.get_mut(n).unwrap().insert(first); - } - } + if pos2 >= self.total() { + return Err(Error::IndexOutOfBounds(pos2, self.total())); + } - *self.first_nodes.get_mut(n).unwrap() = - children_nodes_stack.pop().unwrap().into_iter().collect(); + match self.state { + GrammarState::AfterLeftClosure => {} + _ => { + return Err(Error::WrongState( + self.state, + GrammarState::AfterLeftClosure, + )); } } - Ok(()) + Ok(self.expansion_map.get(&(pos1, pos2)).map(|v| &v[..])) } - /// Return the regular language of the left-linear closures of - /// non-terminals in the grammar. - /// - /// The resulting vector is guaranteed to be of the same length as - /// the number of non-terminals. - /// - /// The resulting regular language is not "self-contained". That - /// is to say, its terminals indices are packed indices and are - /// meaningless without the interpretation of the grammar. They - /// should be converted to a nondeterministic finite automaton and - /// then to its null closure later on. - pub fn left_closure(&self) -> Result<Vec<DefaultRegex<TNT>>, Error> { - let non_len = self.non_num(); - - let mut result = Vec::with_capacity(non_len); - - for (n, rule) in self.rules.iter().enumerate() { - let regex = &rule.regex; - - let regex_root = if let Some(root) = regex.root() { - root - } else { - result.push(Default::default()); - - continue; - }; - - let regex_len = regex.len(); - - /// A convenient macro to retrieve the children from the - /// original regular expression with error propagation. - macro_rules! children { - ($n:expr) => { - regex - .children_of($n) - .map_err(|_| Error::IndexOutOfBounds($n, regex_len))? - }; - } - - /// A convenient macro to retrieve the label from the - /// original regular expression with error propagation. - macro_rules! label { - ($n:expr) => { - regex - .vertex_label($n) - .map_err(|_| Error::IndexOutOfBounds($n, regex_len))? - }; - } - - let parents = regex.parents_array().map_err(|e| match e { - nfa::error::Error::UnknownNode(n) => Error::IndexOutOfBounds(n, regex_len), - nfa::error::Error::Cycle => Error::BuildFail(n, ParseError::Cycle), - _ => unreachable!(), - })?; - - use RegexType::*; - use TNT::*; - - let mut local_result: Vec<RegexType<TNT>> = Vec::with_capacity(regex_len * 2); - let mut builder = ALGBuilder::default(); - - /// Perform a depth-first copy - macro_rules! df_copy { - ($parent:expr, $index:expr) => { - match label!($index) { - Kleene | Plus | Optional | Or | Paren | Empty => { - let mut stack = vec![($parent, $index)]; - - while let Some((top_parent, top_index)) = stack.pop() { - let label = label!(top_index); - let label = match label { - Lit(top_tnt) => Lit(Ter(self.pack_tnt(top_tnt).unwrap())), - _ => label, - }; - - local_result.push(label); - - let new = builder.add_vertex(); - - builder.add_edge(top_parent, new, ()).unwrap(); - - stack.extend(children!(top_index).map(|child| (new, child))); - } - } - Lit(remain_tnt) => { - local_result.push(Lit(Ter(self.pack_tnt(remain_tnt).unwrap()))); - let new = builder.add_vertex(); - builder.add_edge($parent, new, ()).unwrap(); - } - } - }; - } - - local_result.push(Or); - builder.add_vertex(); - - local_result.push(Lit(Ter(self.pack_tnt(Non(n)).unwrap()))); - let non_lit_index = builder.add_vertex(); - - builder.add_edge(0, non_lit_index, ()).unwrap(); - - // If this non-terminal is nullable, add an empty variant. - if self.is_nullable(n)? { - local_result.push(Empty); - let empty_index = builder.add_vertex(); - builder.add_edge(0, empty_index, ()).unwrap(); + // REVIEW: Do we have a better way to record expansion information + // than to compute the transitive closure? + + /// A transformer of labels to be fed into + /// [`closure`][nfa::default::nfa::DefaultNFA::closure], with the + /// predicate that returns true if and only if the label of the + /// first edge is either empty or a nullable non-terminal. + pub fn transform_label_null_epsilon( + &mut self, + two_edges: TwoEdges<LabelType<TNT>>, + ) -> LabelType<TNT> { + let (first_source, first_target, first_label) = two_edges.first_edge(); + let (second_source, second_target, second_label) = two_edges.second_edge(); + + #[cfg(debug_assertions)] + { + assert_eq!(first_target, second_source); + + if let Some(tnt) = *first_label.get_value() { + assert!(matches!(tnt, TNT::Non(n) if matches!(self.is_nullable(n), Ok(true)))); } + } - for first_node in self.first_nodes_of(n)?.copied() { - assert!(first_node < parents.len()); - - let tnt = match label!(first_node) { - Lit(tnt) => Lit(tnt), - _ => continue, - }; - - let mut parents_chain = { - let mut result = Vec::new(); - let mut stack = Vec::with_capacity(parents.len()); - - stack.push(first_node); - - while let Some(top) = stack.pop() { - assert!(top < parents.len()); - if let Some(parent) = parents.get(top).copied().unwrap() { - result.push(parent); - stack.push(parent.0); - } - } + // Compute if this is from left-linear expansion: it is so if + // and only if one if either the edges comes from left-linear + // expansion or we are moving across a non-terminal expansion, + // that is to say, the source of the second edge is the + // starting edge of a non-terminal. - result.reverse(); + let mut left_p = first_label.get_left_p() || second_label.get_left_p(); - result - }; + // Record left-linear expansion information. - if parents_chain.is_empty() { - local_result.push(tnt); - let lit_index = builder.add_vertex(); - builder.add_edge(0, lit_index, ()).unwrap(); + if let Some(second_nt) = self.is_nt_start_in_nfa_p(second_source) { + left_p = true; - continue; - } + if !self + .expansion_map + .contains_key(&(first_source, second_target)) + { + let original_expansion = self.expansion_map.get(&(second_source, second_target)); - assert_eq!(parents_chain.first().unwrap().0, regex_root); - - // A different, "more local", root. - let mut local_root: usize; - - // Handle the direct parent - let (parent_node, parent_edge_index) = parents_chain.pop().unwrap(); - - match label!(parent_node) { - Kleene | Plus => { - // TODO: If parent_edge_index is 0, make a - // Plus node instead. - local_result.extend([Empty, tnt]); - - local_root = builder.add_vertex(); - let lit_index = builder.add_vertex(); - builder.add_edge(local_root, lit_index, ()).unwrap(); - - let iterator = children!(parent_node); - - for index in iterator.clone().skip(parent_edge_index + 1) { - df_copy!(local_root, index); - } - - local_result.push(Kleene); - let new_parent = builder.add_vertex(); - builder.add_edge(local_root, new_parent, ()).unwrap(); - - for index in iterator { - df_copy!(new_parent, index); - } - } - - Or => { - local_result.push(tnt); - local_root = builder.add_vertex(); - } - Optional | Empty => { - // If this path is taken, it should not be - // optional. - local_result.extend([Empty, tnt]); - local_root = builder.add_vertex(); - let lit_index = builder.add_vertex(); - builder.add_edge(local_root, lit_index, ()).unwrap(); - - for index in children!(parent_node).skip(parent_edge_index + 1) { - df_copy!(local_root, index); - } - } - Paren | Lit(_) => unreachable!(), - } + self.expansion_map.insert( + (first_source, second_target), + if let Some(original_expansion) = original_expansion { + let mut result = original_expansion.clone(); + result.push(second_nt); - // Handle successive parents - - for (node, edge_index) in parents_chain.into_iter() { - let node_type = label!(node); - - match node_type { - Kleene | Plus => { - // TODO: If edge_index is 0, then just - // make this a Plus node. - - local_result.push(Empty); - let new_index = builder.add_vertex(); - builder.add_edge(new_index, local_root, ()).unwrap(); - - local_root = new_index; - - let iterator = children!(node); - - for index in iterator.clone().skip(edge_index + 1) { - df_copy!(local_root, index); - } - - local_result.push(Kleene); - let new_parent = builder.add_vertex(); - builder.add_edge(local_root, new_parent, ()).unwrap(); - - for index in iterator { - df_copy!(new_parent, index); - } - } - RegexType::Or => {} - RegexType::Optional | RegexType::Empty => { - local_result.push(Empty); - let new_index = builder.add_vertex(); - builder.add_edge(new_index, local_root, ()).unwrap(); - local_root = new_index; - - for index in children!(node).skip(edge_index + 1) { - df_copy!(local_root, index); - } - } - RegexType::Paren | RegexType::Lit(_) => unreachable!(), - } - } - - builder.add_edge(0, local_root, ()).unwrap(); + result + } else { + vec![second_nt] + }, + ); } - - local_result.shrink_to_fit(); - - let graph = builder.build(); - - assert_eq!(graph.nodes_len(), local_result.len()); - - result.push( - DefaultRegex::new(graph, local_result) - .map_err(|_| Error::BuildFail(n, ParseError::Cycle))?, - ); } - assert_eq!(result.len(), non_len); - - Ok(result) + NfaLabel::new(second_label.get_value(), second_label.get_moved(), left_p) } - /// Convert the regular language of left-linear closures to its - /// equivalent nondeterministic finite automaton. - /// - /// In the generation of the left-linear closure, the terminals - /// and non-terminals are packed into an unsigned integer. We - /// unpack them in converting to nondeterministic finite - /// automaton. - /// - /// The resulting nondeterministic finite automaton should be - /// transformed to its null-closure for use in our algorithm. - pub fn left_closure_to_nfa( - &self, - closure: &[DefaultRegex<TNT>], - ) -> Result<DefaultNFA<DOption<TNT>>, Error> { - let label_transform = |tnt| match tnt { - TNT::Ter(t) => { - let new_tnt = self.unpack_tnt(t).map_err(|e| match e { - Error::IndexOutOfBounds(index, bound) => { - graph::error::Error::IndexOutOfBounds(index, bound) - } - _ => unreachable!(), - })?; - - Ok(SoC::Carry(new_tnt)) + /// For a NON_TERMINAL, return an iterator that goes over the + /// nodes that are reachable from the non-terminal through an + /// empty transition of the nondeterministic finite automaton. + #[inline] + pub fn first_nodes_of(&self, non_terminal: usize) -> Result<std::slice::Iter<usize>, Error> { + match self.state { + GrammarState::Initial => { + return Err(Error::WrongState( + self.state, + GrammarState::AfterComputeFirst, + )); } - TNT::Non(n) => Ok(SoC::Sub(n)), - }; - - let nfa = DefaultNFA::to_nfa(closure, label_transform, Some(TNT::Non(0)))?; + GrammarState::AfterComputeFirst + | GrammarState::AfterLeftClosure + | GrammarState::AfterNFA => {} + } - Ok(nfa) + Ok(self + .first_nodes + .get(non_terminal) + .ok_or(Error::IndexOutOfBounds(non_terminal, self.non.len()))? + .iter()) } } +pub mod first_set; + +pub mod left_closure; + impl Display for Grammar { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { assert_eq!(self.non.len(), self.rules.len()); |