diff options
Diffstat (limited to 'grammar/src/left_closure.rs')
-rw-r--r-- | grammar/src/left_closure.rs | 324 |
1 files changed, 324 insertions, 0 deletions
diff --git a/grammar/src/left_closure.rs b/grammar/src/left_closure.rs new file mode 100644 index 0000000..1630881 --- /dev/null +++ b/grammar/src/left_closure.rs @@ -0,0 +1,324 @@ +//! This file implements some functions to compute the regular +//! language of left-linear closure of a grammar. + +use super::*; + +use nfa::LabelType; + +impl Grammar { + /// Return the regular language of the left-linear closures of + /// non-terminals in the grammar. + /// + /// The resulting vector is guaranteed to be of the same length as + /// the number of non-terminals. + /// + /// The resulting regular language is not "self-contained". That + /// is to say, its terminals indices are packed indices and are + /// meaningless without the interpretation of the grammar. They + /// should be converted to a nondeterministic finite automaton and + /// then to its null closure later on. + pub fn left_closure(&mut self) -> Result<Vec<DefaultRegex<TNT>>, Error> { + match self.state { + GrammarState::Initial => { + return Err(Error::WrongState( + self.state, + GrammarState::AfterComputeFirst, + )) + } + GrammarState::AfterLeftClosure + | GrammarState::AfterNFA + | GrammarState::AfterComputeFirst => {} + } + + let non_len = self.non_num(); + + let mut result = Vec::with_capacity(non_len); + + for (n, rule) in self.rules.iter().enumerate() { + let regex = &rule.regex; + + let regex_root = if let Some(root) = regex.root() { + root + } else { + result.push(Default::default()); + + continue; + }; + + let regex_len = regex.len(); + + /// A convenient macro to retrieve the children from the + /// original regular expression with error propagation. + macro_rules! children { + ($n:expr) => { + regex + .children_of($n) + .map_err(|_| Error::IndexOutOfBounds($n, regex_len))? + }; + } + + /// A convenient macro to retrieve the label from the + /// original regular expression with error propagation. + macro_rules! label { + ($n:expr) => { + regex + .vertex_label($n) + .map_err(|_| Error::IndexOutOfBounds($n, regex_len))? + }; + } + + let parents = regex.parents_array().map_err(|e| match e { + nfa::error::Error::UnknownNode(n) => Error::IndexOutOfBounds(n, regex_len), + nfa::error::Error::Cycle => Error::BuildFail(n, ParseError::Cycle), + _ => unreachable!(), + })?; + + use RegexType::*; + use TNT::*; + + let mut local_result: Vec<RegexType<TNT>> = Vec::with_capacity(regex_len * 2); + let mut builder = ALGBuilder::default(); + + /// Perform a depth-first copy + macro_rules! df_copy { + ($parent:expr, $index:expr) => { + match label!($index) { + Kleene | Plus | Optional | Or | Paren | Empty => { + let mut stack = vec![($parent, $index)]; + + while let Some((top_parent, top_index)) = stack.pop() { + let label = label!(top_index); + let label = match label { + Lit(top_tnt) => Lit(Ter(self.pack_tnt(top_tnt).unwrap())), + _ => label, + }; + + local_result.push(label); + + let new = builder.add_vertex(); + + builder.add_edge(top_parent, new, ()).unwrap(); + + stack.extend(children!(top_index).map(|child| (new, child))); + } + } + Lit(remain_tnt) => { + local_result.push(Lit(Ter(self.pack_tnt(remain_tnt).unwrap()))); + let new = builder.add_vertex(); + builder.add_edge($parent, new, ()).unwrap(); + } + } + }; + } + + local_result.push(Or); + builder.add_vertex(); + + local_result.push(Lit(Ter(self.pack_tnt(Non(n)).unwrap()))); + let non_lit_index = builder.add_vertex(); + + builder.add_edge(0, non_lit_index, ()).unwrap(); + + // If this non-terminal is nullable, add an empty variant. + if self.is_nullable(n)? { + local_result.push(Empty); + let empty_index = builder.add_vertex(); + builder.add_edge(0, empty_index, ()).unwrap(); + } + + for first_node in self.first_nodes_of(n)?.copied() { + assert!(first_node < parents.len()); + + let tnt = match label!(first_node) { + Lit(tnt) => Lit(tnt), + _ => continue, + }; + + let mut parents_chain = { + let mut result = Vec::new(); + let mut stack = Vec::with_capacity(parents.len()); + + stack.push(first_node); + + while let Some(top) = stack.pop() { + assert!(top < parents.len()); + if let Some(parent) = parents.get(top).copied().unwrap() { + result.push(parent); + stack.push(parent.0); + } + } + + result.reverse(); + + result + }; + + if let Some((first, _)) = parents_chain.first() { + assert_eq!(*first, regex_root); + } else { + local_result.push(tnt); + let lit_index = builder.add_vertex(); + builder.add_edge(0, lit_index, ()).unwrap(); + + continue; + } + + // A different, "more local", root. + let mut local_root: usize; + + // Handle the direct parent + let (parent_node, parent_edge_index) = parents_chain.pop().unwrap(); + + match label!(parent_node) { + Kleene | Plus => { + // TODO: If parent_edge_index is 0, make a + // Plus node instead. + local_result.extend([Empty, tnt]); + + local_root = builder.add_vertex(); + let lit_index = builder.add_vertex(); + builder.add_edge(local_root, lit_index, ()).unwrap(); + + let iterator = children!(parent_node); + + for index in iterator.clone().skip(parent_edge_index + 1) { + df_copy!(local_root, index); + } + + local_result.push(Kleene); + let new_parent = builder.add_vertex(); + builder.add_edge(local_root, new_parent, ()).unwrap(); + + for index in iterator { + df_copy!(new_parent, index); + } + } + + Or => { + local_result.push(tnt); + local_root = builder.add_vertex(); + } + Optional | Empty => { + // If this path is taken, it should not be + // optional. + local_result.extend([Empty, tnt]); + local_root = builder.add_vertex(); + let lit_index = builder.add_vertex(); + builder.add_edge(local_root, lit_index, ()).unwrap(); + + for index in children!(parent_node).skip(parent_edge_index + 1) { + df_copy!(local_root, index); + } + } + Paren | Lit(_) => unreachable!(), + } + + // Handle successive parents + + for (node, edge_index) in parents_chain.into_iter() { + let node_type = label!(node); + + match node_type { + Kleene | Plus => { + // TODO: If edge_index is 0, then just + // make this a Plus node. + + local_result.push(Empty); + let new_index = builder.add_vertex(); + builder.add_edge(new_index, local_root, ()).unwrap(); + + local_root = new_index; + + let iterator = children!(node); + + for index in iterator.clone().skip(edge_index + 1) { + df_copy!(local_root, index); + } + + local_result.push(Kleene); + let new_parent = builder.add_vertex(); + builder.add_edge(local_root, new_parent, ()).unwrap(); + + for index in iterator { + df_copy!(new_parent, index); + } + } + RegexType::Or => {} + RegexType::Optional | RegexType::Empty => { + local_result.push(Empty); + let new_index = builder.add_vertex(); + builder.add_edge(new_index, local_root, ()).unwrap(); + local_root = new_index; + + for index in children!(node).skip(edge_index + 1) { + df_copy!(local_root, index); + } + } + RegexType::Paren | RegexType::Lit(_) => unreachable!(), + } + } + + builder.add_edge(0, local_root, ()).unwrap(); + } + + local_result.shrink_to_fit(); + + let graph = builder.build(); + + assert_eq!(graph.nodes_len(), local_result.len()); + + result.push( + DefaultRegex::new(graph, local_result) + .map_err(|_| Error::BuildFail(n, ParseError::Cycle))?, + ); + } + + assert_eq!(result.len(), non_len); + + self.accumulators = { + let mut acc_result = Vec::with_capacity(result.len() + 1); + acc_result.push(0); + + for rule in result.iter() { + acc_result.push(rule.len() + *acc_result.last().unwrap()); + } + + acc_result + }; + + Ok(result) + } + + /// Convert the regular language of left-linear closures to its + /// equivalent nondeterministic finite automaton. + /// + /// In the generation of the left-linear closure, the terminals + /// and non-terminals are packed into an unsigned integer. We + /// unpack them in converting to nondeterministic finite + /// automaton. + /// + /// The resulting nondeterministic finite automaton should be + /// transformed to its null-closure for use in our algorithm. + pub fn left_closure_to_nfa( + &self, + closure: &[DefaultRegex<TNT>], + ) -> Result<DefaultNFA<LabelType<TNT>>, Error> { + let label_transform = |tnt| match tnt { + TNT::Ter(t) => { + let new_tnt = self.unpack_tnt(t).map_err(|e| match e { + Error::IndexOutOfBounds(index, bound) => { + graph::error::Error::IndexOutOfBounds(index, bound) + } + _ => unreachable!(), + })?; + + Ok(SoC::Carry(new_tnt)) + } + TNT::Non(n) => Ok(SoC::Sub(n)), + }; + + let nfa = DefaultNFA::to_nfa(closure, label_transform, Some(TNT::Non(0)))?; + + Ok(nfa) + } +} |