//! This file implements some functions to compute the regular //! language of left-linear closure of a grammar. use super::*; use nfa::LabelType; impl Grammar { /// Return the regular language of the left-linear closures of /// non-terminals in the grammar. /// /// The resulting vector is guaranteed to be of the same length as /// the number of non-terminals. /// /// The resulting regular language is not "self-contained". That /// is to say, its terminals indices are packed indices and are /// meaningless without the interpretation of the grammar. They /// should be converted to a nondeterministic finite automaton and /// then to its null closure later on. pub fn left_closure(&mut self) -> Result>, Error> { match self.state { GrammarState::Initial => { return Err(Error::WrongState( self.state, GrammarState::AfterComputeFirst, )) } GrammarState::AfterLeftClosure | GrammarState::AfterNFA | GrammarState::AfterComputeFirst => {} } let non_len = self.non_num(); let mut result = Vec::with_capacity(non_len); for (n, rule) in self.rules.iter().enumerate() { let regex = &rule.regex; let regex_root = if let Some(root) = regex.root() { root } else { result.push(Default::default()); continue; }; let regex_len = regex.len(); /// A convenient macro to retrieve the children from the /// original regular expression with error propagation. macro_rules! children { ($n:expr) => { regex .children_of($n) .map_err(|_| Error::IndexOutOfBounds($n, regex_len))? }; } /// A convenient macro to retrieve the label from the /// original regular expression with error propagation. macro_rules! label { ($n:expr) => { regex .vertex_label($n) .map_err(|_| Error::IndexOutOfBounds($n, regex_len))? }; } let parents = regex.parents_array().map_err(|e| match e { nfa::error::Error::UnknownNode(n) => Error::IndexOutOfBounds(n, regex_len), nfa::error::Error::Cycle => Error::BuildFail(n, ParseError::Cycle), _ => unreachable!(), })?; use RegexType::*; use TNT::*; let mut local_result: Vec> = Vec::with_capacity(regex_len * 2); let mut builder = ALGBuilder::default(); /// Perform a depth-first copy macro_rules! df_copy { ($parent:expr, $index:expr) => { match label!($index) { Kleene | Plus | Optional | Or | Paren | Empty => { let mut stack = vec![($parent, $index)]; while let Some((top_parent, top_index)) = stack.pop() { let label = label!(top_index); let label = match label { Lit(top_tnt) => Lit(Ter(self.pack_tnt(top_tnt).unwrap())), _ => label, }; local_result.push(label); let new = builder.add_vertex(); builder.add_edge(top_parent, new, ()).unwrap(); stack.extend(children!(top_index).map(|child| (new, child))); } } Lit(remain_tnt) => { local_result.push(Lit(Ter(self.pack_tnt(remain_tnt).unwrap()))); let new = builder.add_vertex(); builder.add_edge($parent, new, ()).unwrap(); } } }; } local_result.push(Or); builder.add_vertex(); local_result.push(Lit(Ter(self.pack_tnt(Non(n)).unwrap()))); let non_lit_index = builder.add_vertex(); builder.add_edge(0, non_lit_index, ()).unwrap(); // If this non-terminal is nullable, add an empty variant. if self.is_nullable(n)? { local_result.push(Empty); let empty_index = builder.add_vertex(); builder.add_edge(0, empty_index, ()).unwrap(); } for first_node in self.first_nodes_of(n)?.copied() { assert!(first_node < parents.len()); let tnt = match label!(first_node) { Lit(tnt) => Lit(tnt), _ => continue, }; let mut parents_chain = { let mut result = Vec::new(); let mut stack = Vec::with_capacity(parents.len()); stack.push(first_node); while let Some(top) = stack.pop() { assert!(top < parents.len()); if let Some(parent) = parents.get(top).copied().unwrap() { result.push(parent); stack.push(parent.0); } } result.reverse(); result }; if let Some((first, _)) = parents_chain.first() { assert_eq!(*first, regex_root); } else { local_result.push(tnt); let lit_index = builder.add_vertex(); builder.add_edge(0, lit_index, ()).unwrap(); continue; } // A different, "more local", root. let mut local_root: usize; // Handle the direct parent let (parent_node, parent_edge_index) = parents_chain.pop().unwrap(); match label!(parent_node) { Kleene | Plus => { // TODO: If parent_edge_index is 0, make a // Plus node instead. local_result.extend([Empty, tnt]); local_root = builder.add_vertex(); let lit_index = builder.add_vertex(); builder.add_edge(local_root, lit_index, ()).unwrap(); let iterator = children!(parent_node); for index in iterator.clone().skip(parent_edge_index + 1) { df_copy!(local_root, index); } local_result.push(Kleene); let new_parent = builder.add_vertex(); builder.add_edge(local_root, new_parent, ()).unwrap(); for index in iterator { df_copy!(new_parent, index); } } Or => { local_result.push(tnt); local_root = builder.add_vertex(); } Optional | Empty => { // If this path is taken, it should not be // optional. local_result.extend([Empty, tnt]); local_root = builder.add_vertex(); let lit_index = builder.add_vertex(); builder.add_edge(local_root, lit_index, ()).unwrap(); for index in children!(parent_node).skip(parent_edge_index + 1) { df_copy!(local_root, index); } } Paren | Lit(_) => unreachable!(), } // Handle successive parents for (node, edge_index) in parents_chain.into_iter() { let node_type = label!(node); match node_type { Kleene | Plus => { // TODO: If edge_index is 0, then just // make this a Plus node. local_result.push(Empty); let new_index = builder.add_vertex(); builder.add_edge(new_index, local_root, ()).unwrap(); local_root = new_index; let iterator = children!(node); for index in iterator.clone().skip(edge_index + 1) { df_copy!(local_root, index); } local_result.push(Kleene); let new_parent = builder.add_vertex(); builder.add_edge(local_root, new_parent, ()).unwrap(); for index in iterator { df_copy!(new_parent, index); } } RegexType::Or => {} RegexType::Optional | RegexType::Empty => { local_result.push(Empty); let new_index = builder.add_vertex(); builder.add_edge(new_index, local_root, ()).unwrap(); local_root = new_index; for index in children!(node).skip(edge_index + 1) { df_copy!(local_root, index); } } RegexType::Paren | RegexType::Lit(_) => unreachable!(), } } builder.add_edge(0, local_root, ()).unwrap(); } local_result.shrink_to_fit(); let graph = builder.build(); assert_eq!(graph.nodes_len(), local_result.len()); result.push( DefaultRegex::new(graph, local_result) .map_err(|_| Error::BuildFail(n, ParseError::Cycle))?, ); } assert_eq!(result.len(), non_len); self.accumulators = { let mut acc_result = Vec::with_capacity(result.len() + 1); acc_result.push(0); for rule in result.iter() { acc_result.push(rule.len() + *acc_result.last().unwrap()); } acc_result }; Ok(result) } /// Convert the regular language of left-linear closures to its /// equivalent nondeterministic finite automaton. /// /// In the generation of the left-linear closure, the terminals /// and non-terminals are packed into an unsigned integer. We /// unpack them in converting to nondeterministic finite /// automaton. /// /// The resulting nondeterministic finite automaton should be /// transformed to its null-closure for use in our algorithm. pub fn left_closure_to_nfa( &self, closure: &[DefaultRegex], ) -> Result>, Error> { let label_transform = |tnt| match tnt { TNT::Ter(t) => { let new_tnt = self.unpack_tnt(t).map_err(|e| match e { Error::IndexOutOfBounds(index, bound) => { graph::error::Error::IndexOutOfBounds(index, bound) } _ => unreachable!(), })?; Ok(SoC::Carry(new_tnt)) } TNT::Non(n) => Ok(SoC::Sub(n)), }; let nfa = DefaultNFA::to_nfa(closure, label_transform, Some(TNT::Non(0)))?; Ok(nfa) } }