//! This file implements a function for computing the set of first //! terminals for a grammar. use super::*; impl Grammar { /// Compute the set of terminals that can appear as the first /// terminal in some left-linear derivation of a non-terminal, for /// every non-terminal. /// /// This is an algorithm that computes the transitive closure, /// which is a common approach for this task. But perhaps there /// are more efficient approaches? /// /// Also the function computes the set of "reachable nodes" in the /// process, and records the information in the `first_nodes` /// attribute. pub fn compute_firsts(&mut self) -> Result<(), Error> { match self.state { GrammarState::Initial => { self.state = GrammarState::AfterComputeFirst; } _ => { // This has been called already. return Ok(()); } } let mut updated = true; let non_len = self.non_num(); use StackElement::{Seen, Unseen}; while updated { updated = false; for (n, regex) in self.rules.iter().map(|rule| &rule.regex).enumerate() { let root = if let Some(root) = regex.root() { root } else { if !self.is_nullable(n)? { updated = true; self.firsts.get_mut(n).unwrap().insert(None); // The default construction of a grammar // reserves some space for each vector, so // explicitly setting this can reduce some // minor memory overhead. let pointer = self.first_nodes.get_mut(n).unwrap(); pointer.clear(); pointer.shrink_to_fit(); } continue; }; let regex_len = regex.len(); let mut stack: Vec = Vec::with_capacity(regex_len); stack.push(Unseen(root)); let mut children_sets_stack: Vec>> = Vec::with_capacity(regex_len); let mut children_nodes_stack: Vec> = Vec::with_capacity(regex_len); while let Some(top) = stack.pop() { let top_index = top.index(); let is_seen = top.is_seen(); match regex .vertex_label(top_index) .map_err(|_| Error::IndexOutOfBounds(top_index, regex_len))? { RegexType::Kleene => { if !is_seen { stack.push(Seen(top_index)); for child in regex.children_of(top_index).unwrap() { stack.push(Unseen(child)); } } else { let degree = regex.degree(top_index).unwrap(); let children_stack_len = children_sets_stack.len(); let children_nodes_len = children_nodes_stack.len(); assert!( children_stack_len >= degree, "not enough stack elements for {top_index}" ); assert!( children_nodes_len >= degree, "not enough stack elements for {top_index}" ); let mut this_set = HashSet::new(); this_set.insert(None); let mut this_nodes = HashSet::new(); this_nodes.insert(top_index); if degree == 0 { children_sets_stack.push(this_set); children_nodes_stack.push(this_nodes); continue; } let mut stop = false; for (child_set, child_nodes) in children_sets_stack .drain((children_stack_len - degree)..) .zip( children_nodes_stack.drain((children_nodes_len - degree)..), ) { if stop { break; } if !child_set.contains(&None) { stop = true; } this_set.extend(child_set); this_nodes.extend(child_nodes); } children_sets_stack.push(this_set); children_nodes_stack.push(this_nodes); } } RegexType::Plus => { if !is_seen { stack.push(Seen(top_index)); for child in regex.children_of(top_index).unwrap() { stack.push(Unseen(child)); } } else { let degree = regex.degree(top_index).unwrap(); let children_stack_len = children_sets_stack.len(); let children_nodes_len = children_nodes_stack.len(); assert!( children_stack_len >= degree, "not enough stack elements for {top_index}" ); assert!( children_nodes_len >= degree, "not enough stack elements for {top_index}" ); let mut this_set = HashSet::new(); let mut this_nodes = HashSet::new(); this_nodes.insert(top_index); if degree == 0 { this_set.insert(None); children_sets_stack.push(this_set); children_nodes_stack.push(this_nodes); continue; } let mut stop = false; for (child_set, child_nodes) in children_sets_stack .drain((children_stack_len - degree)..) .zip( children_nodes_stack.drain((children_nodes_len - degree)..), ) { if stop { break; } if !child_set.contains(&None) { stop = true; } this_set.extend(child_set); this_nodes.extend(child_nodes); } if stop { this_set.remove(&None); } children_sets_stack.push(this_set); children_nodes_stack.push(this_nodes); } } RegexType::Optional => { if !is_seen { stack.push(Seen(top_index)); for child in regex.children_of(top_index).unwrap() { stack.push(Unseen(child)); } } else { let degree = regex.degree(top_index).unwrap(); let children_stack_len = children_sets_stack.len(); let children_nodes_len = children_nodes_stack.len(); assert!( children_stack_len >= degree, "not enough stack elements for {top_index}" ); assert!( children_nodes_len >= degree, "not enough stack elements for {top_index}" ); let mut this_set = HashSet::new(); this_set.insert(None); let mut this_nodes = HashSet::new(); this_nodes.insert(top_index); if degree == 0 { children_sets_stack.push(this_set); children_nodes_stack.push(this_nodes); continue; } let mut stop = false; for (child_set, child_nodes) in children_sets_stack .drain((children_stack_len - degree)..) .zip( children_nodes_stack.drain((children_nodes_len - degree)..), ) { if stop { break; } if !child_set.contains(&None) { stop = true; } this_set.extend(child_set.iter().copied()); this_nodes.extend(child_nodes.iter().copied()); } children_sets_stack.push(this_set); children_nodes_stack.push(this_nodes); } } RegexType::Or => { if !is_seen { stack.push(Seen(top_index)); for child in regex.children_of(top_index).unwrap() { stack.push(Unseen(child)); } } else { let degree = regex.degree(top_index).unwrap(); let children_stack_len = children_sets_stack.len(); let children_nodes_len = children_nodes_stack.len(); assert!( children_stack_len >= degree, "not enough stack elements for {top_index}" ); assert!( children_nodes_len >= degree, "not enough stack elements for {top_index}" ); let mut this_set = HashSet::new(); let mut this_nodes = HashSet::new(); this_nodes.insert(top_index); if degree == 0 { this_set.insert(None); children_sets_stack.push(this_set); children_nodes_stack.push(this_nodes); continue; } for (child_set, child_nodes) in children_sets_stack .drain((children_stack_len - degree)..) .zip( children_nodes_stack.drain((children_nodes_len - degree)..), ) { this_set.extend(child_set.iter().copied()); this_nodes.extend(child_nodes.iter().copied()); } children_sets_stack.push(this_set); children_nodes_stack.push(this_nodes); } } RegexType::Paren => { // Only for printing purposes let mut this_set = HashSet::new(); this_set.insert(None); children_sets_stack.push(this_set); let mut this_nodes = HashSet::new(); this_nodes.insert(top_index); children_nodes_stack.push(this_nodes); } RegexType::Empty => { if !is_seen { stack.push(Seen(top_index)); for child in regex.children_of(top_index).unwrap().rev() { stack.push(Unseen(child)); } } else { let degree = regex.degree(top_index).unwrap(); let children_stack_len = children_sets_stack.len(); let children_nodes_len = children_nodes_stack.len(); assert!( children_stack_len >= degree, "not enough stack elements for {top_index}" ); assert!( children_nodes_len >= degree, "not enough stack elements for {top_index}" ); let mut this_set = HashSet::new(); let mut this_nodes = HashSet::new(); this_nodes.insert(top_index); if degree == 0 { this_set.insert(None); children_sets_stack.push(this_set); children_nodes_stack.push(this_nodes); continue; } let mut stop = false; for (child_set, child_nodes) in children_sets_stack .drain((children_stack_len - degree)..) .zip( children_nodes_stack.drain((children_nodes_len - degree)..), ) { if stop { break; } if !child_set.contains(&None) { stop = true; } this_set.extend(child_set.iter().copied()); this_nodes.extend(child_nodes.iter().copied()); } if stop { this_set.remove(&None); } children_sets_stack.push(this_set); children_nodes_stack.push(this_nodes); } } RegexType::Lit(tnt) => { match tnt { TNT::Ter(t) => { let mut this_set = HashSet::with_capacity(1); this_set.insert(Some(t)); children_sets_stack.push(this_set); } TNT::Non(non) => { let this_set = self .firsts .get(non) .ok_or(Error::IndexOutOfBounds(non, non_len))? .clone(); children_sets_stack.push(this_set); } } let mut this_nodes = HashSet::with_capacity(1); this_nodes.insert(top_index); children_nodes_stack.push(this_nodes); } } } assert_eq!( children_sets_stack.len(), 1, "Too many elements left at the end" ); assert_eq!( children_nodes_stack.len(), 1, "Too many elements left at the end" ); for first in children_sets_stack.pop().unwrap().into_iter() { if !self.firsts.get(n).unwrap().contains(&first) { updated = true; self.firsts.get_mut(n).unwrap().insert(first); } } *self.first_nodes.get_mut(n).unwrap() = children_nodes_stack.pop().unwrap().into_iter().collect(); } } Ok(()) } }