From e8ea01319b3a9032a3f4f69f65e9ca96562b87b9 Mon Sep 17 00:00:00 2001 From: JSDurand Date: Sun, 22 Jan 2023 11:49:47 +0800 Subject: forest: clone correctly Now the forest can detect if a node is packed or cloned, and correctly clones a node in those circumstances. But it still needs to be tested. --- grammar/src/label.rs | 22 ++++++++++--- grammar/src/lib.rs | 91 +++++++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 97 insertions(+), 16 deletions(-) (limited to 'grammar') diff --git a/grammar/src/label.rs b/grammar/src/label.rs index 58eaddc..05b0b1e 100644 --- a/grammar/src/label.rs +++ b/grammar/src/label.rs @@ -93,10 +93,24 @@ impl GrammarLabel { /// Return a string description with the help of the associated /// grammar. pub fn to_string(&self, grammar: &Grammar) -> Result { - // REVIEW: It needs at least 34 bytes, so we just double it. - // Of course we can also calculate the length exactly, but - // this can be postponed till later. - let mut s = String::with_capacity(68); + // First calculate the length of the resulting string. + + let mut num = 2 + 7 + 14 + 6; + + num += self.label().name(grammar)?.len(); + + num += format!("{} ", self.start()).len(); + + if let Some(end) = self.end() { + num += format!("to {end}").len(); + } else { + num += 7; + } + + let num = num; + + let mut s = String::with_capacity(num); + s.push_str("a "); if self.is_packed() { diff --git a/grammar/src/lib.rs b/grammar/src/lib.rs index 50a2b9b..b29fc69 100644 --- a/grammar/src/lib.rs +++ b/grammar/src/lib.rs @@ -213,12 +213,22 @@ pub struct Grammar { // The following attribute is empty until we call `closure` on the // NFA with `transform_label_null_epsilon` as the transformer. /// A hash map that maps a tuple `(pos1, pos2)` of positions - /// `pos1` and `pos2` in the rules to a vector of rule positions. + /// `pos1` and `pos2` in the rules to a vector of non-terminals + /// and rule positions. + /// /// This vector means that in order to expand from `pos1` to - /// `pos`, it is necessary to expand according to the positions in - /// the vector, so we need to add all these expansions into the - /// parse forest later. - expansion_map: HashMap<(usize, usize), Vec>, + /// `pos`, it is necessary to expand according to the + /// non-terminals and positions in the vector, so we need to add + /// all these expansions into the parse forest later. + expansion_map: HashMap<(usize, usize), Vec<(usize, usize)>>, + /// A hash map that maps a tuple `(pos1, pos2)` of positions + /// `pos1` and `pos2` in the rules to a vector of non-terminals. + /// + /// This vector means that in order to expand from `pos1` to + /// `pos`, it is necessary to expand according to the + /// non-terminals, so we can use this information to find out + /// where to join a new node in the parse forest later. + reduction_map: HashMap<(usize, usize), Vec>, /// The state of the grammar, which tells us what information has /// been computed for this grammar. state: GrammarState, @@ -269,6 +279,7 @@ impl Grammar { let state = Default::default(); let expansion_map = Default::default(); + let reduction_map = Default::default(); // NOTE: We cannot calculate accumulators here, as we want the // accumulators of the regular expression of the left-closure, @@ -283,6 +294,7 @@ impl Grammar { first_nodes, state, expansion_map, + reduction_map, accumulators, } } @@ -350,7 +362,7 @@ impl Grammar { /// Query if a position is the starting position of a /// non-terminal. If it is, return the non-terminal, else return - /// `None` . + /// `None`. #[inline] pub fn get_nt_start_in_nfa(&self, pos: usize) -> Option { for (index, accumulator) in self.accumulators.iter().copied().enumerate() { @@ -370,6 +382,18 @@ impl Grammar { None } + /// Query if a position is the ending position of a + /// non-terminal. If it is, return the non-terminal, else return + /// `None`. + #[inline] + pub fn get_nt_end_in_nfa(&self, pos: usize) -> Option { + if pos >= 1 { + self.get_nt_start_in_nfa(pos - 1) + } else { + None + } + } + /// Return the number of terminals. #[inline] pub fn ter_num(&self) -> usize { @@ -465,7 +489,11 @@ impl Grammar { /// Query the expansion information from the position `pos1` to /// the position `pos2` . #[inline] - pub fn query_expansion(&self, pos1: usize, pos2: usize) -> Result, Error> { + pub fn query_expansion( + &self, + pos1: usize, + pos2: usize, + ) -> Result, Error> { if pos1 >= self.total() { return Err(Error::IndexOutOfBounds(pos1, self.total())); } @@ -484,11 +512,36 @@ impl Grammar { } } - Ok(self.expansion_map.get(&(pos1, pos2)).map(|v| v.as_ref())) + Ok(self.expansion_map.get(&(pos1, pos2)).map(AsRef::as_ref)) } - // REVIEW: Do we have a better way to record expansion information - // than to compute the transitive closure? + /// Query the reduction information from the position `pos1` to + /// the position `pos2` . + #[inline] + pub fn query_reduction(&self, pos1: usize, pos2: usize) -> Result, Error> { + if pos1 >= self.total() { + return Err(Error::IndexOutOfBounds(pos1, self.total())); + } + + if pos2 >= self.total() { + return Err(Error::IndexOutOfBounds(pos2, self.total())); + } + + match self.state { + GrammarState::AfterLeftClosure => {} + _ => { + return Err(Error::WrongState( + self.state, + GrammarState::AfterLeftClosure, + )); + } + } + + Ok(self.reduction_map.get(&(pos1, pos2)).map(AsRef::as_ref)) + } + + // REVIEW: Do we have a better way to record expansion and + // reduction information than to compute the transitive closure? // REVIEW: We need a way to eliminate those left-linearly expanded // edges whose labels had already been considered, and we need to @@ -550,14 +603,28 @@ impl Grammar { (first_source, second_target), if let Some(original_expansion) = original_expansion { let mut result = original_expansion.clone(); - result.push(second_nt); + result.push((second_nt, second_label.get_moved())); result } else { - vec![second_nt] + vec![(second_nt, second_label.get_moved())] }, ); } + } else if let Some(second_nt) = self.get_nt_end_in_nfa(second_source) { + let original_reduction = self.reduction_map.get(&(second_source, second_target)); + + self.reduction_map.insert( + (first_source, second_target), + if let Some(original_reduction) = original_reduction { + let mut result = original_reduction.clone(); + result.push(second_nt); + + result + } else { + vec![second_nt] + }, + ); } NfaLabel::new(second_label.get_value(), second_label.get_moved(), left_p) -- cgit v1.2.3-18-g5258