diff options
author | JSDurand <mmemmew@gmail.com> | 2023-02-03 10:52:35 +0800 |
---|---|---|
committer | JSDurand <mmemmew@gmail.com> | 2023-02-03 10:52:35 +0800 |
commit | 265ff8f87dc7392fdf701f811eb2bf54d7bc6678 (patch) | |
tree | 35538c8ac7524e0d9f2acff8be21b72994728bc4 /grammar/src/lib.rs | |
parent | f28155105134b90fd86049c65478d307e0d8dbbc (diff) |
Finally produced the first correct forest
Finally the prototype parser has produced the first correct forest.
It is my first time to generate a correct forest, in fact, ever since
the beginning of this project.
Diffstat (limited to 'grammar/src/lib.rs')
-rw-r--r-- | grammar/src/lib.rs | 160 |
1 files changed, 119 insertions, 41 deletions
diff --git a/grammar/src/lib.rs b/grammar/src/lib.rs index 2c17a5f..a8e0fd7 100644 --- a/grammar/src/lib.rs +++ b/grammar/src/lib.rs @@ -338,7 +338,7 @@ impl Grammar { self.accumulators .get(n) .copied() - .ok_or_else(|| Error::IndexOutOfBounds(n, self.total())) + .ok_or_else(|| Error::IndexOutOfBounds(n, self.non_num())) } /// Return the index of the rules a rule position belongs to. @@ -476,17 +476,28 @@ impl Grammar { } } + /// Return true if and only if the terminal can appear as the + /// first terminal in a string expanded from the non-terminal. + #[inline] + pub fn is_first_of(&self, non_terminal: usize, terminal: usize) -> Result<bool, Error> { + Ok(self + .firsts + .get(non_terminal) + .ok_or(Error::IndexOutOfBounds(non_terminal, self.firsts.len()))? + .contains(&Some(terminal))) + } + /// Return true if and only if the non-terminal is nullable. #[inline] pub fn is_nullable(&self, non_terminal: usize) -> Result<bool, Error> { Ok(self .firsts .get(non_terminal) - .ok_or(Error::IndexOutOfBounds(non_terminal, self.non.len()))? + .ok_or(Error::IndexOutOfBounds(non_terminal, self.firsts.len()))? .contains(&None)) } - // FIXME: We shall use a label to query this information as well, + // REVIEW: We shall use a label to query this information as well, // probably. /// Query the expansion information from the position `pos1` to @@ -497,14 +508,6 @@ impl Grammar { pos1: usize, pos2: usize, ) -> Result<Option<&[(usize, usize)]>, Error> { - if pos1 >= self.total() { - return Err(Error::IndexOutOfBounds(pos1, self.total())); - } - - if pos2 >= self.total() { - return Err(Error::IndexOutOfBounds(pos2, self.total())); - } - match self.state { GrammarState::AfterLeftClosure => {} _ => { @@ -522,14 +525,6 @@ impl Grammar { /// the position `pos2` . #[inline] pub fn query_reduction(&self, pos1: usize, pos2: usize) -> Result<Option<&[usize]>, Error> { - if pos1 >= self.total() { - return Err(Error::IndexOutOfBounds(pos1, self.total())); - } - - if pos2 >= self.total() { - return Err(Error::IndexOutOfBounds(pos2, self.total())); - } - match self.state { GrammarState::AfterLeftClosure => {} _ => { @@ -591,43 +586,122 @@ impl Grammar { let mut left_p = first_label.is_left_p() || second_label.is_left_p(); + // if first_source == 0 && second_label.get_moved() == 15 { + // dbg!(second_source, second_target, first_label, second_label); + // dbg!(self.expansion_map.get(&(second_source, second_target))); + // dbg!(self.expansion_map.get(&(first_source, second_target))); + // } + // Record left-linear expansion information. - if let Some(second_nt) = self.get_nt_start_in_nfa(second_source) { + let original_expansion = self + .expansion_map + .get(&(second_source, second_target)) + .cloned(); + + let second_nt_start = self.get_nt_start_in_nfa(second_source).is_some(); + + if !second_nt_start + && !matches!(self.expansion_map.get(&(first_source, second_target)), + Some(expansion) + if expansion.len() >= + original_expansion + .as_ref() + .map(|vec| vec.len()) + .unwrap_or(1)) + { + if let Some(original_expansion) = &original_expansion { + self.expansion_map + .insert((first_source, second_target), original_expansion.clone()); + } else { + let this_nt = self + .get_rule_num(second_source.div_euclid(2)) + .unwrap_or_else(|_| self.non_num()); + + self.expansion_map.insert( + (first_source, second_target), + vec![(this_nt, second_label.get_moved())], + ); + } + } else if second_nt_start { left_p = true; - if !self - .expansion_map - .contains_key(&(first_source, second_target)) + let original_moved = match self.expansion_map.get(&(first_source, second_source)) { + Some(old_expansion) if !old_expansion.is_empty() => old_expansion.last().unwrap().1, + _ => first_label.get_moved(), + }; + + let original_nt = self + .get_rule_num(first_source.div_euclid(2)) + .unwrap_or_else(|_| self.non_num()); + + if !matches!(self.expansion_map.get(&(first_source, second_target)), + Some(expansion) + if expansion.len() >= + original_expansion + .as_ref() + .map(|vec| vec.len() + 1) + .unwrap_or(1)) { - let original_expansion = self.expansion_map.get(&(second_source, second_target)); - self.expansion_map.insert( (first_source, second_target), if let Some(original_expansion) = original_expansion { - let mut result = original_expansion.clone(); - result.push((second_nt, second_label.get_moved())); + let mut result = original_expansion; + result.push((original_nt, original_moved)); result } else { - vec![(second_nt, second_label.get_moved())] + vec![(original_nt, original_moved)] }, ); } - } else if let Some(second_nt) = self.get_nt_end_in_nfa(second_source) { - let original_reduction = self.reduction_map.get(&(second_source, second_target)); + } - self.reduction_map.insert( - (first_source, second_target), - if let Some(original_reduction) = original_reduction { - let mut result = original_reduction.clone(); - result.push(second_nt); + // Record reduction information. - result - } else { - vec![second_nt] - }, - ); + let original_reduction = self + .reduction_map + .get(&(second_source, second_target)) + .cloned(); + + let second_nt_end = self.get_nt_end_in_nfa(second_source); + + if second_nt_end.is_none() + && !matches!(self.reduction_map.get(&(first_source, second_target)), + Some(reduction) + if reduction.len() >= + original_reduction + .as_ref() + .map(|vec| vec.len()) + .unwrap_or(0)) + { + if let Some(original_reduction) = &original_reduction { + self.reduction_map + .insert((first_source, second_target), original_reduction.clone()); + } + } + + if let Some(second_nt) = second_nt_end { + if !matches!(self.reduction_map.get(&(first_source, second_target)), + Some(reduction) + if reduction.len() >= + original_reduction + .as_ref() + .map(|vec| vec.len() + 1) + .unwrap_or(1)) + { + self.reduction_map.insert( + (first_source, second_target), + if let Some(original_reduction) = original_reduction { + let mut result = original_reduction; + result.push(second_nt); + + result + } else { + vec![second_nt] + }, + ); + } } NfaLabel::new(second_label.get_value(), second_label.get_moved(), left_p) @@ -659,6 +733,10 @@ impl Grammar { /// Return a string describing a rule position. pub fn rule_pos_to_string(&self, pos: usize) -> Result<String, Error> { + if pos == self.total() { + return Ok("End of rules".to_owned()); + } + let rule_num = { let mut result = None; @@ -690,7 +768,7 @@ impl Grammar { if rule_num == 0 { pos } else { - pos - self.accumulators.get(rule_num - 1).copied().unwrap() + pos - self.accumulators.get(rule_num).copied().unwrap() }, ) .unwrap()) |