From 265ff8f87dc7392fdf701f811eb2bf54d7bc6678 Mon Sep 17 00:00:00 2001 From: JSDurand Date: Fri, 3 Feb 2023 10:52:35 +0800 Subject: Finally produced the first correct forest Finally the prototype parser has produced the first correct forest. It is my first time to generate a correct forest, in fact, ever since the beginning of this project. --- grammar/src/left_closure.rs | 8 +- grammar/src/lib.rs | 160 +++++++++++++++++++++++++++---------- grammar/src/test_grammar_helper.rs | 5 -- 3 files changed, 124 insertions(+), 49 deletions(-) (limited to 'grammar') diff --git a/grammar/src/left_closure.rs b/grammar/src/left_closure.rs index 39461f0..ddee28d 100644 --- a/grammar/src/left_closure.rs +++ b/grammar/src/left_closure.rs @@ -25,9 +25,11 @@ impl Grammar { GrammarState::AfterComputeFirst, )) } - GrammarState::AfterLeftClosure - | GrammarState::AfterNFA - | GrammarState::AfterComputeFirst => {} + GrammarState::AfterComputeFirst => { + self.state = GrammarState::AfterLeftClosure; + } + + _ => {} } let non_len = self.non_num(); diff --git a/grammar/src/lib.rs b/grammar/src/lib.rs index 2c17a5f..a8e0fd7 100644 --- a/grammar/src/lib.rs +++ b/grammar/src/lib.rs @@ -338,7 +338,7 @@ impl Grammar { self.accumulators .get(n) .copied() - .ok_or_else(|| Error::IndexOutOfBounds(n, self.total())) + .ok_or_else(|| Error::IndexOutOfBounds(n, self.non_num())) } /// Return the index of the rules a rule position belongs to. @@ -476,17 +476,28 @@ impl Grammar { } } + /// Return true if and only if the terminal can appear as the + /// first terminal in a string expanded from the non-terminal. + #[inline] + pub fn is_first_of(&self, non_terminal: usize, terminal: usize) -> Result { + Ok(self + .firsts + .get(non_terminal) + .ok_or(Error::IndexOutOfBounds(non_terminal, self.firsts.len()))? + .contains(&Some(terminal))) + } + /// Return true if and only if the non-terminal is nullable. #[inline] pub fn is_nullable(&self, non_terminal: usize) -> Result { Ok(self .firsts .get(non_terminal) - .ok_or(Error::IndexOutOfBounds(non_terminal, self.non.len()))? + .ok_or(Error::IndexOutOfBounds(non_terminal, self.firsts.len()))? .contains(&None)) } - // FIXME: We shall use a label to query this information as well, + // REVIEW: We shall use a label to query this information as well, // probably. /// Query the expansion information from the position `pos1` to @@ -497,14 +508,6 @@ impl Grammar { pos1: usize, pos2: usize, ) -> Result, Error> { - if pos1 >= self.total() { - return Err(Error::IndexOutOfBounds(pos1, self.total())); - } - - if pos2 >= self.total() { - return Err(Error::IndexOutOfBounds(pos2, self.total())); - } - match self.state { GrammarState::AfterLeftClosure => {} _ => { @@ -522,14 +525,6 @@ impl Grammar { /// the position `pos2` . #[inline] pub fn query_reduction(&self, pos1: usize, pos2: usize) -> Result, Error> { - if pos1 >= self.total() { - return Err(Error::IndexOutOfBounds(pos1, self.total())); - } - - if pos2 >= self.total() { - return Err(Error::IndexOutOfBounds(pos2, self.total())); - } - match self.state { GrammarState::AfterLeftClosure => {} _ => { @@ -591,43 +586,122 @@ impl Grammar { let mut left_p = first_label.is_left_p() || second_label.is_left_p(); + // if first_source == 0 && second_label.get_moved() == 15 { + // dbg!(second_source, second_target, first_label, second_label); + // dbg!(self.expansion_map.get(&(second_source, second_target))); + // dbg!(self.expansion_map.get(&(first_source, second_target))); + // } + // Record left-linear expansion information. - if let Some(second_nt) = self.get_nt_start_in_nfa(second_source) { + let original_expansion = self + .expansion_map + .get(&(second_source, second_target)) + .cloned(); + + let second_nt_start = self.get_nt_start_in_nfa(second_source).is_some(); + + if !second_nt_start + && !matches!(self.expansion_map.get(&(first_source, second_target)), + Some(expansion) + if expansion.len() >= + original_expansion + .as_ref() + .map(|vec| vec.len()) + .unwrap_or(1)) + { + if let Some(original_expansion) = &original_expansion { + self.expansion_map + .insert((first_source, second_target), original_expansion.clone()); + } else { + let this_nt = self + .get_rule_num(second_source.div_euclid(2)) + .unwrap_or_else(|_| self.non_num()); + + self.expansion_map.insert( + (first_source, second_target), + vec![(this_nt, second_label.get_moved())], + ); + } + } else if second_nt_start { left_p = true; - if !self - .expansion_map - .contains_key(&(first_source, second_target)) + let original_moved = match self.expansion_map.get(&(first_source, second_source)) { + Some(old_expansion) if !old_expansion.is_empty() => old_expansion.last().unwrap().1, + _ => first_label.get_moved(), + }; + + let original_nt = self + .get_rule_num(first_source.div_euclid(2)) + .unwrap_or_else(|_| self.non_num()); + + if !matches!(self.expansion_map.get(&(first_source, second_target)), + Some(expansion) + if expansion.len() >= + original_expansion + .as_ref() + .map(|vec| vec.len() + 1) + .unwrap_or(1)) { - let original_expansion = self.expansion_map.get(&(second_source, second_target)); - self.expansion_map.insert( (first_source, second_target), if let Some(original_expansion) = original_expansion { - let mut result = original_expansion.clone(); - result.push((second_nt, second_label.get_moved())); + let mut result = original_expansion; + result.push((original_nt, original_moved)); result } else { - vec![(second_nt, second_label.get_moved())] + vec![(original_nt, original_moved)] }, ); } - } else if let Some(second_nt) = self.get_nt_end_in_nfa(second_source) { - let original_reduction = self.reduction_map.get(&(second_source, second_target)); + } - self.reduction_map.insert( - (first_source, second_target), - if let Some(original_reduction) = original_reduction { - let mut result = original_reduction.clone(); - result.push(second_nt); + // Record reduction information. - result - } else { - vec![second_nt] - }, - ); + let original_reduction = self + .reduction_map + .get(&(second_source, second_target)) + .cloned(); + + let second_nt_end = self.get_nt_end_in_nfa(second_source); + + if second_nt_end.is_none() + && !matches!(self.reduction_map.get(&(first_source, second_target)), + Some(reduction) + if reduction.len() >= + original_reduction + .as_ref() + .map(|vec| vec.len()) + .unwrap_or(0)) + { + if let Some(original_reduction) = &original_reduction { + self.reduction_map + .insert((first_source, second_target), original_reduction.clone()); + } + } + + if let Some(second_nt) = second_nt_end { + if !matches!(self.reduction_map.get(&(first_source, second_target)), + Some(reduction) + if reduction.len() >= + original_reduction + .as_ref() + .map(|vec| vec.len() + 1) + .unwrap_or(1)) + { + self.reduction_map.insert( + (first_source, second_target), + if let Some(original_reduction) = original_reduction { + let mut result = original_reduction; + result.push(second_nt); + + result + } else { + vec![second_nt] + }, + ); + } } NfaLabel::new(second_label.get_value(), second_label.get_moved(), left_p) @@ -659,6 +733,10 @@ impl Grammar { /// Return a string describing a rule position. pub fn rule_pos_to_string(&self, pos: usize) -> Result { + if pos == self.total() { + return Ok("End of rules".to_owned()); + } + let rule_num = { let mut result = None; @@ -690,7 +768,7 @@ impl Grammar { if rule_num == 0 { pos } else { - pos - self.accumulators.get(rule_num - 1).copied().unwrap() + pos - self.accumulators.get(rule_num).copied().unwrap() }, ) .unwrap()) diff --git a/grammar/src/test_grammar_helper.rs b/grammar/src/test_grammar_helper.rs index 984eb50..9d86865 100644 --- a/grammar/src/test_grammar_helper.rs +++ b/grammar/src/test_grammar_helper.rs @@ -88,7 +88,6 @@ fn scan_tnt( } /// Return a simple testing grammar. -#[allow(dead_code)] pub fn new_grammar() -> Result> { let ter = vec![Terminal::new("a".to_owned()), Terminal::new("b".to_owned())]; let non = vec![ @@ -126,7 +125,6 @@ pub fn new_grammar() -> Result> { /// Return a grammar that might serve as the grammar for my notes, /// somehow, without regular expressions. -#[allow(dead_code)] pub fn new_notes_grammar_no_regexp() -> Result> { let ter = vec![ Terminal::new("NL".to_owned()), @@ -250,7 +248,6 @@ pub fn new_notes_grammar_no_regexp() -> Result Result> { let ter = vec![ Terminal::new("NL".to_owned()), @@ -353,7 +350,6 @@ pub fn new_notes_grammar() -> Result> { } /// Return a grammar that can express parentheses. -#[allow(dead_code)] pub fn new_paren_grammar() -> Result> { let ter = vec![ Terminal::new("LEFT".to_owned()), @@ -444,7 +440,6 @@ pub fn new_left_recursive_grammar() -> Result Result> { let ter = vec![Terminal::new("B".to_owned()), Terminal::new("C".to_owned())]; let non = vec![ -- cgit v1.2.3-18-g5258