summaryrefslogtreecommitdiff
path: root/grammar
diff options
context:
space:
mode:
authorJSDurand <mmemmew@gmail.com>2023-02-03 10:52:35 +0800
committerJSDurand <mmemmew@gmail.com>2023-02-03 10:52:35 +0800
commit265ff8f87dc7392fdf701f811eb2bf54d7bc6678 (patch)
tree35538c8ac7524e0d9f2acff8be21b72994728bc4 /grammar
parentf28155105134b90fd86049c65478d307e0d8dbbc (diff)
Finally produced the first correct forest
Finally the prototype parser has produced the first correct forest. It is my first time to generate a correct forest, in fact, ever since the beginning of this project.
Diffstat (limited to 'grammar')
-rw-r--r--grammar/src/left_closure.rs8
-rw-r--r--grammar/src/lib.rs160
-rw-r--r--grammar/src/test_grammar_helper.rs5
3 files changed, 124 insertions, 49 deletions
diff --git a/grammar/src/left_closure.rs b/grammar/src/left_closure.rs
index 39461f0..ddee28d 100644
--- a/grammar/src/left_closure.rs
+++ b/grammar/src/left_closure.rs
@@ -25,9 +25,11 @@ impl Grammar {
GrammarState::AfterComputeFirst,
))
}
- GrammarState::AfterLeftClosure
- | GrammarState::AfterNFA
- | GrammarState::AfterComputeFirst => {}
+ GrammarState::AfterComputeFirst => {
+ self.state = GrammarState::AfterLeftClosure;
+ }
+
+ _ => {}
}
let non_len = self.non_num();
diff --git a/grammar/src/lib.rs b/grammar/src/lib.rs
index 2c17a5f..a8e0fd7 100644
--- a/grammar/src/lib.rs
+++ b/grammar/src/lib.rs
@@ -338,7 +338,7 @@ impl Grammar {
self.accumulators
.get(n)
.copied()
- .ok_or_else(|| Error::IndexOutOfBounds(n, self.total()))
+ .ok_or_else(|| Error::IndexOutOfBounds(n, self.non_num()))
}
/// Return the index of the rules a rule position belongs to.
@@ -476,17 +476,28 @@ impl Grammar {
}
}
+ /// Return true if and only if the terminal can appear as the
+ /// first terminal in a string expanded from the non-terminal.
+ #[inline]
+ pub fn is_first_of(&self, non_terminal: usize, terminal: usize) -> Result<bool, Error> {
+ Ok(self
+ .firsts
+ .get(non_terminal)
+ .ok_or(Error::IndexOutOfBounds(non_terminal, self.firsts.len()))?
+ .contains(&Some(terminal)))
+ }
+
/// Return true if and only if the non-terminal is nullable.
#[inline]
pub fn is_nullable(&self, non_terminal: usize) -> Result<bool, Error> {
Ok(self
.firsts
.get(non_terminal)
- .ok_or(Error::IndexOutOfBounds(non_terminal, self.non.len()))?
+ .ok_or(Error::IndexOutOfBounds(non_terminal, self.firsts.len()))?
.contains(&None))
}
- // FIXME: We shall use a label to query this information as well,
+ // REVIEW: We shall use a label to query this information as well,
// probably.
/// Query the expansion information from the position `pos1` to
@@ -497,14 +508,6 @@ impl Grammar {
pos1: usize,
pos2: usize,
) -> Result<Option<&[(usize, usize)]>, Error> {
- if pos1 >= self.total() {
- return Err(Error::IndexOutOfBounds(pos1, self.total()));
- }
-
- if pos2 >= self.total() {
- return Err(Error::IndexOutOfBounds(pos2, self.total()));
- }
-
match self.state {
GrammarState::AfterLeftClosure => {}
_ => {
@@ -522,14 +525,6 @@ impl Grammar {
/// the position `pos2` .
#[inline]
pub fn query_reduction(&self, pos1: usize, pos2: usize) -> Result<Option<&[usize]>, Error> {
- if pos1 >= self.total() {
- return Err(Error::IndexOutOfBounds(pos1, self.total()));
- }
-
- if pos2 >= self.total() {
- return Err(Error::IndexOutOfBounds(pos2, self.total()));
- }
-
match self.state {
GrammarState::AfterLeftClosure => {}
_ => {
@@ -591,43 +586,122 @@ impl Grammar {
let mut left_p = first_label.is_left_p() || second_label.is_left_p();
+ // if first_source == 0 && second_label.get_moved() == 15 {
+ // dbg!(second_source, second_target, first_label, second_label);
+ // dbg!(self.expansion_map.get(&(second_source, second_target)));
+ // dbg!(self.expansion_map.get(&(first_source, second_target)));
+ // }
+
// Record left-linear expansion information.
- if let Some(second_nt) = self.get_nt_start_in_nfa(second_source) {
+ let original_expansion = self
+ .expansion_map
+ .get(&(second_source, second_target))
+ .cloned();
+
+ let second_nt_start = self.get_nt_start_in_nfa(second_source).is_some();
+
+ if !second_nt_start
+ && !matches!(self.expansion_map.get(&(first_source, second_target)),
+ Some(expansion)
+ if expansion.len() >=
+ original_expansion
+ .as_ref()
+ .map(|vec| vec.len())
+ .unwrap_or(1))
+ {
+ if let Some(original_expansion) = &original_expansion {
+ self.expansion_map
+ .insert((first_source, second_target), original_expansion.clone());
+ } else {
+ let this_nt = self
+ .get_rule_num(second_source.div_euclid(2))
+ .unwrap_or_else(|_| self.non_num());
+
+ self.expansion_map.insert(
+ (first_source, second_target),
+ vec![(this_nt, second_label.get_moved())],
+ );
+ }
+ } else if second_nt_start {
left_p = true;
- if !self
- .expansion_map
- .contains_key(&(first_source, second_target))
+ let original_moved = match self.expansion_map.get(&(first_source, second_source)) {
+ Some(old_expansion) if !old_expansion.is_empty() => old_expansion.last().unwrap().1,
+ _ => first_label.get_moved(),
+ };
+
+ let original_nt = self
+ .get_rule_num(first_source.div_euclid(2))
+ .unwrap_or_else(|_| self.non_num());
+
+ if !matches!(self.expansion_map.get(&(first_source, second_target)),
+ Some(expansion)
+ if expansion.len() >=
+ original_expansion
+ .as_ref()
+ .map(|vec| vec.len() + 1)
+ .unwrap_or(1))
{
- let original_expansion = self.expansion_map.get(&(second_source, second_target));
-
self.expansion_map.insert(
(first_source, second_target),
if let Some(original_expansion) = original_expansion {
- let mut result = original_expansion.clone();
- result.push((second_nt, second_label.get_moved()));
+ let mut result = original_expansion;
+ result.push((original_nt, original_moved));
result
} else {
- vec![(second_nt, second_label.get_moved())]
+ vec![(original_nt, original_moved)]
},
);
}
- } else if let Some(second_nt) = self.get_nt_end_in_nfa(second_source) {
- let original_reduction = self.reduction_map.get(&(second_source, second_target));
+ }
- self.reduction_map.insert(
- (first_source, second_target),
- if let Some(original_reduction) = original_reduction {
- let mut result = original_reduction.clone();
- result.push(second_nt);
+ // Record reduction information.
- result
- } else {
- vec![second_nt]
- },
- );
+ let original_reduction = self
+ .reduction_map
+ .get(&(second_source, second_target))
+ .cloned();
+
+ let second_nt_end = self.get_nt_end_in_nfa(second_source);
+
+ if second_nt_end.is_none()
+ && !matches!(self.reduction_map.get(&(first_source, second_target)),
+ Some(reduction)
+ if reduction.len() >=
+ original_reduction
+ .as_ref()
+ .map(|vec| vec.len())
+ .unwrap_or(0))
+ {
+ if let Some(original_reduction) = &original_reduction {
+ self.reduction_map
+ .insert((first_source, second_target), original_reduction.clone());
+ }
+ }
+
+ if let Some(second_nt) = second_nt_end {
+ if !matches!(self.reduction_map.get(&(first_source, second_target)),
+ Some(reduction)
+ if reduction.len() >=
+ original_reduction
+ .as_ref()
+ .map(|vec| vec.len() + 1)
+ .unwrap_or(1))
+ {
+ self.reduction_map.insert(
+ (first_source, second_target),
+ if let Some(original_reduction) = original_reduction {
+ let mut result = original_reduction;
+ result.push(second_nt);
+
+ result
+ } else {
+ vec![second_nt]
+ },
+ );
+ }
}
NfaLabel::new(second_label.get_value(), second_label.get_moved(), left_p)
@@ -659,6 +733,10 @@ impl Grammar {
/// Return a string describing a rule position.
pub fn rule_pos_to_string(&self, pos: usize) -> Result<String, Error> {
+ if pos == self.total() {
+ return Ok("End of rules".to_owned());
+ }
+
let rule_num = {
let mut result = None;
@@ -690,7 +768,7 @@ impl Grammar {
if rule_num == 0 {
pos
} else {
- pos - self.accumulators.get(rule_num - 1).copied().unwrap()
+ pos - self.accumulators.get(rule_num).copied().unwrap()
},
)
.unwrap())
diff --git a/grammar/src/test_grammar_helper.rs b/grammar/src/test_grammar_helper.rs
index 984eb50..9d86865 100644
--- a/grammar/src/test_grammar_helper.rs
+++ b/grammar/src/test_grammar_helper.rs
@@ -88,7 +88,6 @@ fn scan_tnt(
}
/// Return a simple testing grammar.
-#[allow(dead_code)]
pub fn new_grammar() -> Result<Grammar, Box<dyn std::error::Error>> {
let ter = vec![Terminal::new("a".to_owned()), Terminal::new("b".to_owned())];
let non = vec![
@@ -126,7 +125,6 @@ pub fn new_grammar() -> Result<Grammar, Box<dyn std::error::Error>> {
/// Return a grammar that might serve as the grammar for my notes,
/// somehow, without regular expressions.
-#[allow(dead_code)]
pub fn new_notes_grammar_no_regexp() -> Result<Grammar, Box<dyn std::error::Error>> {
let ter = vec![
Terminal::new("NL".to_owned()),
@@ -250,7 +248,6 @@ pub fn new_notes_grammar_no_regexp() -> Result<Grammar, Box<dyn std::error::Erro
/// Return a grammar that might serve as the grammar for my notes,
/// somehow.
-#[allow(dead_code)]
pub fn new_notes_grammar() -> Result<Grammar, Box<dyn std::error::Error>> {
let ter = vec![
Terminal::new("NL".to_owned()),
@@ -353,7 +350,6 @@ pub fn new_notes_grammar() -> Result<Grammar, Box<dyn std::error::Error>> {
}
/// Return a grammar that can express parentheses.
-#[allow(dead_code)]
pub fn new_paren_grammar() -> Result<Grammar, Box<dyn std::error::Error>> {
let ter = vec![
Terminal::new("LEFT".to_owned()),
@@ -444,7 +440,6 @@ pub fn new_left_recursive_grammar() -> Result<Grammar, Box<dyn std::error::Error
}
/// Return a right recursive grammar.
-#[allow(dead_code)]
pub fn new_right_recursive_grammar() -> Result<Grammar, Box<dyn std::error::Error>> {
let ter = vec![Terminal::new("B".to_owned()), Terminal::new("C".to_owned())];
let non = vec![