summaryrefslogtreecommitdiff
path: root/grammar
diff options
context:
space:
mode:
authorJSDurand <mmemmew@gmail.com>2023-01-22 11:49:47 +0800
committerJSDurand <mmemmew@gmail.com>2023-01-22 11:49:47 +0800
commite8ea01319b3a9032a3f4f69f65e9ca96562b87b9 (patch)
tree674e7337dce0b9433b9ddfe745b0cf82f528d3ec /grammar
parent973c789dae479dd8383b0f7f9cfa5f167fdf3d38 (diff)
forest: clone correctly
Now the forest can detect if a node is packed or cloned, and correctly clones a node in those circumstances. But it still needs to be tested.
Diffstat (limited to 'grammar')
-rw-r--r--grammar/src/label.rs22
-rw-r--r--grammar/src/lib.rs91
2 files changed, 97 insertions, 16 deletions
diff --git a/grammar/src/label.rs b/grammar/src/label.rs
index 58eaddc..05b0b1e 100644
--- a/grammar/src/label.rs
+++ b/grammar/src/label.rs
@@ -93,10 +93,24 @@ impl GrammarLabel {
/// Return a string description with the help of the associated
/// grammar.
pub fn to_string(&self, grammar: &Grammar) -> Result<String, Error> {
- // REVIEW: It needs at least 34 bytes, so we just double it.
- // Of course we can also calculate the length exactly, but
- // this can be postponed till later.
- let mut s = String::with_capacity(68);
+ // First calculate the length of the resulting string.
+
+ let mut num = 2 + 7 + 14 + 6;
+
+ num += self.label().name(grammar)?.len();
+
+ num += format!("{} ", self.start()).len();
+
+ if let Some(end) = self.end() {
+ num += format!("to {end}").len();
+ } else {
+ num += 7;
+ }
+
+ let num = num;
+
+ let mut s = String::with_capacity(num);
+
s.push_str("a ");
if self.is_packed() {
diff --git a/grammar/src/lib.rs b/grammar/src/lib.rs
index 50a2b9b..b29fc69 100644
--- a/grammar/src/lib.rs
+++ b/grammar/src/lib.rs
@@ -213,12 +213,22 @@ pub struct Grammar {
// The following attribute is empty until we call `closure` on the
// NFA with `transform_label_null_epsilon` as the transformer.
/// A hash map that maps a tuple `(pos1, pos2)` of positions
- /// `pos1` and `pos2` in the rules to a vector of rule positions.
+ /// `pos1` and `pos2` in the rules to a vector of non-terminals
+ /// and rule positions.
+ ///
/// This vector means that in order to expand from `pos1` to
- /// `pos`, it is necessary to expand according to the positions in
- /// the vector, so we need to add all these expansions into the
- /// parse forest later.
- expansion_map: HashMap<(usize, usize), Vec<usize>>,
+ /// `pos`, it is necessary to expand according to the
+ /// non-terminals and positions in the vector, so we need to add
+ /// all these expansions into the parse forest later.
+ expansion_map: HashMap<(usize, usize), Vec<(usize, usize)>>,
+ /// A hash map that maps a tuple `(pos1, pos2)` of positions
+ /// `pos1` and `pos2` in the rules to a vector of non-terminals.
+ ///
+ /// This vector means that in order to expand from `pos1` to
+ /// `pos`, it is necessary to expand according to the
+ /// non-terminals, so we can use this information to find out
+ /// where to join a new node in the parse forest later.
+ reduction_map: HashMap<(usize, usize), Vec<usize>>,
/// The state of the grammar, which tells us what information has
/// been computed for this grammar.
state: GrammarState,
@@ -269,6 +279,7 @@ impl Grammar {
let state = Default::default();
let expansion_map = Default::default();
+ let reduction_map = Default::default();
// NOTE: We cannot calculate accumulators here, as we want the
// accumulators of the regular expression of the left-closure,
@@ -283,6 +294,7 @@ impl Grammar {
first_nodes,
state,
expansion_map,
+ reduction_map,
accumulators,
}
}
@@ -350,7 +362,7 @@ impl Grammar {
/// Query if a position is the starting position of a
/// non-terminal. If it is, return the non-terminal, else return
- /// `None` .
+ /// `None`.
#[inline]
pub fn get_nt_start_in_nfa(&self, pos: usize) -> Option<usize> {
for (index, accumulator) in self.accumulators.iter().copied().enumerate() {
@@ -370,6 +382,18 @@ impl Grammar {
None
}
+ /// Query if a position is the ending position of a
+ /// non-terminal. If it is, return the non-terminal, else return
+ /// `None`.
+ #[inline]
+ pub fn get_nt_end_in_nfa(&self, pos: usize) -> Option<usize> {
+ if pos >= 1 {
+ self.get_nt_start_in_nfa(pos - 1)
+ } else {
+ None
+ }
+ }
+
/// Return the number of terminals.
#[inline]
pub fn ter_num(&self) -> usize {
@@ -465,7 +489,11 @@ impl Grammar {
/// Query the expansion information from the position `pos1` to
/// the position `pos2` .
#[inline]
- pub fn query_expansion(&self, pos1: usize, pos2: usize) -> Result<Option<&[usize]>, Error> {
+ pub fn query_expansion(
+ &self,
+ pos1: usize,
+ pos2: usize,
+ ) -> Result<Option<&[(usize, usize)]>, Error> {
if pos1 >= self.total() {
return Err(Error::IndexOutOfBounds(pos1, self.total()));
}
@@ -484,11 +512,36 @@ impl Grammar {
}
}
- Ok(self.expansion_map.get(&(pos1, pos2)).map(|v| v.as_ref()))
+ Ok(self.expansion_map.get(&(pos1, pos2)).map(AsRef::as_ref))
}
- // REVIEW: Do we have a better way to record expansion information
- // than to compute the transitive closure?
+ /// Query the reduction information from the position `pos1` to
+ /// the position `pos2` .
+ #[inline]
+ pub fn query_reduction(&self, pos1: usize, pos2: usize) -> Result<Option<&[usize]>, Error> {
+ if pos1 >= self.total() {
+ return Err(Error::IndexOutOfBounds(pos1, self.total()));
+ }
+
+ if pos2 >= self.total() {
+ return Err(Error::IndexOutOfBounds(pos2, self.total()));
+ }
+
+ match self.state {
+ GrammarState::AfterLeftClosure => {}
+ _ => {
+ return Err(Error::WrongState(
+ self.state,
+ GrammarState::AfterLeftClosure,
+ ));
+ }
+ }
+
+ Ok(self.reduction_map.get(&(pos1, pos2)).map(AsRef::as_ref))
+ }
+
+ // REVIEW: Do we have a better way to record expansion and
+ // reduction information than to compute the transitive closure?
// REVIEW: We need a way to eliminate those left-linearly expanded
// edges whose labels had already been considered, and we need to
@@ -550,14 +603,28 @@ impl Grammar {
(first_source, second_target),
if let Some(original_expansion) = original_expansion {
let mut result = original_expansion.clone();
- result.push(second_nt);
+ result.push((second_nt, second_label.get_moved()));
result
} else {
- vec![second_nt]
+ vec![(second_nt, second_label.get_moved())]
},
);
}
+ } else if let Some(second_nt) = self.get_nt_end_in_nfa(second_source) {
+ let original_reduction = self.reduction_map.get(&(second_source, second_target));
+
+ self.reduction_map.insert(
+ (first_source, second_target),
+ if let Some(original_reduction) = original_reduction {
+ let mut result = original_reduction.clone();
+ result.push(second_nt);
+
+ result
+ } else {
+ vec![second_nt]
+ },
+ );
}
NfaLabel::new(second_label.get_value(), second_label.get_moved(), left_p)