From e8ea01319b3a9032a3f4f69f65e9ca96562b87b9 Mon Sep 17 00:00:00 2001 From: JSDurand Date: Sun, 22 Jan 2023 11:49:47 +0800 Subject: forest: clone correctly Now the forest can detect if a node is packed or cloned, and correctly clones a node in those circumstances. But it still needs to be tested. --- ChangeLog | 7 +++ chain/src/atom/default.rs | 35 +---------- chain/src/default.rs | 23 +++---- chain/src/lib.rs | 8 +-- forest/src/default.rs | 70 ++++++++++++++------- forest/src/lib.rs | 147 ++++++++++++++++++++++++++++++++++++++++--- grammar/src/label.rs | 22 +++++-- grammar/src/lib.rs | 91 +++++++++++++++++++++++---- graph/src/builder.rs | 3 + graph/src/labelled/binary.rs | 16 +++++ 10 files changed, 323 insertions(+), 99 deletions(-) diff --git a/ChangeLog b/ChangeLog index ac1dfdd..44e241a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2023-01-22 Jean Sévère Durand + + * forest: Correctly clone nodes. + Now the forest can detect if a node is packed or cloned, and + correctly clones a node in those circumstances. But it still + needs to be tested. + 2023-01-20 Jean Sévère Durand * chain: A prototype is added, and passes some tests. But I am diff --git a/chain/src/atom/default.rs b/chain/src/atom/default.rs index 14b7a9f..e88cfc9 100644 --- a/chain/src/atom/default.rs +++ b/chain/src/atom/default.rs @@ -429,12 +429,6 @@ impl DefaultAtom { GrammarError::IndexOutOfBounds(child, accepting_vec.len()), )?; - if nt == 3 - && nfa.degree(child).map_err(index_out_of_bounds_conversion)? == 0 - { - println!("accepting = {accepting}"); - } - if let Some((_, old_accepting)) = terminals_map.get_mut(&t) { *old_accepting = *old_accepting || accepting; } else { @@ -455,40 +449,13 @@ impl DefaultAtom { } } - if nt == 3 { - println!("map = {terminals_map:?}"); - } - for (t, (set, accepting)) in terminals_map.into_iter() { - let new_index = nfa - .extend(set.into_iter()) - .map_err(index_out_of_bounds_conversion)?; + let new_index = nfa.extend(set).map_err(index_out_of_bounds_conversion)?; if accepting_vec.get(new_index).is_none() { #[cfg(debug_assertions)] assert_eq!(new_index, accepting_vec.len()); - // let mut updated = false; - // let nfa_len = nfa.nodes_len(); - - // 'label_loop: for (label, target_iter) in nfa - // .labels_of(new_index) - // .map_err(|_| GrammarError::IndexOutOfBounds(new_index, nfa_len))? - // { - // if label_is_nullable(*label) { - // for target in target_iter { - // if *accepting_vec - // .get(target) - // .ok_or(GrammarError::IndexOutOfBounds(target, nfa_len))? - // { - // updated = true; - - // break 'label_loop; - // } - // } - // } - // } - accepting_vec.push(accepting); } diff --git a/chain/src/default.rs b/chain/src/default.rs index b9d7fe6..22befff 100644 --- a/chain/src/default.rs +++ b/chain/src/default.rs @@ -10,10 +10,7 @@ use crate::atom::{Atom, DefaultAtom}; use core::fmt::Display; use forest::{default::DefaultForest, Forest}; use grammar::{Error as GrammarError, GrammarLabel, GrammarLabelType, TNT}; -#[allow(unused_imports)] -use graph::{ - labelled::DLGBuilder, Builder, DLGraph, Graph, LabelExtGraph, LabelGraph, ParentsGraph, -}; +use graph::{labelled::DLGBuilder, Builder, DLGraph, Graph, LabelExtGraph, LabelGraph}; use std::collections::{HashMap as Map, TryReserveError}; @@ -156,11 +153,13 @@ impl Iterator for DerIter { None } } else { - self.index = DerIterIndex::Single(0); + // If the zero-th element is present, we will + // advance the index to one; if it is not present, + // we will stop iteration. In each case we can + // safely set the index to one. + self.index = DerIterIndex::Single(1); if let Some((edge, to)) = self.singles.first() { - self.index = DerIterIndex::Single(1); - Some(TwoLayers::One(*edge, *to)) } else { None @@ -776,15 +775,7 @@ mod test_chain { println!("repeating {repeat_times} times"); - let input = { - let mut result = Vec::with_capacity(input_template.len() * repeat_times); - - for _ in 0..repeat_times { - result.extend(input_template.iter().copied()); - } - - result - }; + let input = input_template.repeat(repeat_times); let start = std::time::Instant::now(); diff --git a/chain/src/lib.rs b/chain/src/lib.rs index 91c37f7..a3d420b 100644 --- a/chain/src/lib.rs +++ b/chain/src/lib.rs @@ -131,6 +131,7 @@ where Ok(new) => new, Err(error) => { // Prevent further iterations. + self.stop = true; return Some(Err(error.into())); } @@ -157,8 +158,7 @@ pub trait Chain: LabelExtGraph { /// Represents the language that is present after we parse the /// empty string, that is the initial configuration of the - /// language. This may or may not be different from what - /// `Default::default` gives. + /// language. fn unit(atom: Self::Atom) -> Result; /// Return true if and only if the language contains the empty @@ -171,7 +171,7 @@ pub trait Chain: LabelExtGraph { /// An iterator that iterates all layers that need to be merged. type DerIter: Iterator; - /// Take the derivative by a terminal symbol at position `POS`. + /// Take the derivative by a terminal `t` at position `pos`. fn derive(&mut self, t: usize, pos: usize) -> Result; /// Take the union of all derivatives. @@ -187,7 +187,7 @@ pub trait Chain: LabelExtGraph { let edges = self.union(der_iter)?; - let new_index = self.extend(edges.into_iter())?; + let new_index = self.extend(edges)?; self.update_history(new_index); diff --git a/forest/src/default.rs b/forest/src/default.rs index 9295f1a..b96439f 100644 --- a/forest/src/default.rs +++ b/forest/src/default.rs @@ -24,6 +24,8 @@ pub enum Error { /// Encounter an invalid error in converting from an error of /// graphs. InvalidGraphError(GError), + /// Encounter an error when converting forest labels. + LabelConversion(ForestLabelError), } impl Display for Error { @@ -35,6 +37,7 @@ impl Display for Error { Error::DuplicatedNode(n) => write!(f, "node {n} is duplicated"), Error::InvalidGraphError(ge) => write!(f, "invalid error: {ge}"), Error::NodeNoLabel(n) => write!(f, "node {n} has no labels, but it should have one"), + Error::LabelConversion(le) => write!(f, "fail to convert labels: {le}"), } } } @@ -51,6 +54,12 @@ impl From for Error { } } +impl From for Error { + fn from(le: ForestLabelError) -> Self { + Self::LabelConversion(le) + } +} + /// A default implementation of forest. #[derive(Debug, Clone)] pub struct DefaultForest { @@ -193,7 +202,7 @@ impl LabelGraph for DefaultForest { } } -impl Forest for DefaultForest { +impl Forest for DefaultForest> { type Error = Error; fn root(&self) -> Option { @@ -205,7 +214,7 @@ impl Forest for DefaultForest { let mut builder = PLGBuilderMut::from_graph_mut(&mut graph); - let root = Some(builder.add_vertex(label)); + let root = Some(builder.add_vertex(ForestLabel::from(label))); Self { graph, root } } @@ -299,12 +308,12 @@ impl Forest for DefaultForest { let nodes_len = fragment.nodes_len(); - // If the fragment root has a duplicate label, the forest will - // not grow, so we use the label to find the adjoined node - // index. - - // the nodes hava already been added to the forest, so it is - // safe to call unwrap. + /// If the fragment root has a duplicate label, the forest + /// will not grow, so we use the label to find the adjoined + /// node index. + /// + /// The nodes hava already been added to the forest, so it is + /// safe to call unwrap. macro_rules! conversion ( ($node:expr) => { { @@ -360,20 +369,37 @@ impl Forest for DefaultForest { Ok(()) } - fn clone_node(&mut self, node_id: usize, clone_transform: F) -> Result - where - F: Fn(T) -> T, - { - let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + fn clone_node(&mut self, node_id: usize) -> Result { + let builder = PLGBuilderMut::from_graph_mut(&mut self.graph); let old_label = builder .vertex_label(node_id)? .ok_or(Error::NodeNoLabel(node_id))?; - let new_label = clone_transform(old_label); + if old_label.is_packed() { + return Err(ForestLabelError::ClonePack.into()); + } + + if old_label.is_cloned().is_some() { + let mut parents = self.parents_of(node_id)?; + + // Make sure it only has one parent, which is the + // representative node. + assert_eq!(parents.len(), 1); + + // We checked its length is 1, so it is safe to unwrap + // here. + return Ok(parents.next().unwrap().node()); + } + + let rep_label = old_label.pack()?; + + let new_label = old_label.clone(self)?; + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); // Make a new node - let new_index = builder.add_vertex(new_label); + let new_index = builder.add_vertex(rep_label); // Re-direct parents to the new node. // @@ -393,6 +419,10 @@ impl Forest for DefaultForest { builder.add_edge(new_index, node_id, old_label)?; + // Modify the label of the old node. + + builder.set_label(node_id, new_label)?; + Ok(new_index) } } @@ -403,10 +433,10 @@ mod forest_test { macro_rules! leaf ( ($label:expr, $type:tt) =>{ - DefaultForest::<$type>::new_leaf($label) + DefaultForest::>::new_leaf($label) }; ($label:expr) => { - DefaultForest::::new_leaf($label) + DefaultForest::>::new_leaf($label) } ); @@ -479,11 +509,9 @@ mod forest_test { // add a duplicate label forest.plant(3, leaf!(5), false)?; - let len = forest.nodes_len(); - - let clone_transform = |label| label + len; + let _len = forest.nodes_len(); - forest.clone_node(5, clone_transform)?; + forest.clone_node(5)?; assert_eq!(forest.nodes_len(), 7); diff --git a/forest/src/lib.rs b/forest/src/lib.rs index 8c9b918..02c5fcd 100644 --- a/forest/src/lib.rs +++ b/forest/src/lib.rs @@ -13,13 +13,144 @@ use graph::{error::Error as GError, GraphLabel, LabelGraph, ParentsGraph}; +/// An internal type that indicates the status of a node as either a +/// packed, cloned, or plain node. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Default)] +enum ForestLabelType { + Packed, + #[default] + Plain, + Cloned(usize), +} + +impl core::fmt::Display for ForestLabelType { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + Self::Packed => write!(f, "packed"), + Self::Plain => write!(f, "plain"), + Self::Cloned(index) => write!(f, "the {index}-th clone"), + } + } +} + +/// A type that encodes the properties demanded by a forest. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub struct ForestLabel { + label: T, + status: ForestLabelType, +} + +impl core::fmt::Display for ForestLabel { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + writeln!(f, "label: {}, status: {}", self.label, self.status) + } +} + +/// The type of erros for converting forest labels. +#[non_exhaustive] +#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] +pub enum ForestLabelError { + /// Try to pack a cloned node. + PackClone, + /// Try to clone a packed node. + ClonePack, +} + +impl core::fmt::Display for ForestLabelError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + Self::PackClone => write!(f, "cannot pack a cloned node"), + Self::ClonePack => write!(f, "cannot clone a packed node"), + } + } +} + +impl std::error::Error for ForestLabelError {} + +impl ForestLabel { + /// Retrieve the label. + pub fn label(&self) -> T { + self.label + } + + /// Return true if and only if this node is packed. + pub fn is_packed(&self) -> bool { + matches!(self.status, ForestLabelType::Packed) + } + + /// Retrieve the optional clone index. + pub fn is_cloned(&self) -> Option { + match self.status { + ForestLabelType::Cloned(index) => Some(index), + _ => None, + } + } + + /// Try to clone a node. + pub fn clone(self, forest: &F) -> Result + where + F: Forest, + { + if self.is_packed() { + Err(ForestLabelError::ClonePack) + } else { + let clone_index = if let Some(old_index) = self.is_cloned() { + let mut new_index: usize = old_index + 1; + + let mut new_label = Self { + status: ForestLabelType::Cloned(new_index), + ..self + }; + + while forest.query_label(new_label).is_some() { + new_index += 1; + new_label = Self { + status: ForestLabelType::Cloned(new_index), + ..self + }; + } + + new_index + } else { + 0 + }; + + Ok(Self { + status: ForestLabelType::Cloned(clone_index), + ..self + }) + } + } + + /// Try to pack a node. + pub fn pack(self) -> Result { + if self.is_cloned().is_some() { + Err(ForestLabelError::PackClone) + } else { + let new_label = Self { + status: ForestLabelType::Packed, + ..self + }; + + Ok(new_label) + } + } +} + +impl From for ForestLabel { + fn from(label: T) -> Self { + let status = ForestLabelType::default(); + Self { label, status } + } +} + /// The expected behaviours of a forest. /// /// Note that it requires only a subset of the functionalities of /// labelled graphs. -pub trait Forest: ParentsGraph + LabelGraph { +pub trait Forest: ParentsGraph + LabelGraph> { /// The type of errors for operations on the forest. - type Error: std::error::Error + From; + type Error: std::error::Error + From + From; /// Return the root of the forest. /// @@ -44,13 +175,13 @@ pub trait Forest: ParentsGraph + LabelGraph { /// Clone a node by making a new node and making all the nodes /// that previously pointed to the old node now point to the new /// node, and the new node points to the old node. Return the - /// index of the new node. + /// index of the new node. In addition, if the old node had + /// already been cloned, just return the index of its + /// representitave. /// - /// The label of the new node will be given by the label - /// transformer. - fn clone_node(&mut self, node_id: usize, clone_transform: F) -> Result - where - F: Fn(T) -> T; + /// The labels of the representing node and of the cloned node + /// will be handled automatically. + fn clone_node(&mut self, node_id: usize) -> Result; } pub mod default; diff --git a/grammar/src/label.rs b/grammar/src/label.rs index 58eaddc..05b0b1e 100644 --- a/grammar/src/label.rs +++ b/grammar/src/label.rs @@ -93,10 +93,24 @@ impl GrammarLabel { /// Return a string description with the help of the associated /// grammar. pub fn to_string(&self, grammar: &Grammar) -> Result { - // REVIEW: It needs at least 34 bytes, so we just double it. - // Of course we can also calculate the length exactly, but - // this can be postponed till later. - let mut s = String::with_capacity(68); + // First calculate the length of the resulting string. + + let mut num = 2 + 7 + 14 + 6; + + num += self.label().name(grammar)?.len(); + + num += format!("{} ", self.start()).len(); + + if let Some(end) = self.end() { + num += format!("to {end}").len(); + } else { + num += 7; + } + + let num = num; + + let mut s = String::with_capacity(num); + s.push_str("a "); if self.is_packed() { diff --git a/grammar/src/lib.rs b/grammar/src/lib.rs index 50a2b9b..b29fc69 100644 --- a/grammar/src/lib.rs +++ b/grammar/src/lib.rs @@ -213,12 +213,22 @@ pub struct Grammar { // The following attribute is empty until we call `closure` on the // NFA with `transform_label_null_epsilon` as the transformer. /// A hash map that maps a tuple `(pos1, pos2)` of positions - /// `pos1` and `pos2` in the rules to a vector of rule positions. + /// `pos1` and `pos2` in the rules to a vector of non-terminals + /// and rule positions. + /// /// This vector means that in order to expand from `pos1` to - /// `pos`, it is necessary to expand according to the positions in - /// the vector, so we need to add all these expansions into the - /// parse forest later. - expansion_map: HashMap<(usize, usize), Vec>, + /// `pos`, it is necessary to expand according to the + /// non-terminals and positions in the vector, so we need to add + /// all these expansions into the parse forest later. + expansion_map: HashMap<(usize, usize), Vec<(usize, usize)>>, + /// A hash map that maps a tuple `(pos1, pos2)` of positions + /// `pos1` and `pos2` in the rules to a vector of non-terminals. + /// + /// This vector means that in order to expand from `pos1` to + /// `pos`, it is necessary to expand according to the + /// non-terminals, so we can use this information to find out + /// where to join a new node in the parse forest later. + reduction_map: HashMap<(usize, usize), Vec>, /// The state of the grammar, which tells us what information has /// been computed for this grammar. state: GrammarState, @@ -269,6 +279,7 @@ impl Grammar { let state = Default::default(); let expansion_map = Default::default(); + let reduction_map = Default::default(); // NOTE: We cannot calculate accumulators here, as we want the // accumulators of the regular expression of the left-closure, @@ -283,6 +294,7 @@ impl Grammar { first_nodes, state, expansion_map, + reduction_map, accumulators, } } @@ -350,7 +362,7 @@ impl Grammar { /// Query if a position is the starting position of a /// non-terminal. If it is, return the non-terminal, else return - /// `None` . + /// `None`. #[inline] pub fn get_nt_start_in_nfa(&self, pos: usize) -> Option { for (index, accumulator) in self.accumulators.iter().copied().enumerate() { @@ -370,6 +382,18 @@ impl Grammar { None } + /// Query if a position is the ending position of a + /// non-terminal. If it is, return the non-terminal, else return + /// `None`. + #[inline] + pub fn get_nt_end_in_nfa(&self, pos: usize) -> Option { + if pos >= 1 { + self.get_nt_start_in_nfa(pos - 1) + } else { + None + } + } + /// Return the number of terminals. #[inline] pub fn ter_num(&self) -> usize { @@ -465,7 +489,11 @@ impl Grammar { /// Query the expansion information from the position `pos1` to /// the position `pos2` . #[inline] - pub fn query_expansion(&self, pos1: usize, pos2: usize) -> Result, Error> { + pub fn query_expansion( + &self, + pos1: usize, + pos2: usize, + ) -> Result, Error> { if pos1 >= self.total() { return Err(Error::IndexOutOfBounds(pos1, self.total())); } @@ -484,11 +512,36 @@ impl Grammar { } } - Ok(self.expansion_map.get(&(pos1, pos2)).map(|v| v.as_ref())) + Ok(self.expansion_map.get(&(pos1, pos2)).map(AsRef::as_ref)) } - // REVIEW: Do we have a better way to record expansion information - // than to compute the transitive closure? + /// Query the reduction information from the position `pos1` to + /// the position `pos2` . + #[inline] + pub fn query_reduction(&self, pos1: usize, pos2: usize) -> Result, Error> { + if pos1 >= self.total() { + return Err(Error::IndexOutOfBounds(pos1, self.total())); + } + + if pos2 >= self.total() { + return Err(Error::IndexOutOfBounds(pos2, self.total())); + } + + match self.state { + GrammarState::AfterLeftClosure => {} + _ => { + return Err(Error::WrongState( + self.state, + GrammarState::AfterLeftClosure, + )); + } + } + + Ok(self.reduction_map.get(&(pos1, pos2)).map(AsRef::as_ref)) + } + + // REVIEW: Do we have a better way to record expansion and + // reduction information than to compute the transitive closure? // REVIEW: We need a way to eliminate those left-linearly expanded // edges whose labels had already been considered, and we need to @@ -550,14 +603,28 @@ impl Grammar { (first_source, second_target), if let Some(original_expansion) = original_expansion { let mut result = original_expansion.clone(); - result.push(second_nt); + result.push((second_nt, second_label.get_moved())); result } else { - vec![second_nt] + vec![(second_nt, second_label.get_moved())] }, ); } + } else if let Some(second_nt) = self.get_nt_end_in_nfa(second_source) { + let original_reduction = self.reduction_map.get(&(second_source, second_target)); + + self.reduction_map.insert( + (first_source, second_target), + if let Some(original_reduction) = original_reduction { + let mut result = original_reduction.clone(); + result.push(second_nt); + + result + } else { + vec![second_nt] + }, + ); } NfaLabel::new(second_label.get_value(), second_label.get_moved(), left_p) diff --git a/graph/src/builder.rs b/graph/src/builder.rs index b04e7f6..c5f9252 100644 --- a/graph/src/builder.rs +++ b/graph/src/builder.rs @@ -82,6 +82,9 @@ pub trait BuilderMut { /// Add an edge from the source to the target. fn add_edge(&mut self, source: usize, target: usize, label: Self::Label) -> Result<(), Error>; + /// Set the label of an existing node to a new label. + fn set_label(&mut self, node_id: usize, label: Self::Label) -> Result<(), Error>; + /// Remove an edge from the source to the target. /// /// Since some graphs are labelled, the users are allowed to pass diff --git a/graph/src/labelled/binary.rs b/graph/src/labelled/binary.rs index e3e1943..201dda2 100644 --- a/graph/src/labelled/binary.rs +++ b/graph/src/labelled/binary.rs @@ -551,6 +551,22 @@ impl<'a, T: GraphLabel> BuilderMut for PLGBuilderMut<'a, T> { Ok(()) } + + #[inline] + fn set_label(&mut self, node_id: usize, label: Self::Label) -> Result<(), Error> { + if !self.graph.has_node(node_id) { + return Err(Error::IndexOutOfBounds(node_id, self.nodes_len())); + } + + // node_id is now guaranteed to be valid. + + self.graph.nodes.get_mut(node_id).unwrap().label = label; + + self.graph.label_index_map.remove(&label); + self.graph.label_index_map.insert(label, node_id); + + Ok(()) + } } #[cfg(test)] -- cgit v1.2.3-18-g5258