From 987c84f3454c687cca0efe0d471fcf00e052ecab Mon Sep 17 00:00:00 2001 From: JSDurand Date: Sun, 12 Feb 2023 12:07:34 +0800 Subject: Added the functionality of split or clone. I need more than the ability to clone nodes: I also need to split the nodes. Now this seems to be correctly added. --- chain/src/item/default.rs | 650 ------------------------------- chain/src/item/default/mod.rs | 810 +++++++++++++++++++++++++++++++++++++++ chain/src/item/default/splone.rs | 760 ++++++++++++++++++++++++++++++++++++ chain/src/item/genins.rs | 155 +++++++- chain/src/item/mod.rs | 56 ++- 5 files changed, 1745 insertions(+), 686 deletions(-) delete mode 100644 chain/src/item/default.rs create mode 100644 chain/src/item/default/mod.rs create mode 100644 chain/src/item/default/splone.rs (limited to 'chain/src/item') diff --git a/chain/src/item/default.rs b/chain/src/item/default.rs deleted file mode 100644 index f9d26ec..0000000 --- a/chain/src/item/default.rs +++ /dev/null @@ -1,650 +0,0 @@ -//! This module provides a default implementation of iten derivation -//! forest. - -use super::*; -use crate::atom::default::DefaultAtom; -use grammar::{GrammarLabel, GrammarLabelType}; -use graph::{ - builder::BuilderMut, labelled::binary::PLGBuilderMut, Graph, LabelGraph, PLGraph, RedirectGraph, -}; - -use core::fmt::Display; - -/// The type of errors for forest operations. -#[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd)] -pub enum Error { - /// An index is out of bounds. - /// - /// The first component is the index that is out of bounds, and - /// the second component is the current length of nodes. - IndexOutOfBounds(usize, usize), - /// The forest does not permit duplicate nodes but encounters a - /// repeated node. - DuplicatedNode(usize), - /// A node has no labels while it is required to have one. - NodeNoLabel(usize), - /// Encounter an invalid error in converting from an error of - /// graphs. - InvalidGraphError(GError), - /// Encounter an error when converting forest labels. - LabelConversion(ForestLabelError), -} - -impl Display for Error { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Error::IndexOutOfBounds(index, bound) => { - write!(f, "index {index} is out of bound {bound}") - } - Error::DuplicatedNode(n) => write!(f, "node {n} is duplicated"), - Error::InvalidGraphError(ge) => write!(f, "invalid error: {ge}"), - Error::NodeNoLabel(n) => write!(f, "node {n} has no labels, but it should have one"), - Error::LabelConversion(le) => write!(f, "fail to convert labels: {le}"), - } - } -} - -impl std::error::Error for Error {} - -impl From for Error { - fn from(ge: GError) -> Self { - match ge { - GError::IndexOutOfBounds(index, bound) => Self::IndexOutOfBounds(index, bound), - GError::DuplicatedNode(n) => Self::DuplicatedNode(n), - _ => Self::InvalidGraphError(ge), - } - } -} - -impl From for Error { - fn from(le: ForestLabelError) -> Self { - Self::LabelConversion(le) - } -} - -/// A default implementation of forest. -#[derive(Debug, Clone)] -pub struct DefaultForest { - graph: PLGraph, - root: Option, -} - -impl Default for DefaultForest { - fn default() -> Self { - let graph = Default::default(); - let root = None; - - Self { graph, root } - } -} - -impl AsRef> for DefaultForest { - fn as_ref(&self) -> &DefaultForest { - self - } -} - -impl Graph for DefaultForest { - type Iter<'a> = as Graph>::Iter<'a> - where - Self: 'a; - - #[inline] - fn is_empty(&self) -> bool { - self.graph.is_empty() - } - - #[inline] - fn nodes_len(&self) -> usize { - self.graph.nodes_len() - } - - #[inline] - fn edges_len(&self) -> Option { - self.graph.edges_len() - } - - #[inline] - fn children_of(&self, node_id: usize) -> Result, GError> { - self.graph.children_of(node_id) - } - - #[inline] - fn degree(&self, node_id: usize) -> Result { - self.graph.degree(node_id) - } - - #[inline] - fn is_empty_node(&self, node_id: usize) -> Result { - self.graph.is_empty_node(node_id) - } - - #[inline] - fn has_edge(&self, source: usize, target: usize) -> Result { - self.graph.has_edge(source, target) - } - - fn replace_by_builder(&mut self, _builder: impl graph::Builder) { - unimplemented!() - } - - #[inline] - fn print_viz(&self, filename: &str) -> Result<(), std::io::Error> { - self.graph.print_viz(filename) - } -} - -impl ParentsGraph for DefaultForest { - type Iter<'a>= as ParentsGraph>::Iter<'a> - where - Self:'a; - - #[inline] - fn parents_of(&self, node_id: usize) -> Result<::Iter<'_>, GError> { - self.graph.parents_of(node_id) - } - - #[inline] - fn nth_child(&self, node_id: usize, n: usize) -> Result { - self.graph.nth_child(node_id, n) - } - - #[inline] - fn edge_to_parent( - &self, - source: usize, - target: usize, - ) -> Result, GError> { - self.graph.edge_to_parent(source, target) - } -} - -impl LabelGraph for DefaultForest { - type Iter<'a> = std::iter::Empty - where - Self: 'a; - - type LabelIter<'a> = std::iter::Empty<(&'a T, >::Iter<'a>)> - where - Self: 'a, - T: 'a; - - type EdgeLabelIter<'a> = std::iter::Empty - where - Self: 'a, - T: 'a; - - #[inline] - fn query_label(&self, label: T) -> Option { - self.graph.query_label(label) - } - - #[inline] - fn vertex_label(&self, node_id: usize) -> Result, GError> { - self.graph.vertex_label(node_id) - } - - fn edge_label( - &self, - _source: usize, - _target: usize, - ) -> Result, GError> { - unimplemented!("edges have no labels") - } - - fn find_children_with_label( - &self, - _node_id: usize, - _label: &T, - ) -> Result<>::Iter<'_>, GError> { - unimplemented!("edges have no labels") - } - - fn labels_of(&self, _node_id: usize) -> Result, GError> { - unimplemented!("edges have no labels") - } - - fn has_edge_label(&self, _node_id: usize, _label: &T, _target: usize) -> Result { - unimplemented!("edges have no labels") - } -} - -impl Forest for DefaultForest> { - type Error = Error; - - fn root(&self) -> Option { - self.root - } - - fn new_leaf(label: T) -> Self { - let mut graph = PLGraph::default(); - - let mut builder = PLGBuilderMut::from_graph_mut(&mut graph); - - let root = Some(builder.add_vertex(ForestLabel::from(label))); - - Self { graph, root } - } - - fn is_prefix(&self, node_id: usize, fragment: F) -> Result - where - F: Borrow, - { - if !self.has_node(node_id) { - return Err(Error::IndexOutOfBounds(node_id, self.nodes_len())); - } - - // We do a depth-first traversal to determine if every node - // encountered has the same set of children (labels taken into - // the consideration). - - let fragment = fragment.borrow(); - - let mut frag_stack = Vec::with_capacity(fragment.nodes_len()); - - let mut self_stack = Vec::with_capacity(fragment.nodes_len()); - - let frag_root = if let Some(root) = fragment.root() { - root - } else { - // an empty forest is a prefix of any forest. - return Ok(true); - }; - - frag_stack.push(frag_root); - self_stack.push(node_id); - - // defer popping - while let (Some(frag_top), Some(self_top)) = - (frag_stack.last().copied(), self_stack.last().copied()) - { - frag_stack.pop(); - self_stack.pop(); - - if fragment.vertex_label(frag_top)? != self.vertex_label(self_top)? { - // not a prefix - return Ok(false); - } - - let mut self_children = self.children_of(self_top)?; - - for child in fragment.children_of(frag_top)? { - if let Some(self_child) = self_children.next() { - frag_stack.push(child); - self_stack.push(self_child); - } else { - // too few children - return Ok(false); - } - } - } - - // Check both stacks are empty at the end. - Ok(frag_stack.is_empty() && self_stack.is_empty()) - } - - fn plant(&mut self, node_id: usize, fragment: F, planted: bool) -> Result<(), Self::Error> - where - F: Borrow, - { - // Convert self to a builder_mut, and traverse fragment in a - // depth-first manner and adjoin corresponding nodes along the - // way. - - if !self.has_node(node_id) { - return Err(Error::IndexOutOfBounds(node_id, self.nodes_len())); - } - - let fragment = fragment.borrow(); - - let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); - - let root = if let Some(root) = fragment.root() { - root - } else { - // Nothing to do to plant an empty forest. - return Ok(()); - }; - - // Just a dummy label for use in adding edges. - // - // REVIEW: I probably should refactor the API for builder_mut. - let root_label = fragment - .vertex_label(root)? - .ok_or(Error::NodeNoLabel(root))?; - - let nodes_len = fragment.nodes_len(); - - /// If the fragment root has a duplicate label, the forest - /// will not grow, so we use the label to find the adjoined - /// node index. - /// - /// The nodes hava already been added to the forest, so it is - /// safe to call unwrap. - macro_rules! conversion ( - ($node:expr) => { - { - builder - .query_label( - fragment - .vertex_label($node)? - .ok_or(Error::NodeNoLabel($node))? - ).unwrap() - } - } - ); - - // If the fragment has been planted before, we just add an - // edge. - - if planted { - builder.add_edge(node_id, conversion!(root), root_label)?; - - return Ok(()); - } - - // First adjoin those nodes and join the edges later. - - for node in 0..nodes_len { - let label = fragment - .vertex_label(node)? - .ok_or(Error::NodeNoLabel(node))?; - - builder.add_vertex(label); - } - - // Don't forget to join the new sub-forest to the original - // forest, at the specified position. - - builder.add_edge(node_id, conversion!(root), root_label)?; - - // We can try to calculate the depth of fragments, if we need - // to lower the memory usage. But in our use cases, we - // usually deal with fragments where each node has at most one - // child, so the depth is supposed to be equal to the length - // in this case. - let mut stack = Vec::with_capacity(fragment.nodes_len()); - - stack.push(root); - - let mut seen_nodes = std::collections::HashSet::::new(); - - while let Some(top) = stack.pop() { - seen_nodes.insert(top); - - for child in fragment.children_of(top)? { - builder.add_edge(conversion!(top), conversion!(child), root_label)?; - - if !seen_nodes.contains(&child) { - seen_nodes.insert(child); - stack.push(child); - } - } - } - - Ok(()) - } - - fn clone_node(&mut self, node_id: usize) -> Result { - let builder = PLGBuilderMut::from_graph_mut(&mut self.graph); - - let old_label = builder - .vertex_label(node_id)? - .ok_or(Error::NodeNoLabel(node_id))?; - - if old_label.is_packed() { - return Err(ForestLabelError::ClonePack.into()); - } - - // We are sure old_label is not packed here, so it is safe to - // unwrap. - let new_label = old_label.clone(self).unwrap(); - - if old_label.clone_index().is_some() { - let mut parents = self.parents_of(node_id)?; - - // Make sure it only has one parent, which is the - // representative node. - assert_eq!(parents.len(), 1); - - let rep_node = parents.next().unwrap().node(); - - let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); - - let new_clone = builder.add_vertex(new_label); - - builder.add_edge(rep_node, new_clone, new_label)?; - - // We checked its length is 1, so it is safe to unwrap - // here. - return Ok(new_clone); - } - - let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); - - // We are sure old_label is not a clone here, so it is safe to - // unwrap. - let rep_label = old_label.pack().unwrap(); - - // Make a new node - let new_index = builder.add_vertex(rep_label); - - // Re-direct parents to the new node. - // - // This must be done before pointing the new node to the old - // node, otherwise that edge will be redirected as well. - - // Unfortunately I cannot loop through parents and mutate them - // at the same time, so I first collect them into a vector. - let parents: Vec<_> = builder.parents_of(node_id)?.collect(); - - for parent in parents.into_iter() { - builder.redirect(parent.node(), parent.edge(), new_index)?; - } - - // Point the new node to the old node. OLD_LABEL is just a - // place holder. - - builder.add_edge(new_index, node_id, old_label)?; - - // Modify the label of the old node. - - builder.set_label(node_id, new_label)?; - - // Make another clone - - // new_label is cloned, so is guaranteed not to be packed, and - // we are safe to unwrap. - let new_label = new_label.clone(self).unwrap(); - - let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); - - let new_clone = builder.add_vertex(new_label); - - builder.add_edge(new_index, new_clone, new_label)?; - - Ok(new_clone) - } -} - -impl PartialEq for DefaultForest> { - fn eq(&self, other: &Self) -> bool { - let self_root = self.root(); - let other_root = other.root(); - - if (self_root.is_some() && other_root.is_none()) - || (self_root.is_none() && other_root.is_some()) - { - return false; - } else if self_root.is_none() && other_root.is_none() { - return true; - } - - // both roots are not empty - - let self_root = self_root.unwrap(); - let other_root = other_root.unwrap(); - - self.is_prefix(self_root, other).unwrap_or(false) - && other.is_prefix(other_root, self).unwrap_or(false) - } -} - -impl Eq for DefaultForest> {} - -/// Print the labels found in the forest, so that we can easily -/// understand what those labels mean. -pub fn print_labels( - atom: impl Borrow, - forest: impl Borrow>>, -) -> Result<(), Box> { - let forest = forest.borrow(); - let atom = atom.borrow(); - - for node in forest.nodes() { - let label = forest.vertex_label(node)?; - - if let Some(label) = label { - let label = label.label.label(); - - match label { - GrammarLabelType::TNT(tnt) => { - println!("{tnt} = {}", atom.name_of_tnt(tnt)?); - } - GrammarLabelType::Rule(pos) => { - println!("pos {pos} = {}", atom.rule_pos_string(pos)?); - } - } - } else { - return Err(Error::NodeNoLabel(node).into()); - } - } - - Ok(()) -} - -#[allow(unused_macros)] -macro_rules! leaf ( - ($label:expr, $type:tt) =>{ - DefaultForest::>::new_leaf($label) - }; - ($label:expr) => { - DefaultForest::>::new_leaf($label) - } -); - -#[allow(unused_imports)] -pub(crate) use leaf; - -#[cfg(test)] -mod item_test { - use super::*; - - #[test] - fn test_forest_api() -> Result<(), Box> { - let forest: DefaultForest> = Default::default(); - - // empty forest - - assert!(forest.is_empty()); - - // leaf forest - - let mut forest = leaf!(0, usize); - - assert_eq!(forest.nodes_len(), 1); - assert_eq!(forest.root(), Some(0)); - - // add some child - - forest.plant(0, leaf!(1), false)?; - - assert_eq!(forest.nodes_len(), 2); - let mut children = forest.children_of(0)?; - assert_eq!(children.next(), Some(1)); - assert_eq!(children.next(), None); - - // add more children - - forest.plant(0, leaf!(2), false)?; - forest.plant(0, leaf!(3), false)?; - forest.plant(0, leaf!(4), false)?; - forest.plant(2, leaf!(5), false)?; - - assert_eq!(forest.nodes_len(), 6); - let mut children = forest.children_of(0)?; - assert_eq!(children.next(), Some(1)); - assert_eq!(children.next(), Some(2)); - assert_eq!(children.next(), Some(3)); - assert_eq!(children.next(), Some(4)); - let mut children = forest.children_of(2)?; - assert_eq!(children.next(), Some(5)); - assert_eq!(children.next(), None); - - let mut test_forest = leaf!(0); - test_forest.plant(0, leaf!(1), false)?; - test_forest.plant(0, leaf!(2), false)?; - test_forest.plant(0, leaf!(3), false)?; - test_forest.plant(2, leaf!(5), false)?; - - assert!(forest.is_prefix(0, &test_forest)?); - - let mut test_forest = leaf!(0); - test_forest.plant(0, leaf!(1), false)?; - test_forest.plant(0, leaf!(2), false)?; - // this child of the root should have label 3 in order to be a - // prefix. - test_forest.plant(0, leaf!(4), false)?; - test_forest.plant(2, leaf!(5), false)?; - - assert!(!forest.is_prefix(0, &test_forest)?); - - let mut test_forest = leaf!(2); - test_forest.plant(0, leaf!(5), false)?; - - assert!(forest.is_prefix(2, &test_forest)?); - - // now test cloning - - // add a duplicate label - forest.plant(3, leaf!(5), false)?; - - let _len = forest.nodes_len(); - - forest.clone_node(5)?; - - assert_eq!(forest.nodes_len(), 8); - - #[cfg(feature = "test-print-viz")] - forest.print_viz("forest.gv")?; - - Ok(()) - } - - #[test] - fn test_eq() -> Result<(), Box> { - let mut forest = leaf!(0, usize); - - forest.plant(0, leaf!(1), false)?; - forest.plant(0, leaf!(2), false)?; - forest.plant(0, leaf!(3), false)?; - forest.plant(0, leaf!(4), false)?; - forest.plant(2, leaf!(5), false)?; - - let mut test_forest = leaf!(0); - test_forest.plant(0, leaf!(1), false)?; - test_forest.plant(0, leaf!(2), false)?; - test_forest.plant(0, leaf!(3), false)?; - test_forest.plant(2, leaf!(5), false)?; - - assert_ne!(forest, test_forest); - assert_ne!(test_forest, forest); - - test_forest.plant(0, leaf!(4), false)?; - - assert_eq!(forest, test_forest); - assert_eq!(test_forest, forest); - - Ok(()) - } -} diff --git a/chain/src/item/default/mod.rs b/chain/src/item/default/mod.rs new file mode 100644 index 0000000..7ecc70d --- /dev/null +++ b/chain/src/item/default/mod.rs @@ -0,0 +1,810 @@ +//! This module provides a default implementation of iten derivation +//! forest. + +use super::*; +use crate::atom::default::DefaultAtom; +use grammar::{GrammarLabel, GrammarLabelType}; +use graph::{ + builder::BuilderMut, labelled::binary::PLGBuilderMut, Graph, LabelGraph, PLGraph, RedirectGraph, +}; + +use core::fmt::Display; + +/// The type of errors for forest operations. +#[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd)] +pub enum Error { + /// An index is out of bounds. + /// + /// The first component is the index that is out of bounds, and + /// the second component is the current length of nodes. + IndexOutOfBounds(usize, usize), + /// The forest does not permit duplicate nodes but encounters a + /// repeated node. + DuplicatedNode(usize), + /// A node has no labels while it is required to have one. + NodeNoLabel(usize), + /// Encounter an invalid error in converting from an error of + /// graphs. + InvalidGraphError(GError), + /// Encounter an error when converting forest labels. + LabelConversion(ForestLabelError), + /// Trying to split a packed node. + SplitPack(usize), +} + +impl Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Error::IndexOutOfBounds(index, bound) => { + write!(f, "index {index} is out of bound {bound}") + } + Error::DuplicatedNode(n) => write!(f, "node {n} is duplicated"), + Error::InvalidGraphError(ge) => write!(f, "invalid error: {ge}"), + Error::NodeNoLabel(n) => write!(f, "node {n} has no labels, but it should have one"), + Error::LabelConversion(le) => write!(f, "fail to convert labels: {le}"), + Error::SplitPack(n) => write!(f, "cannot split the packed node {n}"), + } + } +} + +impl std::error::Error for Error {} + +impl From for Error { + fn from(ge: GError) -> Self { + match ge { + GError::IndexOutOfBounds(index, bound) => Self::IndexOutOfBounds(index, bound), + GError::DuplicatedNode(n) => Self::DuplicatedNode(n), + _ => Self::InvalidGraphError(ge), + } + } +} + +impl From for Error { + fn from(le: ForestLabelError) -> Self { + Self::LabelConversion(le) + } +} + +/// A default implementation of forest. +#[derive(Debug, Clone)] +pub struct DefaultForest { + graph: PLGraph, + root: Option, +} + +impl Default for DefaultForest { + fn default() -> Self { + let graph = Default::default(); + let root = None; + + Self { graph, root } + } +} + +impl AsRef> for DefaultForest { + fn as_ref(&self) -> &DefaultForest { + self + } +} + +impl Graph for DefaultForest { + type Iter<'a> = as Graph>::Iter<'a> + where + Self: 'a; + + #[inline] + fn is_empty(&self) -> bool { + self.graph.is_empty() + } + + #[inline] + fn nodes_len(&self) -> usize { + self.graph.nodes_len() + } + + #[inline] + fn edges_len(&self) -> Option { + self.graph.edges_len() + } + + #[inline] + fn children_of(&self, node_id: usize) -> Result, GError> { + self.graph.children_of(node_id) + } + + #[inline] + fn degree(&self, node_id: usize) -> Result { + self.graph.degree(node_id) + } + + #[inline] + fn is_empty_node(&self, node_id: usize) -> Result { + self.graph.is_empty_node(node_id) + } + + #[inline] + fn has_edge(&self, source: usize, target: usize) -> Result { + self.graph.has_edge(source, target) + } + + fn replace_by_builder(&mut self, _builder: impl graph::Builder) { + unimplemented!() + } + + #[inline] + fn print_viz(&self, filename: &str) -> Result<(), std::io::Error> { + self.graph.print_viz(filename) + } +} + +impl ParentsGraph for DefaultForest { + type Iter<'a>= as ParentsGraph>::Iter<'a> + where + Self:'a; + + #[inline] + fn parents_of(&self, node_id: usize) -> Result<::Iter<'_>, GError> { + self.graph.parents_of(node_id) + } + + #[inline] + fn nth_child(&self, node_id: usize, n: usize) -> Result { + self.graph.nth_child(node_id, n) + } + + #[inline] + fn edge_to_parent( + &self, + source: usize, + target: usize, + ) -> Result, GError> { + self.graph.edge_to_parent(source, target) + } +} + +impl LabelGraph for DefaultForest { + type Iter<'a> = std::iter::Empty + where + Self: 'a; + + type LabelIter<'a> = std::iter::Empty<(&'a T, >::Iter<'a>)> + where + Self: 'a, + T: 'a; + + type EdgeLabelIter<'a> = std::iter::Empty + where + Self: 'a, + T: 'a; + + #[inline] + fn query_label(&self, label: T) -> Option { + self.graph.query_label(label) + } + + #[inline] + fn vertex_label(&self, node_id: usize) -> Result, GError> { + self.graph.vertex_label(node_id) + } + + fn edge_label( + &self, + _source: usize, + _target: usize, + ) -> Result, GError> { + unimplemented!("edges have no labels") + } + + fn find_children_with_label( + &self, + _node_id: usize, + _label: &T, + ) -> Result<>::Iter<'_>, GError> { + unimplemented!("edges have no labels") + } + + fn labels_of(&self, _node_id: usize) -> Result, GError> { + unimplemented!("edges have no labels") + } + + fn has_edge_label(&self, _node_id: usize, _label: &T, _target: usize) -> Result { + unimplemented!("edges have no labels") + } +} + +impl Forest for DefaultForest> { + type Error = Error; + + fn root(&self) -> Option { + self.root + } + + fn new_leaf(label: T) -> Self { + let mut graph = PLGraph::default(); + + let mut builder = PLGBuilderMut::from_graph_mut(&mut graph); + + let root = Some(builder.add_vertex(ForestLabel::from(label))); + + Self { graph, root } + } + + fn transform_label( + &mut self, + node_id: usize, + transform: impl FnOnce(T) -> T, + ) -> Result<(), Self::Error> { + let vertex_label = self + .vertex_label(node_id)? + .ok_or(Error::NodeNoLabel(node_id))?; + + let transformed_label = transform(vertex_label.label()); + + let transformed_label = ForestLabel::new(transformed_label, vertex_label.status); + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + builder + .set_label(node_id, transformed_label) + .map_err(Into::into) + } + + fn is_prefix(&self, node_id: usize, fragment: F) -> Result + where + F: Borrow, + { + if !self.has_node(node_id) { + return Err(Error::IndexOutOfBounds(node_id, self.nodes_len())); + } + + // We do a depth-first traversal to determine if every node + // encountered has the same set of children (labels taken into + // the consideration). + + let fragment = fragment.borrow(); + + let mut frag_stack = Vec::with_capacity(fragment.nodes_len()); + + let mut self_stack = Vec::with_capacity(fragment.nodes_len()); + + let frag_root = if let Some(root) = fragment.root() { + root + } else { + // an empty forest is a prefix of any forest. + return Ok(true); + }; + + frag_stack.push(frag_root); + self_stack.push(node_id); + + // defer popping + while let (Some(frag_top), Some(self_top)) = + (frag_stack.last().copied(), self_stack.last().copied()) + { + frag_stack.pop(); + self_stack.pop(); + + if fragment.vertex_label(frag_top)? != self.vertex_label(self_top)? { + // not a prefix + return Ok(false); + } + + let mut self_children = self.children_of(self_top)?; + + for child in fragment.children_of(frag_top)? { + if let Some(self_child) = self_children.next() { + frag_stack.push(child); + self_stack.push(self_child); + } else { + // too few children + return Ok(false); + } + } + } + + // Check both stacks are empty at the end. + Ok(frag_stack.is_empty() && self_stack.is_empty()) + } + + fn plant(&mut self, node_id: usize, fragment: F, planted: bool) -> Result<(), Self::Error> + where + F: Borrow, + { + // Convert self to a builder_mut, and traverse fragment in a + // depth-first manner and adjoin corresponding nodes along the + // way. + + if !self.has_node(node_id) { + return Err(Error::IndexOutOfBounds(node_id, self.nodes_len())); + } + + let fragment = fragment.borrow(); + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + let root = if let Some(root) = fragment.root() { + root + } else { + // Nothing to do to plant an empty forest. + return Ok(()); + }; + + // Just a dummy label for use in adding edges. + // + // REVIEW: I probably should refactor the API for builder_mut. + let root_label = fragment + .vertex_label(root)? + .ok_or(Error::NodeNoLabel(root))?; + + let nodes_len = fragment.nodes_len(); + + /// If the fragment root has a duplicate label, the forest + /// will not grow, so we use the label to find the adjoined + /// node index. + /// + /// The nodes hava already been added to the forest, so it is + /// safe to call unwrap. + macro_rules! conversion ( + ($node:expr) => { + { + builder + .query_label( + fragment + .vertex_label($node)? + .ok_or(Error::NodeNoLabel($node))? + ).unwrap() + } + } + ); + + // If the fragment has been planted before, we just add an + // edge. + + if planted { + builder.add_edge(node_id, conversion!(root), root_label)?; + + return Ok(()); + } + + // First adjoin those nodes and join the edges later. + + for node in 0..nodes_len { + let label = fragment + .vertex_label(node)? + .ok_or(Error::NodeNoLabel(node))?; + + builder.add_vertex(label); + } + + // Don't forget to join the new sub-forest to the original + // forest, at the specified position. + + builder.add_edge(node_id, conversion!(root), root_label)?; + + // We can try to calculate the depth of fragments, if we need + // to lower the memory usage. But in our use cases, we + // usually deal with fragments where each node has at most one + // child, so the depth is supposed to be equal to the length + // in this case. + let mut stack = Vec::with_capacity(fragment.nodes_len()); + + stack.push(root); + + let mut seen_nodes = std::collections::HashSet::::new(); + + while let Some(top) = stack.pop() { + seen_nodes.insert(top); + + for child in fragment.children_of(top)? { + builder.add_edge(conversion!(top), conversion!(child), root_label)?; + + if !seen_nodes.contains(&child) { + seen_nodes.insert(child); + stack.push(child); + } + } + } + + Ok(()) + } + + fn clone_node( + &mut self, + node_id: usize, + preserved_edges_num: usize, + no_new_clone: bool, + ) -> Result { + let builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + let old_label = builder + .vertex_label(node_id)? + .ok_or(Error::NodeNoLabel(node_id))?; + + if old_label.is_packed() { + dbg!(node_id, old_label); + return Err(ForestLabelError::ClonePack.into()); + } + + // We are sure old_label is not packed here, so it is safe to + // unwrap. + let new_label = old_label + .clone::>>(self.borrow()) + .unwrap(); + + if old_label.clone_index().is_some() { + if no_new_clone { + return Ok(new_label.clone_index().unwrap()); + } + + let mut parents = self.parents_of(node_id)?; + + // Make sure it only has one parent, which is the + // representative node. + assert_eq!(parents.len(), 1); + + // We checked its length is 1, so it is safe to unwrap + // here. + let rep_node = parents.next().unwrap().node(); + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + let new_clone = builder.add_vertex(new_label); + + builder.add_edge(rep_node, new_clone, new_label)?; + + let preserve_children: Vec<_> = builder + .children_of(node_id)? + .take(preserved_edges_num) + .collect(); + + for child in preserve_children.into_iter() { + builder.add_edge(new_clone, child, new_label)?; + } + + return Ok(new_clone); + } + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + // We are sure old_label is not a clone here, so it is safe to + // unwrap. + let rep_label = old_label.pack().unwrap(); + + // Make a new node + let new_index = builder.add_vertex(rep_label); + + // Re-direct parents to the new node. + // + // This must be done before pointing the new node to the old + // node, otherwise that edge will be redirected as well. + + // Unfortunately I cannot loop through parents and mutate them + // at the same time, so I first collect them into a vector. + let parents: Vec<_> = builder.parents_of(node_id)?.collect(); + + for parent in parents.into_iter() { + builder.redirect(parent.node(), parent.edge(), new_index)?; + } + + // Point the new node to the old node. OLD_LABEL is just a + // place holder. + + builder.add_edge(new_index, node_id, old_label)?; + + // Modify the label of the old node. + + builder.set_label(node_id, new_label)?; + + // Make another clone + + // new_label is cloned, so is guaranteed not to be packed, and + // we are safe to unwrap. + let new_label = new_label + .clone::>>(self.borrow()) + .unwrap(); + + if no_new_clone { + return Ok(new_label.clone_index().unwrap()); + } + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + let new_clone = builder.add_vertex(new_label); + + builder.add_edge(new_index, new_clone, new_label)?; + + let preserve_children: Vec<_> = builder + .children_of(node_id)? + .take(preserved_edges_num) + .collect(); + + for child in preserve_children { + builder.add_edge(new_clone, child, new_label)?; + } + + Ok(new_clone) + } +} + +impl PartialEq for DefaultForest> { + fn eq(&self, other: &Self) -> bool { + let self_root = self.root(); + let other_root = other.root(); + + if (self_root.is_some() && other_root.is_none()) + || (self_root.is_none() && other_root.is_some()) + { + return false; + } else if self_root.is_none() && other_root.is_none() { + return true; + } + + // both roots are not empty + + let self_root = self_root.unwrap(); + let other_root = other_root.unwrap(); + + self.is_prefix(self_root, other).unwrap_or(false) + && other.is_prefix(other_root, self).unwrap_or(false) + } +} + +pub mod splone; + +impl DefaultForest { + /// Create a forest with only one leaf from a raw label, unlike + /// `new_leaf`, which transforms the label for convenience. + pub fn new_leaf_raw(label: T) -> Self { + let mut graph = PLGraph::default(); + + let mut builder = PLGBuilderMut::from_graph_mut(&mut graph); + + let root = Some(builder.add_vertex(label)); + + Self { graph, root } + } +} + +impl Eq for DefaultForest> {} + +impl DefaultForest> { + /// Prints the forest without nodes that do not have ending + /// positions. + /// + /// Nodes without ending positions are "unclosed nodes" that only + /// serve the role of creating nodes with ending positions. + pub fn print_closed_viz(&self, filename: &str) -> Result<(), std::io::Error> { + let discard_nodes: std::collections::HashSet<_> = self + .nodes() + .filter(|node| match self.vertex_label(*node) { + Ok(label) => { + if let Some(label) = label { + label.label().end().is_none() + } else { + true + } + } + Err(_) => true, + }) + .collect(); + + let filename = format!("output/{filename}"); + + let preamble = "digraph nfa { + fontname=\"Helvetica,Arial,sans-serif\" + node [fontname=\"Helvetica,Arial,sans-serif\"] + edge [fontname=\"Helvetica,Arial,sans-serif\"] + rankdir=LR;\n"; + + let mut post = String::new(); + + for node in self.nodes() { + if discard_nodes.contains(&node) { + continue; + } + + post.push_str(&format!( + " {node} [label = \"{}\"]\n", + match self.vertex_label(node) { + Ok(Some(label)) => { + format!("{label}") + } + _ => { + " ε ".to_owned() + } + } + )); + } + + for (source, target) in self.edges() { + if discard_nodes.contains(&source) || discard_nodes.contains(&target) { + continue; + } + + post.push_str(&format!(" {source} -> {target}\n")); + } + + post.push_str("}\n"); + + let result = format!("{preamble}{post}"); + + if std::fs::metadata(&filename).is_ok() { + std::fs::remove_file(&filename)?; + } + + let mut file = std::fs::File::options() + .write(true) + .create(true) + .open(&filename)?; + + use std::io::Write; + + file.write_all(result.as_bytes()) + } +} + +/// Print the labels found in the forest, so that we can easily +/// understand what those labels mean. +pub fn print_labels( + atom: impl Borrow, + forest: impl Borrow>>, +) -> Result<(), Box> { + let forest = forest.borrow(); + let atom = atom.borrow(); + + for node in forest.nodes() { + let label = forest.vertex_label(node)?; + + if let Some(label) = label { + let label = label.label.label(); + + match label { + GrammarLabelType::TNT(tnt) => { + println!("{tnt} = {}", atom.name_of_tnt(tnt)?); + } + GrammarLabelType::Rule(pos) => { + println!("pos {pos} = {}", atom.rule_pos_string(pos)?); + } + } + } else { + return Err(Error::NodeNoLabel(node).into()); + } + } + + Ok(()) +} + +#[allow(unused_macros)] +macro_rules! leaf ( + ($label:expr, $type:tt) =>{ + DefaultForest::>::new_leaf($label) + }; + ($label:expr) => { + DefaultForest::>::new_leaf($label) + } +); + +#[allow(unused_imports)] +pub(crate) use leaf; + +#[cfg(test)] +mod item_test { + use super::*; + + #[test] + fn test_forest_api() -> Result<(), Box> { + let forest: DefaultForest> = Default::default(); + + // empty forest + + assert!(forest.is_empty()); + + // leaf forest + + let mut forest = leaf!(0, usize); + + assert_eq!(forest.nodes_len(), 1); + assert_eq!(forest.root(), Some(0)); + + // add some child + + forest.plant(0, leaf!(1), false)?; + + assert_eq!(forest.nodes_len(), 2); + let mut children = forest.children_of(0)?; + assert_eq!(children.next(), Some(1)); + assert_eq!(children.next(), None); + + // add more children + + forest.plant(0, leaf!(2), false)?; + forest.plant(0, leaf!(3), false)?; + forest.plant(0, leaf!(4), false)?; + forest.plant(2, leaf!(5), false)?; + + assert_eq!(forest.nodes_len(), 6); + let mut children = forest.children_of(0)?; + assert_eq!(children.next(), Some(1)); + assert_eq!(children.next(), Some(2)); + assert_eq!(children.next(), Some(3)); + assert_eq!(children.next(), Some(4)); + let mut children = forest.children_of(2)?; + assert_eq!(children.next(), Some(5)); + assert_eq!(children.next(), None); + + let mut test_forest = leaf!(0); + test_forest.plant(0, leaf!(1), false)?; + test_forest.plant(0, leaf!(2), false)?; + test_forest.plant(0, leaf!(3), false)?; + test_forest.plant(2, leaf!(5), false)?; + + assert!(forest.is_prefix(0, &test_forest)?); + + let mut test_forest = leaf!(0); + test_forest.plant(0, leaf!(1), false)?; + test_forest.plant(0, leaf!(2), false)?; + // this child of the root should have label 3 in order to be a + // prefix. + test_forest.plant(0, leaf!(4), false)?; + test_forest.plant(2, leaf!(5), false)?; + + assert!(!forest.is_prefix(0, &test_forest)?); + + let mut test_forest = leaf!(2); + test_forest.plant(0, leaf!(5), false)?; + + assert!(forest.is_prefix(2, &test_forest)?); + + // now test cloning + + // add a duplicate label + forest.plant(3, leaf!(5), false)?; + + assert_eq!(forest.nodes_len(), 6); + + // add two children to 5 + + forest.plant(5, leaf!(6), false)?; + forest.plant(5, leaf!(7), false)?; + + // clone and preserve one child + let cloned = forest.clone_node(5, 1, false)?; + + assert_eq!(forest.nodes_len(), 10); + + assert_eq!(forest.degree(cloned)?, 1); + assert_eq!(forest.degree(5)?, 2); + + #[cfg(feature = "test-print-viz")] + forest.print_viz("forest.gv")?; + + Ok(()) + } + + #[test] + fn test_eq() -> Result<(), Box> { + let mut forest = leaf!(0, usize); + + forest.plant(0, leaf!(1), false)?; + forest.plant(0, leaf!(2), false)?; + forest.plant(0, leaf!(3), false)?; + forest.plant(0, leaf!(4), false)?; + forest.plant(2, leaf!(5), false)?; + + let mut test_forest = leaf!(0); + test_forest.plant(0, leaf!(1), false)?; + test_forest.plant(0, leaf!(2), false)?; + test_forest.plant(0, leaf!(3), false)?; + test_forest.plant(2, leaf!(5), false)?; + + assert_ne!(forest, test_forest); + assert_ne!(test_forest, forest); + + test_forest.plant(0, leaf!(4), false)?; + + assert_eq!(forest, test_forest); + assert_eq!(test_forest, forest); + + Ok(()) + } +} diff --git a/chain/src/item/default/splone.rs b/chain/src/item/default/splone.rs new file mode 100644 index 0000000..1168539 --- /dev/null +++ b/chain/src/item/default/splone.rs @@ -0,0 +1,760 @@ +//! This module implements a function for "closing" and "splitting" a +//! node in a forest, and hence the name. + +use super::*; +use grammar::TNT; + +fn get_nt_label(label: GrammarLabel) -> Option { + match label.label() { + GrammarLabelType::TNT(TNT::Non(nt)) => Some(nt), + _ => None, + } +} + +fn get_rule_label(label: GrammarLabel) -> Option { + match label.label() { + GrammarLabelType::Rule(rule) => Some(rule), + _ => None, + } +} + +impl DefaultForest> { + /// Either "open split" or "closed split". + /// + /// An open split is an attempt to set the end of the node to + /// `None`, and a closed split is an attempt to set the end to a + /// specific position. + /// + /// Also this function preserves `edge_index + 1` many children + /// when splitting or cloning. + /// + /// To be more precise, this function performs the following + /// actions: + /// + /// 1. Make sure it is labelled by a nonterminal. + /// + /// 2. Check the status of the node. If it is packed, return an + /// error. + /// + /// 3. Make a new label according to the given `end`. See the + /// function + /// [`create_new_label`][DefaultForest::>::create_new_label] + /// for the process of making new labels. + /// + /// 4. If the new label is equal to the old label, and if + /// `edge_index + 1` is equal to the degree of the node, then do + /// nothing. + /// + /// 5. If the new label is equal to the old label but + /// `edge_index+1` is not equal to the degree of the node, then + /// clone the node while preserving `edge_index + 1` many + /// children. + /// + /// 6. If the new label is not equal to the old label, then split + /// the node. See the function + /// [`split_node`][DefaultForest::>::split_node] + /// for details. + /// + /// # Name + /// + /// The name is a mixture of *split* and *clone*. + /// + /// # NOTE + /// + /// We want to "do the same thing" to each parent of the node, + /// that should be checked to be labelled by a rule position. + /// + /// That is to say, if we replace the label of the node, we also + /// replace the label of the "rule parent". If we split the node, + /// the rule parents are also splitted in a parallel manner with + /// the splitted node. + /// + /// Also, when we refer to the parents of the node in the + /// descriptions of procedures below, we actually refer to the + /// parents of the rule parents, which should again be checked to + /// be labelled by non-terminals. + fn splone(&mut self, node: usize, end: Option, edge_index: usize) -> Result<(), Error> { + let node_label = self.vertex_label(node)?.ok_or(Error::NodeNoLabel(node))?; + + assert!(get_nt_label(node_label.label()).is_some()); + + if node_label.is_packed() { + return Err(Error::SplitPack(node)); + } + + let node_end = node_label.label().end(); + let node_degree = self.degree(node)?; + + // We can check the end to know whether the new label is equal + // to the old label. + if node_end == end { + if node_degree == edge_index + 1 { + return Ok(()); + } + + dbg!(); + self.clone_node(node, edge_index + 1, false)?; + + return Ok(()); + } + + let new_label = self.create_new_label(node, end, edge_index)?; + + let new_label = if let Some(label) = new_label { + label + } else { + return Ok(()); + }; + + if node_end.is_some() + || edge_index + 1 != node_degree + || node_label.clone_index().is_some() + || new_label.clone_index().is_some() + { + return self.split_node(new_label, node, edge_index); + } + + // replace the label directly + + // if new_label.clone_index().is_none() { + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + builder.set_label(node, new_label)?; + + let parents: Vec<_> = builder + .parents_of(node)? + .map(|parent| parent.node()) + .collect(); + + for parent in parents { + let mut parent_label = builder + .vertex_label(parent)? + .ok_or(Error::NodeNoLabel(parent))? + .label(); + + assert!(get_rule_label(parent_label).is_some()); + assert_eq!(builder.degree(parent)?, 1); + + if let Some(pos) = end { + parent_label.set_end(pos); + } else { + parent_label.open_end(); + } + + let parent_label = ForestLabel::from(parent_label); + + builder.set_label(parent, parent_label)?; + } + // } else { + // // REVIEW: Call `split_node` in this situation as well? + + // // If we are here, the new label should have a packed + // // parent. + // let packed = self + // .query_label(ForestLabel::new(new_label.label(), ForestLabelType::Packed)) + // .unwrap(); + + // let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + // builder.set_label(node, new_label)?; + + // let parents: Vec<_> = builder.parents_of(node)?.collect(); + + // for parent in parents.iter() { + // builder.redirect(parent.node(), parent.edge(), packed)?; + // } + + // builder.add_edge(packed, node, new_label)?; + // } + + Ok(()) + } + + /// Procedure to split the node: + /// + /// 1. Create a new node with the new label. + /// + /// 2. Preserve the old children as specified by `edge_index + 1`. + /// + /// 3. For each parent, clone the parent. Replace the original + /// child of the parent, which pointed to the old node, by this + /// new node. Other children are inherited from the old parent. + fn split_node( + &mut self, + new_label: ForestLabel, + mut node: usize, + edge_index: usize, + ) -> Result<(), Error> { + let end = new_label.label().end(); + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + let mut new_node = builder.add_vertex(new_label); + + let new_packed = if new_label.clone_index().is_some() { + let packed = builder + .query_label(ForestLabel::new(new_label.label(), ForestLabelType::Packed)) + .unwrap(); + + builder.add_edge(packed, new_node, new_label)?; + + Some(packed) + } else { + None + }; + + let preserve_children: Vec<_> = builder.children_of(node)?.take(edge_index + 1).collect(); + + for child in preserve_children { + builder.add_edge(new_node, child, new_label)?; + } + + let parents: Vec<_> = { + if let Some(label) = self.vertex_label(node)? { + if label.clone_index().is_some() { + let mut parents = self.parents_of(node)?; + assert_eq!(parents.len(), 1); + node = parents.next().unwrap().node(); + } + } + + let parents: Vec<_> = self.parents_of(node)?.collect(); + + let mut result: Vec<(Parent, usize)> = Vec::with_capacity( + parents + .iter() + .map(|parent| { + self.parents_of(parent.node()) + .map(|iter| iter.len()) + .unwrap_or(0) + }) + .sum(), + ); + + for parent in parents { + let mut parent_label = self + .vertex_label(parent.node())? + .ok_or_else(|| Error::NodeNoLabel(parent.node()))? + .label(); + + assert!(get_rule_label(parent_label).is_some()); + assert_eq!(self.degree(parent.node())?, 1); + + if let Some(pos) = end { + parent_label.set_end(pos); + } else { + parent_label.open_end(); + } + + let parent_label = ForestLabel::from(parent_label); + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + let new_parent = builder.add_vertex(parent_label); + + if let Some(packed) = new_packed { + new_node = packed; + } + + builder.add_edge(new_parent, new_node, new_label)?; + + result.extend( + self.parents_of(parent.node())? + .map(|parent_parent| (parent_parent, new_parent)), + ); + } + + result + }; + + for (parent, new_child) in parents { + if self.has_same_children_but_one( + parent.node(), + parent.node(), + parent.edge(), + new_child, + )? { + continue; + } + + // we don't add a child to parent.edge() here. + let cloned = self.clone_node(parent.node(), parent.edge(), false)?; + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + builder.add_edge(cloned, new_child, new_label)?; + + let children_to_add: Vec<_> = builder + .children_of(parent.node())? + .skip(parent.edge() + 1) + .collect(); + + for child in children_to_add { + builder.add_edge(cloned, child, new_label)?; + } + } + + Ok(()) + } + + /// Procedure for the new label: + /// + /// 1. Copy the old label. + /// + /// 2. Set the end to `pos`. + /// + /// 3. Pack the label. + /// + /// 4. Check if this label already exists. If so, clone the label and + /// return it. + /// + /// 5. Else set the label to be a plain label. + /// + /// 6. Check if this plain label already exists. If so, clone the + /// existing label, and return the clone of the cloned label. + /// + /// 7. Else return the plain label. + fn create_new_label( + &mut self, + node: usize, + end: Option, + edge_index: usize, + ) -> Result>, Error> { + let mut copied_label = self + .vertex_label(node)? + .ok_or(Error::NodeNoLabel(node))? + .label(); + + if let Some(pos) = end { + copied_label.set_end(pos); + } else { + copied_label.open_end(); + } + + let label = ForestLabel::new(copied_label, ForestLabelType::Packed); + + if let Some(packed) = self.query_label(label) { + for child in self.children_of(packed)? { + if self.has_same_children(child, node, self.degree(child)?, edge_index + 1)? { + return Ok(None); + } + } + + let mut packed_children = self.children_of(packed)?; + + let first_child = packed_children.next().unwrap(); + + let clone_index = self.clone_node(first_child, 0, true)?; + + Ok(Some(ForestLabel::new( + copied_label, + ForestLabelType::Cloned(clone_index), + ))) + } else { + let plain_label = ForestLabel::new(copied_label, ForestLabelType::Plain); + + if let Some(existing) = self.query_label(plain_label) { + if self.has_same_children(existing, node, self.degree(existing)?, edge_index + 1)? { + return Ok(None); + } + + let clone_index = self.clone_node(existing, 0, true)?; + + Ok(Some(ForestLabel::new( + copied_label, + ForestLabelType::Cloned(clone_index), + ))) + } else { + Ok(Some(plain_label)) + } + } + } + + /// Compare if two nodes have the same children in the same order. + fn has_same_children( + &self, + nodea: usize, + nodeb: usize, + edge_num_a: usize, + edge_num_b: usize, + ) -> Result { + let children_a = self.children_of(nodea)?.take(edge_num_a); + let children_b = self.children_of(nodeb)?.take(edge_num_b); + + if children_a.len() != children_b.len() { + return Ok(false); + } + + for (childa, childb) in children_a.zip(children_b) { + if childa != childb { + return Ok(false); + } + } + + Ok(true) + } + + /// Detect if a node has a branch that has the same children as + /// another node, except at a given index, where it points to + /// another given node. + fn has_same_children_but_one( + &self, + nodea: usize, + nodeb: usize, + edge_index: usize, + alternative: usize, + ) -> Result { + let labela = self.vertex_label(nodea)?.ok_or(Error::NodeNoLabel(nodea))?; + let children_b = self.children_of(nodeb)?; + + if labela.is_plain() { + let children_a = self.children_of(nodea)?; + + if children_a.len() != children_b.len() { + return Ok(false); + } + + for (index, (childa, childb)) in children_a.zip(children_b).enumerate() { + if index != edge_index { + if childa != childb { + return Ok(false); + } + } else if childa != alternative { + return Ok(false); + } + } + + Ok(true) + } else if labela.clone_index().is_some() { + let mut parentsa = self.parents_of(nodea)?; + + assert_eq!(parentsa.len(), 1); + + let parenta = parentsa.next().unwrap().node(); + + 'branch_loop: for branch in self.children_of(parenta)? { + let children_a = self.children_of(branch)?; + let children_b = children_b.clone(); + + if children_a.len() != children_b.len() { + continue 'branch_loop; + } + + for (index, (childa, childb)) in children_a.zip(children_b).enumerate() { + if index != edge_index { + if childa != childb { + continue 'branch_loop; + } + } else if childa != alternative { + continue 'branch_loop; + } + } + + return Ok(true); + } + + Ok(false) + } else { + // a packed node; this should not happen + unreachable!("should not examine children of a packed node") + } + } +} + +#[cfg(test)] +mod test_splone { + use super::*; + + use grammar::TNT::*; + + fn create_test_forest( + ) -> Result>, Box> { + let mut forest = leaf!(GrammarLabel::new(Non(0), 0), GrammarLabel); + + forest.plant( + 0, + leaf!(GrammarLabel::new_closed(6, 0, 1), GrammarLabel), + false, + )?; + forest.plant( + 1, + leaf!(GrammarLabel::new_closed(Ter(0), 0, 1), GrammarLabel), + false, + )?; + + forest.plant(0, leaf!(GrammarLabel::new(7, 1), GrammarLabel), false)?; + forest.plant(3, leaf!(GrammarLabel::new(Non(0), 1), GrammarLabel), false)?; + + forest.plant(4, leaf!(GrammarLabel::new(3, 1), GrammarLabel), false)?; + forest.plant(5, leaf!(GrammarLabel::new(Non(1), 1), GrammarLabel), false)?; + + forest.plant( + 6, + leaf!(GrammarLabel::new_closed(11, 1, 2), GrammarLabel), + false, + )?; + forest.plant( + 7, + leaf!(GrammarLabel::new_closed(Ter(2), 1, 2), GrammarLabel), + false, + )?; + + forest.plant( + 6, + leaf!(GrammarLabel::new_closed(13, 2, 3), GrammarLabel), + false, + )?; + forest.plant( + 9, + leaf!(GrammarLabel::new_closed(Ter(2), 2, 3), GrammarLabel), + false, + )?; + + forest.print_viz("test forest.gv")?; + + Ok(forest) + } + + fn splone_6_3_1() -> Result>, Box> + { + let mut forest = create_test_forest()?; + + let mut builder = PLGBuilderMut::from_graph_mut(&mut forest.graph); + + builder.set_label( + 5, + ForestLabel::new(GrammarLabel::new_closed(3, 1, 3), ForestLabelType::Plain), + )?; + + builder.set_label( + 6, + ForestLabel::new( + GrammarLabel::new_closed(Non(1), 1, 3), + ForestLabelType::Plain, + ), + )?; + + Ok(forest) + } + + fn splone_6_2_0() -> Result>, Box> + { + let mut forest = splone_6_3_1()?; + + let cloned = forest.clone_node(4, 0, false)?; + + forest.plant( + cloned, + leaf!(GrammarLabel::new_closed(3, 1, 2), GrammarLabel), + false, + )?; + + forest.plant( + cloned + 1, + leaf!(GrammarLabel::new_closed(Non(1), 1, 2), GrammarLabel), + false, + )?; + + let mut builder = PLGBuilderMut::from_graph_mut(&mut forest.graph); + + let dummy_label = ForestLabel::from(GrammarLabel::new(0, 0)); + + builder.add_edge(cloned + 2, 7, dummy_label)?; + + Ok(forest) + } + + fn splone_6_n_0() -> Result>, Box> + { + let mut forest = splone_6_2_0()?; + + let cloned = forest.clone_node(4, 0, false)?; + + forest.plant(cloned, leaf!(GrammarLabel::new(3, 1), GrammarLabel), false)?; + + forest.plant( + cloned + 1, + leaf!(GrammarLabel::new(Non(1), 1), GrammarLabel), + false, + )?; + + let mut builder = PLGBuilderMut::from_graph_mut(&mut forest.graph); + + let dummy_label = ForestLabel::from(GrammarLabel::new(0, 0)); + + builder.add_edge(cloned + 2, 7, dummy_label)?; + + Ok(forest) + } + + fn splone_4_3_0() -> Result>, Box> + { + let mut forest = splone_6_n_0()?; + + let cloned = forest.clone_node(0, 0, false)?; + + forest.plant( + cloned, + leaf!(GrammarLabel::new_closed(7, 1, 3), GrammarLabel), + false, + )?; + + forest.plant( + cloned + 1, + leaf!(GrammarLabel::new_closed(Non(0), 1, 3), GrammarLabel), + false, + )?; + + forest.plant( + cloned, + leaf!(GrammarLabel::new_closed(6, 0, 1), GrammarLabel), + true, + )?; + + let mut builder = PLGBuilderMut::from_graph_mut(&mut forest.graph); + + let dummy_label = ForestLabel::from(GrammarLabel::new(0, 0)); + + builder.add_edge(cloned + 2, 5, dummy_label)?; + + Ok(forest) + } + + fn splone_17_3_0_15_3_0( + ) -> Result>, Box> { + let mut forest = splone_4_3_0()?; + + let to_clone = forest + .query_label(ForestLabel::from(GrammarLabel::new_closed(Non(1), 1, 3))) + .unwrap(); + + let cloned = forest.clone_node(to_clone, 0, false)?; + + let mut builder = PLGBuilderMut::from_graph_mut(&mut forest.graph); + + let dummy_label = ForestLabel::from(GrammarLabel::new(0, 0)); + + let child = builder + .query_label(ForestLabel::from(GrammarLabel::new_closed(11, 1, 2))) + .unwrap(); + + builder.add_edge(cloned, child, dummy_label)?; + + let to_clone = forest + .query_label(ForestLabel::from(GrammarLabel::new_closed(Non(0), 1, 3))) + .unwrap(); + + let cloned = forest.clone_node(to_clone, 0, false)?; + + let mut builder = PLGBuilderMut::from_graph_mut(&mut forest.graph); + + let child = builder + .query_label(ForestLabel::from(GrammarLabel::new(3, 1))) + .unwrap(); + + builder.add_edge(cloned, child, dummy_label)?; + + Ok(forest) + } + + #[test] + fn test() -> Result<(), Box> { + let mut test_forest = create_test_forest()?; + + test_forest.splone(6, Some(3), 1)?; + + let answer = splone_6_3_1()?; + + if test_forest != answer { + test_forest.print_viz("sploned forest.gv")?; + answer.print_viz("splone 6 3 1.gv")?; + panic!("splone(6, Some(3), 1) produced wrong forest"); + } + + test_forest.splone(6, Some(3), 1)?; + + if test_forest != answer { + test_forest.print_viz("sploned forest.gv")?; + answer.print_viz("splone 6 3 1.gv")?; + panic!("repeated splone(6, Some(3), 1) produced wrong forest"); + } + + test_forest.splone(6, Some(2), 0)?; + + let answer = splone_6_2_0()?; + + if test_forest != answer { + test_forest.print_viz("sploned forest.gv")?; + answer.print_viz("splone 6 2 0.gv")?; + panic!("splone(6, Some(2), 0) produced wrong forest"); + } + + test_forest.splone(6, Some(2), 0)?; + + if test_forest != answer { + test_forest.print_viz("sploned forest.gv")?; + answer.print_viz("splone 6 2 0.gv")?; + panic!("repeated splone(6, Some(2), 0) produced wrong forest"); + } + + test_forest.splone(6, None, 0)?; + + let answer = splone_6_n_0()?; + + if test_forest != answer { + test_forest.print_viz("sploned forest.gv")?; + answer.print_viz("splone 6 None 0.gv")?; + panic!("splone(6, None, 0) produced wrong forest"); + } + + test_forest.splone(14, None, 0)?; + + if test_forest != answer { + test_forest.print_viz("sploned forest.gv")?; + answer.print_viz("splone 6 None 0.gv")?; + panic!("repeated splone(6, None, 0) produced wrong forest"); + } + + test_forest.splone(4, Some(3), 0)?; + + let answer = splone_4_3_0()?; + + if test_forest != answer { + test_forest.print_viz("sploned forest.gv")?; + answer.print_viz("splone 4 3 0.gv")?; + panic!("splone(4, Some(3), 0) produced wrong forest"); + } + + test_forest.splone(4, Some(3), 0)?; + + if test_forest != answer { + test_forest.print_viz("sploned forest.gv")?; + answer.print_viz("splone 4 3 0.gv")?; + panic!("repeated splone(4, Some(3), 0) produced wrong forest"); + } + + test_forest.splone(17, Some(3), 0)?; + test_forest.splone(15, Some(3), 0)?; + + let answer = splone_17_3_0_15_3_0()?; + + if test_forest != answer { + test_forest.print_viz("sploned forest.gv")?; + answer.print_viz("splone 17 3 0 15 3 0.gv")?; + panic!("splone(17, Some(3), 0) - splone(15, Some(3), 0) produced wrong forest"); + } + + test_forest.splone(17, Some(3), 0)?; + test_forest.splone(15, Some(3), 0)?; + + if test_forest != answer { + test_forest.print_viz("sploned forest.gv")?; + answer.print_viz("splone 17 3 0 15 3 0.gv")?; + panic!( + "repeated splone(17, Some(3), 0) - splone(15, Some(3), 0) produced wrong forest" + ); + } + + Ok(()) + } +} diff --git a/chain/src/item/genins.rs b/chain/src/item/genins.rs index 9ed3b74..43807f8 100644 --- a/chain/src/item/genins.rs +++ b/chain/src/item/genins.rs @@ -68,7 +68,7 @@ pub fn generate_fragment( } /// Generate a virtual fragment representing the left-linear null -/// closure [nt]^t. +/// closure \[nt\]^t. pub fn virtual_generate_fragment( atom: impl Borrow, nt: usize, @@ -130,12 +130,18 @@ impl DefaultForest> { let pase = label.forest_source(); - let _fragment_root = if let Some(root) = fragment.root() { + let fragment_root = if let Some(root) = fragment.root() { root } else { return Err(Error::EmptyItem); }; + let pos = fragment + .vertex_label(fragment_root)? + .ok_or(Error::NodeNoLabel(fragment_root))? + .label() + .start(); + // Ensure the last node in the PaSe is a terminal or a // non-terminal node, as an extra safety guard during // development. @@ -236,26 +242,83 @@ impl DefaultForest> { // locate the nodes for reduction_nt in reduction_info.iter().copied().flatten().rev() { - while let Some(node) = stack.pop() { + while let Some(mut node) = stack.pop() { // REVIEW: A lot of labels appear here. // Perhaps I need to refactor something? + let mut node_label = self + .vertex_label(node.node())? + .ok_or_else(|| Error::NodeNoLabel(node.node()))?; + if matches!( - self - .vertex_label(node.node())? - .ok_or_else(|| Error::NodeNoLabel(node.node()))? + node_label .label() .label(), GrammarLabelType::TNT(TNT::Non(nt)) if nt == *reduction_nt ) { - for parent in self.parents_of(node.node())? { - debug_assert!(matches!( - self.vertex_label(parent.node()), - Ok(Some(label)) if - label - .label() - .label() - .rule() - .is_some())); + // TODO: Try to update the end index of the + // node; if the node already has an ending + // index, clone the node, and update the + // ending index of the cloned node; otherwise, + // just set the ending index. + + if node_label.label().end().is_some() { + let cloned_node = + self.clone_node(node.node(), node.edge() + 1, false)?; + + self.transform_label(cloned_node, |mut label| { + label.set_end(pos + 1); + label + })?; + + node_label = self + .vertex_label(node.node())? + .ok_or_else(|| Error::NodeNoLabel(node.node()))?; + } else { + self.transform_label(node.node(), |mut label| { + label.set_end(pos + 1); + label + })?; + } + + if node_label.clone_index().is_some() { + let mut parent_iter = self.parents_of(node.node())?; + + #[cfg(debug_assertions)] + assert_eq!(parent_iter.len(), 1); + + node = parent_iter.next().unwrap(); + + #[cfg(debug_assertions)] + assert!(self + .vertex_label(node.node())? + .ok_or_else(|| Error::NodeNoLabel(node.node()))? + .is_packed()); + } + + let parents_iter = self.parents_of(node.node())?; + + let to_update = parents_iter + .clone() + .map(|parent| parent.node()) + .collect::>(); + + for parent in parents_iter { + let parent_label = self + .vertex_label(parent.node())? + .ok_or_else(|| Error::NodeNoLabel(parent.node()))? + .label(); + + if parent_label.label().rule().is_none() { + crate::item::default::print_labels(atom, self.borrow()).unwrap(); + self.print_viz("dbg forest.gv").unwrap(); + + dbg!(parent, parent_label, label, node); + + panic!("assumption fails"); + } + + // debug_assert!(parent_label.label().rule().is_some()); + // debug_assert!(parent_label.end().is_none()); second_stack.extend(self.parents_of(parent.node())?.filter(|n| { matches!(self.vertex_label(n.node()), @@ -265,6 +328,13 @@ impl DefaultForest> { Some(TNT::Non(_)))) })); } + + for node in to_update { + self.transform_label(node, |mut label| { + label.set_end(pos + 1); + label + })?; + } } } @@ -293,7 +363,29 @@ impl DefaultForest> { } for parent in stack.into_iter() { - if parent.edge() + 1 >= self.degree(parent.node())? { + let was_last_child = parent.edge() + 1 >= self.degree(parent.node())?; + + let overlap = { + if let Ok(child) = self.nth_child(parent.node(), parent.edge()) { + let edge_th_child = child; + let child_label = self + .vertex_label(edge_th_child)? + .ok_or(Error::NodeNoLabel(edge_th_child))?; + let child_start = child_label.label().start(); + let child_end = child_label.label().end(); + + child_start >= pos || matches!(child_end, Some(end) if end > pos) + } else { + // the root case + false + } + }; + + if overlap { + let cloned_node = self.clone_node(parent.node(), parent.edge() + 1, false)?; + + self.plant(cloned_node, fragment, non_empty)?; + } else if was_last_child { // dbg!(&fragment); self.plant(parent.node(), fragment, non_empty)?; } else { @@ -303,9 +395,18 @@ impl DefaultForest> { // do nothing continue; } - dbg!("clone?"); - let cloned_node = self.clone_node(nth_child)?; + // dbg!( + // label, + // atom_child, + // &parents, + // reduction_info, + // atom.query_reduction(label.label(), atom_child).unwrap() + // ); + + // self.print_viz("dbg forest.gv").unwrap(); + + let cloned_node = self.clone_node(parent.node(), parent.edge() + 1, false)?; self.plant(cloned_node, fragment, non_empty)?; } @@ -484,4 +585,22 @@ mod genins_test { Ok(()) } + + #[test] + fn test_reduction() -> Result<(), Box> { + let grammar = new_paren_grammar()?; + + let atom = DefaultAtom::from_grammar(grammar)?; + + #[cfg(feature = "test-print-viz")] + atom.print_nfa("genins nfa.gv")?; + + // querying reductions + + println!("{:?}", atom.query_reduction(32, 17)?); + + // assert!(matches!(atom.query_reduction(35, 25), Ok(Some(&[2])))); + + Ok(()) + } } diff --git a/chain/src/item/mod.rs b/chain/src/item/mod.rs index c614802..284d640 100644 --- a/chain/src/item/mod.rs +++ b/chain/src/item/mod.rs @@ -5,16 +5,6 @@ //! More specifically, it implements the iten derivation forests for //! the machine. -// TODO: Implement an enumeration for Parent or Segment, perhaps -// called PaSe. - -// TODO: Move functions for handling forests, currently contained -// within the method derive to this module, and make them aware of the -// enumeration PaSe. - -// TODO: Implement the Item trait from semirings for the item -// derivation forest, and then we can easily execute items later on. - use graph::{error::Error as GError, GraphLabel, LabelGraph, Parent, ParentsGraph}; use core::borrow::Borrow; @@ -88,7 +78,7 @@ impl core::fmt::Display for ForestLabelType { match self { Self::Packed => write!(f, "packed"), Self::Plain => write!(f, "plain"), - Self::Cloned(index) => write!(f, "the {index}-th clone"), + Self::Cloned(index) => write!(f, "clone({index})"), } } } @@ -102,7 +92,11 @@ pub struct ForestLabel { impl core::fmt::Display for ForestLabel { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - writeln!(f, "label: {}, status: {}", self.label, self.status) + if !matches!(self.status, ForestLabelType::Plain) { + write!(f, "{}, {}", self.label, self.status) + } else { + write!(f, "{}", self.label) + } } } @@ -128,6 +122,10 @@ impl core::fmt::Display for ForestLabelError { impl std::error::Error for ForestLabelError {} impl ForestLabel { + fn new(label: T, status: ForestLabelType) -> Self { + Self { label, status } + } + /// Retrieve the label. pub fn label(&self) -> T { self.label @@ -135,7 +133,7 @@ impl ForestLabel { /// Return true if and only if this node is packed. pub fn is_packed(&self) -> bool { - matches!(self.status, ForestLabelType::Packed) + self.status == ForestLabelType::Packed } /// Retrieve the optional clone index. @@ -146,12 +144,18 @@ impl ForestLabel { } } + /// Return true if and only if this node is a plain node. + pub fn is_plain(&self) -> bool { + self.status == ForestLabelType::Plain + } + /// Try to clone a node. pub fn clone(self, forest: &F) -> Result where - F: Forest, + F: LabelGraph>, { if self.is_packed() { + dbg!(); Err(ForestLabelError::ClonePack) } else { let clone_index = if let Some(old_index) = self.clone_index() { @@ -221,6 +225,13 @@ pub trait Forest: ParentsGraph + LabelGraph> { /// label. fn new_leaf(label: T) -> Self; + /// Transform the label at the given node. + fn transform_label( + &mut self, + node_id: usize, + transform: impl FnOnce(T) -> T, + ) -> Result<(), Self::Error>; + /// Detect if the fragment is a prefix of the sub-forest rooted at /// `node_id`. fn is_prefix(&self, node_id: usize, fragment: F) -> Result @@ -235,13 +246,22 @@ pub trait Forest: ParentsGraph + LabelGraph> { /// Clone a node by making a new node and making all the nodes /// that previously pointed to the old node now point to the new /// node, and the new node points to the old node. Return the - /// index of the new node. In addition, if the old node had - /// already been cloned, just return the index of its - /// representative. + /// index of the new node. However, if, and only if, + /// `no_new_clone` is `true`, do not make a new clone; instead + /// return the clone index that would be used if a new clone was + /// made. + /// + /// Also, `preserved_edges_num` many edges out of the old node + /// will be copied to be the children of the new node. /// /// The labels of the representing node and of the cloned node /// will be handled automatically. - fn clone_node(&mut self, node_id: usize) -> Result; + fn clone_node( + &mut self, + node_id: usize, + preserved_edges_num: usize, + no_new_clone: bool, + ) -> Result; } pub mod default; -- cgit v1.2.3-18-g5258