//! This module provides a default implementation of iten derivation //! forest. use super::*; use crate::atom::default::DefaultAtom; use grammar::{GrammarLabel, GrammarLabelType}; use graph::{ builder::BuilderMut, labelled::binary::PLGBuilderMut, Graph, LabelGraph, PLGraph, RedirectGraph, }; use core::fmt::Display; /// The type of errors for forest operations. #[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd)] pub enum Error { /// An index is out of bounds. /// /// The first component is the index that is out of bounds, and /// the second component is the current length of nodes. IndexOutOfBounds(usize, usize), /// The forest does not permit duplicate nodes but encounters a /// repeated node. DuplicatedNode(usize), /// A node has no labels while it is required to have one. NodeNoLabel(usize), /// Encounter an invalid error in converting from an error of /// graphs. InvalidGraphError(GError), /// Encounter an error when converting forest labels. LabelConversion(ForestLabelError), } impl Display for Error { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Error::IndexOutOfBounds(index, bound) => { write!(f, "index {index} is out of bound {bound}") } Error::DuplicatedNode(n) => write!(f, "node {n} is duplicated"), Error::InvalidGraphError(ge) => write!(f, "invalid error: {ge}"), Error::NodeNoLabel(n) => write!(f, "node {n} has no labels, but it should have one"), Error::LabelConversion(le) => write!(f, "fail to convert labels: {le}"), } } } impl std::error::Error for Error {} impl From for Error { fn from(ge: GError) -> Self { match ge { GError::IndexOutOfBounds(index, bound) => Self::IndexOutOfBounds(index, bound), GError::DuplicatedNode(n) => Self::DuplicatedNode(n), _ => Self::InvalidGraphError(ge), } } } impl From for Error { fn from(le: ForestLabelError) -> Self { Self::LabelConversion(le) } } /// A default implementation of forest. #[derive(Debug, Clone)] pub struct DefaultForest { graph: PLGraph, root: Option, } impl Default for DefaultForest { fn default() -> Self { let graph = Default::default(); let root = None; Self { graph, root } } } impl AsRef> for DefaultForest { fn as_ref(&self) -> &DefaultForest { self } } impl Graph for DefaultForest { type Iter<'a> = as Graph>::Iter<'a> where Self: 'a; #[inline] fn is_empty(&self) -> bool { self.graph.is_empty() } #[inline] fn nodes_len(&self) -> usize { self.graph.nodes_len() } #[inline] fn edges_len(&self) -> Option { self.graph.edges_len() } #[inline] fn children_of(&self, node_id: usize) -> Result, GError> { self.graph.children_of(node_id) } #[inline] fn degree(&self, node_id: usize) -> Result { self.graph.degree(node_id) } #[inline] fn is_empty_node(&self, node_id: usize) -> Result { self.graph.is_empty_node(node_id) } #[inline] fn has_edge(&self, source: usize, target: usize) -> Result { self.graph.has_edge(source, target) } fn replace_by_builder(&mut self, _builder: impl graph::Builder) { unimplemented!() } #[inline] fn print_viz(&self, filename: &str) -> Result<(), std::io::Error> { self.graph.print_viz(filename) } } impl ParentsGraph for DefaultForest { type Iter<'a>= as ParentsGraph>::Iter<'a> where Self:'a; #[inline] fn parents_of(&self, node_id: usize) -> Result<::Iter<'_>, GError> { self.graph.parents_of(node_id) } #[inline] fn nth_child(&self, node_id: usize, n: usize) -> Result { self.graph.nth_child(node_id, n) } #[inline] fn edge_to_parent( &self, source: usize, target: usize, ) -> Result, GError> { self.graph.edge_to_parent(source, target) } } impl LabelGraph for DefaultForest { type Iter<'a> = std::iter::Empty where Self: 'a; type LabelIter<'a> = std::iter::Empty<(&'a T, >::Iter<'a>)> where Self: 'a, T: 'a; type EdgeLabelIter<'a> = std::iter::Empty where Self: 'a, T: 'a; #[inline] fn query_label(&self, label: T) -> Option { self.graph.query_label(label) } #[inline] fn vertex_label(&self, node_id: usize) -> Result, GError> { self.graph.vertex_label(node_id) } fn edge_label( &self, _source: usize, _target: usize, ) -> Result, GError> { unimplemented!("edges have no labels") } fn find_children_with_label( &self, _node_id: usize, _label: &T, ) -> Result<>::Iter<'_>, GError> { unimplemented!("edges have no labels") } fn labels_of(&self, _node_id: usize) -> Result, GError> { unimplemented!("edges have no labels") } fn has_edge_label(&self, _node_id: usize, _label: &T, _target: usize) -> Result { unimplemented!("edges have no labels") } } impl Forest for DefaultForest> { type Error = Error; fn root(&self) -> Option { self.root } fn new_leaf(label: T) -> Self { let mut graph = PLGraph::default(); let mut builder = PLGBuilderMut::from_graph_mut(&mut graph); let root = Some(builder.add_vertex(ForestLabel::from(label))); Self { graph, root } } fn is_prefix(&self, node_id: usize, fragment: F) -> Result where F: Borrow, { if !self.has_node(node_id) { return Err(Error::IndexOutOfBounds(node_id, self.nodes_len())); } // We do a depth-first traversal to determine if every node // encountered has the same set of children (labels taken into // the consideration). let fragment = fragment.borrow(); let mut frag_stack = Vec::with_capacity(fragment.nodes_len()); let mut self_stack = Vec::with_capacity(fragment.nodes_len()); let frag_root = if let Some(root) = fragment.root() { root } else { // an empty forest is a prefix of any forest. return Ok(true); }; frag_stack.push(frag_root); self_stack.push(node_id); // defer popping while let (Some(frag_top), Some(self_top)) = (frag_stack.last().copied(), self_stack.last().copied()) { frag_stack.pop(); self_stack.pop(); if fragment.vertex_label(frag_top)? != self.vertex_label(self_top)? { // not a prefix return Ok(false); } let mut self_children = self.children_of(self_top)?; for child in fragment.children_of(frag_top)? { if let Some(self_child) = self_children.next() { frag_stack.push(child); self_stack.push(self_child); } else { // too few children return Ok(false); } } } // Check both stacks are empty at the end. Ok(frag_stack.is_empty() && self_stack.is_empty()) } fn plant(&mut self, node_id: usize, fragment: F, planted: bool) -> Result<(), Self::Error> where F: Borrow, { // Convert self to a builder_mut, and traverse fragment in a // depth-first manner and adjoin corresponding nodes along the // way. if !self.has_node(node_id) { return Err(Error::IndexOutOfBounds(node_id, self.nodes_len())); } let fragment = fragment.borrow(); let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); let root = if let Some(root) = fragment.root() { root } else { // Nothing to do to plant an empty forest. return Ok(()); }; // Just a dummy label for use in adding edges. // // REVIEW: I probably should refactor the API for builder_mut. let root_label = fragment .vertex_label(root)? .ok_or(Error::NodeNoLabel(root))?; let nodes_len = fragment.nodes_len(); /// If the fragment root has a duplicate label, the forest /// will not grow, so we use the label to find the adjoined /// node index. /// /// The nodes hava already been added to the forest, so it is /// safe to call unwrap. macro_rules! conversion ( ($node:expr) => { { builder .query_label( fragment .vertex_label($node)? .ok_or(Error::NodeNoLabel($node))? ).unwrap() } } ); // If the fragment has been planted before, we just add an // edge. if planted { builder.add_edge(node_id, conversion!(root), root_label)?; return Ok(()); } // First adjoin those nodes and join the edges later. for node in 0..nodes_len { let label = fragment .vertex_label(node)? .ok_or(Error::NodeNoLabel(node))?; builder.add_vertex(label); } // Don't forget to join the new sub-forest to the original // forest, at the specified position. builder.add_edge(node_id, conversion!(root), root_label)?; // We can try to calculate the depth of fragments, if we need // to lower the memory usage. But in our use cases, we // usually deal with fragments where each node has at most one // child, so the depth is supposed to be equal to the length // in this case. let mut stack = Vec::with_capacity(fragment.nodes_len()); stack.push(root); let mut seen_nodes = std::collections::HashSet::::new(); while let Some(top) = stack.pop() { seen_nodes.insert(top); for child in fragment.children_of(top)? { builder.add_edge(conversion!(top), conversion!(child), root_label)?; if !seen_nodes.contains(&child) { seen_nodes.insert(child); stack.push(child); } } } Ok(()) } fn clone_node(&mut self, node_id: usize) -> Result { let builder = PLGBuilderMut::from_graph_mut(&mut self.graph); let old_label = builder .vertex_label(node_id)? .ok_or(Error::NodeNoLabel(node_id))?; if old_label.is_packed() { return Err(ForestLabelError::ClonePack.into()); } // We are sure old_label is not packed here, so it is safe to // unwrap. let new_label = old_label.clone(self).unwrap(); if old_label.clone_index().is_some() { let mut parents = self.parents_of(node_id)?; // Make sure it only has one parent, which is the // representative node. assert_eq!(parents.len(), 1); let rep_node = parents.next().unwrap().node(); let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); let new_clone = builder.add_vertex(new_label); builder.add_edge(rep_node, new_clone, new_label)?; // We checked its length is 1, so it is safe to unwrap // here. return Ok(new_clone); } let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); // We are sure old_label is not a clone here, so it is safe to // unwrap. let rep_label = old_label.pack().unwrap(); // Make a new node let new_index = builder.add_vertex(rep_label); // Re-direct parents to the new node. // // This must be done before pointing the new node to the old // node, otherwise that edge will be redirected as well. // Unfortunately I cannot loop through parents and mutate them // at the same time, so I first collect them into a vector. let parents: Vec<_> = builder.parents_of(node_id)?.collect(); for parent in parents.into_iter() { builder.redirect(parent.node(), parent.edge(), new_index)?; } // Point the new node to the old node. OLD_LABEL is just a // place holder. builder.add_edge(new_index, node_id, old_label)?; // Modify the label of the old node. builder.set_label(node_id, new_label)?; // Make another clone // new_label is cloned, so is guaranteed not to be packed, and // we are safe to unwrap. let new_label = new_label.clone(self).unwrap(); let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); let new_clone = builder.add_vertex(new_label); builder.add_edge(new_index, new_clone, new_label)?; Ok(new_clone) } } impl PartialEq for DefaultForest> { fn eq(&self, other: &Self) -> bool { let self_root = self.root(); let other_root = other.root(); if (self_root.is_some() && other_root.is_none()) || (self_root.is_none() && other_root.is_some()) { return false; } else if self_root.is_none() && other_root.is_none() { return true; } // both roots are not empty let self_root = self_root.unwrap(); let other_root = other_root.unwrap(); self.is_prefix(self_root, other).unwrap_or(false) && other.is_prefix(other_root, self).unwrap_or(false) } } impl Eq for DefaultForest> {} /// Print the labels found in the forest, so that we can easily /// understand what those labels mean. pub fn print_labels( atom: impl Borrow, forest: impl Borrow>>, ) -> Result<(), Box> { let forest = forest.borrow(); let atom = atom.borrow(); for node in forest.nodes() { let label = forest.vertex_label(node)?; if let Some(label) = label { let label = label.label.label(); match label { GrammarLabelType::TNT(tnt) => { println!("{tnt} = {}", atom.name_of_tnt(tnt)?); } GrammarLabelType::Rule(pos) => { println!("pos {pos} = {}", atom.rule_pos_string(pos)?); } } } else { return Err(Error::NodeNoLabel(node).into()); } } Ok(()) } #[allow(unused_macros)] macro_rules! leaf ( ($label:expr, $type:tt) =>{ DefaultForest::>::new_leaf($label) }; ($label:expr) => { DefaultForest::>::new_leaf($label) } ); #[allow(unused_imports)] pub(crate) use leaf; #[cfg(test)] mod item_test { use super::*; #[test] fn test_forest_api() -> Result<(), Box> { let forest: DefaultForest> = Default::default(); // empty forest assert!(forest.is_empty()); // leaf forest let mut forest = leaf!(0, usize); assert_eq!(forest.nodes_len(), 1); assert_eq!(forest.root(), Some(0)); // add some child forest.plant(0, leaf!(1), false)?; assert_eq!(forest.nodes_len(), 2); let mut children = forest.children_of(0)?; assert_eq!(children.next(), Some(1)); assert_eq!(children.next(), None); // add more children forest.plant(0, leaf!(2), false)?; forest.plant(0, leaf!(3), false)?; forest.plant(0, leaf!(4), false)?; forest.plant(2, leaf!(5), false)?; assert_eq!(forest.nodes_len(), 6); let mut children = forest.children_of(0)?; assert_eq!(children.next(), Some(1)); assert_eq!(children.next(), Some(2)); assert_eq!(children.next(), Some(3)); assert_eq!(children.next(), Some(4)); let mut children = forest.children_of(2)?; assert_eq!(children.next(), Some(5)); assert_eq!(children.next(), None); let mut test_forest = leaf!(0); test_forest.plant(0, leaf!(1), false)?; test_forest.plant(0, leaf!(2), false)?; test_forest.plant(0, leaf!(3), false)?; test_forest.plant(2, leaf!(5), false)?; assert!(forest.is_prefix(0, &test_forest)?); let mut test_forest = leaf!(0); test_forest.plant(0, leaf!(1), false)?; test_forest.plant(0, leaf!(2), false)?; // this child of the root should have label 3 in order to be a // prefix. test_forest.plant(0, leaf!(4), false)?; test_forest.plant(2, leaf!(5), false)?; assert!(!forest.is_prefix(0, &test_forest)?); let mut test_forest = leaf!(2); test_forest.plant(0, leaf!(5), false)?; assert!(forest.is_prefix(2, &test_forest)?); // now test cloning // add a duplicate label forest.plant(3, leaf!(5), false)?; let _len = forest.nodes_len(); forest.clone_node(5)?; assert_eq!(forest.nodes_len(), 8); #[cfg(feature = "test-print-viz")] forest.print_viz("forest.gv")?; Ok(()) } #[test] fn test_eq() -> Result<(), Box> { let mut forest = leaf!(0, usize); forest.plant(0, leaf!(1), false)?; forest.plant(0, leaf!(2), false)?; forest.plant(0, leaf!(3), false)?; forest.plant(0, leaf!(4), false)?; forest.plant(2, leaf!(5), false)?; let mut test_forest = leaf!(0); test_forest.plant(0, leaf!(1), false)?; test_forest.plant(0, leaf!(2), false)?; test_forest.plant(0, leaf!(3), false)?; test_forest.plant(2, leaf!(5), false)?; assert_ne!(forest, test_forest); assert_ne!(test_forest, forest); test_forest.plant(0, leaf!(4), false)?; assert_eq!(forest, test_forest); assert_eq!(test_forest, forest); Ok(()) } }