From b8a2d05a3c0d835556d5ddbd44e4a1e201302af5 Mon Sep 17 00:00:00 2001 From: JSDurand Date: Thu, 10 Aug 2023 11:14:04 +0800 Subject: Move the trait `Forest` to the crate "forest". The purpose of this change is to share this trait with other crates, such as the forth-coming "semiring" crate that will be responsible for handling some simple semiring operations as well as the querying, in my plans. --- forest/Cargo.toml | 1 + forest/src/archive_default.txt | 525 +++++++++++++++++++++++++++++++++++++++++ forest/src/bytes.rs | 176 ++++++++++++++ forest/src/default.rs | 523 ---------------------------------------- forest/src/lib.rs | 102 +++++--- 5 files changed, 772 insertions(+), 555 deletions(-) create mode 100644 forest/src/archive_default.txt create mode 100644 forest/src/bytes.rs delete mode 100644 forest/src/default.rs (limited to 'forest') diff --git a/forest/Cargo.toml b/forest/Cargo.toml index 8cc445b..5702265 100644 --- a/forest/Cargo.toml +++ b/forest/Cargo.toml @@ -13,5 +13,6 @@ test-print-viz = [] [dependencies] graph = { path = "../graph" } +grammar = { path = "../grammar" } # This should be integrated in the package, but it is not yet done. \ No newline at end of file diff --git a/forest/src/archive_default.txt b/forest/src/archive_default.txt new file mode 100644 index 0000000..5572ecc --- /dev/null +++ b/forest/src/archive_default.txt @@ -0,0 +1,525 @@ +// DEPRECATED: This file is deprecated. + +//! This file provides a default implementation for the +//! [`Forest`][crate::Forest] trait. + +use super::*; +use graph::{ + builder::BuilderMut, labelled::binary::PLGBuilderMut, Graph, LabelGraph, PLGraph, RedirectGraph, +}; + +use core::fmt::Display; + +/// The type of errors for forest operations. +#[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd)] +pub enum Error { + /// An index is out of bounds. + /// + /// The first component is the index that is out of bounds, and + /// the second component is the current length of nodes. + IndexOutOfBounds(usize, usize), + /// The forest does not permit duplicate nodes but encounters a + /// repeated node. + DuplicatedNode(usize), + /// A node has no labels while it is required to have one. + NodeNoLabel(usize), + /// Encounter an invalid error in converting from an error of + /// graphs. + InvalidGraphError(GError), + /// Encounter an error when converting forest labels. + LabelConversion(ForestLabelError), +} + +impl Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Error::IndexOutOfBounds(index, bound) => { + write!(f, "index {index} is out of bound {bound}") + } + Error::DuplicatedNode(n) => write!(f, "node {n} is duplicated"), + Error::InvalidGraphError(ge) => write!(f, "invalid error: {ge}"), + Error::NodeNoLabel(n) => write!(f, "node {n} has no labels, but it should have one"), + Error::LabelConversion(le) => write!(f, "fail to convert labels: {le}"), + } + } +} + +impl std::error::Error for Error {} + +impl From for Error { + fn from(ge: GError) -> Self { + match ge { + GError::IndexOutOfBounds(index, bound) => Self::IndexOutOfBounds(index, bound), + GError::DuplicatedNode(n) => Self::DuplicatedNode(n), + _ => Self::InvalidGraphError(ge), + } + } +} + +impl From for Error { + fn from(le: ForestLabelError) -> Self { + Self::LabelConversion(le) + } +} + +/// A default implementation of forest. +#[derive(Debug, Clone)] +pub struct DefaultForest { + graph: PLGraph, + root: Option, +} + +impl Default for DefaultForest { + fn default() -> Self { + let graph = Default::default(); + let root = None; + + Self { graph, root } + } +} + +impl AsRef> for DefaultForest { + fn as_ref(&self) -> &DefaultForest { + self + } +} + +impl Graph for DefaultForest { + type Iter<'a> = as Graph>::Iter<'a> + where + Self: 'a; + + #[inline] + fn is_empty(&self) -> bool { + self.graph.is_empty() + } + + #[inline] + fn nodes_len(&self) -> usize { + self.graph.nodes_len() + } + + #[inline] + fn edges_len(&self) -> Option { + self.graph.edges_len() + } + + #[inline] + fn children_of(&self, node_id: usize) -> Result, GError> { + self.graph.children_of(node_id) + } + + #[inline] + fn degree(&self, node_id: usize) -> Result { + self.graph.degree(node_id) + } + + #[inline] + fn is_empty_node(&self, node_id: usize) -> Result { + self.graph.is_empty_node(node_id) + } + + #[inline] + fn has_edge(&self, source: usize, target: usize) -> Result { + self.graph.has_edge(source, target) + } + + fn replace_by_builder(&mut self, _builder: impl graph::Builder) { + unimplemented!() + } +} + +impl ParentsGraph for DefaultForest { + type Iter<'a>= as ParentsGraph>::Iter<'a> + where + Self:'a; + + #[inline] + fn parents_of(&self, node_id: usize) -> Result<::Iter<'_>, GError> { + self.graph.parents_of(node_id) + } + + #[inline] + fn nth_child(&self, node_id: usize, n: usize) -> Result { + self.graph.nth_child(node_id, n) + } + + #[inline] + fn edge_to_parent( + &self, + source: usize, + target: usize, + ) -> Result, GError> { + self.graph.edge_to_parent(source, target) + } +} + +impl LabelGraph for DefaultForest { + type Iter<'a> = std::iter::Empty + where + Self: 'a; + + type LabelIter<'a> = std::iter::Empty<(&'a T, >::Iter<'a>)> + where + Self: 'a, + T: 'a; + + type EdgeLabelIter<'a> = std::iter::Empty + where + Self: 'a, + T: 'a; + + #[inline] + fn query_label(&self, label: T) -> Option { + self.graph.query_label(label) + } + + #[inline] + fn vertex_label(&self, node_id: usize) -> Result, GError> { + self.graph.vertex_label(node_id) + } + + fn edge_label( + &self, + _source: usize, + _target: usize, + ) -> Result, GError> { + unimplemented!("edges have no labels") + } + + fn find_children_with_label( + &self, + _node_id: usize, + _label: &T, + ) -> Result<>::Iter<'_>, GError> { + unimplemented!("edges have no labels") + } + + fn labels_of(&self, _node_id: usize) -> Result, GError> { + unimplemented!("edges have no labels") + } + + fn has_edge_label(&self, _node_id: usize, _label: &T, _target: usize) -> Result { + unimplemented!("edges have no labels") + } +} + +impl Forest for DefaultForest> { + type Error = Error; + + fn root(&self) -> Option { + self.root + } + + fn new_leaf(label: T) -> Self { + let mut graph = PLGraph::default(); + + let mut builder = PLGBuilderMut::from_graph_mut(&mut graph); + + let root = Some(builder.add_vertex(ForestLabel::from(label))); + + Self { graph, root } + } + + fn is_prefix(&self, node_id: usize, fragment: F) -> Result + where + F: AsRef, + { + if !self.has_node(node_id) { + return Err(Error::IndexOutOfBounds(node_id, self.nodes_len())); + } + + // We do a depth-first traversal to determine if every node + // encountered has the same set of children (labels taken into + // the consideration). + + let fragment = fragment.as_ref(); + + let mut frag_stack = Vec::with_capacity(fragment.nodes_len()); + + let mut self_stack = Vec::with_capacity(fragment.nodes_len()); + + let frag_root = if let Some(root) = fragment.root() { + root + } else { + // an empty forest is a prefix of any forest. + return Ok(true); + }; + + frag_stack.push(frag_root); + self_stack.push(node_id); + + // defer popping + while let (Some(frag_top), Some(self_top)) = + (frag_stack.last().copied(), self_stack.last().copied()) + { + frag_stack.pop(); + self_stack.pop(); + + if fragment.vertex_label(frag_top)? != self.vertex_label(self_top)? { + // not a prefix + return Ok(false); + } + + let mut self_children = self.children_of(self_top)?; + + for child in fragment.children_of(frag_top)? { + if let Some(self_child) = self_children.next() { + frag_stack.push(child); + self_stack.push(self_child); + } else { + // too few children + return Ok(false); + } + } + } + + // Check both stacks are empty at the end. + Ok(frag_stack.is_empty() && self_stack.is_empty()) + } + + fn plant(&mut self, node_id: usize, fragment: F, planted: bool) -> Result<(), Self::Error> + where + F: AsRef, + { + // Convert self to a builder_mut, and traverse fragment in a + // depth-first manner and adjoin corresponding nodes along the + // way. + + if !self.has_node(node_id) { + return Err(Error::IndexOutOfBounds(node_id, self.nodes_len())); + } + + let fragment = fragment.as_ref(); + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + let root = if let Some(root) = fragment.root() { + root + } else { + // Nothing to do to plant an empty forest. + return Ok(()); + }; + + // Just a dummy label for use in adding edges. + // + // REVIEW: I probably should refactor the API for builder_mut. + let root_label = fragment + .vertex_label(root)? + .ok_or(Error::NodeNoLabel(root))?; + + let nodes_len = fragment.nodes_len(); + + /// If the fragment root has a duplicate label, the forest + /// will not grow, so we use the label to find the adjoined + /// node index. + /// + /// The nodes hava already been added to the forest, so it is + /// safe to call unwrap. + macro_rules! conversion ( + ($node:expr) => { + { + builder + .query_label( + fragment + .vertex_label($node)? + .ok_or(Error::NodeNoLabel($node))? + ).unwrap() + } + } + ); + + // If the fragment has been planted before, we just add an + // edge. + + if planted { + builder.add_edge(node_id, conversion!(root), root_label)?; + + return Ok(()); + } + + // First adjoin those nodes and join the edges later. + + for node in 0..nodes_len { + let label = fragment + .vertex_label(node)? + .ok_or(Error::NodeNoLabel(node))?; + + builder.add_vertex(label); + } + + // Don't forget to join the new sub-forest to the original + // forest, at the specified position. + + builder.add_edge(node_id, conversion!(root), root_label)?; + + // We can try to calculate the depth of fragments, if we need + // to lower the memory usage. But in our use cases, we + // usually deal with fragments where each node has at most one + // child, so the depth is supposed to be equal to the length + // in this case. + let mut stack = Vec::with_capacity(fragment.nodes_len()); + + stack.push(root); + + while let Some(top) = stack.pop() { + for child in fragment.children_of(top)? { + builder.add_edge(conversion!(top), conversion!(child), root_label)?; + } + } + + Ok(()) + } + + fn clone_node(&mut self, node_id: usize) -> Result { + let builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + let old_label = builder + .vertex_label(node_id)? + .ok_or(Error::NodeNoLabel(node_id))?; + + if old_label.is_packed() { + return Err(ForestLabelError::ClonePack.into()); + } + + if old_label.is_cloned().is_some() { + let mut parents = self.parents_of(node_id)?; + + // Make sure it only has one parent, which is the + // representative node. + assert_eq!(parents.len(), 1); + + // We checked its length is 1, so it is safe to unwrap + // here. + return Ok(parents.next().unwrap().node()); + } + + let rep_label = old_label.pack()?; + + let new_label = old_label.clone(self)?; + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + // Make a new node + let new_index = builder.add_vertex(rep_label); + + // Re-direct parents to the new node. + // + // This must be done before pointing the new node to the old + // node, otherwise that edge will be redirected as well. + + // Unfortunately I cannot loop through parents and mutate them + // at the same time, so I first collect them into a vector. + let parents: Vec<_> = builder.parents_of(node_id)?.collect(); + + for parent in parents.into_iter() { + builder.redirect(parent.node(), parent.edge(), new_index)?; + } + + // Point the new node to the old node. OLD_LABEL is just a + // place holder. + + builder.add_edge(new_index, node_id, old_label)?; + + // Modify the label of the old node. + + builder.set_label(node_id, new_label)?; + + Ok(new_index) + } +} + +#[cfg(test)] +mod forest_test { + use super::*; + + macro_rules! leaf ( + ($label:expr, $type:tt) =>{ + DefaultForest::>::new_leaf($label) + }; + ($label:expr) => { + DefaultForest::>::new_leaf($label) + } + ); + + #[test] + fn test_forest_api() -> Result<(), Box> { + let forest: DefaultForest = Default::default(); + + // empty forest + + assert!(forest.is_empty()); + + // leaf forest + + let mut forest = leaf!(0, usize); + + assert_eq!(forest.nodes_len(), 1); + assert_eq!(forest.root(), Some(0)); + + // add some child + + forest.plant(0, leaf!(1), false)?; + + assert_eq!(forest.nodes_len(), 2); + let mut children = forest.children_of(0)?; + assert_eq!(children.next(), Some(1)); + assert_eq!(children.next(), None); + + // add more children + + forest.plant(0, leaf!(2), false)?; + forest.plant(0, leaf!(3), false)?; + forest.plant(0, leaf!(4), false)?; + forest.plant(2, leaf!(5), false)?; + + assert_eq!(forest.nodes_len(), 6); + let mut children = forest.children_of(0)?; + assert_eq!(children.next(), Some(1)); + assert_eq!(children.next(), Some(2)); + assert_eq!(children.next(), Some(3)); + assert_eq!(children.next(), Some(4)); + let mut children = forest.children_of(2)?; + assert_eq!(children.next(), Some(5)); + assert_eq!(children.next(), None); + + let mut test_forest = leaf!(0); + test_forest.plant(0, leaf!(1), false)?; + test_forest.plant(0, leaf!(2), false)?; + test_forest.plant(0, leaf!(3), false)?; + test_forest.plant(2, leaf!(5), false)?; + + assert!(forest.is_prefix(0, &test_forest)?); + + let mut test_forest = leaf!(0); + test_forest.plant(0, leaf!(1), false)?; + test_forest.plant(0, leaf!(2), false)?; + // this child of the root should have label 3 in order to be a + // prefix. + test_forest.plant(0, leaf!(4), false)?; + test_forest.plant(2, leaf!(5), false)?; + + assert!(!forest.is_prefix(0, &test_forest)?); + + let mut test_forest = leaf!(2); + test_forest.plant(0, leaf!(5), false)?; + + assert!(forest.is_prefix(2, &test_forest)?); + + // now test cloning + + // add a duplicate label + forest.plant(3, leaf!(5), false)?; + + let _len = forest.nodes_len(); + + forest.clone_node(5)?; + + assert_eq!(forest.nodes_len(), 7); + + #[cfg(feature = "test-print-viz")] + forest.print_viz("forest.gv")?; + + Ok(()) + } +} diff --git a/forest/src/bytes.rs b/forest/src/bytes.rs new file mode 100644 index 0000000..951abad --- /dev/null +++ b/forest/src/bytes.rs @@ -0,0 +1,176 @@ +//! This module defines functions to turn a forest of forest labels +//! into a sequence of bytes. +//! +//! To be more specific, a forest consists of two parts: a vector of +//! node labels and a graph specifying the relations between nodes. +//! +//! # Number format (endianness) +//! +//! Every number mentionned in this format is an unsigned integer of +//! 64 bits, or 8 bytes. Every such number is specified in the big +//! endian format. +//! +//! # Parts of the sequence of bytes +//! +//! The sequence of bytes has three parts: +//! +//! ## Header +//! +//! The header specifies metadata of this forest. +//! +//! ### Special mark +//! +//! The first three bytes form the string "rep", as a special mark. +//! +//! ### Number of nodes +//! +//! The next 8 bytes specifies the number of nodes of this forest. +//! +//! ## Graph +//! +//! Next comes the underlying graph for this forest. This part +//! consists of a vector of vectors of numbers. Each vector of +//! numbers represents the list of children of a node. So the number +//! of vectors of numbers is equal to the number of nodes. +//! +//! ### Vector of vectors of numbers +//! +//! The vectors are not simply concatenated one after another, as that +//! way one cannot read a random node in a constant amount of time. +//! +//! Instead, we first specify the number of children of each node +//! first, along with the offset for that node of the vector of +//! children. +//! +//! As an example, if a graph has three nodes, represented as the +//! adjacency list: `[[1, 2], [2], []]`, then its representation as a +//! sequence of bytes is as follows: +//! ```text +//! 2, x, 1, y, 0, z, 1, 2, 2, +//! ^ ^ ^ +//! x y z +//! ``` +//! +//! This has the advantage that we can read the children of the `n`-th +//! node in constant time. +//! +//! ## Vector of labels +//! +//! Each label occupies a fixed number of bytes, so we simply put the +//! labels one after another. The only thing to note here is the +//! format of the labels. +//! +//! ### Labels +//! +//! Each label has 3 parts: +//! +//! 1. Status: either Packed, Cloned, or Plain. If the node is +//! cloned, it has a clone index. So in total this part occupies 1 +//! byte for the status and 8 bytes for the clone index. +//! 2. Start and end: the range in the input sentence. We just store +//! two numbers here. Hence this part occupies 16 bytes. +//! 3. Grammar label: either a terminal, a non-terminal, or a rule. +//! Each variant needs a number to speify its index. So in total this +//! part occupies 1 byte for the variant discriminant and 8 bytes for +//! the number. +//! +//! To sum up, each label occupies 34 bytes. + +use super::Forest; +use grammar::{GrammarLabel, GrammarLabelType, TNT}; + +/// Convert any type that implements the `Forest` trait into a +/// sequence of bytes. +pub fn forest_to_bytes>(forest: &F) -> Vec { + // First calculate the total number of bytes. + let nodes_len = forest.nodes_len(); + + let degrees: Vec<_> = forest + .nodes() + .map(|node| forest.degree(node).unwrap_or(0)) + .collect(); + + let total_degree: usize = degrees.iter().copied().sum(); + + let len: usize = 8 // total number of bytes at the start + + 3 // special mark + + 8 // number of nodes + + 8 // offset of labels + + 16 * nodes_len // degree & offset for each node + + 8 * total_degree // children of each node + + 34 * nodes_len // labels + ; + + // Then fill in the bytes. + + let mut bytes: Vec = Vec::with_capacity(len); + + // First the headers + + bytes.extend(len.to_be_bytes()); + bytes.extend([114, 101, 112]); // rep + bytes.extend(nodes_len.to_be_bytes()); + bytes.extend((len - 34 * nodes_len).to_be_bytes()); + + let mut accumulated: usize = 0; + + // Then degrees and offsets + + for degree in degrees.iter().copied() { + bytes.extend(degree.to_be_bytes()); + bytes.extend((8 + 3 + 8 + 8 + 16 * nodes_len + 8 * accumulated).to_be_bytes()); + + accumulated += degree; + } + + // Then the children + + bytes.extend(forest.nodes().flat_map(|node| { + forest + .children_of(node) + .unwrap() + .flat_map(|child| child.to_be_bytes()) + })); + + // Finally the labels + + 'nodes_loop: for node in forest.nodes() { + let label = match forest.vertex_label(node) { + Ok(Some(label)) => label, + _ => continue 'nodes_loop, + }; + + if label.is_plain() { + bytes.extend(std::iter::repeat(0).take(9)); + } else if label.is_packed() { + bytes.extend(std::iter::once(1).chain(std::iter::repeat(0).take(8))); + } else if let Some(index) = label.clone_index() { + bytes.extend(std::iter::once(2).chain(index.to_be_bytes())); + } + + let label = label.label(); + + bytes.extend(label.start().to_be_bytes()); + bytes.extend(label.end().unwrap_or(0).to_be_bytes()); + + let label = label.label(); + + match label { + GrammarLabelType::TNT(TNT::Ter(t)) => { + bytes.extend(std::iter::once(0).chain(t.to_be_bytes())); + } + GrammarLabelType::TNT(TNT::Non(n)) => { + bytes.extend(std::iter::once(1).chain(n.to_be_bytes())); + } + GrammarLabelType::Rule(r) => { + bytes.extend(std::iter::once(2).chain(r.to_be_bytes())); + } + } + } + + if bytes.len() != len { + dbg!(); + } + + bytes +} diff --git a/forest/src/default.rs b/forest/src/default.rs deleted file mode 100644 index b96439f..0000000 --- a/forest/src/default.rs +++ /dev/null @@ -1,523 +0,0 @@ -//! This file provides a default implementation for the -//! [`Forest`][crate::Forest] trait. - -use super::*; -use graph::{ - builder::BuilderMut, labelled::binary::PLGBuilderMut, Graph, LabelGraph, PLGraph, RedirectGraph, -}; - -use core::fmt::Display; - -/// The type of errors for forest operations. -#[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd)] -pub enum Error { - /// An index is out of bounds. - /// - /// The first component is the index that is out of bounds, and - /// the second component is the current length of nodes. - IndexOutOfBounds(usize, usize), - /// The forest does not permit duplicate nodes but encounters a - /// repeated node. - DuplicatedNode(usize), - /// A node has no labels while it is required to have one. - NodeNoLabel(usize), - /// Encounter an invalid error in converting from an error of - /// graphs. - InvalidGraphError(GError), - /// Encounter an error when converting forest labels. - LabelConversion(ForestLabelError), -} - -impl Display for Error { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Error::IndexOutOfBounds(index, bound) => { - write!(f, "index {index} is out of bound {bound}") - } - Error::DuplicatedNode(n) => write!(f, "node {n} is duplicated"), - Error::InvalidGraphError(ge) => write!(f, "invalid error: {ge}"), - Error::NodeNoLabel(n) => write!(f, "node {n} has no labels, but it should have one"), - Error::LabelConversion(le) => write!(f, "fail to convert labels: {le}"), - } - } -} - -impl std::error::Error for Error {} - -impl From for Error { - fn from(ge: GError) -> Self { - match ge { - GError::IndexOutOfBounds(index, bound) => Self::IndexOutOfBounds(index, bound), - GError::DuplicatedNode(n) => Self::DuplicatedNode(n), - _ => Self::InvalidGraphError(ge), - } - } -} - -impl From for Error { - fn from(le: ForestLabelError) -> Self { - Self::LabelConversion(le) - } -} - -/// A default implementation of forest. -#[derive(Debug, Clone)] -pub struct DefaultForest { - graph: PLGraph, - root: Option, -} - -impl Default for DefaultForest { - fn default() -> Self { - let graph = Default::default(); - let root = None; - - Self { graph, root } - } -} - -impl AsRef> for DefaultForest { - fn as_ref(&self) -> &DefaultForest { - self - } -} - -impl Graph for DefaultForest { - type Iter<'a> = as Graph>::Iter<'a> - where - Self: 'a; - - #[inline] - fn is_empty(&self) -> bool { - self.graph.is_empty() - } - - #[inline] - fn nodes_len(&self) -> usize { - self.graph.nodes_len() - } - - #[inline] - fn edges_len(&self) -> Option { - self.graph.edges_len() - } - - #[inline] - fn children_of(&self, node_id: usize) -> Result, GError> { - self.graph.children_of(node_id) - } - - #[inline] - fn degree(&self, node_id: usize) -> Result { - self.graph.degree(node_id) - } - - #[inline] - fn is_empty_node(&self, node_id: usize) -> Result { - self.graph.is_empty_node(node_id) - } - - #[inline] - fn has_edge(&self, source: usize, target: usize) -> Result { - self.graph.has_edge(source, target) - } - - fn replace_by_builder(&mut self, _builder: impl graph::Builder) { - unimplemented!() - } -} - -impl ParentsGraph for DefaultForest { - type Iter<'a>= as ParentsGraph>::Iter<'a> - where - Self:'a; - - #[inline] - fn parents_of(&self, node_id: usize) -> Result<::Iter<'_>, GError> { - self.graph.parents_of(node_id) - } - - #[inline] - fn nth_child(&self, node_id: usize, n: usize) -> Result { - self.graph.nth_child(node_id, n) - } - - #[inline] - fn edge_to_parent( - &self, - source: usize, - target: usize, - ) -> Result, GError> { - self.graph.edge_to_parent(source, target) - } -} - -impl LabelGraph for DefaultForest { - type Iter<'a> = std::iter::Empty - where - Self: 'a; - - type LabelIter<'a> = std::iter::Empty<(&'a T, >::Iter<'a>)> - where - Self: 'a, - T: 'a; - - type EdgeLabelIter<'a> = std::iter::Empty - where - Self: 'a, - T: 'a; - - #[inline] - fn query_label(&self, label: T) -> Option { - self.graph.query_label(label) - } - - #[inline] - fn vertex_label(&self, node_id: usize) -> Result, GError> { - self.graph.vertex_label(node_id) - } - - fn edge_label( - &self, - _source: usize, - _target: usize, - ) -> Result, GError> { - unimplemented!("edges have no labels") - } - - fn find_children_with_label( - &self, - _node_id: usize, - _label: &T, - ) -> Result<>::Iter<'_>, GError> { - unimplemented!("edges have no labels") - } - - fn labels_of(&self, _node_id: usize) -> Result, GError> { - unimplemented!("edges have no labels") - } - - fn has_edge_label(&self, _node_id: usize, _label: &T, _target: usize) -> Result { - unimplemented!("edges have no labels") - } -} - -impl Forest for DefaultForest> { - type Error = Error; - - fn root(&self) -> Option { - self.root - } - - fn new_leaf(label: T) -> Self { - let mut graph = PLGraph::default(); - - let mut builder = PLGBuilderMut::from_graph_mut(&mut graph); - - let root = Some(builder.add_vertex(ForestLabel::from(label))); - - Self { graph, root } - } - - fn is_prefix(&self, node_id: usize, fragment: F) -> Result - where - F: AsRef, - { - if !self.has_node(node_id) { - return Err(Error::IndexOutOfBounds(node_id, self.nodes_len())); - } - - // We do a depth-first traversal to determine if every node - // encountered has the same set of children (labels taken into - // the consideration). - - let fragment = fragment.as_ref(); - - let mut frag_stack = Vec::with_capacity(fragment.nodes_len()); - - let mut self_stack = Vec::with_capacity(fragment.nodes_len()); - - let frag_root = if let Some(root) = fragment.root() { - root - } else { - // an empty forest is a prefix of any forest. - return Ok(true); - }; - - frag_stack.push(frag_root); - self_stack.push(node_id); - - // defer popping - while let (Some(frag_top), Some(self_top)) = - (frag_stack.last().copied(), self_stack.last().copied()) - { - frag_stack.pop(); - self_stack.pop(); - - if fragment.vertex_label(frag_top)? != self.vertex_label(self_top)? { - // not a prefix - return Ok(false); - } - - let mut self_children = self.children_of(self_top)?; - - for child in fragment.children_of(frag_top)? { - if let Some(self_child) = self_children.next() { - frag_stack.push(child); - self_stack.push(self_child); - } else { - // too few children - return Ok(false); - } - } - } - - // Check both stacks are empty at the end. - Ok(frag_stack.is_empty() && self_stack.is_empty()) - } - - fn plant(&mut self, node_id: usize, fragment: F, planted: bool) -> Result<(), Self::Error> - where - F: AsRef, - { - // Convert self to a builder_mut, and traverse fragment in a - // depth-first manner and adjoin corresponding nodes along the - // way. - - if !self.has_node(node_id) { - return Err(Error::IndexOutOfBounds(node_id, self.nodes_len())); - } - - let fragment = fragment.as_ref(); - - let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); - - let root = if let Some(root) = fragment.root() { - root - } else { - // Nothing to do to plant an empty forest. - return Ok(()); - }; - - // Just a dummy label for use in adding edges. - // - // REVIEW: I probably should refactor the API for builder_mut. - let root_label = fragment - .vertex_label(root)? - .ok_or(Error::NodeNoLabel(root))?; - - let nodes_len = fragment.nodes_len(); - - /// If the fragment root has a duplicate label, the forest - /// will not grow, so we use the label to find the adjoined - /// node index. - /// - /// The nodes hava already been added to the forest, so it is - /// safe to call unwrap. - macro_rules! conversion ( - ($node:expr) => { - { - builder - .query_label( - fragment - .vertex_label($node)? - .ok_or(Error::NodeNoLabel($node))? - ).unwrap() - } - } - ); - - // If the fragment has been planted before, we just add an - // edge. - - if planted { - builder.add_edge(node_id, conversion!(root), root_label)?; - - return Ok(()); - } - - // First adjoin those nodes and join the edges later. - - for node in 0..nodes_len { - let label = fragment - .vertex_label(node)? - .ok_or(Error::NodeNoLabel(node))?; - - builder.add_vertex(label); - } - - // Don't forget to join the new sub-forest to the original - // forest, at the specified position. - - builder.add_edge(node_id, conversion!(root), root_label)?; - - // We can try to calculate the depth of fragments, if we need - // to lower the memory usage. But in our use cases, we - // usually deal with fragments where each node has at most one - // child, so the depth is supposed to be equal to the length - // in this case. - let mut stack = Vec::with_capacity(fragment.nodes_len()); - - stack.push(root); - - while let Some(top) = stack.pop() { - for child in fragment.children_of(top)? { - builder.add_edge(conversion!(top), conversion!(child), root_label)?; - } - } - - Ok(()) - } - - fn clone_node(&mut self, node_id: usize) -> Result { - let builder = PLGBuilderMut::from_graph_mut(&mut self.graph); - - let old_label = builder - .vertex_label(node_id)? - .ok_or(Error::NodeNoLabel(node_id))?; - - if old_label.is_packed() { - return Err(ForestLabelError::ClonePack.into()); - } - - if old_label.is_cloned().is_some() { - let mut parents = self.parents_of(node_id)?; - - // Make sure it only has one parent, which is the - // representative node. - assert_eq!(parents.len(), 1); - - // We checked its length is 1, so it is safe to unwrap - // here. - return Ok(parents.next().unwrap().node()); - } - - let rep_label = old_label.pack()?; - - let new_label = old_label.clone(self)?; - - let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); - - // Make a new node - let new_index = builder.add_vertex(rep_label); - - // Re-direct parents to the new node. - // - // This must be done before pointing the new node to the old - // node, otherwise that edge will be redirected as well. - - // Unfortunately I cannot loop through parents and mutate them - // at the same time, so I first collect them into a vector. - let parents: Vec<_> = builder.parents_of(node_id)?.collect(); - - for parent in parents.into_iter() { - builder.redirect(parent.node(), parent.edge(), new_index)?; - } - - // Point the new node to the old node. OLD_LABEL is just a - // place holder. - - builder.add_edge(new_index, node_id, old_label)?; - - // Modify the label of the old node. - - builder.set_label(node_id, new_label)?; - - Ok(new_index) - } -} - -#[cfg(test)] -mod forest_test { - use super::*; - - macro_rules! leaf ( - ($label:expr, $type:tt) =>{ - DefaultForest::>::new_leaf($label) - }; - ($label:expr) => { - DefaultForest::>::new_leaf($label) - } - ); - - #[test] - fn test_forest_api() -> Result<(), Box> { - let forest: DefaultForest = Default::default(); - - // empty forest - - assert!(forest.is_empty()); - - // leaf forest - - let mut forest = leaf!(0, usize); - - assert_eq!(forest.nodes_len(), 1); - assert_eq!(forest.root(), Some(0)); - - // add some child - - forest.plant(0, leaf!(1), false)?; - - assert_eq!(forest.nodes_len(), 2); - let mut children = forest.children_of(0)?; - assert_eq!(children.next(), Some(1)); - assert_eq!(children.next(), None); - - // add more children - - forest.plant(0, leaf!(2), false)?; - forest.plant(0, leaf!(3), false)?; - forest.plant(0, leaf!(4), false)?; - forest.plant(2, leaf!(5), false)?; - - assert_eq!(forest.nodes_len(), 6); - let mut children = forest.children_of(0)?; - assert_eq!(children.next(), Some(1)); - assert_eq!(children.next(), Some(2)); - assert_eq!(children.next(), Some(3)); - assert_eq!(children.next(), Some(4)); - let mut children = forest.children_of(2)?; - assert_eq!(children.next(), Some(5)); - assert_eq!(children.next(), None); - - let mut test_forest = leaf!(0); - test_forest.plant(0, leaf!(1), false)?; - test_forest.plant(0, leaf!(2), false)?; - test_forest.plant(0, leaf!(3), false)?; - test_forest.plant(2, leaf!(5), false)?; - - assert!(forest.is_prefix(0, &test_forest)?); - - let mut test_forest = leaf!(0); - test_forest.plant(0, leaf!(1), false)?; - test_forest.plant(0, leaf!(2), false)?; - // this child of the root should have label 3 in order to be a - // prefix. - test_forest.plant(0, leaf!(4), false)?; - test_forest.plant(2, leaf!(5), false)?; - - assert!(!forest.is_prefix(0, &test_forest)?); - - let mut test_forest = leaf!(2); - test_forest.plant(0, leaf!(5), false)?; - - assert!(forest.is_prefix(2, &test_forest)?); - - // now test cloning - - // add a duplicate label - forest.plant(3, leaf!(5), false)?; - - let _len = forest.nodes_len(); - - forest.clone_node(5)?; - - assert_eq!(forest.nodes_len(), 7); - - #[cfg(feature = "test-print-viz")] - forest.print_viz("forest.gv")?; - - Ok(()) - } -} diff --git a/forest/src/lib.rs b/forest/src/lib.rs index 0b1092f..a888fae 100644 --- a/forest/src/lib.rs +++ b/forest/src/lib.rs @@ -1,26 +1,27 @@ #![warn(missing_docs)] -//! This file defines the expected behaviours of forests, and provides -//! a default implementation for forests. +//! This file defines the expected behaviours of forests. //! -//! The default forest actually implements the so-called shared packed -//! parse forest, or SPPF. It packs sub-trees with the same parents -//! under the same branch, and lets nodes from different branches -//! share the same children, and hence the name. +//! The forests are the basis of the `semiring` crate. use graph::{error::Error as GError, GraphLabel, LabelGraph, ParentsGraph}; +use std::borrow::Borrow; + /// An internal type that indicates the status of a node as either a /// packed, cloned, or plain node. #[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Default)] -enum ForestLabelType { +pub enum ForestLabelType { + /// A packed node Packed, + /// A plain node #[default] Plain, + /// A cloned node Cloned(usize), } -impl core::fmt::Display for ForestLabelType { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { +impl std::fmt::Display for ForestLabelType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::Packed => write!(f, "packed"), Self::Plain => write!(f, "plain"), @@ -36,14 +37,17 @@ pub struct ForestLabel { status: ForestLabelType, } -impl core::fmt::Display for ForestLabel { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - writeln!(f, "label: {}, status: {}", self.label, self.status) +impl std::fmt::Display for ForestLabel { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if !matches!(self.status, ForestLabelType::Plain) { + write!(f, "{}, {}", self.label, self.status) + } else { + write!(f, "{}", self.label) + } } } /// The type of erros for converting forest labels. -#[non_exhaustive] #[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] pub enum ForestLabelError { /// Try to pack a cloned node. @@ -52,8 +56,8 @@ pub enum ForestLabelError { ClonePack, } -impl core::fmt::Display for ForestLabelError { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { +impl std::fmt::Display for ForestLabelError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::PackClone => write!(f, "cannot pack a cloned node"), Self::ClonePack => write!(f, "cannot clone a packed node"), @@ -64,33 +68,50 @@ impl core::fmt::Display for ForestLabelError { impl std::error::Error for ForestLabelError {} impl ForestLabel { + /// Construct a new label with the inner `label` and `status`. + pub fn new(label: T, status: ForestLabelType) -> Self { + Self { label, status } + } + /// Retrieve the label. pub fn label(&self) -> T { self.label } + /// Retrieve the status. + pub fn status(&self) -> ForestLabelType { + self.status + } + /// Return true if and only if this node is packed. pub fn is_packed(&self) -> bool { - matches!(self.status, ForestLabelType::Packed) + self.status == ForestLabelType::Packed } /// Retrieve the optional clone index. - pub fn is_cloned(&self) -> Option { - match self.status { - ForestLabelType::Cloned(index) => Some(index), - _ => None, + pub fn clone_index(&self) -> Option { + if let ForestLabelType::Cloned(index) = self.status { + Some(index) + } else { + None } } + /// Return true if and only if this node is a plain node. + pub fn is_plain(&self) -> bool { + self.status == ForestLabelType::Plain + } + /// Try to clone a node. pub fn clone(self, forest: &F) -> Result where - F: Forest, + F: LabelGraph>, { if self.is_packed() { + dbg!(); Err(ForestLabelError::ClonePack) } else { - let clone_index = if let Some(old_index) = self.is_cloned() { + let clone_index = if let Some(old_index) = self.clone_index() { let mut new_index: usize = old_index + 1; let mut new_label = Self { @@ -120,7 +141,7 @@ impl ForestLabel { /// Try to pack a node. pub fn pack(self) -> Result { - if self.is_cloned().is_some() { + if self.clone_index().is_some() { Err(ForestLabelError::PackClone) } else { let new_label = Self { @@ -140,7 +161,7 @@ impl From for ForestLabel { } } -/// The expected behaviours of a forest. +/// The expected behaviours of an item derivation forest. /// /// Note that it requires only a subset of the functionalities of /// labelled graphs. @@ -157,28 +178,45 @@ pub trait Forest: ParentsGraph + LabelGraph> { /// label. fn new_leaf(label: T) -> Self; + /// Transform the label at the given node. + fn transform_label( + &mut self, + node_id: usize, + transform: impl FnOnce(T) -> T, + ) -> Result<(), Self::Error>; + /// Detect if the fragment is a prefix of the sub-forest rooted at /// `node_id`. fn is_prefix(&self, node_id: usize, fragment: F) -> Result where - F: AsRef; + F: Borrow; /// Extend the forest by adjoining another forest at a given node. fn plant(&mut self, node_id: usize, fragment: F, planted: bool) -> Result<(), Self::Error> where - F: AsRef; + F: Borrow; /// Clone a node by making a new node and making all the nodes /// that previously pointed to the old node now point to the new /// node, and the new node points to the old node. Return the - /// index of the new node. In addition, if the old node had - /// already been cloned, just return the index of its - /// representitave. + /// index of the new node. However, if, and only if, + /// `no_new_clone` is `true`, do not make a new clone; instead + /// return the clone index that would be used if a new clone was + /// made. + /// + /// Also, `preserved_edges_num` many edges out of the old node + /// will be copied to be the children of the new node. /// /// The labels of the representing node and of the cloned node /// will be handled automatically. - fn clone_node(&mut self, node_id: usize) -> Result; + fn clone_node( + &mut self, + node_id: usize, + preserved_edges_num: usize, + no_new_clone: bool, + ) -> Result; } -pub mod default; -pub use default::Error; +pub mod bytes; + +pub use bytes::forest_to_bytes; -- cgit v1.2.3-18-g5258