From b8a2d05a3c0d835556d5ddbd44e4a1e201302af5 Mon Sep 17 00:00:00 2001 From: JSDurand Date: Thu, 10 Aug 2023 11:14:04 +0800 Subject: Move the trait `Forest` to the crate "forest". The purpose of this change is to share this trait with other crates, such as the forth-coming "semiring" crate that will be responsible for handling some simple semiring operations as well as the querying, in my plans. --- Cargo.toml | 1 + chain/Cargo.toml | 1 + chain/src/atom/default.rs | 3 +- chain/src/default.rs | 6 +- chain/src/item/archive.txt | 207 ++++++++++++++++ chain/src/item/default/mod.rs | 8 +- chain/src/item/mod.rs | 209 +--------------- chain/src/lib.rs | 2 +- forest/Cargo.toml | 1 + forest/src/archive_default.txt | 525 +++++++++++++++++++++++++++++++++++++++++ forest/src/bytes.rs | 176 ++++++++++++++ forest/src/default.rs | 523 ---------------------------------------- forest/src/lib.rs | 102 +++++--- src/bytes.rs | 175 -------------- src/lib.rs | 8 +- 15 files changed, 998 insertions(+), 949 deletions(-) create mode 100644 chain/src/item/archive.txt create mode 100644 forest/src/archive_default.txt create mode 100644 forest/src/bytes.rs delete mode 100644 forest/src/default.rs delete mode 100644 src/bytes.rs diff --git a/Cargo.toml b/Cargo.toml index 399eafb..ef6f2da 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,6 +27,7 @@ crate-type = ["cdylib"] chain = { path = "./chain" } grammar = { path = "./grammar" } graph = { path = "./graph" } +forest = { path = "./forest" } [features] default = [] diff --git a/chain/Cargo.toml b/chain/Cargo.toml index e5499ee..6a1b089 100644 --- a/chain/Cargo.toml +++ b/chain/Cargo.toml @@ -14,6 +14,7 @@ nfa = { path = "../nfa" } graph = { path = "../graph" } graph_macro = { path = "../graph_macro" } grammar = { path = "../grammar" } +forest = { path = "../forest" } [dev-dependencies] grammar = { path = "../grammar", features = ["test-helper"] } diff --git a/chain/src/atom/default.rs b/chain/src/atom/default.rs index fa0cc3b..df61b6f 100644 --- a/chain/src/atom/default.rs +++ b/chain/src/atom/default.rs @@ -18,7 +18,8 @@ use std::{ iter::Copied, }; -use crate::item::{default::DefaultForest, ForestLabel}; +use crate::item::default::DefaultForest; +use forest::ForestLabel; /// A virtual node represents the derivative of a non-terminal symbol /// `s` with respect to a terminal symbol `t`. diff --git a/chain/src/default.rs b/chain/src/default.rs index 11f0c93..85e3c8f 100644 --- a/chain/src/default.rs +++ b/chain/src/default.rs @@ -8,9 +8,11 @@ use super::*; use crate::atom::{Atom, DefaultAtom}; use crate::item::{ - default::DefaultForest, generate_fragment, genins::index_out_of_bounds_conversion, Forest, - ForestLabel, ForestLabelError, + default::DefaultForest, generate_fragment, genins::index_out_of_bounds_conversion, }; + +use forest::{Forest, ForestLabel, ForestLabelError}; + use grammar::{GrammarLabel, GrammarLabelType, START_NONTERMINAL, TNT}; #[allow(unused_imports)] use graph::{ diff --git a/chain/src/item/archive.txt b/chain/src/item/archive.txt new file mode 100644 index 0000000..fab3814 --- /dev/null +++ b/chain/src/item/archive.txt @@ -0,0 +1,207 @@ +// /// An internal type that indicates the status of a node as either a +// /// packed, cloned, or plain node. +// #[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Default)] +// pub enum ForestLabelType { +// /// A packed node +// Packed, +// /// A plain node +// #[default] +// Plain, +// /// A cloned node +// Cloned(usize), +// } +// +// impl std::fmt::Display for ForestLabelType { +// fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { +// match self { +// Self::Packed => write!(f, "packed"), +// Self::Plain => write!(f, "plain"), +// Self::Cloned(index) => write!(f, "clone({index})"), +// } +// } +// } +// +// /// A type that encodes the properties demanded by a forest. +// #[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +// pub struct ForestLabel { +// label: T, +// status: ForestLabelType, +// } +// +// impl std::fmt::Display for ForestLabel { +// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { +// if !matches!(self.status, ForestLabelType::Plain) { +// write!(f, "{}, {}", self.label, self.status) +// } else { +// write!(f, "{}", self.label) +// } +// } +// } +// +// /// The type of erros for converting forest labels. +// #[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] +// pub enum ForestLabelError { +// /// Try to pack a cloned node. +// PackClone, +// /// Try to clone a packed node. +// ClonePack, +// } +// +// impl std::fmt::Display for ForestLabelError { +// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { +// match self { +// Self::PackClone => write!(f, "cannot pack a cloned node"), +// Self::ClonePack => write!(f, "cannot clone a packed node"), +// } +// } +// } +// +// impl std::error::Error for ForestLabelError {} +// +// impl ForestLabel { +// /// Construct a new label with the inner `label` and `status`. +// pub fn new(label: T, status: ForestLabelType) -> Self { +// Self { label, status } +// } +// +// /// Retrieve the label. +// pub fn label(&self) -> T { +// self.label +// } +// +// /// Return true if and only if this node is packed. +// pub fn is_packed(&self) -> bool { +// self.status == ForestLabelType::Packed +// } +// +// /// Retrieve the optional clone index. +// pub fn clone_index(&self) -> Option { +// match self.status { +// ForestLabelType::Cloned(index) => Some(index), +// _ => None, +// } +// } +// +// /// Return true if and only if this node is a plain node. +// pub fn is_plain(&self) -> bool { +// self.status == ForestLabelType::Plain +// } +// +// /// Try to clone a node. +// pub fn clone(self, forest: &F) -> Result +// where +// F: LabelGraph>, +// { +// if self.is_packed() { +// dbg!(); +// Err(ForestLabelError::ClonePack) +// } else { +// let clone_index = if let Some(old_index) = self.clone_index() { +// let mut new_index: usize = old_index + 1; +// +// let mut new_label = Self { +// status: ForestLabelType::Cloned(new_index), +// ..self +// }; +// +// while forest.query_label(new_label).is_some() { +// new_index += 1; +// new_label = Self { +// status: ForestLabelType::Cloned(new_index), +// ..self +// }; +// } +// +// new_index +// } else { +// 0 +// }; +// +// Ok(Self { +// status: ForestLabelType::Cloned(clone_index), +// ..self +// }) +// } +// } +// +// /// Try to pack a node. +// pub fn pack(self) -> Result { +// if self.clone_index().is_some() { +// Err(ForestLabelError::PackClone) +// } else { +// let new_label = Self { +// status: ForestLabelType::Packed, +// ..self +// }; +// +// Ok(new_label) +// } +// } +// } +// +// impl From for ForestLabel { +// fn from(label: T) -> Self { +// let status = ForestLabelType::default(); +// Self { label, status } +// } +// } +// +// // REVIEW: Should we move this trait (and only this trait) to a +// // separate crate, or is it enough to keep it here? +// +// /// The expected behaviours of an item derivation forest. +// /// +// /// Note that it requires only a subset of the functionalities of +// /// labelled graphs. +// pub trait Forest: ParentsGraph + LabelGraph> { +// /// The type of errors for operations on the forest. +// type Error: std::error::Error + From + From; +// +// /// Return the root of the forest. +// /// +// /// A forest without a root is regarded as empty. +// fn root(&self) -> Option; +// +// /// Construct a forest consisting of one leaf node with the given +// /// label. +// fn new_leaf(label: T) -> Self; +// +// /// Transform the label at the given node. +// fn transform_label( +// &mut self, +// node_id: usize, +// transform: impl FnOnce(T) -> T, +// ) -> Result<(), Self::Error>; +// +// /// Detect if the fragment is a prefix of the sub-forest rooted at +// /// `node_id`. +// fn is_prefix(&self, node_id: usize, fragment: F) -> Result +// where +// F: Borrow; +// +// /// Extend the forest by adjoining another forest at a given node. +// fn plant(&mut self, node_id: usize, fragment: F, planted: bool) -> Result<(), Self::Error> +// where +// F: Borrow; +// +// /// Clone a node by making a new node and making all the nodes +// /// that previously pointed to the old node now point to the new +// /// node, and the new node points to the old node. Return the +// /// index of the new node. However, if, and only if, +// /// `no_new_clone` is `true`, do not make a new clone; instead +// /// return the clone index that would be used if a new clone was +// /// made. +// /// +// /// Also, `preserved_edges_num` many edges out of the old node +// /// will be copied to be the children of the new node. +// /// +// /// The labels of the representing node and of the cloned node +// /// will be handled automatically. +// fn clone_node( +// &mut self, +// node_id: usize, +// preserved_edges_num: usize, +// no_new_clone: bool, +// ) -> Result; +// } +// diff --git a/chain/src/item/default/mod.rs b/chain/src/item/default/mod.rs index de43f37..01e463e 100644 --- a/chain/src/item/default/mod.rs +++ b/chain/src/item/default/mod.rs @@ -200,7 +200,7 @@ impl Forest for DefaultForest> { let transformed_label = transform(vertex_label.label()); - let transformed_label = ForestLabel::new(transformed_label, vertex_label.status); + let transformed_label = ForestLabel::new(transformed_label, vertex_label.status()); let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); @@ -1239,7 +1239,7 @@ impl DefaultForest> { label_label.set_start(pos); - let new_label = ForestLabel::new(label_label, label.status); + let new_label = ForestLabel::new(label_label, label.status()); builder.set_label(node, new_label)?; } @@ -1306,7 +1306,7 @@ impl DefaultForest> { label_label.set_end(pos + 1); } - let new_label = ForestLabel::new(label_label, label.status); + let new_label = ForestLabel::new(label_label, label.status()); builder.set_label(node, new_label)?; } @@ -1339,7 +1339,7 @@ pub fn print_labels( let label = forest.vertex_label(node)?; if let Some(label) = label { - let label = label.label.label(); + let label = label.label().label(); match label { GrammarLabelType::TNT(tnt) => { diff --git a/chain/src/item/mod.rs b/chain/src/item/mod.rs index ed41c61..54ea168 100644 --- a/chain/src/item/mod.rs +++ b/chain/src/item/mod.rs @@ -9,6 +9,8 @@ use graph::{error::Error as GError, GraphLabel, LabelGraph, Parent, ParentsGraph use std::borrow::Borrow; +use forest::{Forest, ForestLabel, ForestLabelError, ForestLabelType}; + /// A parent or a virtual segment. /// /// # Parent @@ -86,213 +88,6 @@ impl PaVi { } } -/// An internal type that indicates the status of a node as either a -/// packed, cloned, or plain node. -#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Default)] -pub enum ForestLabelType { - /// A packed node - Packed, - /// A plain node - #[default] - Plain, - /// A cloned node - Cloned(usize), -} - -impl std::fmt::Display for ForestLabelType { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - match self { - Self::Packed => write!(f, "packed"), - Self::Plain => write!(f, "plain"), - Self::Cloned(index) => write!(f, "clone({index})"), - } - } -} - -/// A type that encodes the properties demanded by a forest. -#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] -pub struct ForestLabel { - label: T, - status: ForestLabelType, -} - -impl std::fmt::Display for ForestLabel { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - if !matches!(self.status, ForestLabelType::Plain) { - write!(f, "{}, {}", self.label, self.status) - } else { - write!(f, "{}", self.label) - } - } -} - -/// The type of erros for converting forest labels. -#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] -pub enum ForestLabelError { - /// Try to pack a cloned node. - PackClone, - /// Try to clone a packed node. - ClonePack, -} - -impl std::fmt::Display for ForestLabelError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Self::PackClone => write!(f, "cannot pack a cloned node"), - Self::ClonePack => write!(f, "cannot clone a packed node"), - } - } -} - -impl std::error::Error for ForestLabelError {} - -impl ForestLabel { - /// Construct a new label with the inner `label` and `status`. - pub fn new(label: T, status: ForestLabelType) -> Self { - Self { label, status } - } - - /// Retrieve the label. - pub fn label(&self) -> T { - self.label - } - - /// Return true if and only if this node is packed. - pub fn is_packed(&self) -> bool { - self.status == ForestLabelType::Packed - } - - /// Retrieve the optional clone index. - pub fn clone_index(&self) -> Option { - match self.status { - ForestLabelType::Cloned(index) => Some(index), - _ => None, - } - } - - /// Return true if and only if this node is a plain node. - pub fn is_plain(&self) -> bool { - self.status == ForestLabelType::Plain - } - - /// Try to clone a node. - pub fn clone(self, forest: &F) -> Result - where - F: LabelGraph>, - { - if self.is_packed() { - dbg!(); - Err(ForestLabelError::ClonePack) - } else { - let clone_index = if let Some(old_index) = self.clone_index() { - let mut new_index: usize = old_index + 1; - - let mut new_label = Self { - status: ForestLabelType::Cloned(new_index), - ..self - }; - - while forest.query_label(new_label).is_some() { - new_index += 1; - new_label = Self { - status: ForestLabelType::Cloned(new_index), - ..self - }; - } - - new_index - } else { - 0 - }; - - Ok(Self { - status: ForestLabelType::Cloned(clone_index), - ..self - }) - } - } - - /// Try to pack a node. - pub fn pack(self) -> Result { - if self.clone_index().is_some() { - Err(ForestLabelError::PackClone) - } else { - let new_label = Self { - status: ForestLabelType::Packed, - ..self - }; - - Ok(new_label) - } - } -} - -impl From for ForestLabel { - fn from(label: T) -> Self { - let status = ForestLabelType::default(); - Self { label, status } - } -} - -// REVIEW: Should we move this trait (and only this trait) to a -// separate crate, or is it enough to keep it here? - -/// The expected behaviours of an item derivation forest. -/// -/// Note that it requires only a subset of the functionalities of -/// labelled graphs. -pub trait Forest: ParentsGraph + LabelGraph> { - /// The type of errors for operations on the forest. - type Error: std::error::Error + From + From; - - /// Return the root of the forest. - /// - /// A forest without a root is regarded as empty. - fn root(&self) -> Option; - - /// Construct a forest consisting of one leaf node with the given - /// label. - fn new_leaf(label: T) -> Self; - - /// Transform the label at the given node. - fn transform_label( - &mut self, - node_id: usize, - transform: impl FnOnce(T) -> T, - ) -> Result<(), Self::Error>; - - /// Detect if the fragment is a prefix of the sub-forest rooted at - /// `node_id`. - fn is_prefix(&self, node_id: usize, fragment: F) -> Result - where - F: Borrow; - - /// Extend the forest by adjoining another forest at a given node. - fn plant(&mut self, node_id: usize, fragment: F, planted: bool) -> Result<(), Self::Error> - where - F: Borrow; - - /// Clone a node by making a new node and making all the nodes - /// that previously pointed to the old node now point to the new - /// node, and the new node points to the old node. Return the - /// index of the new node. However, if, and only if, - /// `no_new_clone` is `true`, do not make a new clone; instead - /// return the clone index that would be used if a new clone was - /// made. - /// - /// Also, `preserved_edges_num` many edges out of the old node - /// will be copied to be the children of the new node. - /// - /// The labels of the representing node and of the cloned node - /// will be handled automatically. - fn clone_node( - &mut self, - node_id: usize, - preserved_edges_num: usize, - no_new_clone: bool, - ) -> Result; -} - pub mod default; pub mod genins; diff --git a/chain/src/lib.rs b/chain/src/lib.rs index f16ebb2..10143dd 100644 --- a/chain/src/lib.rs +++ b/chain/src/lib.rs @@ -321,7 +321,7 @@ pub trait Chain: LabelExtGraph { // purpose, but I have not figured out yet wha the correct trait // should be. /// The type of output item that will be produced by this machine. - type Item: item::Forest; + type Item: forest::Forest; /// Signal to the parser that the end of the input is reached, so /// that the parser knows to generate suitable forests. diff --git a/forest/Cargo.toml b/forest/Cargo.toml index 8cc445b..5702265 100644 --- a/forest/Cargo.toml +++ b/forest/Cargo.toml @@ -13,5 +13,6 @@ test-print-viz = [] [dependencies] graph = { path = "../graph" } +grammar = { path = "../grammar" } # This should be integrated in the package, but it is not yet done. \ No newline at end of file diff --git a/forest/src/archive_default.txt b/forest/src/archive_default.txt new file mode 100644 index 0000000..5572ecc --- /dev/null +++ b/forest/src/archive_default.txt @@ -0,0 +1,525 @@ +// DEPRECATED: This file is deprecated. + +//! This file provides a default implementation for the +//! [`Forest`][crate::Forest] trait. + +use super::*; +use graph::{ + builder::BuilderMut, labelled::binary::PLGBuilderMut, Graph, LabelGraph, PLGraph, RedirectGraph, +}; + +use core::fmt::Display; + +/// The type of errors for forest operations. +#[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd)] +pub enum Error { + /// An index is out of bounds. + /// + /// The first component is the index that is out of bounds, and + /// the second component is the current length of nodes. + IndexOutOfBounds(usize, usize), + /// The forest does not permit duplicate nodes but encounters a + /// repeated node. + DuplicatedNode(usize), + /// A node has no labels while it is required to have one. + NodeNoLabel(usize), + /// Encounter an invalid error in converting from an error of + /// graphs. + InvalidGraphError(GError), + /// Encounter an error when converting forest labels. + LabelConversion(ForestLabelError), +} + +impl Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Error::IndexOutOfBounds(index, bound) => { + write!(f, "index {index} is out of bound {bound}") + } + Error::DuplicatedNode(n) => write!(f, "node {n} is duplicated"), + Error::InvalidGraphError(ge) => write!(f, "invalid error: {ge}"), + Error::NodeNoLabel(n) => write!(f, "node {n} has no labels, but it should have one"), + Error::LabelConversion(le) => write!(f, "fail to convert labels: {le}"), + } + } +} + +impl std::error::Error for Error {} + +impl From for Error { + fn from(ge: GError) -> Self { + match ge { + GError::IndexOutOfBounds(index, bound) => Self::IndexOutOfBounds(index, bound), + GError::DuplicatedNode(n) => Self::DuplicatedNode(n), + _ => Self::InvalidGraphError(ge), + } + } +} + +impl From for Error { + fn from(le: ForestLabelError) -> Self { + Self::LabelConversion(le) + } +} + +/// A default implementation of forest. +#[derive(Debug, Clone)] +pub struct DefaultForest { + graph: PLGraph, + root: Option, +} + +impl Default for DefaultForest { + fn default() -> Self { + let graph = Default::default(); + let root = None; + + Self { graph, root } + } +} + +impl AsRef> for DefaultForest { + fn as_ref(&self) -> &DefaultForest { + self + } +} + +impl Graph for DefaultForest { + type Iter<'a> = as Graph>::Iter<'a> + where + Self: 'a; + + #[inline] + fn is_empty(&self) -> bool { + self.graph.is_empty() + } + + #[inline] + fn nodes_len(&self) -> usize { + self.graph.nodes_len() + } + + #[inline] + fn edges_len(&self) -> Option { + self.graph.edges_len() + } + + #[inline] + fn children_of(&self, node_id: usize) -> Result, GError> { + self.graph.children_of(node_id) + } + + #[inline] + fn degree(&self, node_id: usize) -> Result { + self.graph.degree(node_id) + } + + #[inline] + fn is_empty_node(&self, node_id: usize) -> Result { + self.graph.is_empty_node(node_id) + } + + #[inline] + fn has_edge(&self, source: usize, target: usize) -> Result { + self.graph.has_edge(source, target) + } + + fn replace_by_builder(&mut self, _builder: impl graph::Builder) { + unimplemented!() + } +} + +impl ParentsGraph for DefaultForest { + type Iter<'a>= as ParentsGraph>::Iter<'a> + where + Self:'a; + + #[inline] + fn parents_of(&self, node_id: usize) -> Result<::Iter<'_>, GError> { + self.graph.parents_of(node_id) + } + + #[inline] + fn nth_child(&self, node_id: usize, n: usize) -> Result { + self.graph.nth_child(node_id, n) + } + + #[inline] + fn edge_to_parent( + &self, + source: usize, + target: usize, + ) -> Result, GError> { + self.graph.edge_to_parent(source, target) + } +} + +impl LabelGraph for DefaultForest { + type Iter<'a> = std::iter::Empty + where + Self: 'a; + + type LabelIter<'a> = std::iter::Empty<(&'a T, >::Iter<'a>)> + where + Self: 'a, + T: 'a; + + type EdgeLabelIter<'a> = std::iter::Empty + where + Self: 'a, + T: 'a; + + #[inline] + fn query_label(&self, label: T) -> Option { + self.graph.query_label(label) + } + + #[inline] + fn vertex_label(&self, node_id: usize) -> Result, GError> { + self.graph.vertex_label(node_id) + } + + fn edge_label( + &self, + _source: usize, + _target: usize, + ) -> Result, GError> { + unimplemented!("edges have no labels") + } + + fn find_children_with_label( + &self, + _node_id: usize, + _label: &T, + ) -> Result<>::Iter<'_>, GError> { + unimplemented!("edges have no labels") + } + + fn labels_of(&self, _node_id: usize) -> Result, GError> { + unimplemented!("edges have no labels") + } + + fn has_edge_label(&self, _node_id: usize, _label: &T, _target: usize) -> Result { + unimplemented!("edges have no labels") + } +} + +impl Forest for DefaultForest> { + type Error = Error; + + fn root(&self) -> Option { + self.root + } + + fn new_leaf(label: T) -> Self { + let mut graph = PLGraph::default(); + + let mut builder = PLGBuilderMut::from_graph_mut(&mut graph); + + let root = Some(builder.add_vertex(ForestLabel::from(label))); + + Self { graph, root } + } + + fn is_prefix(&self, node_id: usize, fragment: F) -> Result + where + F: AsRef, + { + if !self.has_node(node_id) { + return Err(Error::IndexOutOfBounds(node_id, self.nodes_len())); + } + + // We do a depth-first traversal to determine if every node + // encountered has the same set of children (labels taken into + // the consideration). + + let fragment = fragment.as_ref(); + + let mut frag_stack = Vec::with_capacity(fragment.nodes_len()); + + let mut self_stack = Vec::with_capacity(fragment.nodes_len()); + + let frag_root = if let Some(root) = fragment.root() { + root + } else { + // an empty forest is a prefix of any forest. + return Ok(true); + }; + + frag_stack.push(frag_root); + self_stack.push(node_id); + + // defer popping + while let (Some(frag_top), Some(self_top)) = + (frag_stack.last().copied(), self_stack.last().copied()) + { + frag_stack.pop(); + self_stack.pop(); + + if fragment.vertex_label(frag_top)? != self.vertex_label(self_top)? { + // not a prefix + return Ok(false); + } + + let mut self_children = self.children_of(self_top)?; + + for child in fragment.children_of(frag_top)? { + if let Some(self_child) = self_children.next() { + frag_stack.push(child); + self_stack.push(self_child); + } else { + // too few children + return Ok(false); + } + } + } + + // Check both stacks are empty at the end. + Ok(frag_stack.is_empty() && self_stack.is_empty()) + } + + fn plant(&mut self, node_id: usize, fragment: F, planted: bool) -> Result<(), Self::Error> + where + F: AsRef, + { + // Convert self to a builder_mut, and traverse fragment in a + // depth-first manner and adjoin corresponding nodes along the + // way. + + if !self.has_node(node_id) { + return Err(Error::IndexOutOfBounds(node_id, self.nodes_len())); + } + + let fragment = fragment.as_ref(); + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + let root = if let Some(root) = fragment.root() { + root + } else { + // Nothing to do to plant an empty forest. + return Ok(()); + }; + + // Just a dummy label for use in adding edges. + // + // REVIEW: I probably should refactor the API for builder_mut. + let root_label = fragment + .vertex_label(root)? + .ok_or(Error::NodeNoLabel(root))?; + + let nodes_len = fragment.nodes_len(); + + /// If the fragment root has a duplicate label, the forest + /// will not grow, so we use the label to find the adjoined + /// node index. + /// + /// The nodes hava already been added to the forest, so it is + /// safe to call unwrap. + macro_rules! conversion ( + ($node:expr) => { + { + builder + .query_label( + fragment + .vertex_label($node)? + .ok_or(Error::NodeNoLabel($node))? + ).unwrap() + } + } + ); + + // If the fragment has been planted before, we just add an + // edge. + + if planted { + builder.add_edge(node_id, conversion!(root), root_label)?; + + return Ok(()); + } + + // First adjoin those nodes and join the edges later. + + for node in 0..nodes_len { + let label = fragment + .vertex_label(node)? + .ok_or(Error::NodeNoLabel(node))?; + + builder.add_vertex(label); + } + + // Don't forget to join the new sub-forest to the original + // forest, at the specified position. + + builder.add_edge(node_id, conversion!(root), root_label)?; + + // We can try to calculate the depth of fragments, if we need + // to lower the memory usage. But in our use cases, we + // usually deal with fragments where each node has at most one + // child, so the depth is supposed to be equal to the length + // in this case. + let mut stack = Vec::with_capacity(fragment.nodes_len()); + + stack.push(root); + + while let Some(top) = stack.pop() { + for child in fragment.children_of(top)? { + builder.add_edge(conversion!(top), conversion!(child), root_label)?; + } + } + + Ok(()) + } + + fn clone_node(&mut self, node_id: usize) -> Result { + let builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + let old_label = builder + .vertex_label(node_id)? + .ok_or(Error::NodeNoLabel(node_id))?; + + if old_label.is_packed() { + return Err(ForestLabelError::ClonePack.into()); + } + + if old_label.is_cloned().is_some() { + let mut parents = self.parents_of(node_id)?; + + // Make sure it only has one parent, which is the + // representative node. + assert_eq!(parents.len(), 1); + + // We checked its length is 1, so it is safe to unwrap + // here. + return Ok(parents.next().unwrap().node()); + } + + let rep_label = old_label.pack()?; + + let new_label = old_label.clone(self)?; + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + // Make a new node + let new_index = builder.add_vertex(rep_label); + + // Re-direct parents to the new node. + // + // This must be done before pointing the new node to the old + // node, otherwise that edge will be redirected as well. + + // Unfortunately I cannot loop through parents and mutate them + // at the same time, so I first collect them into a vector. + let parents: Vec<_> = builder.parents_of(node_id)?.collect(); + + for parent in parents.into_iter() { + builder.redirect(parent.node(), parent.edge(), new_index)?; + } + + // Point the new node to the old node. OLD_LABEL is just a + // place holder. + + builder.add_edge(new_index, node_id, old_label)?; + + // Modify the label of the old node. + + builder.set_label(node_id, new_label)?; + + Ok(new_index) + } +} + +#[cfg(test)] +mod forest_test { + use super::*; + + macro_rules! leaf ( + ($label:expr, $type:tt) =>{ + DefaultForest::>::new_leaf($label) + }; + ($label:expr) => { + DefaultForest::>::new_leaf($label) + } + ); + + #[test] + fn test_forest_api() -> Result<(), Box> { + let forest: DefaultForest = Default::default(); + + // empty forest + + assert!(forest.is_empty()); + + // leaf forest + + let mut forest = leaf!(0, usize); + + assert_eq!(forest.nodes_len(), 1); + assert_eq!(forest.root(), Some(0)); + + // add some child + + forest.plant(0, leaf!(1), false)?; + + assert_eq!(forest.nodes_len(), 2); + let mut children = forest.children_of(0)?; + assert_eq!(children.next(), Some(1)); + assert_eq!(children.next(), None); + + // add more children + + forest.plant(0, leaf!(2), false)?; + forest.plant(0, leaf!(3), false)?; + forest.plant(0, leaf!(4), false)?; + forest.plant(2, leaf!(5), false)?; + + assert_eq!(forest.nodes_len(), 6); + let mut children = forest.children_of(0)?; + assert_eq!(children.next(), Some(1)); + assert_eq!(children.next(), Some(2)); + assert_eq!(children.next(), Some(3)); + assert_eq!(children.next(), Some(4)); + let mut children = forest.children_of(2)?; + assert_eq!(children.next(), Some(5)); + assert_eq!(children.next(), None); + + let mut test_forest = leaf!(0); + test_forest.plant(0, leaf!(1), false)?; + test_forest.plant(0, leaf!(2), false)?; + test_forest.plant(0, leaf!(3), false)?; + test_forest.plant(2, leaf!(5), false)?; + + assert!(forest.is_prefix(0, &test_forest)?); + + let mut test_forest = leaf!(0); + test_forest.plant(0, leaf!(1), false)?; + test_forest.plant(0, leaf!(2), false)?; + // this child of the root should have label 3 in order to be a + // prefix. + test_forest.plant(0, leaf!(4), false)?; + test_forest.plant(2, leaf!(5), false)?; + + assert!(!forest.is_prefix(0, &test_forest)?); + + let mut test_forest = leaf!(2); + test_forest.plant(0, leaf!(5), false)?; + + assert!(forest.is_prefix(2, &test_forest)?); + + // now test cloning + + // add a duplicate label + forest.plant(3, leaf!(5), false)?; + + let _len = forest.nodes_len(); + + forest.clone_node(5)?; + + assert_eq!(forest.nodes_len(), 7); + + #[cfg(feature = "test-print-viz")] + forest.print_viz("forest.gv")?; + + Ok(()) + } +} diff --git a/forest/src/bytes.rs b/forest/src/bytes.rs new file mode 100644 index 0000000..951abad --- /dev/null +++ b/forest/src/bytes.rs @@ -0,0 +1,176 @@ +//! This module defines functions to turn a forest of forest labels +//! into a sequence of bytes. +//! +//! To be more specific, a forest consists of two parts: a vector of +//! node labels and a graph specifying the relations between nodes. +//! +//! # Number format (endianness) +//! +//! Every number mentionned in this format is an unsigned integer of +//! 64 bits, or 8 bytes. Every such number is specified in the big +//! endian format. +//! +//! # Parts of the sequence of bytes +//! +//! The sequence of bytes has three parts: +//! +//! ## Header +//! +//! The header specifies metadata of this forest. +//! +//! ### Special mark +//! +//! The first three bytes form the string "rep", as a special mark. +//! +//! ### Number of nodes +//! +//! The next 8 bytes specifies the number of nodes of this forest. +//! +//! ## Graph +//! +//! Next comes the underlying graph for this forest. This part +//! consists of a vector of vectors of numbers. Each vector of +//! numbers represents the list of children of a node. So the number +//! of vectors of numbers is equal to the number of nodes. +//! +//! ### Vector of vectors of numbers +//! +//! The vectors are not simply concatenated one after another, as that +//! way one cannot read a random node in a constant amount of time. +//! +//! Instead, we first specify the number of children of each node +//! first, along with the offset for that node of the vector of +//! children. +//! +//! As an example, if a graph has three nodes, represented as the +//! adjacency list: `[[1, 2], [2], []]`, then its representation as a +//! sequence of bytes is as follows: +//! ```text +//! 2, x, 1, y, 0, z, 1, 2, 2, +//! ^ ^ ^ +//! x y z +//! ``` +//! +//! This has the advantage that we can read the children of the `n`-th +//! node in constant time. +//! +//! ## Vector of labels +//! +//! Each label occupies a fixed number of bytes, so we simply put the +//! labels one after another. The only thing to note here is the +//! format of the labels. +//! +//! ### Labels +//! +//! Each label has 3 parts: +//! +//! 1. Status: either Packed, Cloned, or Plain. If the node is +//! cloned, it has a clone index. So in total this part occupies 1 +//! byte for the status and 8 bytes for the clone index. +//! 2. Start and end: the range in the input sentence. We just store +//! two numbers here. Hence this part occupies 16 bytes. +//! 3. Grammar label: either a terminal, a non-terminal, or a rule. +//! Each variant needs a number to speify its index. So in total this +//! part occupies 1 byte for the variant discriminant and 8 bytes for +//! the number. +//! +//! To sum up, each label occupies 34 bytes. + +use super::Forest; +use grammar::{GrammarLabel, GrammarLabelType, TNT}; + +/// Convert any type that implements the `Forest` trait into a +/// sequence of bytes. +pub fn forest_to_bytes>(forest: &F) -> Vec { + // First calculate the total number of bytes. + let nodes_len = forest.nodes_len(); + + let degrees: Vec<_> = forest + .nodes() + .map(|node| forest.degree(node).unwrap_or(0)) + .collect(); + + let total_degree: usize = degrees.iter().copied().sum(); + + let len: usize = 8 // total number of bytes at the start + + 3 // special mark + + 8 // number of nodes + + 8 // offset of labels + + 16 * nodes_len // degree & offset for each node + + 8 * total_degree // children of each node + + 34 * nodes_len // labels + ; + + // Then fill in the bytes. + + let mut bytes: Vec = Vec::with_capacity(len); + + // First the headers + + bytes.extend(len.to_be_bytes()); + bytes.extend([114, 101, 112]); // rep + bytes.extend(nodes_len.to_be_bytes()); + bytes.extend((len - 34 * nodes_len).to_be_bytes()); + + let mut accumulated: usize = 0; + + // Then degrees and offsets + + for degree in degrees.iter().copied() { + bytes.extend(degree.to_be_bytes()); + bytes.extend((8 + 3 + 8 + 8 + 16 * nodes_len + 8 * accumulated).to_be_bytes()); + + accumulated += degree; + } + + // Then the children + + bytes.extend(forest.nodes().flat_map(|node| { + forest + .children_of(node) + .unwrap() + .flat_map(|child| child.to_be_bytes()) + })); + + // Finally the labels + + 'nodes_loop: for node in forest.nodes() { + let label = match forest.vertex_label(node) { + Ok(Some(label)) => label, + _ => continue 'nodes_loop, + }; + + if label.is_plain() { + bytes.extend(std::iter::repeat(0).take(9)); + } else if label.is_packed() { + bytes.extend(std::iter::once(1).chain(std::iter::repeat(0).take(8))); + } else if let Some(index) = label.clone_index() { + bytes.extend(std::iter::once(2).chain(index.to_be_bytes())); + } + + let label = label.label(); + + bytes.extend(label.start().to_be_bytes()); + bytes.extend(label.end().unwrap_or(0).to_be_bytes()); + + let label = label.label(); + + match label { + GrammarLabelType::TNT(TNT::Ter(t)) => { + bytes.extend(std::iter::once(0).chain(t.to_be_bytes())); + } + GrammarLabelType::TNT(TNT::Non(n)) => { + bytes.extend(std::iter::once(1).chain(n.to_be_bytes())); + } + GrammarLabelType::Rule(r) => { + bytes.extend(std::iter::once(2).chain(r.to_be_bytes())); + } + } + } + + if bytes.len() != len { + dbg!(); + } + + bytes +} diff --git a/forest/src/default.rs b/forest/src/default.rs deleted file mode 100644 index b96439f..0000000 --- a/forest/src/default.rs +++ /dev/null @@ -1,523 +0,0 @@ -//! This file provides a default implementation for the -//! [`Forest`][crate::Forest] trait. - -use super::*; -use graph::{ - builder::BuilderMut, labelled::binary::PLGBuilderMut, Graph, LabelGraph, PLGraph, RedirectGraph, -}; - -use core::fmt::Display; - -/// The type of errors for forest operations. -#[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd)] -pub enum Error { - /// An index is out of bounds. - /// - /// The first component is the index that is out of bounds, and - /// the second component is the current length of nodes. - IndexOutOfBounds(usize, usize), - /// The forest does not permit duplicate nodes but encounters a - /// repeated node. - DuplicatedNode(usize), - /// A node has no labels while it is required to have one. - NodeNoLabel(usize), - /// Encounter an invalid error in converting from an error of - /// graphs. - InvalidGraphError(GError), - /// Encounter an error when converting forest labels. - LabelConversion(ForestLabelError), -} - -impl Display for Error { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Error::IndexOutOfBounds(index, bound) => { - write!(f, "index {index} is out of bound {bound}") - } - Error::DuplicatedNode(n) => write!(f, "node {n} is duplicated"), - Error::InvalidGraphError(ge) => write!(f, "invalid error: {ge}"), - Error::NodeNoLabel(n) => write!(f, "node {n} has no labels, but it should have one"), - Error::LabelConversion(le) => write!(f, "fail to convert labels: {le}"), - } - } -} - -impl std::error::Error for Error {} - -impl From for Error { - fn from(ge: GError) -> Self { - match ge { - GError::IndexOutOfBounds(index, bound) => Self::IndexOutOfBounds(index, bound), - GError::DuplicatedNode(n) => Self::DuplicatedNode(n), - _ => Self::InvalidGraphError(ge), - } - } -} - -impl From for Error { - fn from(le: ForestLabelError) -> Self { - Self::LabelConversion(le) - } -} - -/// A default implementation of forest. -#[derive(Debug, Clone)] -pub struct DefaultForest { - graph: PLGraph, - root: Option, -} - -impl Default for DefaultForest { - fn default() -> Self { - let graph = Default::default(); - let root = None; - - Self { graph, root } - } -} - -impl AsRef> for DefaultForest { - fn as_ref(&self) -> &DefaultForest { - self - } -} - -impl Graph for DefaultForest { - type Iter<'a> = as Graph>::Iter<'a> - where - Self: 'a; - - #[inline] - fn is_empty(&self) -> bool { - self.graph.is_empty() - } - - #[inline] - fn nodes_len(&self) -> usize { - self.graph.nodes_len() - } - - #[inline] - fn edges_len(&self) -> Option { - self.graph.edges_len() - } - - #[inline] - fn children_of(&self, node_id: usize) -> Result, GError> { - self.graph.children_of(node_id) - } - - #[inline] - fn degree(&self, node_id: usize) -> Result { - self.graph.degree(node_id) - } - - #[inline] - fn is_empty_node(&self, node_id: usize) -> Result { - self.graph.is_empty_node(node_id) - } - - #[inline] - fn has_edge(&self, source: usize, target: usize) -> Result { - self.graph.has_edge(source, target) - } - - fn replace_by_builder(&mut self, _builder: impl graph::Builder) { - unimplemented!() - } -} - -impl ParentsGraph for DefaultForest { - type Iter<'a>= as ParentsGraph>::Iter<'a> - where - Self:'a; - - #[inline] - fn parents_of(&self, node_id: usize) -> Result<::Iter<'_>, GError> { - self.graph.parents_of(node_id) - } - - #[inline] - fn nth_child(&self, node_id: usize, n: usize) -> Result { - self.graph.nth_child(node_id, n) - } - - #[inline] - fn edge_to_parent( - &self, - source: usize, - target: usize, - ) -> Result, GError> { - self.graph.edge_to_parent(source, target) - } -} - -impl LabelGraph for DefaultForest { - type Iter<'a> = std::iter::Empty - where - Self: 'a; - - type LabelIter<'a> = std::iter::Empty<(&'a T, >::Iter<'a>)> - where - Self: 'a, - T: 'a; - - type EdgeLabelIter<'a> = std::iter::Empty - where - Self: 'a, - T: 'a; - - #[inline] - fn query_label(&self, label: T) -> Option { - self.graph.query_label(label) - } - - #[inline] - fn vertex_label(&self, node_id: usize) -> Result, GError> { - self.graph.vertex_label(node_id) - } - - fn edge_label( - &self, - _source: usize, - _target: usize, - ) -> Result, GError> { - unimplemented!("edges have no labels") - } - - fn find_children_with_label( - &self, - _node_id: usize, - _label: &T, - ) -> Result<>::Iter<'_>, GError> { - unimplemented!("edges have no labels") - } - - fn labels_of(&self, _node_id: usize) -> Result, GError> { - unimplemented!("edges have no labels") - } - - fn has_edge_label(&self, _node_id: usize, _label: &T, _target: usize) -> Result { - unimplemented!("edges have no labels") - } -} - -impl Forest for DefaultForest> { - type Error = Error; - - fn root(&self) -> Option { - self.root - } - - fn new_leaf(label: T) -> Self { - let mut graph = PLGraph::default(); - - let mut builder = PLGBuilderMut::from_graph_mut(&mut graph); - - let root = Some(builder.add_vertex(ForestLabel::from(label))); - - Self { graph, root } - } - - fn is_prefix(&self, node_id: usize, fragment: F) -> Result - where - F: AsRef, - { - if !self.has_node(node_id) { - return Err(Error::IndexOutOfBounds(node_id, self.nodes_len())); - } - - // We do a depth-first traversal to determine if every node - // encountered has the same set of children (labels taken into - // the consideration). - - let fragment = fragment.as_ref(); - - let mut frag_stack = Vec::with_capacity(fragment.nodes_len()); - - let mut self_stack = Vec::with_capacity(fragment.nodes_len()); - - let frag_root = if let Some(root) = fragment.root() { - root - } else { - // an empty forest is a prefix of any forest. - return Ok(true); - }; - - frag_stack.push(frag_root); - self_stack.push(node_id); - - // defer popping - while let (Some(frag_top), Some(self_top)) = - (frag_stack.last().copied(), self_stack.last().copied()) - { - frag_stack.pop(); - self_stack.pop(); - - if fragment.vertex_label(frag_top)? != self.vertex_label(self_top)? { - // not a prefix - return Ok(false); - } - - let mut self_children = self.children_of(self_top)?; - - for child in fragment.children_of(frag_top)? { - if let Some(self_child) = self_children.next() { - frag_stack.push(child); - self_stack.push(self_child); - } else { - // too few children - return Ok(false); - } - } - } - - // Check both stacks are empty at the end. - Ok(frag_stack.is_empty() && self_stack.is_empty()) - } - - fn plant(&mut self, node_id: usize, fragment: F, planted: bool) -> Result<(), Self::Error> - where - F: AsRef, - { - // Convert self to a builder_mut, and traverse fragment in a - // depth-first manner and adjoin corresponding nodes along the - // way. - - if !self.has_node(node_id) { - return Err(Error::IndexOutOfBounds(node_id, self.nodes_len())); - } - - let fragment = fragment.as_ref(); - - let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); - - let root = if let Some(root) = fragment.root() { - root - } else { - // Nothing to do to plant an empty forest. - return Ok(()); - }; - - // Just a dummy label for use in adding edges. - // - // REVIEW: I probably should refactor the API for builder_mut. - let root_label = fragment - .vertex_label(root)? - .ok_or(Error::NodeNoLabel(root))?; - - let nodes_len = fragment.nodes_len(); - - /// If the fragment root has a duplicate label, the forest - /// will not grow, so we use the label to find the adjoined - /// node index. - /// - /// The nodes hava already been added to the forest, so it is - /// safe to call unwrap. - macro_rules! conversion ( - ($node:expr) => { - { - builder - .query_label( - fragment - .vertex_label($node)? - .ok_or(Error::NodeNoLabel($node))? - ).unwrap() - } - } - ); - - // If the fragment has been planted before, we just add an - // edge. - - if planted { - builder.add_edge(node_id, conversion!(root), root_label)?; - - return Ok(()); - } - - // First adjoin those nodes and join the edges later. - - for node in 0..nodes_len { - let label = fragment - .vertex_label(node)? - .ok_or(Error::NodeNoLabel(node))?; - - builder.add_vertex(label); - } - - // Don't forget to join the new sub-forest to the original - // forest, at the specified position. - - builder.add_edge(node_id, conversion!(root), root_label)?; - - // We can try to calculate the depth of fragments, if we need - // to lower the memory usage. But in our use cases, we - // usually deal with fragments where each node has at most one - // child, so the depth is supposed to be equal to the length - // in this case. - let mut stack = Vec::with_capacity(fragment.nodes_len()); - - stack.push(root); - - while let Some(top) = stack.pop() { - for child in fragment.children_of(top)? { - builder.add_edge(conversion!(top), conversion!(child), root_label)?; - } - } - - Ok(()) - } - - fn clone_node(&mut self, node_id: usize) -> Result { - let builder = PLGBuilderMut::from_graph_mut(&mut self.graph); - - let old_label = builder - .vertex_label(node_id)? - .ok_or(Error::NodeNoLabel(node_id))?; - - if old_label.is_packed() { - return Err(ForestLabelError::ClonePack.into()); - } - - if old_label.is_cloned().is_some() { - let mut parents = self.parents_of(node_id)?; - - // Make sure it only has one parent, which is the - // representative node. - assert_eq!(parents.len(), 1); - - // We checked its length is 1, so it is safe to unwrap - // here. - return Ok(parents.next().unwrap().node()); - } - - let rep_label = old_label.pack()?; - - let new_label = old_label.clone(self)?; - - let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); - - // Make a new node - let new_index = builder.add_vertex(rep_label); - - // Re-direct parents to the new node. - // - // This must be done before pointing the new node to the old - // node, otherwise that edge will be redirected as well. - - // Unfortunately I cannot loop through parents and mutate them - // at the same time, so I first collect them into a vector. - let parents: Vec<_> = builder.parents_of(node_id)?.collect(); - - for parent in parents.into_iter() { - builder.redirect(parent.node(), parent.edge(), new_index)?; - } - - // Point the new node to the old node. OLD_LABEL is just a - // place holder. - - builder.add_edge(new_index, node_id, old_label)?; - - // Modify the label of the old node. - - builder.set_label(node_id, new_label)?; - - Ok(new_index) - } -} - -#[cfg(test)] -mod forest_test { - use super::*; - - macro_rules! leaf ( - ($label:expr, $type:tt) =>{ - DefaultForest::>::new_leaf($label) - }; - ($label:expr) => { - DefaultForest::>::new_leaf($label) - } - ); - - #[test] - fn test_forest_api() -> Result<(), Box> { - let forest: DefaultForest = Default::default(); - - // empty forest - - assert!(forest.is_empty()); - - // leaf forest - - let mut forest = leaf!(0, usize); - - assert_eq!(forest.nodes_len(), 1); - assert_eq!(forest.root(), Some(0)); - - // add some child - - forest.plant(0, leaf!(1), false)?; - - assert_eq!(forest.nodes_len(), 2); - let mut children = forest.children_of(0)?; - assert_eq!(children.next(), Some(1)); - assert_eq!(children.next(), None); - - // add more children - - forest.plant(0, leaf!(2), false)?; - forest.plant(0, leaf!(3), false)?; - forest.plant(0, leaf!(4), false)?; - forest.plant(2, leaf!(5), false)?; - - assert_eq!(forest.nodes_len(), 6); - let mut children = forest.children_of(0)?; - assert_eq!(children.next(), Some(1)); - assert_eq!(children.next(), Some(2)); - assert_eq!(children.next(), Some(3)); - assert_eq!(children.next(), Some(4)); - let mut children = forest.children_of(2)?; - assert_eq!(children.next(), Some(5)); - assert_eq!(children.next(), None); - - let mut test_forest = leaf!(0); - test_forest.plant(0, leaf!(1), false)?; - test_forest.plant(0, leaf!(2), false)?; - test_forest.plant(0, leaf!(3), false)?; - test_forest.plant(2, leaf!(5), false)?; - - assert!(forest.is_prefix(0, &test_forest)?); - - let mut test_forest = leaf!(0); - test_forest.plant(0, leaf!(1), false)?; - test_forest.plant(0, leaf!(2), false)?; - // this child of the root should have label 3 in order to be a - // prefix. - test_forest.plant(0, leaf!(4), false)?; - test_forest.plant(2, leaf!(5), false)?; - - assert!(!forest.is_prefix(0, &test_forest)?); - - let mut test_forest = leaf!(2); - test_forest.plant(0, leaf!(5), false)?; - - assert!(forest.is_prefix(2, &test_forest)?); - - // now test cloning - - // add a duplicate label - forest.plant(3, leaf!(5), false)?; - - let _len = forest.nodes_len(); - - forest.clone_node(5)?; - - assert_eq!(forest.nodes_len(), 7); - - #[cfg(feature = "test-print-viz")] - forest.print_viz("forest.gv")?; - - Ok(()) - } -} diff --git a/forest/src/lib.rs b/forest/src/lib.rs index 0b1092f..a888fae 100644 --- a/forest/src/lib.rs +++ b/forest/src/lib.rs @@ -1,26 +1,27 @@ #![warn(missing_docs)] -//! This file defines the expected behaviours of forests, and provides -//! a default implementation for forests. +//! This file defines the expected behaviours of forests. //! -//! The default forest actually implements the so-called shared packed -//! parse forest, or SPPF. It packs sub-trees with the same parents -//! under the same branch, and lets nodes from different branches -//! share the same children, and hence the name. +//! The forests are the basis of the `semiring` crate. use graph::{error::Error as GError, GraphLabel, LabelGraph, ParentsGraph}; +use std::borrow::Borrow; + /// An internal type that indicates the status of a node as either a /// packed, cloned, or plain node. #[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Default)] -enum ForestLabelType { +pub enum ForestLabelType { + /// A packed node Packed, + /// A plain node #[default] Plain, + /// A cloned node Cloned(usize), } -impl core::fmt::Display for ForestLabelType { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { +impl std::fmt::Display for ForestLabelType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::Packed => write!(f, "packed"), Self::Plain => write!(f, "plain"), @@ -36,14 +37,17 @@ pub struct ForestLabel { status: ForestLabelType, } -impl core::fmt::Display for ForestLabel { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { - writeln!(f, "label: {}, status: {}", self.label, self.status) +impl std::fmt::Display for ForestLabel { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if !matches!(self.status, ForestLabelType::Plain) { + write!(f, "{}, {}", self.label, self.status) + } else { + write!(f, "{}", self.label) + } } } /// The type of erros for converting forest labels. -#[non_exhaustive] #[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] pub enum ForestLabelError { /// Try to pack a cloned node. @@ -52,8 +56,8 @@ pub enum ForestLabelError { ClonePack, } -impl core::fmt::Display for ForestLabelError { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { +impl std::fmt::Display for ForestLabelError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::PackClone => write!(f, "cannot pack a cloned node"), Self::ClonePack => write!(f, "cannot clone a packed node"), @@ -64,33 +68,50 @@ impl core::fmt::Display for ForestLabelError { impl std::error::Error for ForestLabelError {} impl ForestLabel { + /// Construct a new label with the inner `label` and `status`. + pub fn new(label: T, status: ForestLabelType) -> Self { + Self { label, status } + } + /// Retrieve the label. pub fn label(&self) -> T { self.label } + /// Retrieve the status. + pub fn status(&self) -> ForestLabelType { + self.status + } + /// Return true if and only if this node is packed. pub fn is_packed(&self) -> bool { - matches!(self.status, ForestLabelType::Packed) + self.status == ForestLabelType::Packed } /// Retrieve the optional clone index. - pub fn is_cloned(&self) -> Option { - match self.status { - ForestLabelType::Cloned(index) => Some(index), - _ => None, + pub fn clone_index(&self) -> Option { + if let ForestLabelType::Cloned(index) = self.status { + Some(index) + } else { + None } } + /// Return true if and only if this node is a plain node. + pub fn is_plain(&self) -> bool { + self.status == ForestLabelType::Plain + } + /// Try to clone a node. pub fn clone(self, forest: &F) -> Result where - F: Forest, + F: LabelGraph>, { if self.is_packed() { + dbg!(); Err(ForestLabelError::ClonePack) } else { - let clone_index = if let Some(old_index) = self.is_cloned() { + let clone_index = if let Some(old_index) = self.clone_index() { let mut new_index: usize = old_index + 1; let mut new_label = Self { @@ -120,7 +141,7 @@ impl ForestLabel { /// Try to pack a node. pub fn pack(self) -> Result { - if self.is_cloned().is_some() { + if self.clone_index().is_some() { Err(ForestLabelError::PackClone) } else { let new_label = Self { @@ -140,7 +161,7 @@ impl From for ForestLabel { } } -/// The expected behaviours of a forest. +/// The expected behaviours of an item derivation forest. /// /// Note that it requires only a subset of the functionalities of /// labelled graphs. @@ -157,28 +178,45 @@ pub trait Forest: ParentsGraph + LabelGraph> { /// label. fn new_leaf(label: T) -> Self; + /// Transform the label at the given node. + fn transform_label( + &mut self, + node_id: usize, + transform: impl FnOnce(T) -> T, + ) -> Result<(), Self::Error>; + /// Detect if the fragment is a prefix of the sub-forest rooted at /// `node_id`. fn is_prefix(&self, node_id: usize, fragment: F) -> Result where - F: AsRef; + F: Borrow; /// Extend the forest by adjoining another forest at a given node. fn plant(&mut self, node_id: usize, fragment: F, planted: bool) -> Result<(), Self::Error> where - F: AsRef; + F: Borrow; /// Clone a node by making a new node and making all the nodes /// that previously pointed to the old node now point to the new /// node, and the new node points to the old node. Return the - /// index of the new node. In addition, if the old node had - /// already been cloned, just return the index of its - /// representitave. + /// index of the new node. However, if, and only if, + /// `no_new_clone` is `true`, do not make a new clone; instead + /// return the clone index that would be used if a new clone was + /// made. + /// + /// Also, `preserved_edges_num` many edges out of the old node + /// will be copied to be the children of the new node. /// /// The labels of the representing node and of the cloned node /// will be handled automatically. - fn clone_node(&mut self, node_id: usize) -> Result; + fn clone_node( + &mut self, + node_id: usize, + preserved_edges_num: usize, + no_new_clone: bool, + ) -> Result; } -pub mod default; -pub use default::Error; +pub mod bytes; + +pub use bytes::forest_to_bytes; diff --git a/src/bytes.rs b/src/bytes.rs deleted file mode 100644 index f764077..0000000 --- a/src/bytes.rs +++ /dev/null @@ -1,175 +0,0 @@ -//! This module defines functions to turn a forest of forest labels -//! into a sequence of bytes. -//! -//! To be more specific, a forest consists of two parts: a vector of -//! node labels and a graph specifying the relations between nodes. -//! -//! # Number format (endianness) -//! -//! Every number mentionned in this format is an unsigned integer of -//! 64 bits, or 8 bytes. Every such number is specified in the big -//! endian format. -//! -//! # Parts of the sequence of bytes -//! -//! The sequence of bytes has three parts: -//! -//! ## Header -//! -//! The header specifies metadata of this forest. -//! -//! ### Special mark -//! -//! The first three bytes form the string "rep", as a special mark. -//! -//! ### Number of nodes -//! -//! The next 8 bytes specifies the number of nodes of this forest. -//! -//! ## Graph -//! -//! Next comes the underlying graph for this forest. This part -//! consists of a vector of vectors of numbers. Each vector of -//! numbers represents the list of children of a node. So the number -//! of vectors of numbers is equal to the number of nodes. -//! -//! ### Vector of vectors of numbers -//! -//! The vectors are not simply concatenated one after another, as that -//! way one cannot read a random node in a constant amount of time. -//! -//! Instead, we first specify the number of children of each node -//! first, along with the offset for that node of the vector of -//! children. -//! -//! As an example, if a graph has three nodes, represented as the -//! adjacency list: `[[1, 2], [2], []]`, then its representation as a -//! sequence of bytes is as follows: -//! ```text -//! 2, x, 1, y, 0, z, 1, 2, 2, -//! ^ ^ ^ -//! x y z -//! ``` -//! -//! This has the advantage that we can read the children of the `n`-th -//! node in constant time. -//! -//! ## Vector of labels -//! -//! Each label occupies a fixed number of bytes, so we simply put the -//! labels one after another. The only thing to note here is the -//! format of the labels. -//! -//! ### Labels -//! -//! Each label has 3 parts: -//! -//! 1. Status: either Packed, Cloned, or Plain. If the node is -//! cloned, it has a clone index. So in total this part occupies 1 -//! byte for the status and 8 bytes for the clone index. -//! 2. Start and end: the range in the input sentence. We just store -//! two numbers here. Hence this part occupies 16 bytes. -//! 3. Grammar label: either a terminal, a non-terminal, or a rule. -//! Each variant needs a number to speify its index. So in total this -//! part occupies 1 byte for the variant discriminant and 8 bytes for -//! the number. -//! -//! To sum up, each label occupies 34 bytes. - -use chain::item::{default::DefaultForest, ForestLabel}; -use grammar::{GrammarLabel, GrammarLabelType, TNT}; -use graph::{Graph, LabelGraph}; - -pub(super) fn forest_to_bytes(forest: &DefaultForest>) -> Vec { - // First calculate the total number of bytes. - let nodes_len = forest.nodes_len(); - - let degrees: Vec<_> = forest - .nodes() - .map(|node| forest.degree(node).unwrap_or(0)) - .collect(); - - let total_degree: usize = degrees.iter().copied().sum(); - - let len: usize = 8 // total number of bytes at the start - + 3 // special mark - + 8 // number of nodes - + 8 // offset of labels - + 16 * nodes_len // degree & offset for each node - + 8 * total_degree // children of each node - + 34 * nodes_len // labels - ; - - // Then fill in the bytes. - - let mut bytes: Vec = Vec::with_capacity(len); - - // First the headers - - bytes.extend(len.to_be_bytes()); - bytes.extend([114, 101, 112]); // rep - bytes.extend(nodes_len.to_be_bytes()); - bytes.extend((len - 34 * nodes_len).to_be_bytes()); - - let mut accumulated: usize = 0; - - // Then degrees and offsets - - for degree in degrees.iter().copied() { - bytes.extend(degree.to_be_bytes()); - bytes.extend((8 + 3 + 8 + 8 + 16 * nodes_len + 8 * accumulated).to_be_bytes()); - - accumulated += degree; - } - - // Then the children - - bytes.extend(forest.nodes().flat_map(|node| { - forest - .children_of(node) - .unwrap_or_default() - .flat_map(|child| child.to_be_bytes()) - })); - - // Finally the labels - - 'nodes_loop: for node in forest.nodes() { - let label = match forest.vertex_label(node) { - Ok(Some(label)) => label, - _ => continue 'nodes_loop, - }; - - if label.is_plain() { - bytes.extend(std::iter::repeat(0).take(9)); - } else if label.is_packed() { - bytes.extend(std::iter::once(1).chain(std::iter::repeat(0).take(8))); - } else if let Some(index) = label.clone_index() { - bytes.extend(std::iter::once(2).chain(index.to_be_bytes())); - } - - let label = label.label(); - - bytes.extend(label.start().to_be_bytes()); - bytes.extend(label.end().unwrap_or(0).to_be_bytes()); - - let label = label.label(); - - match label { - GrammarLabelType::TNT(TNT::Ter(t)) => { - bytes.extend(std::iter::once(0).chain(t.to_be_bytes())); - } - GrammarLabelType::TNT(TNT::Non(n)) => { - bytes.extend(std::iter::once(1).chain(n.to_be_bytes())); - } - GrammarLabelType::Rule(r) => { - bytes.extend(std::iter::once(2).chain(r.to_be_bytes())); - } - } - } - - if bytes.len() != len { - dbg!(); - } - - bytes -} diff --git a/src/lib.rs b/src/lib.rs index 3483847..69be41f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,10 +9,12 @@ use chain::{atom::DefaultAtom, default::DefaultChain, Chain}; use grammar::Grammar; // For printing forests -use chain::item::{default::DefaultForest, ForestLabel, ForestLabelType}; +use chain::item::default::DefaultForest; use grammar::{GrammarLabel, TNT}; use graph::LabelGraph; +use forest::{ForestLabel, ForestLabelType}; + /// This struct is the representation of a parser. /// /// When the user constructs a parser, an instance of this struct will @@ -485,7 +487,7 @@ extern "C" fn parser_parse( Ok(forest) => { Box::leak(parser_box); - let mut bytes = bytes::forest_to_bytes(&forest); + let mut bytes = forest::forest_to_bytes(&forest); let bytes_len = bytes.len().to_be_bytes().to_vec(); @@ -835,5 +837,3 @@ extern "C" fn print_forest_node(forest: *mut CForest, node: *mut std::os::raw::c println!("node {node} has label {label}"); } - -pub mod bytes; -- cgit v1.2.3-18-g5258