diff options
Diffstat (limited to 'chain/src/item/default/mod.rs')
-rw-r--r-- | chain/src/item/default/mod.rs | 810 |
1 files changed, 810 insertions, 0 deletions
diff --git a/chain/src/item/default/mod.rs b/chain/src/item/default/mod.rs new file mode 100644 index 0000000..7ecc70d --- /dev/null +++ b/chain/src/item/default/mod.rs @@ -0,0 +1,810 @@ +//! This module provides a default implementation of iten derivation +//! forest. + +use super::*; +use crate::atom::default::DefaultAtom; +use grammar::{GrammarLabel, GrammarLabelType}; +use graph::{ + builder::BuilderMut, labelled::binary::PLGBuilderMut, Graph, LabelGraph, PLGraph, RedirectGraph, +}; + +use core::fmt::Display; + +/// The type of errors for forest operations. +#[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd)] +pub enum Error { + /// An index is out of bounds. + /// + /// The first component is the index that is out of bounds, and + /// the second component is the current length of nodes. + IndexOutOfBounds(usize, usize), + /// The forest does not permit duplicate nodes but encounters a + /// repeated node. + DuplicatedNode(usize), + /// A node has no labels while it is required to have one. + NodeNoLabel(usize), + /// Encounter an invalid error in converting from an error of + /// graphs. + InvalidGraphError(GError), + /// Encounter an error when converting forest labels. + LabelConversion(ForestLabelError), + /// Trying to split a packed node. + SplitPack(usize), +} + +impl Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Error::IndexOutOfBounds(index, bound) => { + write!(f, "index {index} is out of bound {bound}") + } + Error::DuplicatedNode(n) => write!(f, "node {n} is duplicated"), + Error::InvalidGraphError(ge) => write!(f, "invalid error: {ge}"), + Error::NodeNoLabel(n) => write!(f, "node {n} has no labels, but it should have one"), + Error::LabelConversion(le) => write!(f, "fail to convert labels: {le}"), + Error::SplitPack(n) => write!(f, "cannot split the packed node {n}"), + } + } +} + +impl std::error::Error for Error {} + +impl From<GError> for Error { + fn from(ge: GError) -> Self { + match ge { + GError::IndexOutOfBounds(index, bound) => Self::IndexOutOfBounds(index, bound), + GError::DuplicatedNode(n) => Self::DuplicatedNode(n), + _ => Self::InvalidGraphError(ge), + } + } +} + +impl From<ForestLabelError> for Error { + fn from(le: ForestLabelError) -> Self { + Self::LabelConversion(le) + } +} + +/// A default implementation of forest. +#[derive(Debug, Clone)] +pub struct DefaultForest<T: GraphLabel> { + graph: PLGraph<T>, + root: Option<usize>, +} + +impl<T: GraphLabel> Default for DefaultForest<T> { + fn default() -> Self { + let graph = Default::default(); + let root = None; + + Self { graph, root } + } +} + +impl<T: GraphLabel> AsRef<DefaultForest<T>> for DefaultForest<T> { + fn as_ref(&self) -> &DefaultForest<T> { + self + } +} + +impl<T: GraphLabel> Graph for DefaultForest<T> { + type Iter<'a> = <PLGraph<T> as Graph>::Iter<'a> + where + Self: 'a; + + #[inline] + fn is_empty(&self) -> bool { + self.graph.is_empty() + } + + #[inline] + fn nodes_len(&self) -> usize { + self.graph.nodes_len() + } + + #[inline] + fn edges_len(&self) -> Option<usize> { + self.graph.edges_len() + } + + #[inline] + fn children_of(&self, node_id: usize) -> Result<Self::Iter<'_>, GError> { + self.graph.children_of(node_id) + } + + #[inline] + fn degree(&self, node_id: usize) -> Result<usize, GError> { + self.graph.degree(node_id) + } + + #[inline] + fn is_empty_node(&self, node_id: usize) -> Result<bool, GError> { + self.graph.is_empty_node(node_id) + } + + #[inline] + fn has_edge(&self, source: usize, target: usize) -> Result<bool, GError> { + self.graph.has_edge(source, target) + } + + fn replace_by_builder(&mut self, _builder: impl graph::Builder<Result = Self>) { + unimplemented!() + } + + #[inline] + fn print_viz(&self, filename: &str) -> Result<(), std::io::Error> { + self.graph.print_viz(filename) + } +} + +impl<T: GraphLabel> ParentsGraph for DefaultForest<T> { + type Iter<'a>= <PLGraph<T> as ParentsGraph>::Iter<'a> + where + Self:'a; + + #[inline] + fn parents_of(&self, node_id: usize) -> Result<<Self as ParentsGraph>::Iter<'_>, GError> { + self.graph.parents_of(node_id) + } + + #[inline] + fn nth_child(&self, node_id: usize, n: usize) -> Result<usize, GError> { + self.graph.nth_child(node_id, n) + } + + #[inline] + fn edge_to_parent( + &self, + source: usize, + target: usize, + ) -> Result<Option<graph::Parent>, GError> { + self.graph.edge_to_parent(source, target) + } +} + +impl<T: GraphLabel> LabelGraph<T> for DefaultForest<T> { + type Iter<'a> = std::iter::Empty<usize> + where + Self: 'a; + + type LabelIter<'a> = std::iter::Empty<(&'a T, <Self as LabelGraph<T>>::Iter<'a>)> + where + Self: 'a, + T: 'a; + + type EdgeLabelIter<'a> = std::iter::Empty<T> + where + Self: 'a, + T: 'a; + + #[inline] + fn query_label(&self, label: T) -> Option<usize> { + self.graph.query_label(label) + } + + #[inline] + fn vertex_label(&self, node_id: usize) -> Result<Option<T>, GError> { + self.graph.vertex_label(node_id) + } + + fn edge_label( + &self, + _source: usize, + _target: usize, + ) -> Result<Self::EdgeLabelIter<'_>, GError> { + unimplemented!("edges have no labels") + } + + fn find_children_with_label( + &self, + _node_id: usize, + _label: &T, + ) -> Result<<Self as LabelGraph<T>>::Iter<'_>, GError> { + unimplemented!("edges have no labels") + } + + fn labels_of(&self, _node_id: usize) -> Result<Self::LabelIter<'_>, GError> { + unimplemented!("edges have no labels") + } + + fn has_edge_label(&self, _node_id: usize, _label: &T, _target: usize) -> Result<bool, GError> { + unimplemented!("edges have no labels") + } +} + +impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> { + type Error = Error; + + fn root(&self) -> Option<usize> { + self.root + } + + fn new_leaf(label: T) -> Self { + let mut graph = PLGraph::default(); + + let mut builder = PLGBuilderMut::from_graph_mut(&mut graph); + + let root = Some(builder.add_vertex(ForestLabel::from(label))); + + Self { graph, root } + } + + fn transform_label( + &mut self, + node_id: usize, + transform: impl FnOnce(T) -> T, + ) -> Result<(), Self::Error> { + let vertex_label = self + .vertex_label(node_id)? + .ok_or(Error::NodeNoLabel(node_id))?; + + let transformed_label = transform(vertex_label.label()); + + let transformed_label = ForestLabel::new(transformed_label, vertex_label.status); + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + builder + .set_label(node_id, transformed_label) + .map_err(Into::into) + } + + fn is_prefix<F>(&self, node_id: usize, fragment: F) -> Result<bool, Self::Error> + where + F: Borrow<Self>, + { + if !self.has_node(node_id) { + return Err(Error::IndexOutOfBounds(node_id, self.nodes_len())); + } + + // We do a depth-first traversal to determine if every node + // encountered has the same set of children (labels taken into + // the consideration). + + let fragment = fragment.borrow(); + + let mut frag_stack = Vec::with_capacity(fragment.nodes_len()); + + let mut self_stack = Vec::with_capacity(fragment.nodes_len()); + + let frag_root = if let Some(root) = fragment.root() { + root + } else { + // an empty forest is a prefix of any forest. + return Ok(true); + }; + + frag_stack.push(frag_root); + self_stack.push(node_id); + + // defer popping + while let (Some(frag_top), Some(self_top)) = + (frag_stack.last().copied(), self_stack.last().copied()) + { + frag_stack.pop(); + self_stack.pop(); + + if fragment.vertex_label(frag_top)? != self.vertex_label(self_top)? { + // not a prefix + return Ok(false); + } + + let mut self_children = self.children_of(self_top)?; + + for child in fragment.children_of(frag_top)? { + if let Some(self_child) = self_children.next() { + frag_stack.push(child); + self_stack.push(self_child); + } else { + // too few children + return Ok(false); + } + } + } + + // Check both stacks are empty at the end. + Ok(frag_stack.is_empty() && self_stack.is_empty()) + } + + fn plant<F>(&mut self, node_id: usize, fragment: F, planted: bool) -> Result<(), Self::Error> + where + F: Borrow<Self>, + { + // Convert self to a builder_mut, and traverse fragment in a + // depth-first manner and adjoin corresponding nodes along the + // way. + + if !self.has_node(node_id) { + return Err(Error::IndexOutOfBounds(node_id, self.nodes_len())); + } + + let fragment = fragment.borrow(); + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + let root = if let Some(root) = fragment.root() { + root + } else { + // Nothing to do to plant an empty forest. + return Ok(()); + }; + + // Just a dummy label for use in adding edges. + // + // REVIEW: I probably should refactor the API for builder_mut. + let root_label = fragment + .vertex_label(root)? + .ok_or(Error::NodeNoLabel(root))?; + + let nodes_len = fragment.nodes_len(); + + /// If the fragment root has a duplicate label, the forest + /// will not grow, so we use the label to find the adjoined + /// node index. + /// + /// The nodes hava already been added to the forest, so it is + /// safe to call unwrap. + macro_rules! conversion ( + ($node:expr) => { + { + builder + .query_label( + fragment + .vertex_label($node)? + .ok_or(Error::NodeNoLabel($node))? + ).unwrap() + } + } + ); + + // If the fragment has been planted before, we just add an + // edge. + + if planted { + builder.add_edge(node_id, conversion!(root), root_label)?; + + return Ok(()); + } + + // First adjoin those nodes and join the edges later. + + for node in 0..nodes_len { + let label = fragment + .vertex_label(node)? + .ok_or(Error::NodeNoLabel(node))?; + + builder.add_vertex(label); + } + + // Don't forget to join the new sub-forest to the original + // forest, at the specified position. + + builder.add_edge(node_id, conversion!(root), root_label)?; + + // We can try to calculate the depth of fragments, if we need + // to lower the memory usage. But in our use cases, we + // usually deal with fragments where each node has at most one + // child, so the depth is supposed to be equal to the length + // in this case. + let mut stack = Vec::with_capacity(fragment.nodes_len()); + + stack.push(root); + + let mut seen_nodes = std::collections::HashSet::<usize>::new(); + + while let Some(top) = stack.pop() { + seen_nodes.insert(top); + + for child in fragment.children_of(top)? { + builder.add_edge(conversion!(top), conversion!(child), root_label)?; + + if !seen_nodes.contains(&child) { + seen_nodes.insert(child); + stack.push(child); + } + } + } + + Ok(()) + } + + fn clone_node( + &mut self, + node_id: usize, + preserved_edges_num: usize, + no_new_clone: bool, + ) -> Result<usize, Self::Error> { + let builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + let old_label = builder + .vertex_label(node_id)? + .ok_or(Error::NodeNoLabel(node_id))?; + + if old_label.is_packed() { + dbg!(node_id, old_label); + return Err(ForestLabelError::ClonePack.into()); + } + + // We are sure old_label is not packed here, so it is safe to + // unwrap. + let new_label = old_label + .clone::<DefaultForest<ForestLabel<T>>>(self.borrow()) + .unwrap(); + + if old_label.clone_index().is_some() { + if no_new_clone { + return Ok(new_label.clone_index().unwrap()); + } + + let mut parents = self.parents_of(node_id)?; + + // Make sure it only has one parent, which is the + // representative node. + assert_eq!(parents.len(), 1); + + // We checked its length is 1, so it is safe to unwrap + // here. + let rep_node = parents.next().unwrap().node(); + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + let new_clone = builder.add_vertex(new_label); + + builder.add_edge(rep_node, new_clone, new_label)?; + + let preserve_children: Vec<_> = builder + .children_of(node_id)? + .take(preserved_edges_num) + .collect(); + + for child in preserve_children.into_iter() { + builder.add_edge(new_clone, child, new_label)?; + } + + return Ok(new_clone); + } + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + // We are sure old_label is not a clone here, so it is safe to + // unwrap. + let rep_label = old_label.pack().unwrap(); + + // Make a new node + let new_index = builder.add_vertex(rep_label); + + // Re-direct parents to the new node. + // + // This must be done before pointing the new node to the old + // node, otherwise that edge will be redirected as well. + + // Unfortunately I cannot loop through parents and mutate them + // at the same time, so I first collect them into a vector. + let parents: Vec<_> = builder.parents_of(node_id)?.collect(); + + for parent in parents.into_iter() { + builder.redirect(parent.node(), parent.edge(), new_index)?; + } + + // Point the new node to the old node. OLD_LABEL is just a + // place holder. + + builder.add_edge(new_index, node_id, old_label)?; + + // Modify the label of the old node. + + builder.set_label(node_id, new_label)?; + + // Make another clone + + // new_label is cloned, so is guaranteed not to be packed, and + // we are safe to unwrap. + let new_label = new_label + .clone::<DefaultForest<ForestLabel<T>>>(self.borrow()) + .unwrap(); + + if no_new_clone { + return Ok(new_label.clone_index().unwrap()); + } + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + let new_clone = builder.add_vertex(new_label); + + builder.add_edge(new_index, new_clone, new_label)?; + + let preserve_children: Vec<_> = builder + .children_of(node_id)? + .take(preserved_edges_num) + .collect(); + + for child in preserve_children { + builder.add_edge(new_clone, child, new_label)?; + } + + Ok(new_clone) + } +} + +impl<T: GraphLabel> PartialEq for DefaultForest<ForestLabel<T>> { + fn eq(&self, other: &Self) -> bool { + let self_root = self.root(); + let other_root = other.root(); + + if (self_root.is_some() && other_root.is_none()) + || (self_root.is_none() && other_root.is_some()) + { + return false; + } else if self_root.is_none() && other_root.is_none() { + return true; + } + + // both roots are not empty + + let self_root = self_root.unwrap(); + let other_root = other_root.unwrap(); + + self.is_prefix(self_root, other).unwrap_or(false) + && other.is_prefix(other_root, self).unwrap_or(false) + } +} + +pub mod splone; + +impl<T: GraphLabel> DefaultForest<T> { + /// Create a forest with only one leaf from a raw label, unlike + /// `new_leaf`, which transforms the label for convenience. + pub fn new_leaf_raw(label: T) -> Self { + let mut graph = PLGraph::default(); + + let mut builder = PLGBuilderMut::from_graph_mut(&mut graph); + + let root = Some(builder.add_vertex(label)); + + Self { graph, root } + } +} + +impl<T: GraphLabel> Eq for DefaultForest<ForestLabel<T>> {} + +impl DefaultForest<ForestLabel<GrammarLabel>> { + /// Prints the forest without nodes that do not have ending + /// positions. + /// + /// Nodes without ending positions are "unclosed nodes" that only + /// serve the role of creating nodes with ending positions. + pub fn print_closed_viz(&self, filename: &str) -> Result<(), std::io::Error> { + let discard_nodes: std::collections::HashSet<_> = self + .nodes() + .filter(|node| match self.vertex_label(*node) { + Ok(label) => { + if let Some(label) = label { + label.label().end().is_none() + } else { + true + } + } + Err(_) => true, + }) + .collect(); + + let filename = format!("output/{filename}"); + + let preamble = "digraph nfa { + fontname=\"Helvetica,Arial,sans-serif\" + node [fontname=\"Helvetica,Arial,sans-serif\"] + edge [fontname=\"Helvetica,Arial,sans-serif\"] + rankdir=LR;\n"; + + let mut post = String::new(); + + for node in self.nodes() { + if discard_nodes.contains(&node) { + continue; + } + + post.push_str(&format!( + " {node} [label = \"{}\"]\n", + match self.vertex_label(node) { + Ok(Some(label)) => { + format!("{label}") + } + _ => { + " ε ".to_owned() + } + } + )); + } + + for (source, target) in self.edges() { + if discard_nodes.contains(&source) || discard_nodes.contains(&target) { + continue; + } + + post.push_str(&format!(" {source} -> {target}\n")); + } + + post.push_str("}\n"); + + let result = format!("{preamble}{post}"); + + if std::fs::metadata(&filename).is_ok() { + std::fs::remove_file(&filename)?; + } + + let mut file = std::fs::File::options() + .write(true) + .create(true) + .open(&filename)?; + + use std::io::Write; + + file.write_all(result.as_bytes()) + } +} + +/// Print the labels found in the forest, so that we can easily +/// understand what those labels mean. +pub fn print_labels( + atom: impl Borrow<DefaultAtom>, + forest: impl Borrow<DefaultForest<ForestLabel<GrammarLabel>>>, +) -> Result<(), Box<dyn std::error::Error>> { + let forest = forest.borrow(); + let atom = atom.borrow(); + + for node in forest.nodes() { + let label = forest.vertex_label(node)?; + + if let Some(label) = label { + let label = label.label.label(); + + match label { + GrammarLabelType::TNT(tnt) => { + println!("{tnt} = {}", atom.name_of_tnt(tnt)?); + } + GrammarLabelType::Rule(pos) => { + println!("pos {pos} = {}", atom.rule_pos_string(pos)?); + } + } + } else { + return Err(Error::NodeNoLabel(node).into()); + } + } + + Ok(()) +} + +#[allow(unused_macros)] +macro_rules! leaf ( + ($label:expr, $type:tt) =>{ + DefaultForest::<ForestLabel<$type>>::new_leaf($label) + }; + ($label:expr) => { + DefaultForest::<ForestLabel<usize>>::new_leaf($label) + } +); + +#[allow(unused_imports)] +pub(crate) use leaf; + +#[cfg(test)] +mod item_test { + use super::*; + + #[test] + fn test_forest_api() -> Result<(), Box<dyn std::error::Error>> { + let forest: DefaultForest<ForestLabel<usize>> = Default::default(); + + // empty forest + + assert!(forest.is_empty()); + + // leaf forest + + let mut forest = leaf!(0, usize); + + assert_eq!(forest.nodes_len(), 1); + assert_eq!(forest.root(), Some(0)); + + // add some child + + forest.plant(0, leaf!(1), false)?; + + assert_eq!(forest.nodes_len(), 2); + let mut children = forest.children_of(0)?; + assert_eq!(children.next(), Some(1)); + assert_eq!(children.next(), None); + + // add more children + + forest.plant(0, leaf!(2), false)?; + forest.plant(0, leaf!(3), false)?; + forest.plant(0, leaf!(4), false)?; + forest.plant(2, leaf!(5), false)?; + + assert_eq!(forest.nodes_len(), 6); + let mut children = forest.children_of(0)?; + assert_eq!(children.next(), Some(1)); + assert_eq!(children.next(), Some(2)); + assert_eq!(children.next(), Some(3)); + assert_eq!(children.next(), Some(4)); + let mut children = forest.children_of(2)?; + assert_eq!(children.next(), Some(5)); + assert_eq!(children.next(), None); + + let mut test_forest = leaf!(0); + test_forest.plant(0, leaf!(1), false)?; + test_forest.plant(0, leaf!(2), false)?; + test_forest.plant(0, leaf!(3), false)?; + test_forest.plant(2, leaf!(5), false)?; + + assert!(forest.is_prefix(0, &test_forest)?); + + let mut test_forest = leaf!(0); + test_forest.plant(0, leaf!(1), false)?; + test_forest.plant(0, leaf!(2), false)?; + // this child of the root should have label 3 in order to be a + // prefix. + test_forest.plant(0, leaf!(4), false)?; + test_forest.plant(2, leaf!(5), false)?; + + assert!(!forest.is_prefix(0, &test_forest)?); + + let mut test_forest = leaf!(2); + test_forest.plant(0, leaf!(5), false)?; + + assert!(forest.is_prefix(2, &test_forest)?); + + // now test cloning + + // add a duplicate label + forest.plant(3, leaf!(5), false)?; + + assert_eq!(forest.nodes_len(), 6); + + // add two children to 5 + + forest.plant(5, leaf!(6), false)?; + forest.plant(5, leaf!(7), false)?; + + // clone and preserve one child + let cloned = forest.clone_node(5, 1, false)?; + + assert_eq!(forest.nodes_len(), 10); + + assert_eq!(forest.degree(cloned)?, 1); + assert_eq!(forest.degree(5)?, 2); + + #[cfg(feature = "test-print-viz")] + forest.print_viz("forest.gv")?; + + Ok(()) + } + + #[test] + fn test_eq() -> Result<(), Box<dyn std::error::Error>> { + let mut forest = leaf!(0, usize); + + forest.plant(0, leaf!(1), false)?; + forest.plant(0, leaf!(2), false)?; + forest.plant(0, leaf!(3), false)?; + forest.plant(0, leaf!(4), false)?; + forest.plant(2, leaf!(5), false)?; + + let mut test_forest = leaf!(0); + test_forest.plant(0, leaf!(1), false)?; + test_forest.plant(0, leaf!(2), false)?; + test_forest.plant(0, leaf!(3), false)?; + test_forest.plant(2, leaf!(5), false)?; + + assert_ne!(forest, test_forest); + assert_ne!(test_forest, forest); + + test_forest.plant(0, leaf!(4), false)?; + + assert_eq!(forest, test_forest); + assert_eq!(test_forest, forest); + + Ok(()) + } +} |