diff options
Diffstat (limited to 'chain/src/item/default')
-rw-r--r-- | chain/src/item/default/mod.rs | 810 | ||||
-rw-r--r-- | chain/src/item/default/splone.rs | 760 |
2 files changed, 1570 insertions, 0 deletions
diff --git a/chain/src/item/default/mod.rs b/chain/src/item/default/mod.rs new file mode 100644 index 0000000..7ecc70d --- /dev/null +++ b/chain/src/item/default/mod.rs @@ -0,0 +1,810 @@ +//! This module provides a default implementation of iten derivation +//! forest. + +use super::*; +use crate::atom::default::DefaultAtom; +use grammar::{GrammarLabel, GrammarLabelType}; +use graph::{ + builder::BuilderMut, labelled::binary::PLGBuilderMut, Graph, LabelGraph, PLGraph, RedirectGraph, +}; + +use core::fmt::Display; + +/// The type of errors for forest operations. +#[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd)] +pub enum Error { + /// An index is out of bounds. + /// + /// The first component is the index that is out of bounds, and + /// the second component is the current length of nodes. + IndexOutOfBounds(usize, usize), + /// The forest does not permit duplicate nodes but encounters a + /// repeated node. + DuplicatedNode(usize), + /// A node has no labels while it is required to have one. + NodeNoLabel(usize), + /// Encounter an invalid error in converting from an error of + /// graphs. + InvalidGraphError(GError), + /// Encounter an error when converting forest labels. + LabelConversion(ForestLabelError), + /// Trying to split a packed node. + SplitPack(usize), +} + +impl Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Error::IndexOutOfBounds(index, bound) => { + write!(f, "index {index} is out of bound {bound}") + } + Error::DuplicatedNode(n) => write!(f, "node {n} is duplicated"), + Error::InvalidGraphError(ge) => write!(f, "invalid error: {ge}"), + Error::NodeNoLabel(n) => write!(f, "node {n} has no labels, but it should have one"), + Error::LabelConversion(le) => write!(f, "fail to convert labels: {le}"), + Error::SplitPack(n) => write!(f, "cannot split the packed node {n}"), + } + } +} + +impl std::error::Error for Error {} + +impl From<GError> for Error { + fn from(ge: GError) -> Self { + match ge { + GError::IndexOutOfBounds(index, bound) => Self::IndexOutOfBounds(index, bound), + GError::DuplicatedNode(n) => Self::DuplicatedNode(n), + _ => Self::InvalidGraphError(ge), + } + } +} + +impl From<ForestLabelError> for Error { + fn from(le: ForestLabelError) -> Self { + Self::LabelConversion(le) + } +} + +/// A default implementation of forest. +#[derive(Debug, Clone)] +pub struct DefaultForest<T: GraphLabel> { + graph: PLGraph<T>, + root: Option<usize>, +} + +impl<T: GraphLabel> Default for DefaultForest<T> { + fn default() -> Self { + let graph = Default::default(); + let root = None; + + Self { graph, root } + } +} + +impl<T: GraphLabel> AsRef<DefaultForest<T>> for DefaultForest<T> { + fn as_ref(&self) -> &DefaultForest<T> { + self + } +} + +impl<T: GraphLabel> Graph for DefaultForest<T> { + type Iter<'a> = <PLGraph<T> as Graph>::Iter<'a> + where + Self: 'a; + + #[inline] + fn is_empty(&self) -> bool { + self.graph.is_empty() + } + + #[inline] + fn nodes_len(&self) -> usize { + self.graph.nodes_len() + } + + #[inline] + fn edges_len(&self) -> Option<usize> { + self.graph.edges_len() + } + + #[inline] + fn children_of(&self, node_id: usize) -> Result<Self::Iter<'_>, GError> { + self.graph.children_of(node_id) + } + + #[inline] + fn degree(&self, node_id: usize) -> Result<usize, GError> { + self.graph.degree(node_id) + } + + #[inline] + fn is_empty_node(&self, node_id: usize) -> Result<bool, GError> { + self.graph.is_empty_node(node_id) + } + + #[inline] + fn has_edge(&self, source: usize, target: usize) -> Result<bool, GError> { + self.graph.has_edge(source, target) + } + + fn replace_by_builder(&mut self, _builder: impl graph::Builder<Result = Self>) { + unimplemented!() + } + + #[inline] + fn print_viz(&self, filename: &str) -> Result<(), std::io::Error> { + self.graph.print_viz(filename) + } +} + +impl<T: GraphLabel> ParentsGraph for DefaultForest<T> { + type Iter<'a>= <PLGraph<T> as ParentsGraph>::Iter<'a> + where + Self:'a; + + #[inline] + fn parents_of(&self, node_id: usize) -> Result<<Self as ParentsGraph>::Iter<'_>, GError> { + self.graph.parents_of(node_id) + } + + #[inline] + fn nth_child(&self, node_id: usize, n: usize) -> Result<usize, GError> { + self.graph.nth_child(node_id, n) + } + + #[inline] + fn edge_to_parent( + &self, + source: usize, + target: usize, + ) -> Result<Option<graph::Parent>, GError> { + self.graph.edge_to_parent(source, target) + } +} + +impl<T: GraphLabel> LabelGraph<T> for DefaultForest<T> { + type Iter<'a> = std::iter::Empty<usize> + where + Self: 'a; + + type LabelIter<'a> = std::iter::Empty<(&'a T, <Self as LabelGraph<T>>::Iter<'a>)> + where + Self: 'a, + T: 'a; + + type EdgeLabelIter<'a> = std::iter::Empty<T> + where + Self: 'a, + T: 'a; + + #[inline] + fn query_label(&self, label: T) -> Option<usize> { + self.graph.query_label(label) + } + + #[inline] + fn vertex_label(&self, node_id: usize) -> Result<Option<T>, GError> { + self.graph.vertex_label(node_id) + } + + fn edge_label( + &self, + _source: usize, + _target: usize, + ) -> Result<Self::EdgeLabelIter<'_>, GError> { + unimplemented!("edges have no labels") + } + + fn find_children_with_label( + &self, + _node_id: usize, + _label: &T, + ) -> Result<<Self as LabelGraph<T>>::Iter<'_>, GError> { + unimplemented!("edges have no labels") + } + + fn labels_of(&self, _node_id: usize) -> Result<Self::LabelIter<'_>, GError> { + unimplemented!("edges have no labels") + } + + fn has_edge_label(&self, _node_id: usize, _label: &T, _target: usize) -> Result<bool, GError> { + unimplemented!("edges have no labels") + } +} + +impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> { + type Error = Error; + + fn root(&self) -> Option<usize> { + self.root + } + + fn new_leaf(label: T) -> Self { + let mut graph = PLGraph::default(); + + let mut builder = PLGBuilderMut::from_graph_mut(&mut graph); + + let root = Some(builder.add_vertex(ForestLabel::from(label))); + + Self { graph, root } + } + + fn transform_label( + &mut self, + node_id: usize, + transform: impl FnOnce(T) -> T, + ) -> Result<(), Self::Error> { + let vertex_label = self + .vertex_label(node_id)? + .ok_or(Error::NodeNoLabel(node_id))?; + + let transformed_label = transform(vertex_label.label()); + + let transformed_label = ForestLabel::new(transformed_label, vertex_label.status); + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + builder + .set_label(node_id, transformed_label) + .map_err(Into::into) + } + + fn is_prefix<F>(&self, node_id: usize, fragment: F) -> Result<bool, Self::Error> + where + F: Borrow<Self>, + { + if !self.has_node(node_id) { + return Err(Error::IndexOutOfBounds(node_id, self.nodes_len())); + } + + // We do a depth-first traversal to determine if every node + // encountered has the same set of children (labels taken into + // the consideration). + + let fragment = fragment.borrow(); + + let mut frag_stack = Vec::with_capacity(fragment.nodes_len()); + + let mut self_stack = Vec::with_capacity(fragment.nodes_len()); + + let frag_root = if let Some(root) = fragment.root() { + root + } else { + // an empty forest is a prefix of any forest. + return Ok(true); + }; + + frag_stack.push(frag_root); + self_stack.push(node_id); + + // defer popping + while let (Some(frag_top), Some(self_top)) = + (frag_stack.last().copied(), self_stack.last().copied()) + { + frag_stack.pop(); + self_stack.pop(); + + if fragment.vertex_label(frag_top)? != self.vertex_label(self_top)? { + // not a prefix + return Ok(false); + } + + let mut self_children = self.children_of(self_top)?; + + for child in fragment.children_of(frag_top)? { + if let Some(self_child) = self_children.next() { + frag_stack.push(child); + self_stack.push(self_child); + } else { + // too few children + return Ok(false); + } + } + } + + // Check both stacks are empty at the end. + Ok(frag_stack.is_empty() && self_stack.is_empty()) + } + + fn plant<F>(&mut self, node_id: usize, fragment: F, planted: bool) -> Result<(), Self::Error> + where + F: Borrow<Self>, + { + // Convert self to a builder_mut, and traverse fragment in a + // depth-first manner and adjoin corresponding nodes along the + // way. + + if !self.has_node(node_id) { + return Err(Error::IndexOutOfBounds(node_id, self.nodes_len())); + } + + let fragment = fragment.borrow(); + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + let root = if let Some(root) = fragment.root() { + root + } else { + // Nothing to do to plant an empty forest. + return Ok(()); + }; + + // Just a dummy label for use in adding edges. + // + // REVIEW: I probably should refactor the API for builder_mut. + let root_label = fragment + .vertex_label(root)? + .ok_or(Error::NodeNoLabel(root))?; + + let nodes_len = fragment.nodes_len(); + + /// If the fragment root has a duplicate label, the forest + /// will not grow, so we use the label to find the adjoined + /// node index. + /// + /// The nodes hava already been added to the forest, so it is + /// safe to call unwrap. + macro_rules! conversion ( + ($node:expr) => { + { + builder + .query_label( + fragment + .vertex_label($node)? + .ok_or(Error::NodeNoLabel($node))? + ).unwrap() + } + } + ); + + // If the fragment has been planted before, we just add an + // edge. + + if planted { + builder.add_edge(node_id, conversion!(root), root_label)?; + + return Ok(()); + } + + // First adjoin those nodes and join the edges later. + + for node in 0..nodes_len { + let label = fragment + .vertex_label(node)? + .ok_or(Error::NodeNoLabel(node))?; + + builder.add_vertex(label); + } + + // Don't forget to join the new sub-forest to the original + // forest, at the specified position. + + builder.add_edge(node_id, conversion!(root), root_label)?; + + // We can try to calculate the depth of fragments, if we need + // to lower the memory usage. But in our use cases, we + // usually deal with fragments where each node has at most one + // child, so the depth is supposed to be equal to the length + // in this case. + let mut stack = Vec::with_capacity(fragment.nodes_len()); + + stack.push(root); + + let mut seen_nodes = std::collections::HashSet::<usize>::new(); + + while let Some(top) = stack.pop() { + seen_nodes.insert(top); + + for child in fragment.children_of(top)? { + builder.add_edge(conversion!(top), conversion!(child), root_label)?; + + if !seen_nodes.contains(&child) { + seen_nodes.insert(child); + stack.push(child); + } + } + } + + Ok(()) + } + + fn clone_node( + &mut self, + node_id: usize, + preserved_edges_num: usize, + no_new_clone: bool, + ) -> Result<usize, Self::Error> { + let builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + let old_label = builder + .vertex_label(node_id)? + .ok_or(Error::NodeNoLabel(node_id))?; + + if old_label.is_packed() { + dbg!(node_id, old_label); + return Err(ForestLabelError::ClonePack.into()); + } + + // We are sure old_label is not packed here, so it is safe to + // unwrap. + let new_label = old_label + .clone::<DefaultForest<ForestLabel<T>>>(self.borrow()) + .unwrap(); + + if old_label.clone_index().is_some() { + if no_new_clone { + return Ok(new_label.clone_index().unwrap()); + } + + let mut parents = self.parents_of(node_id)?; + + // Make sure it only has one parent, which is the + // representative node. + assert_eq!(parents.len(), 1); + + // We checked its length is 1, so it is safe to unwrap + // here. + let rep_node = parents.next().unwrap().node(); + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + let new_clone = builder.add_vertex(new_label); + + builder.add_edge(rep_node, new_clone, new_label)?; + + let preserve_children: Vec<_> = builder + .children_of(node_id)? + .take(preserved_edges_num) + .collect(); + + for child in preserve_children.into_iter() { + builder.add_edge(new_clone, child, new_label)?; + } + + return Ok(new_clone); + } + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + // We are sure old_label is not a clone here, so it is safe to + // unwrap. + let rep_label = old_label.pack().unwrap(); + + // Make a new node + let new_index = builder.add_vertex(rep_label); + + // Re-direct parents to the new node. + // + // This must be done before pointing the new node to the old + // node, otherwise that edge will be redirected as well. + + // Unfortunately I cannot loop through parents and mutate them + // at the same time, so I first collect them into a vector. + let parents: Vec<_> = builder.parents_of(node_id)?.collect(); + + for parent in parents.into_iter() { + builder.redirect(parent.node(), parent.edge(), new_index)?; + } + + // Point the new node to the old node. OLD_LABEL is just a + // place holder. + + builder.add_edge(new_index, node_id, old_label)?; + + // Modify the label of the old node. + + builder.set_label(node_id, new_label)?; + + // Make another clone + + // new_label is cloned, so is guaranteed not to be packed, and + // we are safe to unwrap. + let new_label = new_label + .clone::<DefaultForest<ForestLabel<T>>>(self.borrow()) + .unwrap(); + + if no_new_clone { + return Ok(new_label.clone_index().unwrap()); + } + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + let new_clone = builder.add_vertex(new_label); + + builder.add_edge(new_index, new_clone, new_label)?; + + let preserve_children: Vec<_> = builder + .children_of(node_id)? + .take(preserved_edges_num) + .collect(); + + for child in preserve_children { + builder.add_edge(new_clone, child, new_label)?; + } + + Ok(new_clone) + } +} + +impl<T: GraphLabel> PartialEq for DefaultForest<ForestLabel<T>> { + fn eq(&self, other: &Self) -> bool { + let self_root = self.root(); + let other_root = other.root(); + + if (self_root.is_some() && other_root.is_none()) + || (self_root.is_none() && other_root.is_some()) + { + return false; + } else if self_root.is_none() && other_root.is_none() { + return true; + } + + // both roots are not empty + + let self_root = self_root.unwrap(); + let other_root = other_root.unwrap(); + + self.is_prefix(self_root, other).unwrap_or(false) + && other.is_prefix(other_root, self).unwrap_or(false) + } +} + +pub mod splone; + +impl<T: GraphLabel> DefaultForest<T> { + /// Create a forest with only one leaf from a raw label, unlike + /// `new_leaf`, which transforms the label for convenience. + pub fn new_leaf_raw(label: T) -> Self { + let mut graph = PLGraph::default(); + + let mut builder = PLGBuilderMut::from_graph_mut(&mut graph); + + let root = Some(builder.add_vertex(label)); + + Self { graph, root } + } +} + +impl<T: GraphLabel> Eq for DefaultForest<ForestLabel<T>> {} + +impl DefaultForest<ForestLabel<GrammarLabel>> { + /// Prints the forest without nodes that do not have ending + /// positions. + /// + /// Nodes without ending positions are "unclosed nodes" that only + /// serve the role of creating nodes with ending positions. + pub fn print_closed_viz(&self, filename: &str) -> Result<(), std::io::Error> { + let discard_nodes: std::collections::HashSet<_> = self + .nodes() + .filter(|node| match self.vertex_label(*node) { + Ok(label) => { + if let Some(label) = label { + label.label().end().is_none() + } else { + true + } + } + Err(_) => true, + }) + .collect(); + + let filename = format!("output/{filename}"); + + let preamble = "digraph nfa { + fontname=\"Helvetica,Arial,sans-serif\" + node [fontname=\"Helvetica,Arial,sans-serif\"] + edge [fontname=\"Helvetica,Arial,sans-serif\"] + rankdir=LR;\n"; + + let mut post = String::new(); + + for node in self.nodes() { + if discard_nodes.contains(&node) { + continue; + } + + post.push_str(&format!( + " {node} [label = \"{}\"]\n", + match self.vertex_label(node) { + Ok(Some(label)) => { + format!("{label}") + } + _ => { + " ε ".to_owned() + } + } + )); + } + + for (source, target) in self.edges() { + if discard_nodes.contains(&source) || discard_nodes.contains(&target) { + continue; + } + + post.push_str(&format!(" {source} -> {target}\n")); + } + + post.push_str("}\n"); + + let result = format!("{preamble}{post}"); + + if std::fs::metadata(&filename).is_ok() { + std::fs::remove_file(&filename)?; + } + + let mut file = std::fs::File::options() + .write(true) + .create(true) + .open(&filename)?; + + use std::io::Write; + + file.write_all(result.as_bytes()) + } +} + +/// Print the labels found in the forest, so that we can easily +/// understand what those labels mean. +pub fn print_labels( + atom: impl Borrow<DefaultAtom>, + forest: impl Borrow<DefaultForest<ForestLabel<GrammarLabel>>>, +) -> Result<(), Box<dyn std::error::Error>> { + let forest = forest.borrow(); + let atom = atom.borrow(); + + for node in forest.nodes() { + let label = forest.vertex_label(node)?; + + if let Some(label) = label { + let label = label.label.label(); + + match label { + GrammarLabelType::TNT(tnt) => { + println!("{tnt} = {}", atom.name_of_tnt(tnt)?); + } + GrammarLabelType::Rule(pos) => { + println!("pos {pos} = {}", atom.rule_pos_string(pos)?); + } + } + } else { + return Err(Error::NodeNoLabel(node).into()); + } + } + + Ok(()) +} + +#[allow(unused_macros)] +macro_rules! leaf ( + ($label:expr, $type:tt) =>{ + DefaultForest::<ForestLabel<$type>>::new_leaf($label) + }; + ($label:expr) => { + DefaultForest::<ForestLabel<usize>>::new_leaf($label) + } +); + +#[allow(unused_imports)] +pub(crate) use leaf; + +#[cfg(test)] +mod item_test { + use super::*; + + #[test] + fn test_forest_api() -> Result<(), Box<dyn std::error::Error>> { + let forest: DefaultForest<ForestLabel<usize>> = Default::default(); + + // empty forest + + assert!(forest.is_empty()); + + // leaf forest + + let mut forest = leaf!(0, usize); + + assert_eq!(forest.nodes_len(), 1); + assert_eq!(forest.root(), Some(0)); + + // add some child + + forest.plant(0, leaf!(1), false)?; + + assert_eq!(forest.nodes_len(), 2); + let mut children = forest.children_of(0)?; + assert_eq!(children.next(), Some(1)); + assert_eq!(children.next(), None); + + // add more children + + forest.plant(0, leaf!(2), false)?; + forest.plant(0, leaf!(3), false)?; + forest.plant(0, leaf!(4), false)?; + forest.plant(2, leaf!(5), false)?; + + assert_eq!(forest.nodes_len(), 6); + let mut children = forest.children_of(0)?; + assert_eq!(children.next(), Some(1)); + assert_eq!(children.next(), Some(2)); + assert_eq!(children.next(), Some(3)); + assert_eq!(children.next(), Some(4)); + let mut children = forest.children_of(2)?; + assert_eq!(children.next(), Some(5)); + assert_eq!(children.next(), None); + + let mut test_forest = leaf!(0); + test_forest.plant(0, leaf!(1), false)?; + test_forest.plant(0, leaf!(2), false)?; + test_forest.plant(0, leaf!(3), false)?; + test_forest.plant(2, leaf!(5), false)?; + + assert!(forest.is_prefix(0, &test_forest)?); + + let mut test_forest = leaf!(0); + test_forest.plant(0, leaf!(1), false)?; + test_forest.plant(0, leaf!(2), false)?; + // this child of the root should have label 3 in order to be a + // prefix. + test_forest.plant(0, leaf!(4), false)?; + test_forest.plant(2, leaf!(5), false)?; + + assert!(!forest.is_prefix(0, &test_forest)?); + + let mut test_forest = leaf!(2); + test_forest.plant(0, leaf!(5), false)?; + + assert!(forest.is_prefix(2, &test_forest)?); + + // now test cloning + + // add a duplicate label + forest.plant(3, leaf!(5), false)?; + + assert_eq!(forest.nodes_len(), 6); + + // add two children to 5 + + forest.plant(5, leaf!(6), false)?; + forest.plant(5, leaf!(7), false)?; + + // clone and preserve one child + let cloned = forest.clone_node(5, 1, false)?; + + assert_eq!(forest.nodes_len(), 10); + + assert_eq!(forest.degree(cloned)?, 1); + assert_eq!(forest.degree(5)?, 2); + + #[cfg(feature = "test-print-viz")] + forest.print_viz("forest.gv")?; + + Ok(()) + } + + #[test] + fn test_eq() -> Result<(), Box<dyn std::error::Error>> { + let mut forest = leaf!(0, usize); + + forest.plant(0, leaf!(1), false)?; + forest.plant(0, leaf!(2), false)?; + forest.plant(0, leaf!(3), false)?; + forest.plant(0, leaf!(4), false)?; + forest.plant(2, leaf!(5), false)?; + + let mut test_forest = leaf!(0); + test_forest.plant(0, leaf!(1), false)?; + test_forest.plant(0, leaf!(2), false)?; + test_forest.plant(0, leaf!(3), false)?; + test_forest.plant(2, leaf!(5), false)?; + + assert_ne!(forest, test_forest); + assert_ne!(test_forest, forest); + + test_forest.plant(0, leaf!(4), false)?; + + assert_eq!(forest, test_forest); + assert_eq!(test_forest, forest); + + Ok(()) + } +} diff --git a/chain/src/item/default/splone.rs b/chain/src/item/default/splone.rs new file mode 100644 index 0000000..1168539 --- /dev/null +++ b/chain/src/item/default/splone.rs @@ -0,0 +1,760 @@ +//! This module implements a function for "closing" and "splitting" a +//! node in a forest, and hence the name. + +use super::*; +use grammar::TNT; + +fn get_nt_label(label: GrammarLabel) -> Option<usize> { + match label.label() { + GrammarLabelType::TNT(TNT::Non(nt)) => Some(nt), + _ => None, + } +} + +fn get_rule_label(label: GrammarLabel) -> Option<usize> { + match label.label() { + GrammarLabelType::Rule(rule) => Some(rule), + _ => None, + } +} + +impl DefaultForest<ForestLabel<GrammarLabel>> { + /// Either "open split" or "closed split". + /// + /// An open split is an attempt to set the end of the node to + /// `None`, and a closed split is an attempt to set the end to a + /// specific position. + /// + /// Also this function preserves `edge_index + 1` many children + /// when splitting or cloning. + /// + /// To be more precise, this function performs the following + /// actions: + /// + /// 1. Make sure it is labelled by a nonterminal. + /// + /// 2. Check the status of the node. If it is packed, return an + /// error. + /// + /// 3. Make a new label according to the given `end`. See the + /// function + /// [`create_new_label`][DefaultForest::<ForestLabel<GrammarLabel>>::create_new_label] + /// for the process of making new labels. + /// + /// 4. If the new label is equal to the old label, and if + /// `edge_index + 1` is equal to the degree of the node, then do + /// nothing. + /// + /// 5. If the new label is equal to the old label but + /// `edge_index+1` is not equal to the degree of the node, then + /// clone the node while preserving `edge_index + 1` many + /// children. + /// + /// 6. If the new label is not equal to the old label, then split + /// the node. See the function + /// [`split_node`][DefaultForest::<ForestLabel<GrammarLabel>>::split_node] + /// for details. + /// + /// # Name + /// + /// The name is a mixture of *split* and *clone*. + /// + /// # NOTE + /// + /// We want to "do the same thing" to each parent of the node, + /// that should be checked to be labelled by a rule position. + /// + /// That is to say, if we replace the label of the node, we also + /// replace the label of the "rule parent". If we split the node, + /// the rule parents are also splitted in a parallel manner with + /// the splitted node. + /// + /// Also, when we refer to the parents of the node in the + /// descriptions of procedures below, we actually refer to the + /// parents of the rule parents, which should again be checked to + /// be labelled by non-terminals. + fn splone(&mut self, node: usize, end: Option<usize>, edge_index: usize) -> Result<(), Error> { + let node_label = self.vertex_label(node)?.ok_or(Error::NodeNoLabel(node))?; + + assert!(get_nt_label(node_label.label()).is_some()); + + if node_label.is_packed() { + return Err(Error::SplitPack(node)); + } + + let node_end = node_label.label().end(); + let node_degree = self.degree(node)?; + + // We can check the end to know whether the new label is equal + // to the old label. + if node_end == end { + if node_degree == edge_index + 1 { + return Ok(()); + } + + dbg!(); + self.clone_node(node, edge_index + 1, false)?; + + return Ok(()); + } + + let new_label = self.create_new_label(node, end, edge_index)?; + + let new_label = if let Some(label) = new_label { + label + } else { + return Ok(()); + }; + + if node_end.is_some() + || edge_index + 1 != node_degree + || node_label.clone_index().is_some() + || new_label.clone_index().is_some() + { + return self.split_node(new_label, node, edge_index); + } + + // replace the label directly + + // if new_label.clone_index().is_none() { + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + builder.set_label(node, new_label)?; + + let parents: Vec<_> = builder + .parents_of(node)? + .map(|parent| parent.node()) + .collect(); + + for parent in parents { + let mut parent_label = builder + .vertex_label(parent)? + .ok_or(Error::NodeNoLabel(parent))? + .label(); + + assert!(get_rule_label(parent_label).is_some()); + assert_eq!(builder.degree(parent)?, 1); + + if let Some(pos) = end { + parent_label.set_end(pos); + } else { + parent_label.open_end(); + } + + let parent_label = ForestLabel::from(parent_label); + + builder.set_label(parent, parent_label)?; + } + // } else { + // // REVIEW: Call `split_node` in this situation as well? + + // // If we are here, the new label should have a packed + // // parent. + // let packed = self + // .query_label(ForestLabel::new(new_label.label(), ForestLabelType::Packed)) + // .unwrap(); + + // let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + // builder.set_label(node, new_label)?; + + // let parents: Vec<_> = builder.parents_of(node)?.collect(); + + // for parent in parents.iter() { + // builder.redirect(parent.node(), parent.edge(), packed)?; + // } + + // builder.add_edge(packed, node, new_label)?; + // } + + Ok(()) + } + + /// Procedure to split the node: + /// + /// 1. Create a new node with the new label. + /// + /// 2. Preserve the old children as specified by `edge_index + 1`. + /// + /// 3. For each parent, clone the parent. Replace the original + /// child of the parent, which pointed to the old node, by this + /// new node. Other children are inherited from the old parent. + fn split_node( + &mut self, + new_label: ForestLabel<GrammarLabel>, + mut node: usize, + edge_index: usize, + ) -> Result<(), Error> { + let end = new_label.label().end(); + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + let mut new_node = builder.add_vertex(new_label); + + let new_packed = if new_label.clone_index().is_some() { + let packed = builder + .query_label(ForestLabel::new(new_label.label(), ForestLabelType::Packed)) + .unwrap(); + + builder.add_edge(packed, new_node, new_label)?; + + Some(packed) + } else { + None + }; + + let preserve_children: Vec<_> = builder.children_of(node)?.take(edge_index + 1).collect(); + + for child in preserve_children { + builder.add_edge(new_node, child, new_label)?; + } + + let parents: Vec<_> = { + if let Some(label) = self.vertex_label(node)? { + if label.clone_index().is_some() { + let mut parents = self.parents_of(node)?; + assert_eq!(parents.len(), 1); + node = parents.next().unwrap().node(); + } + } + + let parents: Vec<_> = self.parents_of(node)?.collect(); + + let mut result: Vec<(Parent, usize)> = Vec::with_capacity( + parents + .iter() + .map(|parent| { + self.parents_of(parent.node()) + .map(|iter| iter.len()) + .unwrap_or(0) + }) + .sum(), + ); + + for parent in parents { + let mut parent_label = self + .vertex_label(parent.node())? + .ok_or_else(|| Error::NodeNoLabel(parent.node()))? + .label(); + + assert!(get_rule_label(parent_label).is_some()); + assert_eq!(self.degree(parent.node())?, 1); + + if let Some(pos) = end { + parent_label.set_end(pos); + } else { + parent_label.open_end(); + } + + let parent_label = ForestLabel::from(parent_label); + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + let new_parent = builder.add_vertex(parent_label); + + if let Some(packed) = new_packed { + new_node = packed; + } + + builder.add_edge(new_parent, new_node, new_label)?; + + result.extend( + self.parents_of(parent.node())? + .map(|parent_parent| (parent_parent, new_parent)), + ); + } + + result + }; + + for (parent, new_child) in parents { + if self.has_same_children_but_one( + parent.node(), + parent.node(), + parent.edge(), + new_child, + )? { + continue; + } + + // we don't add a child to parent.edge() here. + let cloned = self.clone_node(parent.node(), parent.edge(), false)?; + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + builder.add_edge(cloned, new_child, new_label)?; + + let children_to_add: Vec<_> = builder + .children_of(parent.node())? + .skip(parent.edge() + 1) + .collect(); + + for child in children_to_add { + builder.add_edge(cloned, child, new_label)?; + } + } + + Ok(()) + } + + /// Procedure for the new label: + /// + /// 1. Copy the old label. + /// + /// 2. Set the end to `pos`. + /// + /// 3. Pack the label. + /// + /// 4. Check if this label already exists. If so, clone the label and + /// return it. + /// + /// 5. Else set the label to be a plain label. + /// + /// 6. Check if this plain label already exists. If so, clone the + /// existing label, and return the clone of the cloned label. + /// + /// 7. Else return the plain label. + fn create_new_label( + &mut self, + node: usize, + end: Option<usize>, + edge_index: usize, + ) -> Result<Option<ForestLabel<GrammarLabel>>, Error> { + let mut copied_label = self + .vertex_label(node)? + .ok_or(Error::NodeNoLabel(node))? + .label(); + + if let Some(pos) = end { + copied_label.set_end(pos); + } else { + copied_label.open_end(); + } + + let label = ForestLabel::new(copied_label, ForestLabelType::Packed); + + if let Some(packed) = self.query_label(label) { + for child in self.children_of(packed)? { + if self.has_same_children(child, node, self.degree(child)?, edge_index + 1)? { + return Ok(None); + } + } + + let mut packed_children = self.children_of(packed)?; + + let first_child = packed_children.next().unwrap(); + + let clone_index = self.clone_node(first_child, 0, true)?; + + Ok(Some(ForestLabel::new( + copied_label, + ForestLabelType::Cloned(clone_index), + ))) + } else { + let plain_label = ForestLabel::new(copied_label, ForestLabelType::Plain); + + if let Some(existing) = self.query_label(plain_label) { + if self.has_same_children(existing, node, self.degree(existing)?, edge_index + 1)? { + return Ok(None); + } + + let clone_index = self.clone_node(existing, 0, true)?; + + Ok(Some(ForestLabel::new( + copied_label, + ForestLabelType::Cloned(clone_index), + ))) + } else { + Ok(Some(plain_label)) + } + } + } + + /// Compare if two nodes have the same children in the same order. + fn has_same_children( + &self, + nodea: usize, + nodeb: usize, + edge_num_a: usize, + edge_num_b: usize, + ) -> Result<bool, Error> { + let children_a = self.children_of(nodea)?.take(edge_num_a); + let children_b = self.children_of(nodeb)?.take(edge_num_b); + + if children_a.len() != children_b.len() { + return Ok(false); + } + + for (childa, childb) in children_a.zip(children_b) { + if childa != childb { + return Ok(false); + } + } + + Ok(true) + } + + /// Detect if a node has a branch that has the same children as + /// another node, except at a given index, where it points to + /// another given node. + fn has_same_children_but_one( + &self, + nodea: usize, + nodeb: usize, + edge_index: usize, + alternative: usize, + ) -> Result<bool, Error> { + let labela = self.vertex_label(nodea)?.ok_or(Error::NodeNoLabel(nodea))?; + let children_b = self.children_of(nodeb)?; + + if labela.is_plain() { + let children_a = self.children_of(nodea)?; + + if children_a.len() != children_b.len() { + return Ok(false); + } + + for (index, (childa, childb)) in children_a.zip(children_b).enumerate() { + if index != edge_index { + if childa != childb { + return Ok(false); + } + } else if childa != alternative { + return Ok(false); + } + } + + Ok(true) + } else if labela.clone_index().is_some() { + let mut parentsa = self.parents_of(nodea)?; + + assert_eq!(parentsa.len(), 1); + + let parenta = parentsa.next().unwrap().node(); + + 'branch_loop: for branch in self.children_of(parenta)? { + let children_a = self.children_of(branch)?; + let children_b = children_b.clone(); + + if children_a.len() != children_b.len() { + continue 'branch_loop; + } + + for (index, (childa, childb)) in children_a.zip(children_b).enumerate() { + if index != edge_index { + if childa != childb { + continue 'branch_loop; + } + } else if childa != alternative { + continue 'branch_loop; + } + } + + return Ok(true); + } + + Ok(false) + } else { + // a packed node; this should not happen + unreachable!("should not examine children of a packed node") + } + } +} + +#[cfg(test)] +mod test_splone { + use super::*; + + use grammar::TNT::*; + + fn create_test_forest( + ) -> Result<DefaultForest<ForestLabel<GrammarLabel>>, Box<dyn std::error::Error>> { + let mut forest = leaf!(GrammarLabel::new(Non(0), 0), GrammarLabel); + + forest.plant( + 0, + leaf!(GrammarLabel::new_closed(6, 0, 1), GrammarLabel), + false, + )?; + forest.plant( + 1, + leaf!(GrammarLabel::new_closed(Ter(0), 0, 1), GrammarLabel), + false, + )?; + + forest.plant(0, leaf!(GrammarLabel::new(7, 1), GrammarLabel), false)?; + forest.plant(3, leaf!(GrammarLabel::new(Non(0), 1), GrammarLabel), false)?; + + forest.plant(4, leaf!(GrammarLabel::new(3, 1), GrammarLabel), false)?; + forest.plant(5, leaf!(GrammarLabel::new(Non(1), 1), GrammarLabel), false)?; + + forest.plant( + 6, + leaf!(GrammarLabel::new_closed(11, 1, 2), GrammarLabel), + false, + )?; + forest.plant( + 7, + leaf!(GrammarLabel::new_closed(Ter(2), 1, 2), GrammarLabel), + false, + )?; + + forest.plant( + 6, + leaf!(GrammarLabel::new_closed(13, 2, 3), GrammarLabel), + false, + )?; + forest.plant( + 9, + leaf!(GrammarLabel::new_closed(Ter(2), 2, 3), GrammarLabel), + false, + )?; + + forest.print_viz("test forest.gv")?; + + Ok(forest) + } + + fn splone_6_3_1() -> Result<DefaultForest<ForestLabel<GrammarLabel>>, Box<dyn std::error::Error>> + { + let mut forest = create_test_forest()?; + + let mut builder = PLGBuilderMut::from_graph_mut(&mut forest.graph); + + builder.set_label( + 5, + ForestLabel::new(GrammarLabel::new_closed(3, 1, 3), ForestLabelType::Plain), + )?; + + builder.set_label( + 6, + ForestLabel::new( + GrammarLabel::new_closed(Non(1), 1, 3), + ForestLabelType::Plain, + ), + )?; + + Ok(forest) + } + + fn splone_6_2_0() -> Result<DefaultForest<ForestLabel<GrammarLabel>>, Box<dyn std::error::Error>> + { + let mut forest = splone_6_3_1()?; + + let cloned = forest.clone_node(4, 0, false)?; + + forest.plant( + cloned, + leaf!(GrammarLabel::new_closed(3, 1, 2), GrammarLabel), + false, + )?; + + forest.plant( + cloned + 1, + leaf!(GrammarLabel::new_closed(Non(1), 1, 2), GrammarLabel), + false, + )?; + + let mut builder = PLGBuilderMut::from_graph_mut(&mut forest.graph); + + let dummy_label = ForestLabel::from(GrammarLabel::new(0, 0)); + + builder.add_edge(cloned + 2, 7, dummy_label)?; + + Ok(forest) + } + + fn splone_6_n_0() -> Result<DefaultForest<ForestLabel<GrammarLabel>>, Box<dyn std::error::Error>> + { + let mut forest = splone_6_2_0()?; + + let cloned = forest.clone_node(4, 0, false)?; + + forest.plant(cloned, leaf!(GrammarLabel::new(3, 1), GrammarLabel), false)?; + + forest.plant( + cloned + 1, + leaf!(GrammarLabel::new(Non(1), 1), GrammarLabel), + false, + )?; + + let mut builder = PLGBuilderMut::from_graph_mut(&mut forest.graph); + + let dummy_label = ForestLabel::from(GrammarLabel::new(0, 0)); + + builder.add_edge(cloned + 2, 7, dummy_label)?; + + Ok(forest) + } + + fn splone_4_3_0() -> Result<DefaultForest<ForestLabel<GrammarLabel>>, Box<dyn std::error::Error>> + { + let mut forest = splone_6_n_0()?; + + let cloned = forest.clone_node(0, 0, false)?; + + forest.plant( + cloned, + leaf!(GrammarLabel::new_closed(7, 1, 3), GrammarLabel), + false, + )?; + + forest.plant( + cloned + 1, + leaf!(GrammarLabel::new_closed(Non(0), 1, 3), GrammarLabel), + false, + )?; + + forest.plant( + cloned, + leaf!(GrammarLabel::new_closed(6, 0, 1), GrammarLabel), + true, + )?; + + let mut builder = PLGBuilderMut::from_graph_mut(&mut forest.graph); + + let dummy_label = ForestLabel::from(GrammarLabel::new(0, 0)); + + builder.add_edge(cloned + 2, 5, dummy_label)?; + + Ok(forest) + } + + fn splone_17_3_0_15_3_0( + ) -> Result<DefaultForest<ForestLabel<GrammarLabel>>, Box<dyn std::error::Error>> { + let mut forest = splone_4_3_0()?; + + let to_clone = forest + .query_label(ForestLabel::from(GrammarLabel::new_closed(Non(1), 1, 3))) + .unwrap(); + + let cloned = forest.clone_node(to_clone, 0, false)?; + + let mut builder = PLGBuilderMut::from_graph_mut(&mut forest.graph); + + let dummy_label = ForestLabel::from(GrammarLabel::new(0, 0)); + + let child = builder + .query_label(ForestLabel::from(GrammarLabel::new_closed(11, 1, 2))) + .unwrap(); + + builder.add_edge(cloned, child, dummy_label)?; + + let to_clone = forest + .query_label(ForestLabel::from(GrammarLabel::new_closed(Non(0), 1, 3))) + .unwrap(); + + let cloned = forest.clone_node(to_clone, 0, false)?; + + let mut builder = PLGBuilderMut::from_graph_mut(&mut forest.graph); + + let child = builder + .query_label(ForestLabel::from(GrammarLabel::new(3, 1))) + .unwrap(); + + builder.add_edge(cloned, child, dummy_label)?; + + Ok(forest) + } + + #[test] + fn test() -> Result<(), Box<dyn std::error::Error>> { + let mut test_forest = create_test_forest()?; + + test_forest.splone(6, Some(3), 1)?; + + let answer = splone_6_3_1()?; + + if test_forest != answer { + test_forest.print_viz("sploned forest.gv")?; + answer.print_viz("splone 6 3 1.gv")?; + panic!("splone(6, Some(3), 1) produced wrong forest"); + } + + test_forest.splone(6, Some(3), 1)?; + + if test_forest != answer { + test_forest.print_viz("sploned forest.gv")?; + answer.print_viz("splone 6 3 1.gv")?; + panic!("repeated splone(6, Some(3), 1) produced wrong forest"); + } + + test_forest.splone(6, Some(2), 0)?; + + let answer = splone_6_2_0()?; + + if test_forest != answer { + test_forest.print_viz("sploned forest.gv")?; + answer.print_viz("splone 6 2 0.gv")?; + panic!("splone(6, Some(2), 0) produced wrong forest"); + } + + test_forest.splone(6, Some(2), 0)?; + + if test_forest != answer { + test_forest.print_viz("sploned forest.gv")?; + answer.print_viz("splone 6 2 0.gv")?; + panic!("repeated splone(6, Some(2), 0) produced wrong forest"); + } + + test_forest.splone(6, None, 0)?; + + let answer = splone_6_n_0()?; + + if test_forest != answer { + test_forest.print_viz("sploned forest.gv")?; + answer.print_viz("splone 6 None 0.gv")?; + panic!("splone(6, None, 0) produced wrong forest"); + } + + test_forest.splone(14, None, 0)?; + + if test_forest != answer { + test_forest.print_viz("sploned forest.gv")?; + answer.print_viz("splone 6 None 0.gv")?; + panic!("repeated splone(6, None, 0) produced wrong forest"); + } + + test_forest.splone(4, Some(3), 0)?; + + let answer = splone_4_3_0()?; + + if test_forest != answer { + test_forest.print_viz("sploned forest.gv")?; + answer.print_viz("splone 4 3 0.gv")?; + panic!("splone(4, Some(3), 0) produced wrong forest"); + } + + test_forest.splone(4, Some(3), 0)?; + + if test_forest != answer { + test_forest.print_viz("sploned forest.gv")?; + answer.print_viz("splone 4 3 0.gv")?; + panic!("repeated splone(4, Some(3), 0) produced wrong forest"); + } + + test_forest.splone(17, Some(3), 0)?; + test_forest.splone(15, Some(3), 0)?; + + let answer = splone_17_3_0_15_3_0()?; + + if test_forest != answer { + test_forest.print_viz("sploned forest.gv")?; + answer.print_viz("splone 17 3 0 15 3 0.gv")?; + panic!("splone(17, Some(3), 0) - splone(15, Some(3), 0) produced wrong forest"); + } + + test_forest.splone(17, Some(3), 0)?; + test_forest.splone(15, Some(3), 0)?; + + if test_forest != answer { + test_forest.print_viz("sploned forest.gv")?; + answer.print_viz("splone 17 3 0 15 3 0.gv")?; + panic!( + "repeated splone(17, Some(3), 0) - splone(15, Some(3), 0) produced wrong forest" + ); + } + + Ok(()) + } +} |