diff options
author | JSDurand <mmemmew@gmail.com> | 2023-01-13 14:26:28 +0800 |
---|---|---|
committer | JSDurand <mmemmew@gmail.com> | 2023-01-13 14:26:28 +0800 |
commit | 8f8d3d1a3c276be4be2e5d2e767ada564c47279a (patch) | |
tree | daba317c8d381f7159f9a34d957291472bad2873 | |
parent | 3c6511f69c7639abff60ac9999a08ce2daa24a7d (diff) |
forest seems to be completed
I seem to have finished the implementation of forests. Now it remains
the implementation of the chain-rule machine, of which I have a rough
plan now.
-rw-r--r-- | Cargo.toml | 2 | ||||
-rw-r--r-- | chain/src/plan.org | 19 | ||||
-rw-r--r-- | forest/Cargo.toml | 6 | ||||
-rw-r--r-- | forest/src/default.rs | 484 | ||||
-rw-r--r-- | forest/src/lib.rs | 81 | ||||
-rw-r--r-- | grammar/Cargo.toml | 3 | ||||
-rw-r--r-- | grammar/src/lib.rs | 15 | ||||
-rw-r--r-- | grammar/src/tests/test_grammar_left_closure.rs | 38 | ||||
-rw-r--r-- | graph/src/labelled/binary.rs | 171 | ||||
-rw-r--r-- | graph/src/labelled/mod.rs | 1 | ||||
-rw-r--r-- | graph/src/lib.rs | 18 |
11 files changed, 727 insertions, 111 deletions
@@ -13,7 +13,7 @@ rust-version = "1.65" [workspace] members = [ "graph", "receme", "nfa", "chain", "viz", "grammar", - "forest", "semiring"] + "forest", "semiring", "graph_macro" ] # testing the new resolver, even though this has no dependencies ;p resolver = "2" diff --git a/chain/src/plan.org b/chain/src/plan.org index b708413..84192f0 100644 --- a/chain/src/plan.org +++ b/chain/src/plan.org @@ -2,7 +2,7 @@ #+AUTHOR: Durand #+DATE: <2022-11-18 Ven 19:57> -* Things to do [6/10] +* Things to do [7/10] - [X] Implement builders for graphs - [X] Find sets of the first terminals for each non-terminal, in the @@ -79,6 +79,15 @@ lack of this step might be the root cause of the failure of the previous version of this project. + [X] Test atoms +- [X] Implement forests [4/4] + + [X] Design a format of forests. This should be the most crucial + thing to do, in order to have a better understanding of the whole + picture. I need to have a clear understanding of the details of + the binarised shared packed parsed forests, that reflects the + regular expressions in the grammar equations. + + [X] Define a trait with the expected behaviours. + + [X] Implement them as parents-knowing graphs. + + [X] Test - [-] Implement languages. [1/4] + [X] First define a trait with the expected behaviours. + [ ] Then implement them as doubly labelled graphs. @@ -86,14 +95,6 @@ + [ ] Each edge in the chain-rule machine needs to be labelled also with a position in the forest. This perfectly solves the problem of those "plugs"! -- [-] Implement forests [2/3] - + [X] Design a format of forests. This should be the most crucial - thing to do, in order to have a better understanding of the whole - picture. I need to have a clear understanding of the details of - the binarised shared packed parsed forests, that reflects the - regular expressions in the grammar equations. - + [X] Define a trait with the expected behaviours. - + [-] Implement them as parents-knowing graphs. - [ ] Implement semiring. [0/5] + [ ] Define the trait. + [ ] Implement the boolean semiring. diff --git a/forest/Cargo.toml b/forest/Cargo.toml index b88451d..c51bf36 100644 --- a/forest/Cargo.toml +++ b/forest/Cargo.toml @@ -5,5 +5,11 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[features] +default = [] + +# This flag controls whether to print GraphViz files during testing. +test-print-viz = [] + [dependencies] graph = { path = "../graph" } diff --git a/forest/src/default.rs b/forest/src/default.rs index d3970e9..d79c1c7 100644 --- a/forest/src/default.rs +++ b/forest/src/default.rs @@ -1,21 +1,471 @@ //! This file provides a default implementation for the //! [`Forest`][crate::Forest] trait. -#[allow(unused_imports)] use super::*; -#[allow(unused_imports)] -use graph::{error::Error as GraphError, ALGraph, Graph}; - -#[allow(unused_imports)] -use std::collections::{hash_set::Iter, HashMap, HashSet}; - -// TODO: Use PLGraph instead. - -// #[derive(Debug, Clone)] -// pub struct DefaultForest<NodeLabel: GraphLabel, EdgeLabel: GraphLabel> { -// graph: ALGraph, -// vertex_labels: HashMap<usize, NodeLabel>, -// edge_labels: HashMap<(usize, usize), EdgeLabel>, -// plugins: HashSet<usize>, -// plugouts: HashSet<usize>, -// } +use graph::{ + builder::BuilderMut, labelled::binary::PLGBuilderMut, Graph, LabelGraph, PLGraph, RedirectGraph, +}; + +use core::fmt::Display; + +/// The type of errors for forest operations. +#[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd)] +pub enum Error { + /// An index is out of bounds. + /// + /// The first component is the index that is out of bounds, and + /// the second component is the current length of nodes. + IndexOutOfBounds(usize, usize), + /// The forest does not permit duplicate nodes but encounters a + /// repeated node. + DuplicatedNode(usize), + /// A node has no labels while it is required to have one. + NodeNoLabel(usize), + /// Encounter an invalid error in converting from an error of + /// graphs. + InvalidGraphError(GError), +} + +impl Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Error::IndexOutOfBounds(index, bound) => { + write!(f, "index {index} is out of bound {bound}") + } + Error::DuplicatedNode(n) => write!(f, "node {n} is duplicated"), + Error::InvalidGraphError(ge) => write!(f, "invalid error: {ge}"), + Error::NodeNoLabel(n) => write!(f, "node {n} has no labels, but it should have one"), + } + } +} + +impl std::error::Error for Error {} + +impl From<GError> for Error { + fn from(ge: GError) -> Self { + match ge { + GError::IndexOutOfBounds(index, bound) => Self::IndexOutOfBounds(index, bound), + GError::DuplicatedNode(n) => Self::DuplicatedNode(n), + _ => Self::InvalidGraphError(ge), + } + } +} + +/// A default implementation of forest. +#[derive(Debug, Clone)] +pub struct DefaultForest<T: GraphLabel> { + graph: PLGraph<T>, + root: Option<usize>, +} + +impl<T: GraphLabel> Default for DefaultForest<T> { + fn default() -> Self { + let graph = Default::default(); + let root = None; + + Self { graph, root } + } +} + +impl<T: GraphLabel> AsRef<DefaultForest<T>> for DefaultForest<T> { + fn as_ref(&self) -> &DefaultForest<T> { + &self + } +} + +impl<T: GraphLabel> Graph for DefaultForest<T> { + type Iter<'a> = <PLGraph<T> as Graph>::Iter<'a> + where + Self: 'a; + + #[inline] + fn is_empty(&self) -> bool { + self.graph.is_empty() + } + + #[inline] + fn nodes_len(&self) -> usize { + self.graph.nodes_len() + } + + #[inline] + fn edges_len(&self) -> Option<usize> { + self.graph.edges_len() + } + + #[inline] + fn children_of(&self, node_id: usize) -> Result<Self::Iter<'_>, GError> { + self.graph.children_of(node_id) + } + + #[inline] + fn degree(&self, node_id: usize) -> Result<usize, GError> { + self.graph.degree(node_id) + } + + #[inline] + fn is_empty_node(&self, node_id: usize) -> Result<bool, GError> { + self.graph.is_empty_node(node_id) + } + + #[inline] + fn has_edge(&self, source: usize, target: usize) -> Result<bool, GError> { + self.graph.has_edge(source, target) + } + + fn replace_by_builder(&mut self, _builder: impl graph::Builder<Result = Self>) { + unimplemented!() + } +} + +impl<T: GraphLabel> ParentsGraph for DefaultForest<T> { + type Iter<'a>= <PLGraph<T> as ParentsGraph>::Iter<'a> + where + Self:'a; + + fn parents_of(&self, node_id: usize) -> Result<<Self as ParentsGraph>::Iter<'_>, GError> { + self.graph.parents_of(node_id) + } +} + +impl<T: GraphLabel> LabelGraph<T> for DefaultForest<T> { + type Iter<'a> = std::iter::Empty<usize> + where + Self: 'a; + + type LabelIter<'a> = std::iter::Empty<(&'a T, <Self as LabelGraph<T>>::Iter<'a>)> + where + Self: 'a, + T: 'a; + + type EdgeLabelIter<'a> = std::iter::Empty<T> + where + Self: 'a, + T: 'a; + + #[inline] + fn query_label(&self, label: T) -> Option<usize> { + self.graph.query_label(label) + } + + #[inline] + fn vertex_label(&self, node_id: usize) -> Result<Option<T>, GError> { + self.graph.vertex_label(node_id) + } + + fn edge_label( + &self, + _source: usize, + _target: usize, + ) -> Result<Self::EdgeLabelIter<'_>, GError> { + unimplemented!("edges have no labels") + } + + fn find_children_with_label( + &self, + _node_id: usize, + _label: &T, + ) -> Result<<Self as LabelGraph<T>>::Iter<'_>, GError> { + unimplemented!("edges have no labels") + } + + fn labels_of(&self, _node_id: usize) -> Result<Self::LabelIter<'_>, GError> { + unimplemented!("edges have no labels") + } + + fn has_edge_label(&self, _node_id: usize, _label: &T, _target: usize) -> Result<bool, GError> { + unimplemented!("edges have no labels") + } +} + +impl<T: GraphLabel> Forest<T> for DefaultForest<T> { + type Error = Error; + + fn root(&self) -> Option<usize> { + self.root + } + + fn new_leaf(label: T) -> Self { + let mut graph = PLGraph::default(); + + let mut builder = PLGBuilderMut::from_graph_mut(&mut graph); + + let root = Some(builder.add_vertex(label)); + + Self { graph, root } + } + + fn is_prefix<F>(&self, node_id: usize, fragment: F) -> Result<bool, Self::Error> + where + F: AsRef<Self>, + { + if !self.has_node(node_id) { + return Err(Error::IndexOutOfBounds(node_id, self.nodes_len())); + } + + // We do a depth-first traversal to determine if every node + // encountered has the same set of children (labels taken into + // the consideration). + + let fragment = fragment.as_ref(); + + let mut frag_stack = Vec::with_capacity(fragment.nodes_len()); + + let mut self_stack = Vec::with_capacity(fragment.nodes_len()); + + let frag_root = if let Some(root) = fragment.root() { + root + } else { + // an empty forest is a prefix of any forest. + return Ok(true); + }; + + frag_stack.push(frag_root); + self_stack.push(node_id); + + // defer popping + while let (Some(frag_top), Some(self_top)) = + (frag_stack.last().copied(), self_stack.last().copied()) + { + frag_stack.pop(); + self_stack.pop(); + + if fragment.vertex_label(frag_top)? != self.vertex_label(self_top)? { + // not a prefix + return Ok(false); + } + + let mut self_children = self.children_of(self_top)?; + + for child in fragment.children_of(frag_top)? { + if let Some(self_child) = self_children.next() { + frag_stack.push(child); + self_stack.push(self_child); + } else { + // too few children + return Ok(false); + } + } + } + + // Check both stacks are empty at the end. + Ok(frag_stack.is_empty() && self_stack.is_empty()) + } + + fn plant<F>(&mut self, node_id: usize, fragment: F) -> Result<(), Self::Error> + where + F: AsRef<Self>, + { + // Convert self to a builder_mut, and traverse fragment in a + // depth-first manner and adjoin corresponding nodes along the + // way. + + if !self.has_node(node_id) { + return Err(Error::IndexOutOfBounds(node_id, self.nodes_len())); + } + + let fragment = fragment.as_ref(); + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + let root = if let Some(root) = fragment.root() { + root + } else { + // Nothing to do to plant an empty forest. + return Ok(()); + }; + + // Just a dummy label for use in adding edges. + // + // REVIEW: I probably should refactor the API for builder_mut. + let root_label = fragment + .vertex_label(root)? + .ok_or(Error::NodeNoLabel(root))?; + + let nodes_len = fragment.nodes_len(); + + // First adjoin those nodes and join the edges later. + + for node in 0..nodes_len { + let label = fragment + .vertex_label(node)? + .ok_or(Error::NodeNoLabel(node))?; + + builder.add_vertex(label); + } + + // If the fragment root has a duplicate label, the forest will + // not grow, so we use the label to find the adjoined node + // index. + + // the nodes hava already been added to the forest, so it is + // safe to call unwrap. + macro_rules! conversion ( + ($node:expr) => { + { + builder + .query_label( + fragment + .vertex_label($node)? + .ok_or(Error::NodeNoLabel($node))? + ).unwrap() + } + } + ); + + // Don't forget to join the new sub-forest to the original + // forest, at the specified position. + + builder.add_edge(node_id, conversion!(root), root_label)?; + + // We can try to calculate the depth of fragments, if we need + // to lower the memory usage. But in our use cases, we + // usually deal with fragments where each node has at most one + // child, so the depth is supposed to be equal to the length + // in this case. + let mut stack = Vec::with_capacity(fragment.nodes_len()); + + stack.push(root); + + while let Some(top) = stack.pop() { + for child in fragment.children_of(top)? { + builder.add_edge(conversion!(top), conversion!(child), root_label)?; + } + } + + Ok(()) + } + + fn clone_node<F>(&mut self, node_id: usize, clone_transform: F) -> Result<usize, Self::Error> + where + F: Fn(T) -> T, + { + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + let old_label = builder + .vertex_label(node_id)? + .ok_or(Error::NodeNoLabel(node_id))?; + + let new_label = clone_transform(old_label); + + // Make a new node + let new_index = builder.add_vertex(new_label); + + // Re-direct parents to the new node. + // + // This must be done before pointing the new node to the old + // node, otherwise that edge will be redirected as well. + + // Unfortunately I cannot loop through parents and mutate them + // at the same time, so I first collect them into a vector. + let parents: Vec<_> = builder.parents_of(node_id)?.collect(); + + for parent in parents.into_iter() { + builder.redirect(parent.node(), parent.edge(), new_index)?; + } + + // Point the new node to the old node. OLD_LABEL is just a + // place holder. + + builder.add_edge(new_index, node_id, old_label)?; + + Ok(new_index) + } +} + +#[cfg(test)] +mod forest_test { + use super::*; + + macro_rules! leaf ( + ($label:expr, $type:tt) =>{ + DefaultForest::<$type>::new_leaf($label) + }; + ($label:expr) => { + DefaultForest::<usize>::new_leaf($label) + } + ); + + #[test] + fn test_forest_api() -> Result<(), Box<dyn std::error::Error>> { + let forest: DefaultForest<usize> = Default::default(); + + // empty forest + + assert!(forest.is_empty()); + + // leaf forest + + let mut forest = leaf!(0, usize); + + assert_eq!(forest.nodes_len(), 1); + assert_eq!(forest.root(), Some(0)); + + // add some child + + forest.plant(0, leaf!(1))?; + + assert_eq!(forest.nodes_len(), 2); + let mut children = forest.children_of(0)?; + assert_eq!(children.next(), Some(1)); + assert_eq!(children.next(), None); + + // add more children + + forest.plant(0, leaf!(2))?; + forest.plant(0, leaf!(3))?; + forest.plant(0, leaf!(4))?; + forest.plant(2, leaf!(5))?; + + assert_eq!(forest.nodes_len(), 6); + let mut children = forest.children_of(0)?; + assert_eq!(children.next(), Some(1)); + assert_eq!(children.next(), Some(2)); + assert_eq!(children.next(), Some(3)); + assert_eq!(children.next(), Some(4)); + let mut children = forest.children_of(2)?; + assert_eq!(children.next(), Some(5)); + assert_eq!(children.next(), None); + + let mut test_forest = leaf!(0); + test_forest.plant(0, leaf!(1))?; + test_forest.plant(0, leaf!(2))?; + test_forest.plant(0, leaf!(3))?; + test_forest.plant(2, leaf!(5))?; + + assert!(forest.is_prefix(0, &test_forest)?); + + let mut test_forest = leaf!(0); + test_forest.plant(0, leaf!(1))?; + test_forest.plant(0, leaf!(2))?; + // this child of the root should have label 3 in order to be a + // prefix. + test_forest.plant(0, leaf!(4))?; + test_forest.plant(2, leaf!(5))?; + + assert!(!forest.is_prefix(0, &test_forest)?); + + let mut test_forest = leaf!(2); + test_forest.plant(0, leaf!(5))?; + + assert!(forest.is_prefix(2, &test_forest)?); + + // now test cloning + + // add a duplicate label + forest.plant(3, leaf!(5))?; + + let len = forest.nodes_len(); + + let clone_transform = |label| label + len; + + forest.clone_node(5, clone_transform)?; + + assert_eq!(forest.nodes_len(), 7); + + #[cfg(feature = "test-print-viz")] + forest.print_viz("forest.gv")?; + + Ok(()) + } +} diff --git a/forest/src/lib.rs b/forest/src/lib.rs index c2f4988..f843bc1 100644 --- a/forest/src/lib.rs +++ b/forest/src/lib.rs @@ -11,57 +11,7 @@ //! out-coming and in-coming plugs. These plugs are used to join //! different fragments of forests together. -use graph::{GraphLabel, LabelGraph, ParentsGraph}; - -use core::fmt::Display; - -// TODO: move this to default - -/// The type of errors for forest operations. -#[derive(Debug)] -pub enum Error { - /// An index is out of bounds. - IndexOutOfBounds(usize, usize), -} - -impl Display for Error { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Error::IndexOutOfBounds(index, bound) => { - write!(f, "index {index} is out of bound {bound}") - } - } - } -} - -impl std::error::Error for Error {} - -// /// A builder of a forest. -// pub trait ForestBuilder<NodeLabel: GraphLabel, EdgeLabel: GraphLabel> { -// /// The type of the resulting forest. -// type Output: Forest<NodeLabel, EdgeLabel>; - -// /// Construct a new builder with only one node with the given -// /// label. -// fn new_leaf(label: NodeLabel) -> Self; - -// /// Add a child to the builder the given labels for the new node -// /// and the added edge. -// /// -// /// All plug-out nodes within the builder should have a new child -// /// with the specified labels, and hence multiple children might -// /// be added, and the plug-out nodes should be modified -// /// accordingly. -// fn add_children(&mut self, vertex_label: NodeLabel, edge_labe: EdgeLabel); - -// /// Build the forest. -// fn build(self) -> Self::Output; - -// /// Build the forest from a reference. -// fn build_ref(&self) -> Self::Output; -// } - -// FIXME: The trait should be re-designed. +use graph::{error::Error as GError, GraphLabel, LabelGraph, ParentsGraph}; /// The expected behaviours of a forest. /// @@ -69,19 +19,38 @@ impl std::error::Error for Error {} /// labelled graphs. pub trait Forest<T: GraphLabel>: ParentsGraph + LabelGraph<T> { /// The type of errors for operations on the forest. - type Error: std::error::Error + From<graph::error::Error>; + type Error: std::error::Error + From<GError>; + + /// Return the root of the forest. + /// + /// A forest without a root is regarded as empty. + fn root(&self) -> Option<usize>; + + /// Construct a forest consisting of one leaf node with the given + /// label. + fn new_leaf(label: T) -> Self; /// Detect if the fragment is a prefix of the sub-forest rooted at /// `node_id`. - fn is_prefix<F: AsRef<Self>>(&self, node_id: usize, fragment: F) -> Result<bool, Self::Error>; + fn is_prefix<F>(&self, node_id: usize, fragment: F) -> Result<bool, Self::Error> + where + F: AsRef<Self>; /// Extend the forest by adjoining another forest at a given node. - fn plant<F: AsRef<Self>>(&mut self, node_id: usize, fragment: F) -> Result<(), Self::Error>; + fn plant<F>(&mut self, node_id: usize, fragment: F) -> Result<(), Self::Error> + where + F: AsRef<Self>; /// Clone a node by making a new node and making all the nodes /// that previously pointed to the old node now point to the new - /// node, and the new node points to the old node. - fn clone(&mut self, node_id: usize) -> Result<(), Self::Error>; + /// node, and the new node points to the old node. Return the + /// index of the new node. + /// + /// The label of the new node will be given by the label + /// transformer. + fn clone_node<F>(&mut self, node_id: usize, clone_transform: F) -> Result<usize, Self::Error> + where + F: Fn(T) -> T; } pub mod default; diff --git a/grammar/Cargo.toml b/grammar/Cargo.toml index 20c4b48..793ab5a 100644 --- a/grammar/Cargo.toml +++ b/grammar/Cargo.toml @@ -12,6 +12,9 @@ default = [] # some grammars for testing. test-helper = [] +# This flag controls whether to print GraphViz files during testing. +test-print-viz = [] + [dependencies] nfa = { path = "../nfa" } graph = { path = "../graph" } diff --git a/grammar/src/lib.rs b/grammar/src/lib.rs index 627ae6f..297cb66 100644 --- a/grammar/src/lib.rs +++ b/grammar/src/lib.rs @@ -462,6 +462,19 @@ impl Grammar { // REVIEW: Do we have a better way to record expansion information // than to compute the transitive closure? + // REVIEW: We need a way to eliminate those left-linearly expanded + // edges whose labels had already been considered, and we need to + // preserve the transition of the `left_p` property at the same + // time. + // + // Maybe we could decide to delete those edges in the + // `remove_predicate`? But we cannot access the states of NFA in + // that predicate, in the current design, thus we need to refactor + // some codes, it seems: we need a way to "compactify" an NFA, by + // a key function, in such a way that if two entries have the same + // key (determined by the key function), then only one, determined + // by another function, remains in the NFA. + /// A transformer of labels to be fed into /// [`closure`][nfa::default::nfa::DefaultNFA::closure], with the /// predicate that returns true if and only if the label of the @@ -483,7 +496,7 @@ impl Grammar { } // Compute if this is from left-linear expansion: it is so if - // and only if one if either the edges comes from left-linear + // and only if either one of the edges comes from left-linear // expansion or we are moving across a non-terminal expansion, // that is to say, the source of the second edge is the // starting edge of a non-terminal. diff --git a/grammar/src/tests/test_grammar_left_closure.rs b/grammar/src/tests/test_grammar_left_closure.rs index 003c211..ffc7c0f 100644 --- a/grammar/src/tests/test_grammar_left_closure.rs +++ b/grammar/src/tests/test_grammar_left_closure.rs @@ -33,9 +33,6 @@ fn test_regex() -> Result<(), Box<dyn std::error::Error>> { Ok(()) } -// We ignore this test by default as it is possibly involved with -// printing to a graphviz file. -#[ignore] #[test] fn test_nfa() -> Result<(), Box<dyn std::error::Error>> { let mut grammar = new_notes_grammar()?; @@ -69,19 +66,22 @@ fn test_nfa() -> Result<(), Box<dyn std::error::Error>> { grammar .left_closure_to_nfa(&closure) .map(|_| ()) - .map_err(Into::into) + .map_err(Into::<Box<dyn std::error::Error>>::into)?; // let _nfa = grammar.left_closure_to_nfa(&closure)?; - // writeln!(lock, "Not printing nfa to nfa.gv")?; + #[cfg(features = "test-print-viz")] + { + writeln!(lock, "Not printing nfa to nfa.gv")?; - // nfa.print_viz("nfa.gv").map_err(Into::into) + nfa.print_viz("nfa.gv") + .map_err(Into::<Box<dyn std::error::Error>>::into)?; + } - // Ok(()) + Ok(()) } #[test] -#[ignore] fn test_remove_epsilon() -> Result<(), Box<dyn std::error::Error>> { let mut lock = stdout().lock(); @@ -123,17 +123,18 @@ fn test_remove_epsilon() -> Result<(), Box<dyn std::error::Error>> { let mut nfa = grammar.left_closure_to_nfa(&closure)?; + #[cfg(features = "test-print-viz")] nfa.print_viz("nfa_orig.gv")?; nfa.remove_epsilon(|label| label.get_value().is_none())?; + #[cfg(features = "test-print-viz")] nfa.print_viz("nfa_no_epsilon.gv")?; Ok(()) } #[test] -#[ignore] fn test_remove_dead() -> Result<(), Box<dyn std::error::Error>> { let mut grammar = new_paren_grammar()?; let closure = new_closure_regex(&mut grammar)?; @@ -173,9 +174,10 @@ fn test_remove_dead() -> Result<(), Box<dyn std::error::Error>> { let mut nfa = grammar.left_closure_to_nfa(&closure)?; + #[cfg(features = "test-print-viz")] nfa.print_viz("nfa_orig.gv")?; - nfa.remove_epsilon(|label| label.get_value().is_none())?; + // nfa.remove_epsilon(|label| label.get_value().is_none())?; let accumulators: HashSet<usize> = accumulators.into_iter().collect(); @@ -183,15 +185,22 @@ fn test_remove_dead() -> Result<(), Box<dyn std::error::Error>> { let grammar_reserve_node = |node| accumulators.contains(&node); + nfa.closure( + |label| label.get_value().is_none(), + true, + |two_edges| two_edges.second_edge().2, + |label| label.get_value().is_none(), + )?; + nfa.remove_dead(grammar_reserve_node)?; + #[cfg(features = "test-print-viz")] nfa.print_viz("nfa_no_dead.gv")?; Ok(()) } #[test] -#[ignore] fn test_nulling() -> Result<(), Box<dyn std::error::Error>> { // TODO: Test more grammars. let mut grammar = new_right_recursive_grammar()?; @@ -273,9 +282,12 @@ fn test_nulling() -> Result<(), Box<dyn std::error::Error>> { nfa.remove_dead(grammar_reserve_nodes)?; - writeln!(lock, "Printing nfa to nfa.gv")?; + #[cfg(features = "test-print-viz")] + { + writeln!(lock, "Printing nfa to nfa.gv")?; - nfa.print_viz("nfa.gv")?; + nfa.print_viz("nfa.gv")?; + } Ok(()) } diff --git a/graph/src/labelled/binary.rs b/graph/src/labelled/binary.rs index 67d86f9..bfd8109 100644 --- a/graph/src/labelled/binary.rs +++ b/graph/src/labelled/binary.rs @@ -2,7 +2,7 @@ //! labels and each node knows about its parents. use super::*; -use crate::{Parent, ParentsGraph}; +use crate::{Parent, ParentsGraph, RedirectGraph}; use std::{ collections::{hash_map::Iter as MapIter, HashMap as Map}, @@ -142,6 +142,11 @@ impl<T: GraphLabel> Graph for PLGraph<T> { } #[inline] + fn edges_len(&self) -> Option<usize> { + Some(self.nodes.iter().map(|node| node.children.len()).sum()) + } + + #[inline] fn children_of(&self, node_id: usize) -> Result<Self::Iter<'_>, Error> { if let Some(node) = self.nodes.get(node_id) { Ok(node.children.iter().copied()) @@ -243,6 +248,76 @@ impl<T: GraphLabel> ParentsGraph for PLGraph<T> { } } +impl<T: GraphLabel> RedirectGraph for PLGraph<T> { + fn redirect( + &mut self, + node_id: usize, + child_index: usize, + new_child: usize, + ) -> Result<(), Error> { + let nodes_len = self.nodes.len(); + + if !self.has_node(new_child) { + return Err(Error::IndexOutOfBounds(new_child, nodes_len)); + } + + if let Some(node) = self.nodes.get_mut(node_id) { + if node.children.len() <= child_index { + return Err(Error::IndexOutOfBounds(child_index, node.children.len())); + } + + // Check if `new_child` is already pointed to by the node. + if let Some(index) = node.children.indices.get(&new_child) { + // This should not happen in our use case, but we + // should still do somthing: as the edges cannot + // duplicate, we simply remove the original edge, + // unless index = child_index, in which case the old + // child is equal to the new child, and we have + // nothing to do. + if *index != child_index { + node.children.remove(new_child); + } + } else { + // The index has been checked above, so it is safe to + // call `unwrap` here. + let old_child = std::mem::replace( + node.children.children.get_mut(child_index).unwrap(), + new_child, + ); + + node.children.indices.remove(&old_child); + + node.children.indices.insert(new_child, child_index); + + // Don't forget to remove `node` from the parents of + // the old child. + + if let Some(old_child_node) = self.nodes.get_mut(old_child) { + old_child_node.parents.remove(&node_id); + } else { + // The node contained an invalid child. + return Err(Error::IndexOutOfBounds(old_child, nodes_len)); + } + + // Don't forget to add node as a parent to the new + // child. + + // new_child has been checked at the beginning of the + // function, so it is safe to call `unwrap`. + self.nodes + .get_mut(new_child) + .unwrap() + .parents + .insert(node_id, child_index); + } + + Ok(()) + } else { + Err(Error::IndexOutOfBounds(node_id, nodes_len)) + } + } +} + impl<T: GraphLabel> LabelGraph<T> for PLGraph<T> { type Iter<'a> = std::iter::Empty<usize> where @@ -299,6 +374,20 @@ pub struct PLGBuilderMut<'a, T: GraphLabel> { graph: &'a mut PLGraph<T>, } +impl<'a, T: GraphLabel> std::ops::Deref for PLGBuilderMut<'a, T> { + type Target = PLGraph<T>; + + fn deref(&self) -> &Self::Target { + &self.graph + } +} + +impl<'a, T: GraphLabel> std::ops::DerefMut for PLGBuilderMut<'a, T> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.graph + } +} + impl<'a, T: GraphLabel> BuilderMut for PLGBuilderMut<'a, T> { type Label = T; @@ -467,6 +556,8 @@ mod binary_test { builder.add_vertex(4); builder.add_vertex(5); + // These labels are not used on edges: they are just place + // holders. builder.add_edge(1, 0, 0)?; builder.add_edge(2, 0, 0)?; builder.add_edge(2, 1, 0)?; @@ -481,43 +572,73 @@ mod binary_test { // testing adding a duplicatedly labelled node assert_eq!(builder.add_vertex(0), 0); - let graph = graph; - // ensuring the correct length - assert_eq!(graph.nodes_len(), 6); + assert_eq!(builder.nodes_len(), 6); // testing children_of - assert_eq!(graph.children_of(5)?.collect::<Set<_>>(), set!(1, 3, 2)); + assert_eq!(builder.children_of(5)?.collect::<Set<_>>(), set!(1, 3, 2)); // testing parents_of assert_eq!( - graph.parents_of(0)?.collect::<Set<_>>(), + builder.parents_of(0)?.collect::<Set<_>>(), set!(Parent::new(1, 0), Parent::new(2, 0), Parent::new(3, 0)) ); assert_eq!( - graph.parents_of(1)?.collect::<Set<_>>(), + builder.parents_of(1)?.collect::<Set<_>>(), set!(Parent::new(2, 1), Parent::new(4, 0), Parent::new(5, 2)) ); - assert_eq!(graph.parents_of(5)?.len(), 0); + assert_eq!(builder.parents_of(5)?.len(), 0); // testing degree - assert_eq!(graph.degree(4)?, 2); + assert_eq!(builder.degree(4)?, 2); // testing is_empty_node - assert!(graph.is_empty_node(0)?); - assert!(!graph.is_empty_node(1)?); + assert!(builder.is_empty_node(0)?); + assert!(!builder.is_empty_node(1)?); // testing has_edge - assert!(graph.has_edge(3, 2)?); - assert!(!graph.has_edge(3, 1)?); + assert!(builder.has_edge(3, 2)?); + assert!(!builder.has_edge(3, 1)?); assert!(matches!( - graph.has_edge(3, 6), + builder.has_edge(3, 6), Err(Error::IndexOutOfBounds(6, 6)) )); + // testing redirect + builder.redirect(5, 2, 0)?; + assert_eq!(builder.children_of(5)?.collect::<Set<_>>(), set!(0, 3, 2)); + + assert_eq!( + builder.parents_of(0)?.collect::<Set<_>>(), + set!( + Parent::new(1, 0), + Parent::new(2, 0), + Parent::new(3, 0), + Parent::new(5, 2) + ) + ); + + builder.redirect(5, 0, 1)?; + + assert_eq!(builder.children_of(5)?.collect::<Set<_>>(), set!(1, 0, 3)); + + assert_eq!( + builder.parents_of(1)?.collect::<Set<_>>(), + set!(Parent::new(2, 1), Parent::new(4, 0), Parent::new(5, 0)) + ); + + builder.redirect(5, 0, 1)?; // should be no-op + + assert_eq!(builder.children_of(5)?.collect::<Set<_>>(), set!(1, 0, 3)); + + assert_eq!( + builder.parents_of(1)?.collect::<Set<_>>(), + set!(Parent::new(2, 1), Parent::new(4, 0), Parent::new(5, 0)) + ); + Ok(()) } } @@ -636,6 +757,28 @@ mod test_plgraph_builder { // println!(); + // source out of bounds + assert!(matches!( + builder.redirect(5, 0, 0), + Err(Error::IndexOutOfBounds(5, 5)) + )); + + // child_index out of bounds + assert!(matches!( + builder.redirect(4, 0, 0), + Err(Error::IndexOutOfBounds(0, 0)) + )); + + // new_child out of bounds + assert!(matches!( + builder.redirect(4, 0, 10), + Err(Error::IndexOutOfBounds(10, 5)) + )); + + // println!("Correct errors when redirecting"); + + // println!(); + let graph = graph; println!("final graph: {graph:?}"); diff --git a/graph/src/labelled/mod.rs b/graph/src/labelled/mod.rs index fa26bc4..2bbc7ec 100644 --- a/graph/src/labelled/mod.rs +++ b/graph/src/labelled/mod.rs @@ -16,5 +16,6 @@ pub mod double; pub use double::{DLGBuilder, DLGraph}; pub mod binary; +pub use binary::{PLGBuilderMut, PLGraph}; // pub use binary::BLGraph; diff --git a/graph/src/lib.rs b/graph/src/lib.rs index 6813df3..6af7889 100644 --- a/graph/src/lib.rs +++ b/graph/src/lib.rs @@ -23,6 +23,7 @@ pub use adlist::ALGraph; pub mod labelled; pub use labelled::DLGraph; +pub use labelled::PLGraph; pub mod builder; @@ -252,6 +253,23 @@ pub trait ParentsGraph: Graph { fn parents_of(&self, node_id: usize) -> Result<<Self as ParentsGraph>::Iter<'_>, Error>; } +// TODO: Design a trait of graphs which can "replace" a certain child +// by another child. To re-direct children, so to speak. + +/// An /exended/ graph in the sense that it offers the ability to +/// "redirect" children of a node to another node. +pub trait RedirectGraph: Graph { + /// Replace the edge that points from `node_id` to the + /// `child_index`-th child by a new edge that points to + /// `new_child`. + fn redirect( + &mut self, + node_id: usize, + child_index: usize, + new_child: usize, + ) -> Result<(), Error>; +} + /// A labelled graph is just a graph with labels associated to /// vertices and / or edges. /// |