summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJSDurand <mmemmew@gmail.com>2023-01-13 14:26:28 +0800
committerJSDurand <mmemmew@gmail.com>2023-01-13 14:26:28 +0800
commit8f8d3d1a3c276be4be2e5d2e767ada564c47279a (patch)
treedaba317c8d381f7159f9a34d957291472bad2873
parent3c6511f69c7639abff60ac9999a08ce2daa24a7d (diff)
forest seems to be completed
I seem to have finished the implementation of forests. Now it remains the implementation of the chain-rule machine, of which I have a rough plan now.
-rw-r--r--Cargo.toml2
-rw-r--r--chain/src/plan.org19
-rw-r--r--forest/Cargo.toml6
-rw-r--r--forest/src/default.rs484
-rw-r--r--forest/src/lib.rs81
-rw-r--r--grammar/Cargo.toml3
-rw-r--r--grammar/src/lib.rs15
-rw-r--r--grammar/src/tests/test_grammar_left_closure.rs38
-rw-r--r--graph/src/labelled/binary.rs171
-rw-r--r--graph/src/labelled/mod.rs1
-rw-r--r--graph/src/lib.rs18
11 files changed, 727 insertions, 111 deletions
diff --git a/Cargo.toml b/Cargo.toml
index e34a8cf..1ddd0a0 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,7 +13,7 @@ rust-version = "1.65"
[workspace]
members = [ "graph", "receme", "nfa", "chain", "viz", "grammar",
- "forest", "semiring"]
+ "forest", "semiring", "graph_macro" ]
# testing the new resolver, even though this has no dependencies ;p
resolver = "2"
diff --git a/chain/src/plan.org b/chain/src/plan.org
index b708413..84192f0 100644
--- a/chain/src/plan.org
+++ b/chain/src/plan.org
@@ -2,7 +2,7 @@
#+AUTHOR: Durand
#+DATE: <2022-11-18 Ven 19:57>
-* Things to do [6/10]
+* Things to do [7/10]
- [X] Implement builders for graphs
- [X] Find sets of the first terminals for each non-terminal, in the
@@ -79,6 +79,15 @@
lack of this step might be the root cause of the failure of the
previous version of this project.
+ [X] Test atoms
+- [X] Implement forests [4/4]
+ + [X] Design a format of forests. This should be the most crucial
+ thing to do, in order to have a better understanding of the whole
+ picture. I need to have a clear understanding of the details of
+ the binarised shared packed parsed forests, that reflects the
+ regular expressions in the grammar equations.
+ + [X] Define a trait with the expected behaviours.
+ + [X] Implement them as parents-knowing graphs.
+ + [X] Test
- [-] Implement languages. [1/4]
+ [X] First define a trait with the expected behaviours.
+ [ ] Then implement them as doubly labelled graphs.
@@ -86,14 +95,6 @@
+ [ ] Each edge in the chain-rule machine needs to be labelled also
with a position in the forest. This perfectly solves the problem
of those "plugs"!
-- [-] Implement forests [2/3]
- + [X] Design a format of forests. This should be the most crucial
- thing to do, in order to have a better understanding of the whole
- picture. I need to have a clear understanding of the details of
- the binarised shared packed parsed forests, that reflects the
- regular expressions in the grammar equations.
- + [X] Define a trait with the expected behaviours.
- + [-] Implement them as parents-knowing graphs.
- [ ] Implement semiring. [0/5]
+ [ ] Define the trait.
+ [ ] Implement the boolean semiring.
diff --git a/forest/Cargo.toml b/forest/Cargo.toml
index b88451d..c51bf36 100644
--- a/forest/Cargo.toml
+++ b/forest/Cargo.toml
@@ -5,5 +5,11 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+[features]
+default = []
+
+# This flag controls whether to print GraphViz files during testing.
+test-print-viz = []
+
[dependencies]
graph = { path = "../graph" }
diff --git a/forest/src/default.rs b/forest/src/default.rs
index d3970e9..d79c1c7 100644
--- a/forest/src/default.rs
+++ b/forest/src/default.rs
@@ -1,21 +1,471 @@
//! This file provides a default implementation for the
//! [`Forest`][crate::Forest] trait.
-#[allow(unused_imports)]
use super::*;
-#[allow(unused_imports)]
-use graph::{error::Error as GraphError, ALGraph, Graph};
-
-#[allow(unused_imports)]
-use std::collections::{hash_set::Iter, HashMap, HashSet};
-
-// TODO: Use PLGraph instead.
-
-// #[derive(Debug, Clone)]
-// pub struct DefaultForest<NodeLabel: GraphLabel, EdgeLabel: GraphLabel> {
-// graph: ALGraph,
-// vertex_labels: HashMap<usize, NodeLabel>,
-// edge_labels: HashMap<(usize, usize), EdgeLabel>,
-// plugins: HashSet<usize>,
-// plugouts: HashSet<usize>,
-// }
+use graph::{
+ builder::BuilderMut, labelled::binary::PLGBuilderMut, Graph, LabelGraph, PLGraph, RedirectGraph,
+};
+
+use core::fmt::Display;
+
+/// The type of errors for forest operations.
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd)]
+pub enum Error {
+ /// An index is out of bounds.
+ ///
+ /// The first component is the index that is out of bounds, and
+ /// the second component is the current length of nodes.
+ IndexOutOfBounds(usize, usize),
+ /// The forest does not permit duplicate nodes but encounters a
+ /// repeated node.
+ DuplicatedNode(usize),
+ /// A node has no labels while it is required to have one.
+ NodeNoLabel(usize),
+ /// Encounter an invalid error in converting from an error of
+ /// graphs.
+ InvalidGraphError(GError),
+}
+
+impl Display for Error {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ match self {
+ Error::IndexOutOfBounds(index, bound) => {
+ write!(f, "index {index} is out of bound {bound}")
+ }
+ Error::DuplicatedNode(n) => write!(f, "node {n} is duplicated"),
+ Error::InvalidGraphError(ge) => write!(f, "invalid error: {ge}"),
+ Error::NodeNoLabel(n) => write!(f, "node {n} has no labels, but it should have one"),
+ }
+ }
+}
+
+impl std::error::Error for Error {}
+
+impl From<GError> for Error {
+ fn from(ge: GError) -> Self {
+ match ge {
+ GError::IndexOutOfBounds(index, bound) => Self::IndexOutOfBounds(index, bound),
+ GError::DuplicatedNode(n) => Self::DuplicatedNode(n),
+ _ => Self::InvalidGraphError(ge),
+ }
+ }
+}
+
+/// A default implementation of forest.
+#[derive(Debug, Clone)]
+pub struct DefaultForest<T: GraphLabel> {
+ graph: PLGraph<T>,
+ root: Option<usize>,
+}
+
+impl<T: GraphLabel> Default for DefaultForest<T> {
+ fn default() -> Self {
+ let graph = Default::default();
+ let root = None;
+
+ Self { graph, root }
+ }
+}
+
+impl<T: GraphLabel> AsRef<DefaultForest<T>> for DefaultForest<T> {
+ fn as_ref(&self) -> &DefaultForest<T> {
+ &self
+ }
+}
+
+impl<T: GraphLabel> Graph for DefaultForest<T> {
+ type Iter<'a> = <PLGraph<T> as Graph>::Iter<'a>
+ where
+ Self: 'a;
+
+ #[inline]
+ fn is_empty(&self) -> bool {
+ self.graph.is_empty()
+ }
+
+ #[inline]
+ fn nodes_len(&self) -> usize {
+ self.graph.nodes_len()
+ }
+
+ #[inline]
+ fn edges_len(&self) -> Option<usize> {
+ self.graph.edges_len()
+ }
+
+ #[inline]
+ fn children_of(&self, node_id: usize) -> Result<Self::Iter<'_>, GError> {
+ self.graph.children_of(node_id)
+ }
+
+ #[inline]
+ fn degree(&self, node_id: usize) -> Result<usize, GError> {
+ self.graph.degree(node_id)
+ }
+
+ #[inline]
+ fn is_empty_node(&self, node_id: usize) -> Result<bool, GError> {
+ self.graph.is_empty_node(node_id)
+ }
+
+ #[inline]
+ fn has_edge(&self, source: usize, target: usize) -> Result<bool, GError> {
+ self.graph.has_edge(source, target)
+ }
+
+ fn replace_by_builder(&mut self, _builder: impl graph::Builder<Result = Self>) {
+ unimplemented!()
+ }
+}
+
+impl<T: GraphLabel> ParentsGraph for DefaultForest<T> {
+ type Iter<'a>= <PLGraph<T> as ParentsGraph>::Iter<'a>
+ where
+ Self:'a;
+
+ fn parents_of(&self, node_id: usize) -> Result<<Self as ParentsGraph>::Iter<'_>, GError> {
+ self.graph.parents_of(node_id)
+ }
+}
+
+impl<T: GraphLabel> LabelGraph<T> for DefaultForest<T> {
+ type Iter<'a> = std::iter::Empty<usize>
+ where
+ Self: 'a;
+
+ type LabelIter<'a> = std::iter::Empty<(&'a T, <Self as LabelGraph<T>>::Iter<'a>)>
+ where
+ Self: 'a,
+ T: 'a;
+
+ type EdgeLabelIter<'a> = std::iter::Empty<T>
+ where
+ Self: 'a,
+ T: 'a;
+
+ #[inline]
+ fn query_label(&self, label: T) -> Option<usize> {
+ self.graph.query_label(label)
+ }
+
+ #[inline]
+ fn vertex_label(&self, node_id: usize) -> Result<Option<T>, GError> {
+ self.graph.vertex_label(node_id)
+ }
+
+ fn edge_label(
+ &self,
+ _source: usize,
+ _target: usize,
+ ) -> Result<Self::EdgeLabelIter<'_>, GError> {
+ unimplemented!("edges have no labels")
+ }
+
+ fn find_children_with_label(
+ &self,
+ _node_id: usize,
+ _label: &T,
+ ) -> Result<<Self as LabelGraph<T>>::Iter<'_>, GError> {
+ unimplemented!("edges have no labels")
+ }
+
+ fn labels_of(&self, _node_id: usize) -> Result<Self::LabelIter<'_>, GError> {
+ unimplemented!("edges have no labels")
+ }
+
+ fn has_edge_label(&self, _node_id: usize, _label: &T, _target: usize) -> Result<bool, GError> {
+ unimplemented!("edges have no labels")
+ }
+}
+
+impl<T: GraphLabel> Forest<T> for DefaultForest<T> {
+ type Error = Error;
+
+ fn root(&self) -> Option<usize> {
+ self.root
+ }
+
+ fn new_leaf(label: T) -> Self {
+ let mut graph = PLGraph::default();
+
+ let mut builder = PLGBuilderMut::from_graph_mut(&mut graph);
+
+ let root = Some(builder.add_vertex(label));
+
+ Self { graph, root }
+ }
+
+ fn is_prefix<F>(&self, node_id: usize, fragment: F) -> Result<bool, Self::Error>
+ where
+ F: AsRef<Self>,
+ {
+ if !self.has_node(node_id) {
+ return Err(Error::IndexOutOfBounds(node_id, self.nodes_len()));
+ }
+
+ // We do a depth-first traversal to determine if every node
+ // encountered has the same set of children (labels taken into
+ // the consideration).
+
+ let fragment = fragment.as_ref();
+
+ let mut frag_stack = Vec::with_capacity(fragment.nodes_len());
+
+ let mut self_stack = Vec::with_capacity(fragment.nodes_len());
+
+ let frag_root = if let Some(root) = fragment.root() {
+ root
+ } else {
+ // an empty forest is a prefix of any forest.
+ return Ok(true);
+ };
+
+ frag_stack.push(frag_root);
+ self_stack.push(node_id);
+
+ // defer popping
+ while let (Some(frag_top), Some(self_top)) =
+ (frag_stack.last().copied(), self_stack.last().copied())
+ {
+ frag_stack.pop();
+ self_stack.pop();
+
+ if fragment.vertex_label(frag_top)? != self.vertex_label(self_top)? {
+ // not a prefix
+ return Ok(false);
+ }
+
+ let mut self_children = self.children_of(self_top)?;
+
+ for child in fragment.children_of(frag_top)? {
+ if let Some(self_child) = self_children.next() {
+ frag_stack.push(child);
+ self_stack.push(self_child);
+ } else {
+ // too few children
+ return Ok(false);
+ }
+ }
+ }
+
+ // Check both stacks are empty at the end.
+ Ok(frag_stack.is_empty() && self_stack.is_empty())
+ }
+
+ fn plant<F>(&mut self, node_id: usize, fragment: F) -> Result<(), Self::Error>
+ where
+ F: AsRef<Self>,
+ {
+ // Convert self to a builder_mut, and traverse fragment in a
+ // depth-first manner and adjoin corresponding nodes along the
+ // way.
+
+ if !self.has_node(node_id) {
+ return Err(Error::IndexOutOfBounds(node_id, self.nodes_len()));
+ }
+
+ let fragment = fragment.as_ref();
+
+ let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph);
+
+ let root = if let Some(root) = fragment.root() {
+ root
+ } else {
+ // Nothing to do to plant an empty forest.
+ return Ok(());
+ };
+
+ // Just a dummy label for use in adding edges.
+ //
+ // REVIEW: I probably should refactor the API for builder_mut.
+ let root_label = fragment
+ .vertex_label(root)?
+ .ok_or(Error::NodeNoLabel(root))?;
+
+ let nodes_len = fragment.nodes_len();
+
+ // First adjoin those nodes and join the edges later.
+
+ for node in 0..nodes_len {
+ let label = fragment
+ .vertex_label(node)?
+ .ok_or(Error::NodeNoLabel(node))?;
+
+ builder.add_vertex(label);
+ }
+
+ // If the fragment root has a duplicate label, the forest will
+ // not grow, so we use the label to find the adjoined node
+ // index.
+
+ // the nodes hava already been added to the forest, so it is
+ // safe to call unwrap.
+ macro_rules! conversion (
+ ($node:expr) => {
+ {
+ builder
+ .query_label(
+ fragment
+ .vertex_label($node)?
+ .ok_or(Error::NodeNoLabel($node))?
+ ).unwrap()
+ }
+ }
+ );
+
+ // Don't forget to join the new sub-forest to the original
+ // forest, at the specified position.
+
+ builder.add_edge(node_id, conversion!(root), root_label)?;
+
+ // We can try to calculate the depth of fragments, if we need
+ // to lower the memory usage. But in our use cases, we
+ // usually deal with fragments where each node has at most one
+ // child, so the depth is supposed to be equal to the length
+ // in this case.
+ let mut stack = Vec::with_capacity(fragment.nodes_len());
+
+ stack.push(root);
+
+ while let Some(top) = stack.pop() {
+ for child in fragment.children_of(top)? {
+ builder.add_edge(conversion!(top), conversion!(child), root_label)?;
+ }
+ }
+
+ Ok(())
+ }
+
+ fn clone_node<F>(&mut self, node_id: usize, clone_transform: F) -> Result<usize, Self::Error>
+ where
+ F: Fn(T) -> T,
+ {
+ let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph);
+
+ let old_label = builder
+ .vertex_label(node_id)?
+ .ok_or(Error::NodeNoLabel(node_id))?;
+
+ let new_label = clone_transform(old_label);
+
+ // Make a new node
+ let new_index = builder.add_vertex(new_label);
+
+ // Re-direct parents to the new node.
+ //
+ // This must be done before pointing the new node to the old
+ // node, otherwise that edge will be redirected as well.
+
+ // Unfortunately I cannot loop through parents and mutate them
+ // at the same time, so I first collect them into a vector.
+ let parents: Vec<_> = builder.parents_of(node_id)?.collect();
+
+ for parent in parents.into_iter() {
+ builder.redirect(parent.node(), parent.edge(), new_index)?;
+ }
+
+ // Point the new node to the old node. OLD_LABEL is just a
+ // place holder.
+
+ builder.add_edge(new_index, node_id, old_label)?;
+
+ Ok(new_index)
+ }
+}
+
+#[cfg(test)]
+mod forest_test {
+ use super::*;
+
+ macro_rules! leaf (
+ ($label:expr, $type:tt) =>{
+ DefaultForest::<$type>::new_leaf($label)
+ };
+ ($label:expr) => {
+ DefaultForest::<usize>::new_leaf($label)
+ }
+ );
+
+ #[test]
+ fn test_forest_api() -> Result<(), Box<dyn std::error::Error>> {
+ let forest: DefaultForest<usize> = Default::default();
+
+ // empty forest
+
+ assert!(forest.is_empty());
+
+ // leaf forest
+
+ let mut forest = leaf!(0, usize);
+
+ assert_eq!(forest.nodes_len(), 1);
+ assert_eq!(forest.root(), Some(0));
+
+ // add some child
+
+ forest.plant(0, leaf!(1))?;
+
+ assert_eq!(forest.nodes_len(), 2);
+ let mut children = forest.children_of(0)?;
+ assert_eq!(children.next(), Some(1));
+ assert_eq!(children.next(), None);
+
+ // add more children
+
+ forest.plant(0, leaf!(2))?;
+ forest.plant(0, leaf!(3))?;
+ forest.plant(0, leaf!(4))?;
+ forest.plant(2, leaf!(5))?;
+
+ assert_eq!(forest.nodes_len(), 6);
+ let mut children = forest.children_of(0)?;
+ assert_eq!(children.next(), Some(1));
+ assert_eq!(children.next(), Some(2));
+ assert_eq!(children.next(), Some(3));
+ assert_eq!(children.next(), Some(4));
+ let mut children = forest.children_of(2)?;
+ assert_eq!(children.next(), Some(5));
+ assert_eq!(children.next(), None);
+
+ let mut test_forest = leaf!(0);
+ test_forest.plant(0, leaf!(1))?;
+ test_forest.plant(0, leaf!(2))?;
+ test_forest.plant(0, leaf!(3))?;
+ test_forest.plant(2, leaf!(5))?;
+
+ assert!(forest.is_prefix(0, &test_forest)?);
+
+ let mut test_forest = leaf!(0);
+ test_forest.plant(0, leaf!(1))?;
+ test_forest.plant(0, leaf!(2))?;
+ // this child of the root should have label 3 in order to be a
+ // prefix.
+ test_forest.plant(0, leaf!(4))?;
+ test_forest.plant(2, leaf!(5))?;
+
+ assert!(!forest.is_prefix(0, &test_forest)?);
+
+ let mut test_forest = leaf!(2);
+ test_forest.plant(0, leaf!(5))?;
+
+ assert!(forest.is_prefix(2, &test_forest)?);
+
+ // now test cloning
+
+ // add a duplicate label
+ forest.plant(3, leaf!(5))?;
+
+ let len = forest.nodes_len();
+
+ let clone_transform = |label| label + len;
+
+ forest.clone_node(5, clone_transform)?;
+
+ assert_eq!(forest.nodes_len(), 7);
+
+ #[cfg(feature = "test-print-viz")]
+ forest.print_viz("forest.gv")?;
+
+ Ok(())
+ }
+}
diff --git a/forest/src/lib.rs b/forest/src/lib.rs
index c2f4988..f843bc1 100644
--- a/forest/src/lib.rs
+++ b/forest/src/lib.rs
@@ -11,57 +11,7 @@
//! out-coming and in-coming plugs. These plugs are used to join
//! different fragments of forests together.
-use graph::{GraphLabel, LabelGraph, ParentsGraph};
-
-use core::fmt::Display;
-
-// TODO: move this to default
-
-/// The type of errors for forest operations.
-#[derive(Debug)]
-pub enum Error {
- /// An index is out of bounds.
- IndexOutOfBounds(usize, usize),
-}
-
-impl Display for Error {
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
- match self {
- Error::IndexOutOfBounds(index, bound) => {
- write!(f, "index {index} is out of bound {bound}")
- }
- }
- }
-}
-
-impl std::error::Error for Error {}
-
-// /// A builder of a forest.
-// pub trait ForestBuilder<NodeLabel: GraphLabel, EdgeLabel: GraphLabel> {
-// /// The type of the resulting forest.
-// type Output: Forest<NodeLabel, EdgeLabel>;
-
-// /// Construct a new builder with only one node with the given
-// /// label.
-// fn new_leaf(label: NodeLabel) -> Self;
-
-// /// Add a child to the builder the given labels for the new node
-// /// and the added edge.
-// ///
-// /// All plug-out nodes within the builder should have a new child
-// /// with the specified labels, and hence multiple children might
-// /// be added, and the plug-out nodes should be modified
-// /// accordingly.
-// fn add_children(&mut self, vertex_label: NodeLabel, edge_labe: EdgeLabel);
-
-// /// Build the forest.
-// fn build(self) -> Self::Output;
-
-// /// Build the forest from a reference.
-// fn build_ref(&self) -> Self::Output;
-// }
-
-// FIXME: The trait should be re-designed.
+use graph::{error::Error as GError, GraphLabel, LabelGraph, ParentsGraph};
/// The expected behaviours of a forest.
///
@@ -69,19 +19,38 @@ impl std::error::Error for Error {}
/// labelled graphs.
pub trait Forest<T: GraphLabel>: ParentsGraph + LabelGraph<T> {
/// The type of errors for operations on the forest.
- type Error: std::error::Error + From<graph::error::Error>;
+ type Error: std::error::Error + From<GError>;
+
+ /// Return the root of the forest.
+ ///
+ /// A forest without a root is regarded as empty.
+ fn root(&self) -> Option<usize>;
+
+ /// Construct a forest consisting of one leaf node with the given
+ /// label.
+ fn new_leaf(label: T) -> Self;
/// Detect if the fragment is a prefix of the sub-forest rooted at
/// `node_id`.
- fn is_prefix<F: AsRef<Self>>(&self, node_id: usize, fragment: F) -> Result<bool, Self::Error>;
+ fn is_prefix<F>(&self, node_id: usize, fragment: F) -> Result<bool, Self::Error>
+ where
+ F: AsRef<Self>;
/// Extend the forest by adjoining another forest at a given node.
- fn plant<F: AsRef<Self>>(&mut self, node_id: usize, fragment: F) -> Result<(), Self::Error>;
+ fn plant<F>(&mut self, node_id: usize, fragment: F) -> Result<(), Self::Error>
+ where
+ F: AsRef<Self>;
/// Clone a node by making a new node and making all the nodes
/// that previously pointed to the old node now point to the new
- /// node, and the new node points to the old node.
- fn clone(&mut self, node_id: usize) -> Result<(), Self::Error>;
+ /// node, and the new node points to the old node. Return the
+ /// index of the new node.
+ ///
+ /// The label of the new node will be given by the label
+ /// transformer.
+ fn clone_node<F>(&mut self, node_id: usize, clone_transform: F) -> Result<usize, Self::Error>
+ where
+ F: Fn(T) -> T;
}
pub mod default;
diff --git a/grammar/Cargo.toml b/grammar/Cargo.toml
index 20c4b48..793ab5a 100644
--- a/grammar/Cargo.toml
+++ b/grammar/Cargo.toml
@@ -12,6 +12,9 @@ default = []
# some grammars for testing.
test-helper = []
+# This flag controls whether to print GraphViz files during testing.
+test-print-viz = []
+
[dependencies]
nfa = { path = "../nfa" }
graph = { path = "../graph" }
diff --git a/grammar/src/lib.rs b/grammar/src/lib.rs
index 627ae6f..297cb66 100644
--- a/grammar/src/lib.rs
+++ b/grammar/src/lib.rs
@@ -462,6 +462,19 @@ impl Grammar {
// REVIEW: Do we have a better way to record expansion information
// than to compute the transitive closure?
+ // REVIEW: We need a way to eliminate those left-linearly expanded
+ // edges whose labels had already been considered, and we need to
+ // preserve the transition of the `left_p` property at the same
+ // time.
+ //
+ // Maybe we could decide to delete those edges in the
+ // `remove_predicate`? But we cannot access the states of NFA in
+ // that predicate, in the current design, thus we need to refactor
+ // some codes, it seems: we need a way to "compactify" an NFA, by
+ // a key function, in such a way that if two entries have the same
+ // key (determined by the key function), then only one, determined
+ // by another function, remains in the NFA.
+
/// A transformer of labels to be fed into
/// [`closure`][nfa::default::nfa::DefaultNFA::closure], with the
/// predicate that returns true if and only if the label of the
@@ -483,7 +496,7 @@ impl Grammar {
}
// Compute if this is from left-linear expansion: it is so if
- // and only if one if either the edges comes from left-linear
+ // and only if either one of the edges comes from left-linear
// expansion or we are moving across a non-terminal expansion,
// that is to say, the source of the second edge is the
// starting edge of a non-terminal.
diff --git a/grammar/src/tests/test_grammar_left_closure.rs b/grammar/src/tests/test_grammar_left_closure.rs
index 003c211..ffc7c0f 100644
--- a/grammar/src/tests/test_grammar_left_closure.rs
+++ b/grammar/src/tests/test_grammar_left_closure.rs
@@ -33,9 +33,6 @@ fn test_regex() -> Result<(), Box<dyn std::error::Error>> {
Ok(())
}
-// We ignore this test by default as it is possibly involved with
-// printing to a graphviz file.
-#[ignore]
#[test]
fn test_nfa() -> Result<(), Box<dyn std::error::Error>> {
let mut grammar = new_notes_grammar()?;
@@ -69,19 +66,22 @@ fn test_nfa() -> Result<(), Box<dyn std::error::Error>> {
grammar
.left_closure_to_nfa(&closure)
.map(|_| ())
- .map_err(Into::into)
+ .map_err(Into::<Box<dyn std::error::Error>>::into)?;
// let _nfa = grammar.left_closure_to_nfa(&closure)?;
- // writeln!(lock, "Not printing nfa to nfa.gv")?;
+ #[cfg(features = "test-print-viz")]
+ {
+ writeln!(lock, "Not printing nfa to nfa.gv")?;
- // nfa.print_viz("nfa.gv").map_err(Into::into)
+ nfa.print_viz("nfa.gv")
+ .map_err(Into::<Box<dyn std::error::Error>>::into)?;
+ }
- // Ok(())
+ Ok(())
}
#[test]
-#[ignore]
fn test_remove_epsilon() -> Result<(), Box<dyn std::error::Error>> {
let mut lock = stdout().lock();
@@ -123,17 +123,18 @@ fn test_remove_epsilon() -> Result<(), Box<dyn std::error::Error>> {
let mut nfa = grammar.left_closure_to_nfa(&closure)?;
+ #[cfg(features = "test-print-viz")]
nfa.print_viz("nfa_orig.gv")?;
nfa.remove_epsilon(|label| label.get_value().is_none())?;
+ #[cfg(features = "test-print-viz")]
nfa.print_viz("nfa_no_epsilon.gv")?;
Ok(())
}
#[test]
-#[ignore]
fn test_remove_dead() -> Result<(), Box<dyn std::error::Error>> {
let mut grammar = new_paren_grammar()?;
let closure = new_closure_regex(&mut grammar)?;
@@ -173,9 +174,10 @@ fn test_remove_dead() -> Result<(), Box<dyn std::error::Error>> {
let mut nfa = grammar.left_closure_to_nfa(&closure)?;
+ #[cfg(features = "test-print-viz")]
nfa.print_viz("nfa_orig.gv")?;
- nfa.remove_epsilon(|label| label.get_value().is_none())?;
+ // nfa.remove_epsilon(|label| label.get_value().is_none())?;
let accumulators: HashSet<usize> = accumulators.into_iter().collect();
@@ -183,15 +185,22 @@ fn test_remove_dead() -> Result<(), Box<dyn std::error::Error>> {
let grammar_reserve_node = |node| accumulators.contains(&node);
+ nfa.closure(
+ |label| label.get_value().is_none(),
+ true,
+ |two_edges| two_edges.second_edge().2,
+ |label| label.get_value().is_none(),
+ )?;
+
nfa.remove_dead(grammar_reserve_node)?;
+ #[cfg(features = "test-print-viz")]
nfa.print_viz("nfa_no_dead.gv")?;
Ok(())
}
#[test]
-#[ignore]
fn test_nulling() -> Result<(), Box<dyn std::error::Error>> {
// TODO: Test more grammars.
let mut grammar = new_right_recursive_grammar()?;
@@ -273,9 +282,12 @@ fn test_nulling() -> Result<(), Box<dyn std::error::Error>> {
nfa.remove_dead(grammar_reserve_nodes)?;
- writeln!(lock, "Printing nfa to nfa.gv")?;
+ #[cfg(features = "test-print-viz")]
+ {
+ writeln!(lock, "Printing nfa to nfa.gv")?;
- nfa.print_viz("nfa.gv")?;
+ nfa.print_viz("nfa.gv")?;
+ }
Ok(())
}
diff --git a/graph/src/labelled/binary.rs b/graph/src/labelled/binary.rs
index 67d86f9..bfd8109 100644
--- a/graph/src/labelled/binary.rs
+++ b/graph/src/labelled/binary.rs
@@ -2,7 +2,7 @@
//! labels and each node knows about its parents.
use super::*;
-use crate::{Parent, ParentsGraph};
+use crate::{Parent, ParentsGraph, RedirectGraph};
use std::{
collections::{hash_map::Iter as MapIter, HashMap as Map},
@@ -142,6 +142,11 @@ impl<T: GraphLabel> Graph for PLGraph<T> {
}
#[inline]
+ fn edges_len(&self) -> Option<usize> {
+ Some(self.nodes.iter().map(|node| node.children.len()).sum())
+ }
+
+ #[inline]
fn children_of(&self, node_id: usize) -> Result<Self::Iter<'_>, Error> {
if let Some(node) = self.nodes.get(node_id) {
Ok(node.children.iter().copied())
@@ -243,6 +248,76 @@ impl<T: GraphLabel> ParentsGraph for PLGraph<T> {
}
}
+impl<T: GraphLabel> RedirectGraph for PLGraph<T> {
+ fn redirect(
+ &mut self,
+ node_id: usize,
+ child_index: usize,
+ new_child: usize,
+ ) -> Result<(), Error> {
+ let nodes_len = self.nodes.len();
+
+ if !self.has_node(new_child) {
+ return Err(Error::IndexOutOfBounds(new_child, nodes_len));
+ }
+
+ if let Some(node) = self.nodes.get_mut(node_id) {
+ if node.children.len() <= child_index {
+ return Err(Error::IndexOutOfBounds(child_index, node.children.len()));
+ }
+
+ // Check if `new_child` is already pointed to by the node.
+ if let Some(index) = node.children.indices.get(&new_child) {
+ // This should not happen in our use case, but we
+ // should still do somthing: as the edges cannot
+ // duplicate, we simply remove the original edge,
+ // unless index = child_index, in which case the old
+ // child is equal to the new child, and we have
+ // nothing to do.
+ if *index != child_index {
+ node.children.remove(new_child);
+ }
+ } else {
+ // The index has been checked above, so it is safe to
+ // call `unwrap` here.
+ let old_child = std::mem::replace(
+ node.children.children.get_mut(child_index).unwrap(),
+ new_child,
+ );
+
+ node.children.indices.remove(&old_child);
+
+ node.children.indices.insert(new_child, child_index);
+
+ // Don't forget to remove `node` from the parents of
+ // the old child.
+
+ if let Some(old_child_node) = self.nodes.get_mut(old_child) {
+ old_child_node.parents.remove(&node_id);
+ } else {
+ // The node contained an invalid child.
+ return Err(Error::IndexOutOfBounds(old_child, nodes_len));
+ }
+
+ // Don't forget to add node as a parent to the new
+ // child.
+
+ // new_child has been checked at the beginning of the
+ // function, so it is safe to call `unwrap`.
+ self.nodes
+ .get_mut(new_child)
+ .unwrap()
+ .parents
+ .insert(node_id, child_index);
+ }
+
+ Ok(())
+ } else {
+ Err(Error::IndexOutOfBounds(node_id, nodes_len))
+ }
+ }
+}
+
impl<T: GraphLabel> LabelGraph<T> for PLGraph<T> {
type Iter<'a> = std::iter::Empty<usize>
where
@@ -299,6 +374,20 @@ pub struct PLGBuilderMut<'a, T: GraphLabel> {
graph: &'a mut PLGraph<T>,
}
+impl<'a, T: GraphLabel> std::ops::Deref for PLGBuilderMut<'a, T> {
+ type Target = PLGraph<T>;
+
+ fn deref(&self) -> &Self::Target {
+ &self.graph
+ }
+}
+
+impl<'a, T: GraphLabel> std::ops::DerefMut for PLGBuilderMut<'a, T> {
+ fn deref_mut(&mut self) -> &mut Self::Target {
+ &mut self.graph
+ }
+}
+
impl<'a, T: GraphLabel> BuilderMut for PLGBuilderMut<'a, T> {
type Label = T;
@@ -467,6 +556,8 @@ mod binary_test {
builder.add_vertex(4);
builder.add_vertex(5);
+ // These labels are not used on edges: they are just place
+ // holders.
builder.add_edge(1, 0, 0)?;
builder.add_edge(2, 0, 0)?;
builder.add_edge(2, 1, 0)?;
@@ -481,43 +572,73 @@ mod binary_test {
// testing adding a duplicatedly labelled node
assert_eq!(builder.add_vertex(0), 0);
- let graph = graph;
-
// ensuring the correct length
- assert_eq!(graph.nodes_len(), 6);
+ assert_eq!(builder.nodes_len(), 6);
// testing children_of
- assert_eq!(graph.children_of(5)?.collect::<Set<_>>(), set!(1, 3, 2));
+ assert_eq!(builder.children_of(5)?.collect::<Set<_>>(), set!(1, 3, 2));
// testing parents_of
assert_eq!(
- graph.parents_of(0)?.collect::<Set<_>>(),
+ builder.parents_of(0)?.collect::<Set<_>>(),
set!(Parent::new(1, 0), Parent::new(2, 0), Parent::new(3, 0))
);
assert_eq!(
- graph.parents_of(1)?.collect::<Set<_>>(),
+ builder.parents_of(1)?.collect::<Set<_>>(),
set!(Parent::new(2, 1), Parent::new(4, 0), Parent::new(5, 2))
);
- assert_eq!(graph.parents_of(5)?.len(), 0);
+ assert_eq!(builder.parents_of(5)?.len(), 0);
// testing degree
- assert_eq!(graph.degree(4)?, 2);
+ assert_eq!(builder.degree(4)?, 2);
// testing is_empty_node
- assert!(graph.is_empty_node(0)?);
- assert!(!graph.is_empty_node(1)?);
+ assert!(builder.is_empty_node(0)?);
+ assert!(!builder.is_empty_node(1)?);
// testing has_edge
- assert!(graph.has_edge(3, 2)?);
- assert!(!graph.has_edge(3, 1)?);
+ assert!(builder.has_edge(3, 2)?);
+ assert!(!builder.has_edge(3, 1)?);
assert!(matches!(
- graph.has_edge(3, 6),
+ builder.has_edge(3, 6),
Err(Error::IndexOutOfBounds(6, 6))
));
+ // testing redirect
+ builder.redirect(5, 2, 0)?;
+ assert_eq!(builder.children_of(5)?.collect::<Set<_>>(), set!(0, 3, 2));
+
+ assert_eq!(
+ builder.parents_of(0)?.collect::<Set<_>>(),
+ set!(
+ Parent::new(1, 0),
+ Parent::new(2, 0),
+ Parent::new(3, 0),
+ Parent::new(5, 2)
+ )
+ );
+
+ builder.redirect(5, 0, 1)?;
+
+ assert_eq!(builder.children_of(5)?.collect::<Set<_>>(), set!(1, 0, 3));
+
+ assert_eq!(
+ builder.parents_of(1)?.collect::<Set<_>>(),
+ set!(Parent::new(2, 1), Parent::new(4, 0), Parent::new(5, 0))
+ );
+
+ builder.redirect(5, 0, 1)?; // should be no-op
+
+ assert_eq!(builder.children_of(5)?.collect::<Set<_>>(), set!(1, 0, 3));
+
+ assert_eq!(
+ builder.parents_of(1)?.collect::<Set<_>>(),
+ set!(Parent::new(2, 1), Parent::new(4, 0), Parent::new(5, 0))
+ );
+
Ok(())
}
}
@@ -636,6 +757,28 @@ mod test_plgraph_builder {
// println!();
+ // source out of bounds
+ assert!(matches!(
+ builder.redirect(5, 0, 0),
+ Err(Error::IndexOutOfBounds(5, 5))
+ ));
+
+ // child_index out of bounds
+ assert!(matches!(
+ builder.redirect(4, 0, 0),
+ Err(Error::IndexOutOfBounds(0, 0))
+ ));
+
+ // new_child out of bounds
+ assert!(matches!(
+ builder.redirect(4, 0, 10),
+ Err(Error::IndexOutOfBounds(10, 5))
+ ));
+
+ // println!("Correct errors when redirecting");
+
+ // println!();
+
let graph = graph;
println!("final graph: {graph:?}");
diff --git a/graph/src/labelled/mod.rs b/graph/src/labelled/mod.rs
index fa26bc4..2bbc7ec 100644
--- a/graph/src/labelled/mod.rs
+++ b/graph/src/labelled/mod.rs
@@ -16,5 +16,6 @@ pub mod double;
pub use double::{DLGBuilder, DLGraph};
pub mod binary;
+pub use binary::{PLGBuilderMut, PLGraph};
// pub use binary::BLGraph;
diff --git a/graph/src/lib.rs b/graph/src/lib.rs
index 6813df3..6af7889 100644
--- a/graph/src/lib.rs
+++ b/graph/src/lib.rs
@@ -23,6 +23,7 @@ pub use adlist::ALGraph;
pub mod labelled;
pub use labelled::DLGraph;
+pub use labelled::PLGraph;
pub mod builder;
@@ -252,6 +253,23 @@ pub trait ParentsGraph: Graph {
fn parents_of(&self, node_id: usize) -> Result<<Self as ParentsGraph>::Iter<'_>, Error>;
}
+// TODO: Design a trait of graphs which can "replace" a certain child
+// by another child. To re-direct children, so to speak.
+
+/// An /exended/ graph in the sense that it offers the ability to
+/// "redirect" children of a node to another node.
+pub trait RedirectGraph: Graph {
+ /// Replace the edge that points from `node_id` to the
+ /// `child_index`-th child by a new edge that points to
+ /// `new_child`.
+ fn redirect(
+ &mut self,
+ node_id: usize,
+ child_index: usize,
+ new_child: usize,
+ ) -> Result<(), Error>;
+}
+
/// A labelled graph is just a graph with labels associated to
/// vertices and / or edges.
///