From 1a3d346f413325ed37848a6b2526e8e729269833 Mon Sep 17 00:00:00 2001 From: JSDurand Date: Wed, 11 Jan 2023 23:47:26 +0800 Subject: Record left-linear expansion and forest format Now the grammar will record the left-linear expansions when generating the nondeterministic finite automaton frmo its rules, and will record whether an edge in the nondeterministic finite automaton comes from a left-linear expansion. The latter is needed because while performing a chain-rule derivation, we do not need the left-linear expanded derivations in the "first layer". This might well have been the root cause of the bad performance of the previous version of this package. Also I have figured out how to properly generate and handle parse forests while manipulating the "chain-rule machine". --- graph/src/adlist.rs | 2 +- graph/src/adset.rs | 2 +- graph/src/builder.rs | 4 +- graph/src/labelled/binary.rs | 406 +++++++++++++++++++++++++++++++++++++++++-- graph/src/labelled/double.rs | 9 +- graph/src/lib.rs | 31 +++- 6 files changed, 428 insertions(+), 26 deletions(-) (limited to 'graph/src') diff --git a/graph/src/adlist.rs b/graph/src/adlist.rs index ba9afb8..ba3077e 100644 --- a/graph/src/adlist.rs +++ b/graph/src/adlist.rs @@ -201,7 +201,7 @@ impl Builder for ALGBuilder { /// implement a customized builder. fn remove_edge(&mut self, source: usize, target: usize, _predicate: F) -> Result<(), Error> where - F: Fn(Self::Label) -> bool, + F: FnMut(Self::Label) -> bool, { let nodes_len = self.nodes.len(); diff --git a/graph/src/adset.rs b/graph/src/adset.rs index adf2aaf..a72935a 100644 --- a/graph/src/adset.rs +++ b/graph/src/adset.rs @@ -215,7 +215,7 @@ impl Builder for ASGBuilder { fn remove_edge(&mut self, source: usize, target: usize, _predicate: F) -> Result<(), Error> where - F: Fn(Self::Label) -> bool, + F: FnMut(Self::Label) -> bool, { let nodes_len = self.nodes.len(); diff --git a/graph/src/builder.rs b/graph/src/builder.rs index 5505e4f..b04e7f6 100644 --- a/graph/src/builder.rs +++ b/graph/src/builder.rs @@ -40,7 +40,7 @@ pub trait Builder: Default { /// target should be removed. fn remove_edge(&mut self, source: usize, target: usize, predicate: F) -> Result<(), Error> where - F: Fn(Self::Label) -> bool; + F: FnMut(Self::Label) -> bool; /// Convert the builder into a graph. /// @@ -89,5 +89,5 @@ pub trait BuilderMut { /// target should be removed. fn remove_edge(&mut self, source: usize, target: usize, predicate: F) -> Result<(), Error> where - F: Fn(Self::Label) -> bool; + F: FnMut(Self::Label) -> bool; } diff --git a/graph/src/labelled/binary.rs b/graph/src/labelled/binary.rs index 439505d..67d86f9 100644 --- a/graph/src/labelled/binary.rs +++ b/graph/src/labelled/binary.rs @@ -2,24 +2,89 @@ //! labels and each node knows about its parents. use super::*; -use crate::ParentsGraph; +use crate::{Parent, ParentsGraph}; -use std::collections::{HashMap as Map, HashSet as Set}; +use std::{ + collections::{hash_map::Iter as MapIter, HashMap as Map}, + slice::Iter, +}; use crate::builder::BuilderMut; +/// An ad-hoc `IndexSet` for children. +#[derive(Debug, Clone, Default)] +struct PLChildren { + children: Vec, + indices: Map, +} + +impl PLChildren { + fn iter(&self) -> Iter<'_, usize> { + self.children.iter() + } + + fn len(&self) -> usize { + self.children.len() + } + + fn is_empty(&self) -> bool { + self.children.is_empty() + } + + fn contains(&self, key: &usize) -> bool { + self.indices.contains_key(key) + } + + // The return value matters not for me. + fn insert(&mut self, key: usize) { + if let Some(key_index) = self.indices.get(&key) { + debug_assert!(*key_index < self.children.len()); + + *self.children.get_mut(*key_index).unwrap() = key; + } else { + let key_index = self.children.len(); + self.indices.insert(key, key_index); + self.children.push(key); + } + } + + /// Remove an element from children. + /// + /// We must preserve the order of elements, so we have to shift + /// every element that follows it, which leads to a slow + /// performance. So try to avoid removing edges, if possible. + fn remove(&mut self, key: usize) { + let key_index = if let Some(key_index_result) = self.indices.get(&key) { + *key_index_result + } else { + // If the key is not contained in children, we have + // nothing to do. + return; + }; + + let children_len = self.children.len(); + + debug_assert!(key_index < children_len); + + for i in (key_index + 1)..children_len { + let key = self.children.get(i).unwrap(); + *self.indices.get_mut(key).unwrap() -= 1; + } + + self.children.remove(key_index); + } +} + /// A node has some children, some parents, and a label. #[derive(Debug, Clone)] struct PLNode { - children: Set, - // REVIEW: If performance for removing edges is a bottleneck, use - // a hash set here. - parents: Vec, + children: PLChildren, + parents: Map, label: T, } impl PLNode { - fn new(children: Set, parents: Vec, label: T) -> Self { + fn new(children: PLChildren, parents: Map, label: T) -> Self { Self { children, parents, @@ -62,7 +127,7 @@ impl Default for PLGraph { } impl Graph for PLGraph { - type Iter<'a> = std::iter::Copied> + type Iter<'a> = std::iter::Copied> where Self: 'a; @@ -103,6 +168,7 @@ impl Graph for PLGraph { } } + #[inline] fn has_edge(&self, source: usize, target: usize) -> Result { if let Some(node) = self.nodes.get(source) { if !self.has_node(target) { @@ -116,7 +182,7 @@ impl Graph for PLGraph { } fn replace_by_builder(&mut self, _builder: impl Builder) { - unimplemented!("for later") + todo!() } fn print_viz(&self, _filename: &str) -> Result<(), std::io::Error> { @@ -124,14 +190,53 @@ impl Graph for PLGraph { } } +/// An iterator of parents. +/// +/// This is to avoid a boxed allocation. +#[derive(Debug, Clone)] +pub struct ParentIter<'a> { + /// MapIter yields (&usize, &usize), so we need to dereference + /// that. + parents: MapIter<'a, usize, usize>, +} + +impl<'a> ParentIter<'a> { + fn new(parents: MapIter<'a, usize, usize>) -> Self { + Self { parents } + } +} + +impl<'a> Iterator for ParentIter<'a> { + type Item = Parent; + + #[inline] + fn next(&mut self) -> Option { + self.parents + .next() + .map(|(key, value)| Parent::new(*key, *value)) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.parents.size_hint() + } +} + +impl<'a> ExactSizeIterator for ParentIter<'a> { + #[inline] + fn len(&self) -> usize { + self.parents.len() + } +} + impl ParentsGraph for PLGraph { - type Iter<'a> = std::iter::Copied> + type Iter<'a> = ParentIter<'a> where Self: 'a; #[inline] fn parents_of(&self, node_id: usize) -> Result<::Iter<'_>, Error> { if let Some(node) = self.nodes.get(node_id) { - Ok(node.parents.iter().copied()) + Ok(ParentIter::new(node.parents.iter())) } else { Err(Error::IndexOutOfBounds(node_id, self.nodes.len())) } @@ -203,10 +308,12 @@ impl<'a, T: GraphLabel> BuilderMut for PLGBuilderMut<'a, T> { where Self::Label: 'b; + #[inline] fn from_graph_mut(graph: &mut Self::Graph) -> Self::ResultBuilder<'_> { PLGBuilderMut { graph } } + #[inline] fn add_vertex(&mut self, label: Self::Label) -> usize { if let Some(old) = self.graph.label_index_map.get(&label) { *old @@ -222,11 +329,16 @@ impl<'a, T: GraphLabel> BuilderMut for PLGBuilderMut<'a, T> { } } + #[inline] fn add_edge(&mut self, source: usize, target: usize, _label: Self::Label) -> Result<(), Error> { if self.graph.has_edge(source, target)? { - return Err(Error::DuplicatedEdge(source, target)); + return Ok(()); } + // The validity of source and target is guaranteed now. + + let parent_child_index = self.graph.degree(source).unwrap(); + self.graph .nodes .get_mut(source) @@ -239,35 +351,295 @@ impl<'a, T: GraphLabel> BuilderMut for PLGBuilderMut<'a, T> { .get_mut(target) .unwrap() .parents - .push(source); + .insert(source, parent_child_index); Ok(()) } + /// Remove an edge from the source to the target. + /// + /// Since some graphs are labelled, the users are allowed to pass + /// a predicate to determine if an edge from the source to the + /// target should be removed. + /// + /// # Performance + /// + /// Removal is slow since we need to keep the order of the elements. fn remove_edge(&mut self, source: usize, target: usize, _predicate: F) -> Result<(), Error> where - F: Fn(Self::Label) -> bool, + F: FnMut(Self::Label) -> bool, { if !self.graph.has_edge(source, target)? { return Ok(()); } + // Both source and target are valid indices now. + + let child_index = *self + .graph + .nodes + .get(target) + .unwrap() + .parents + .get(&source) + .unwrap(); + + let source_degree = self.graph.degree(source).unwrap(); + + // Decrement the relevant children's parents index. + for i in (child_index + 1)..source_degree { + let child = *self + .graph + .nodes + .get(source) + .unwrap() + .children + .children + .get(i) + .unwrap(); + + *self + .graph + .nodes + .get_mut(child) + .unwrap() + .parents + .get_mut(&source) + .unwrap() -= 1; + } + self.graph .nodes .get_mut(source) .unwrap() .children - .remove(&target); + .remove(target); self.graph .nodes .get_mut(target) .unwrap() .parents - .retain(|parent| *parent != source); + .remove(&source); + + Ok(()) + } +} + +#[cfg(test)] +mod binary_test { + use super::*; + use std::collections::HashSet as Set; + + macro_rules! set { + ($type:tt) => { Set::<$type>::default() }; + ($($num:expr),*) => { + { + let mut set: Set<_> = Set::default(); + $(set.insert($num);)* + set + } + }; + } + + #[test] + fn test_graph_apis() -> Result<(), Error> { + let mut graph: PLGraph = Default::default(); + + // testing empty graph + assert!(graph.is_empty()); + + let mut builder = PLGBuilderMut::from_graph_mut(&mut graph); + + // testing adding an empty node + assert_eq!(builder.add_vertex(0), 0); + + // testing nodes_len + assert_eq!(graph.nodes_len(), 1); + + let mut builder = PLGBuilderMut::from_graph_mut(&mut graph); + + // testing more vertices and edges + + builder.add_vertex(1); + builder.add_vertex(2); + builder.add_vertex(3); + builder.add_vertex(4); + builder.add_vertex(5); + + builder.add_edge(1, 0, 0)?; + builder.add_edge(2, 0, 0)?; + builder.add_edge(2, 1, 0)?; + builder.add_edge(3, 0, 0)?; + builder.add_edge(3, 2, 0)?; + builder.add_edge(4, 1, 0)?; + builder.add_edge(4, 2, 0)?; + builder.add_edge(5, 2, 0)?; + builder.add_edge(5, 3, 0)?; + builder.add_edge(5, 1, 0)?; + + // testing adding a duplicatedly labelled node + assert_eq!(builder.add_vertex(0), 0); + + let graph = graph; + + // ensuring the correct length + assert_eq!(graph.nodes_len(), 6); + + // testing children_of + assert_eq!(graph.children_of(5)?.collect::>(), set!(1, 3, 2)); + + // testing parents_of + + assert_eq!( + graph.parents_of(0)?.collect::>(), + set!(Parent::new(1, 0), Parent::new(2, 0), Parent::new(3, 0)) + ); + + assert_eq!( + graph.parents_of(1)?.collect::>(), + set!(Parent::new(2, 1), Parent::new(4, 0), Parent::new(5, 2)) + ); + + assert_eq!(graph.parents_of(5)?.len(), 0); + + // testing degree + assert_eq!(graph.degree(4)?, 2); + + // testing is_empty_node + assert!(graph.is_empty_node(0)?); + assert!(!graph.is_empty_node(1)?); + + // testing has_edge + assert!(graph.has_edge(3, 2)?); + assert!(!graph.has_edge(3, 1)?); + assert!(matches!( + graph.has_edge(3, 6), + Err(Error::IndexOutOfBounds(6, 6)) + )); Ok(()) } } -// TODO: tests +#[cfg(test)] +mod test_plgraph_builder { + use super::*; + + #[test] + fn test_builder() -> Result<(), Box> { + let mut graph = PLGraph::::default(); + let mut builder = PLGBuilderMut::from_graph_mut(&mut graph); + + // Add five nodes + builder.add_vertex(0); + builder.add_vertex(1); + builder.add_vertex(2); + builder.add_vertex(3); + builder.add_vertex(4); + + // println!("five empty nodes: {builder:?}"); + + // Link each node to its successor and link the last node with + // the first one to form a cycle. + for i in 0..5 { + builder.add_edge(i, if i < 4 { i + 1 } else { 0 }, 0)?; + } + + // println!("a cycle of five nodes: {builder:?}"); + + // Remove the link from the last node to the first node. + builder.remove_edge(4, 0, |_| true)?; + + // println!("a line of five nodes: {builder:?}"); + + // build a graph + + let graph = graph; + + println!("final graph: {graph:?}"); + + Ok(()) + } + + #[test] + fn test_errors() -> Result<(), Box> { + let mut graph = PLGraph::::default(); + let mut builder = PLGBuilderMut::from_graph_mut(&mut graph); + + // Add five nodes + builder.add_vertex(0); + builder.add_vertex(1); + builder.add_vertex(2); + builder.add_vertex(3); + builder.add_vertex(4); + + // println!("five empty nodes: {builder:?}"); + + // Errors in add_edge + + // println!(); + // println!("Testing errors in add_edge:"); + // println!(); + + assert!(matches!( + builder.add_edge(0, 5, 0), + Err(Error::IndexOutOfBounds(5, 5)) + )); + + // println!("Right error for an index out of bounds as the target"); + + assert!(matches!( + builder.add_edge(10, 5, 0), + Err(Error::IndexOutOfBounds(10, 5)) + )); + + // println!("Right error for an index out of bounds as the source"); + + assert!(matches!( + builder.add_edge(10, 50, 0), + Err(Error::IndexOutOfBounds(10, 5)) + )); + + // println!("Right error for both indices out of bounds"); + + // Errors in remove_edge + + // println!(); + // println!("Testing errors in remove_edge:"); + // println!(); + + assert!(matches!( + builder.remove_edge(0, 5, |_| true), + Err(Error::IndexOutOfBounds(5, 5)) + )); + + // println!("Right error for an index out of bounds as the target"); + + assert!(matches!( + builder.remove_edge(10, 5, |_| true), + Err(Error::IndexOutOfBounds(10, 5)) + )); + + // println!("Right error for an index out of bounds as the source"); + + assert!(matches!( + builder.remove_edge(10, 50, |_| true), + Err(Error::IndexOutOfBounds(10, 5)) + )); + + // println!("Right error for both indices out of bounds"); + + assert!(builder.remove_edge(0, 1, |_| true).is_ok()); + + // println!("No errors for removing a non-existing edge"); + + // println!(); + + let graph = graph; + + println!("final graph: {graph:?}"); + + Ok(()) + } +} diff --git a/graph/src/labelled/double.rs b/graph/src/labelled/double.rs index 53b5dc8..4ab8a38 100644 --- a/graph/src/labelled/double.rs +++ b/graph/src/labelled/double.rs @@ -554,9 +554,14 @@ impl Builder for DLGBuilder { Ok(()) } - fn remove_edge(&mut self, source: usize, target: usize, predicate: F) -> Result<(), Error> + fn remove_edge( + &mut self, + source: usize, + target: usize, + mut predicate: F, + ) -> Result<(), Error> where - F: Fn(Self::Label) -> bool, + F: FnMut(Self::Label) -> bool, { let nodes_len = self.nodes.len(); diff --git a/graph/src/lib.rs b/graph/src/lib.rs index 26159c6..6813df3 100644 --- a/graph/src/lib.rs +++ b/graph/src/lib.rs @@ -214,12 +214,37 @@ impl usize { + self.0 + } + + /// Return the index of the edge that points to the child. + #[inline] + pub fn edge(&self) -> usize { + self.1 + } + + /// Construct a parent strucure. + #[inline] + pub fn new(node: usize, edge: usize) -> Self { + Self(node, edge) + } +} + /// A parents-knowing graph can return an iterator of parents for any /// node. pub trait ParentsGraph: Graph { /// The type of an iterator that goes through the parents of a /// node. - type Iter<'a>: Iterator + 'a + type Iter<'a>: Iterator + 'a where Self: 'a; @@ -340,14 +365,14 @@ mod test_cycle_detection { let graph = builder.build_ref(); - assert_eq!(graph.contains_cycles()?, true); + assert!(!graph.contains_cycles()?); // Remove the link from the last node to the first node. builder.remove_edge(4, 0, |_| true)?; let graph = builder.build(); - assert_eq!(graph.contains_cycles()?, false); + assert!(!graph.contains_cycles()?); Ok(()) } -- cgit v1.2.3-18-g5258