diff options
Diffstat (limited to 'graph/src/labelled/double.rs')
-rw-r--r-- | graph/src/labelled/double.rs | 876 |
1 files changed, 876 insertions, 0 deletions
diff --git a/graph/src/labelled/double.rs b/graph/src/labelled/double.rs new file mode 100644 index 0000000..53b5dc8 --- /dev/null +++ b/graph/src/labelled/double.rs @@ -0,0 +1,876 @@ +//! This file implements a labelled graph that can index edges by +//! labels. +//! +//! Since the method +//! [`find_children_with_label`][crate::LabelGraph::find_children_with_label] +//! needs to be implemented efficiently, we store the mappings between +//! labels and edges in both directions. + +use super::*; + +// We use BTreeMap and BTreeSet here as we need to exclude duplicate +// edge sets, while an ordinary hashmap and hashset do not allow +// hashing. +use std::collections::{ + btree_map::{Iter as MapIter, Keys}, + btree_set::Iter, + BTreeMap as Map, BTreeSet as Set, HashMap as HMap, +}; + +#[derive(Debug, Clone)] +struct DLNode<T: GraphLabel> { + by_target: Map<usize, Set<T>>, + by_label: Map<T, Set<usize>>, + flat: Vec<(T, usize)>, +} + +impl<T: GraphLabel> Default for DLNode<T> { + fn default() -> Self { + Self { + by_target: Default::default(), + by_label: Default::default(), + flat: Default::default(), + } + } +} + +impl<T: GraphLabel> DLNode<T> { + fn new( + by_target: Map<usize, Set<T>>, + by_label: Map<T, Set<usize>>, + flat: Vec<(T, usize)>, + ) -> Self { + Self { + by_target, + by_label, + flat, + } + } +} + +/// Mapping a set of edges to an index of node. +type EdgeMap<T> = HMap<Set<(T, usize)>, usize>; + +/// Double direction Labelled Graph. +/// +/// Each node is supposed to have a unique edge set. Constructing +/// methods such as from the trait +/// [`LabelExtGraph`][super::LabelExtGraph] already handles the +/// elimination of duplication. +#[derive(Debug, Clone)] +pub struct DLGraph<T: GraphLabel> { + nodes: Vec<DLNode<T>>, + edges_table: EdgeMap<T>, +} + +impl<T: GraphLabel> DLGraph<T> { + #[inline] + /// Return an empty graph. + pub fn new() -> Self { + Self { + nodes: Vec::new(), + edges_table: HMap::default(), + } + } + + /// Return a builder. + #[inline] + pub fn builder(self) -> DLGBuilder<T> { + DLGBuilder(self) + } + + /// Return a builder from a reference. + #[inline] + pub fn builder_ref(&self) -> DLGBuilder<T> { + DLGBuilder(self.clone()) + } +} + +impl<T: GraphLabel> Default for DLGraph<T> { + #[inline] + fn default() -> Self { + Self::new() + } +} + +impl<T: GraphLabel> Graph for DLGraph<T> { + // Not using a boxed pointer is supposed to save some allocations. + type Iter<'a> = std::iter::Copied<Keys<'a, usize, Set<T>>> where T: 'a; + + #[inline] + fn is_empty(&self) -> bool { + self.nodes.is_empty() + } + + #[inline] + fn nodes_len(&self) -> usize { + self.nodes.len() + } + + #[inline] + fn children_of(&self, node_id: usize) -> Result<Self::Iter<'_>, Error> { + match self.nodes.get(node_id) { + Some(node) => Ok(node.by_target.keys().copied()), + None => Err(Error::IndexOutOfBounds(node_id, self.nodes.len())), + } + } + + #[inline] + /// Return the number of "children" of a node, or an error if the + /// node is not a member of the graph. + /// + /// This counts edges with different labels as different edges. + fn degree(&self, node_id: usize) -> Result<usize, Error> { + self.nodes + .get(node_id) + .ok_or(Error::IndexOutOfBounds(node_id, self.nodes.len())) + .map(|node| node.flat.len()) + } + + #[inline] + fn is_empty_node(&self, node_id: usize) -> Result<bool, Error> { + self.nodes + .get(node_id) + .ok_or(Error::IndexOutOfBounds(node_id, self.nodes.len())) + .map(|node| node.flat.is_empty()) + } + + fn has_edge(&self, source: usize, target: usize) -> Result<bool, Error> { + match self.nodes.get(source) { + Some(source_node) => { + if self.nodes.get(target).is_none() { + Err(Error::IndexOutOfBounds(target, self.nodes.len())) + } else { + Ok(source_node.by_target.contains_key(&target) + && !source_node.by_target.get(&target).unwrap().is_empty()) + } + } + None => Err(Error::IndexOutOfBounds(source, self.nodes.len())), + } + } + + fn print_viz(&self, filename: &str) -> Result<(), std::io::Error> { + let preamble = "digraph nfa { + fontname=\"Helvetica,Arial,sans-serif\" + node [fontname=\"Helvetica,Arial,sans-serif\"] + edge [fontname=\"Helvetica,Arial,sans-serif\"] + rankdir=LR;\n"; + + let mut post = String::new(); + + use std::fmt::Write as FWrite; + + for (source, target) in self.edges() { + for label in self.edge_label(source, target).unwrap() { + let _ = writeln!(post, " {source} -> {target} [label = \"{label}\"]"); + } + } + + post.push_str("}\n"); + + let result = format!("{preamble}{post}"); + + if std::fs::metadata(filename).is_ok() { + std::fs::remove_file(filename)?; + } + + let mut file = std::fs::File::options() + .write(true) + .create(true) + .open(filename)?; + + use std::io::Write; + + file.write_all(result.as_bytes()) + } + + #[inline] + fn replace_by_builder(&mut self, builder: impl Builder<Result = Self>) { + let new_graph = builder.build(); + + self.nodes = new_graph.nodes; + self.edges_table = new_graph.edges_table; + } +} + +/// A delegation of iterators. +/// +/// This is used to avoid a boxed pointer to an iterator. +#[derive(Default, Debug, Clone)] +pub struct LabelIndexIter<'a> { + iter: Option<std::iter::Copied<Iter<'a, usize>>>, +} + +impl<'a> Iterator for LabelIndexIter<'a> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option<Self::Item> { + self.iter.as_mut().and_then(|iterator| iterator.next()) + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + match &self.iter { + Some(iter) => iter.size_hint(), + None => (0, Some(0)), + } + } +} + +impl<'a> ExactSizeIterator for LabelIndexIter<'a> { + #[inline] + fn len(&self) -> usize { + match &self.iter { + Some(iter) => iter.len(), + None => 0, + } + } +} + +impl<'a> LabelIndexIter<'a> { + fn new(iter: std::iter::Copied<Iter<'a, usize>>) -> Self { + let iter = Some(iter); + Self { iter } + } +} + +// A convenience method +impl<'a> From<&'a Set<usize>> for LabelIndexIter<'a> { + fn from(set: &'a Set<usize>) -> Self { + Self::new(set.iter().copied()) + } +} + +#[derive(Debug)] +/// A delegation of iterators. +/// +/// This is used to avoid a boxed pointer to an iterator. +pub struct LabelIter<'a, T> { + iter: MapIter<'a, T, Set<usize>>, +} + +impl<'a, T> ExactSizeIterator for LabelIter<'a, T> { + #[inline] + fn len(&self) -> usize { + self.iter.len() + } +} + +impl<'a, T> LabelIter<'a, T> { + fn new(iter: MapIter<'a, T, Set<usize>>) -> Self { + Self { iter } + } +} + +impl<'a, T> Iterator for LabelIter<'a, T> { + type Item = (&'a T, LabelIndexIter<'a>); + + #[inline] + fn next(&mut self) -> Option<Self::Item> { + self.iter.next().map(|(label, set)| (label, set.into())) + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + self.iter.size_hint() + } +} + +/// This is used to avoid a boxed pointer to an iterator. +#[derive(Debug)] +pub struct EdgeLabelIter<'a, T> { + iter: Option<Iter<'a, T>>, +} + +impl<'a, T> Default for EdgeLabelIter<'a, T> { + #[inline] + fn default() -> Self { + Self { iter: None } + } +} + +impl<'a, T: Copy + Clone> ExactSizeIterator for EdgeLabelIter<'a, T> { + #[inline] + fn len(&self) -> usize { + if let Some(iter) = &self.iter { + iter.len() + } else { + 0 + } + } +} + +impl<'a, T: Copy + Clone> EdgeLabelIter<'a, T> { + fn new(iter: Iter<'a, T>) -> Self { + Self { iter: Some(iter) } + } +} + +impl<'a, T: Copy + Clone> Iterator for EdgeLabelIter<'a, T> { + type Item = T; + + #[inline] + fn next(&mut self) -> Option<Self::Item> { + if let Some(iter) = &mut self.iter { + iter.next().copied() + } else { + None + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + if let Some(iter) = &self.iter { + iter.size_hint() + } else { + (0, Some(0)) + } + } +} + +impl<T: GraphLabel> LabelGraph<T> for DLGraph<T> { + type Iter<'a> = LabelIndexIter<'a> where T: 'a; + + type LabelIter<'a> = LabelIter<'a,T> where T: 'a; + + type EdgeLabelIter<'a> = EdgeLabelIter<'a,T> + where + Self: 'a, + T: 'a + Copy + Clone; + + fn edge_label(&self, source: usize, target: usize) -> Result<Self::EdgeLabelIter<'_>, Error> { + if self.has_edge(source, target)? { + Ok(EdgeLabelIter::new( + self.nodes + .get(source) + .unwrap() + .by_target + .get(&target) + .unwrap() + .iter(), + )) + } else { + Ok(Default::default()) + } + } + + fn find_children_with_label( + &self, + node_id: usize, + label: &T, + ) -> Result<<Self as LabelGraph<T>>::Iter<'_>, Error> { + match self + .nodes + .get(node_id) + .ok_or(Error::IndexOutOfBounds(node_id, self.nodes.len()))? + .by_label + .get(label) + { + Some(set) => Ok(set.into()), + None => Ok(Default::default()), + } + } + + #[inline] + fn labels_of(&self, node_id: usize) -> Result<Self::LabelIter<'_>, Error> { + match self.nodes.get(node_id) { + Some(node) => Ok(Self::LabelIter::new(node.by_label.iter())), + None => Err(Error::IndexOutOfBounds(node_id, self.nodes.len())), + } + } + + fn has_edge_label(&self, node_id: usize, label: &T, target: usize) -> Result<bool, Error> { + let nodes_len = self.nodes.len(); + + match self.nodes.get(node_id) { + Some(node) => { + if target >= nodes_len { + return Err(Error::IndexOutOfBounds(target, nodes_len)); + } + + Ok(node + .by_target + .get(&target) + .map(|labels| labels.contains(label)) + .unwrap_or(false)) + } + None => Err(Error::IndexOutOfBounds(node_id, nodes_len)), + } + } +} + +impl<T: GraphLabel> LabelExtGraph<T> for DLGraph<T> { + fn extend(&mut self, edges: impl IntoIterator<Item = (T, usize)>) -> Result<usize, Error> { + let mut by_target: Map<usize, Set<T>> = Map::default(); + let mut by_label: Map<T, Set<usize>> = Map::default(); + let mut flat = Vec::new(); + let mut edges_set = Set::new(); + + for (label, to) in edges { + if !self.has_node(to) { + return Err(Error::IndexOutOfBounds(to, self.nodes.len())); + } + + edges_set.insert((label, to)); + + if let Some(set) = by_target.get(&to) { + if !set.contains(&label) { + flat.push((label, to)); + by_target.get_mut(&to).unwrap().insert(label); + by_label + .entry(label) + .or_insert_with(Default::default) + .insert(to); + } + } else { + flat.push((label, to)); + by_target + .entry(to) + .or_insert_with(Default::default) + .insert(label); + by_label + .entry(label) + .or_insert_with(Default::default) + .insert(to); + } + } + + match self.edges_table.get(&edges_set) { + Some(old_index) => Ok(*old_index), + None => { + let new_node = DLNode::new(by_target, by_label, flat); + let new_index = self.nodes_len(); + + self.edges_table.insert(edges_set, new_index); + + self.nodes.push(new_node); + + Ok(new_index) + } + } + } +} + +// Builder for this implementation of graph + +/// A builder for labelled adjacency doubly linked graphs. +#[derive(Debug, Clone)] +pub struct DLGBuilder<T: GraphLabel>(DLGraph<T>); + +impl<T: GraphLabel> Default for DLGBuilder<T> { + fn default() -> Self { + Self(Default::default()) + } +} + +impl<T: GraphLabel> Deref for DLGBuilder<T> { + type Target = DLGraph<T>; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl<T: GraphLabel> DerefMut for DLGBuilder<T> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl<T: GraphLabel> Builder for DLGBuilder<T> { + type Label = T; + + type Result = DLGraph<T>; + + #[inline] + fn with_capacity(size: usize) -> Self { + Self(DLGraph { + nodes: Vec::with_capacity(size), + edges_table: Default::default(), + }) + } + + #[inline] + fn add_vertex(&mut self) -> usize { + self.nodes.push(DLNode::default()); + self.nodes.len() - 1 + } + + #[inline] + fn add_vertices(&mut self, n: usize) { + self.nodes + .extend(std::iter::repeat_with(Default::default).take(n)); + } + + fn add_edge(&mut self, source: usize, target: usize, label: Self::Label) -> Result<(), Error> { + let nodes_len = self.nodes.len(); + + let source_node = self + .nodes + .get_mut(source) + .ok_or(Error::IndexOutOfBounds(source, nodes_len))?; + + if !(0..nodes_len).contains(&target) { + return Err(Error::IndexOutOfBounds(target, nodes_len)); + } + + let new_edge_p = !matches!( + source_node + .by_target + .get(&target) + .map(|set| set.contains(&label)), + Some(true) + ); + + if new_edge_p { + let old_children_set: Set<(T, usize)> = source_node.flat.iter().copied().collect(); + + source_node + .by_target + .entry(target) + .or_insert_with(Set::default) + .insert(label); + + source_node + .by_label + .entry(label) + .or_insert_with(Set::default) + .insert(target); + + source_node.flat.push((label, target)); + + let new_children_set: Set<(T, usize)> = source_node.flat.iter().copied().collect(); + + // When source_node is in use, we cannot borrow self + // mutably again, so we move the following two statements + // to after all uses of source_node. + + self.edges_table.remove(&old_children_set); + + self.edges_table.insert(new_children_set, source); + } + + Ok(()) + } + + fn remove_edge<F>(&mut self, source: usize, target: usize, predicate: F) -> Result<(), Error> + where + F: Fn(Self::Label) -> bool, + { + let nodes_len = self.nodes.len(); + + let source_node = self + .nodes + .get_mut(source) + .ok_or(Error::IndexOutOfBounds(source, nodes_len))?; + + if !(0..nodes_len).contains(&target) { + return Err(Error::IndexOutOfBounds(target, nodes_len)); + } + + if let Some(target_labels_set) = source_node.by_target.get(&target) { + let mut to_remove = Vec::new(); + + for label in target_labels_set.iter() { + if predicate(*label) { + to_remove.push(*label); + } + } + + if !to_remove.is_empty() { + let old_children_set: Set<(T, usize)> = source_node.flat.iter().copied().collect(); + + for label in to_remove.iter().copied() { + // This must be "Some", as the outer "if" checks + source_node + .by_target + .get_mut(&target) + .map(|set| set.remove(&label)); + + source_node + .by_label + .get_mut(&label) + .map(|set| set.remove(&target)); + + source_node.flat.retain(|(child_label, child_target)| { + (*child_label, *child_target) != (label, target) + }); + } + + // If after removal no labels remain for the target, + // we remove the edge entirely, to avoid false empty + // edges. + + if let Some(set) = source_node.by_target.get(&target) { + if set.is_empty() { + source_node.by_target.remove(&target); + } + } + + for label in to_remove.iter() { + if let Some(set) = source_node.by_label.get(label) { + if set.is_empty() { + source_node.by_label.remove(label); + } + } + } + + // if source_node.flat.is_empty() { + // source_node.by_target.remove(&target); + + // for label in to_remove.iter() { + // source_node.by_label.remove(label); + // } + // } + + let new_children_set: Set<(T, usize)> = source_node.flat.iter().copied().collect(); + + // When source_node is in use, we cannot borrow self + // mutably again, so we move the following two + // statements to after all uses of source_node. + + self.edges_table.remove(&old_children_set); + + self.edges_table.insert(new_children_set, source); + } + } + + Ok(()) + } + + fn build_ref(&self) -> Self::Result { + self.0.clone() + } + + fn build(self) -> Self::Result { + self.0 + } +} + +#[cfg(test)] +mod label_test { + use super::*; + + macro_rules! set { + () => { Set::<usize>::default() }; + ($($num:literal),*) => { + { + let mut set: Set<usize> = Set::default(); + $(set.insert($num);)* + set + } + }; + } + + macro_rules! map { + () => { Map::<usize, Set<usize>>::default() }; + ($(($key:literal, $value:expr)),*) => { + { + let mut map: Map<usize, Set<usize>> = Map::default(); + $(map.insert($key, $value);)* + map + } + }; + } + + #[test] + fn test_graph_apis() -> Result<(), Error> { + let mut graph: DLGraph<usize> = Default::default(); + + // testing empty graph + assert!(graph.is_empty()); + + // testing adding an empty node + assert_eq!(graph.extend(std::iter::empty())?, 0); + + // testing nodes_len + assert_eq!(graph.nodes_len(), 1); + + // testing extension + + assert_eq!(graph.extend([(0, 0)].iter().copied())?, 1); + assert_eq!(graph.extend([(1, 0), (1, 1)].iter().copied())?, 2); + assert_eq!(graph.extend([(3, 0), (3, 2)].iter().copied())?, 3); + assert_eq!(graph.extend([(1, 1), (1, 2)].iter().copied())?, 4); + assert_eq!(graph.extend([(2, 1), (3, 2), (2, 3)].iter().copied())?, 5); + + // testing adding a duplicated edge set + assert_eq!(graph.extend([(2, 1), (2, 3), (3, 2)].iter().copied())?, 5); + assert_eq!(graph.extend([(3, 2), (3, 0)].iter().copied())?, 3); + + let graph = graph; + + // ensuring the correct length + assert_eq!(graph.nodes_len(), 6); + + // testing children_of + assert_eq!(graph.children_of(5)?.collect::<Set<_>>(), set!(1, 3, 2)); + + // testing find_children_with_label + assert_eq!( + graph.find_children_with_label(5, &2)?.collect::<Set<_>>(), + set!(1, 3) + ); + + // testing edge_label + assert_eq!(graph.edge_label(5, 2)?.collect::<Set<_>>(), set!(3)); + assert!(matches!( + graph.edge_label(6, 2), + Err(Error::IndexOutOfBounds(6, 6)) + )); + + // testing degree + assert_eq!(graph.degree(4)?, 2); + + // testing is_empty_node + assert!(graph.is_empty_node(0)?); + assert!(!graph.is_empty_node(1)?); + + // testing has_edge + assert!(graph.has_edge(3, 2)?); + assert!(!graph.has_edge(3, 1)?); + assert!(matches!( + graph.has_edge(3, 6), + Err(Error::IndexOutOfBounds(6, 6)) + )); + + // testing labels_of + let mut label_map: Map<usize, Set<usize>> = Map::default(); + + for (label, children) in graph.labels_of(5)? { + label_map.insert(*label, children.collect()); + } + + let compare_map = map!((2, set!(1, 3)), (3, set!(2))); + + assert_eq!(label_map, compare_map); + + assert!(matches!( + graph.labels_of(6), + Err(Error::IndexOutOfBounds(6, 6)) + )); + + Ok(()) + } +} + +#[cfg(test)] +mod test_dlgraph_builder { + use super::*; + + #[test] + fn test_builder() -> Result<(), Box<dyn std::error::Error>> { + let mut builder = DLGBuilder::<usize>::default(); + + // Add five nodes + builder.add_vertex(); + builder.add_vertex(); + builder.add_vertex(); + builder.add_vertex(); + builder.add_vertex(); + + println!("five empty nodes: {builder:?}"); + + // Link each node to its successor and link the last node with + // the first one to form a cycle. + for i in 0..5 { + builder.add_edge(i, if i < 4 { i + 1 } else { 0 }, i)?; + } + + println!("a cycle of five nodes: {builder:?}"); + + // Remove the link from the last node to the first node. + builder.remove_edge(4, 0, |_| true)?; + + println!("a line of five nodes: {builder:?}"); + + // build a graph + + let graph = builder.build(); + + println!("final graph: {graph:?}"); + + Ok(()) + } + + #[test] + fn test_errors() -> Result<(), Box<dyn std::error::Error>> { + let mut builder = DLGBuilder::default(); + + // Add five nodes + builder.add_vertex(); + builder.add_vertex(); + builder.add_vertex(); + builder.add_vertex(); + builder.add_vertex(); + + println!("five empty nodes: {builder:?}"); + + // Errors in add_edge + + println!(); + println!("Testing errors in add_edge:"); + println!(); + + assert!(matches!( + builder.add_edge(0, 5, 0), + Err(Error::IndexOutOfBounds(5, 5)) + )); + + println!("Right error for an index out of bounds as the target"); + + assert!(matches!( + builder.add_edge(10, 5, 0), + Err(Error::IndexOutOfBounds(10, 5)) + )); + + println!("Right error for an index out of bounds as the source"); + + assert!(matches!( + builder.add_edge(10, 50, 0), + Err(Error::IndexOutOfBounds(10, 5)) + )); + + println!("Right error for both indices out of bounds"); + + // Errors in remove_edge + + println!(); + println!("Testing errors in remove_edge:"); + println!(); + + assert!(matches!( + builder.remove_edge(0, 5, |_| true), + Err(Error::IndexOutOfBounds(5, 5)) + )); + + println!("Right error for an index out of bounds as the target"); + + assert!(matches!( + builder.remove_edge(10, 5, |_| true), + Err(Error::IndexOutOfBounds(10, 5)) + )); + + println!("Right error for an index out of bounds as the source"); + + assert!(matches!( + builder.remove_edge(10, 50, |_| true), + Err(Error::IndexOutOfBounds(10, 5)) + )); + + println!("Right error for both indices out of bounds"); + + assert!(matches!(builder.remove_edge(0, 1, |_| true), Ok(()),)); + + println!("No errors for removing a non-existing edge"); + + println!(); + + let graph = builder.build(); + + println!("final graph: {graph:?}"); + + Ok(()) + } +} |