//! This file implements a labelled graph that can index vertices by //! labels and each node knows about its parents. use super::*; use crate::{Parent, ParentsGraph, RedirectGraph}; use std::{ collections::{hash_map::Iter as MapIter, HashMap as Map}, slice::Iter, }; use crate::builder::BuilderMut; /// An ad-hoc `IndexSet` for children. #[derive(Debug, Clone, Default)] struct PLChildren { children: Vec, indices: Map, } impl PLChildren { #[inline] fn iter(&self) -> Iter<'_, usize> { self.children.iter() } #[inline] fn len(&self) -> usize { self.children.len() } #[inline] fn is_empty(&self) -> bool { self.children.is_empty() } #[inline] fn contains(&self, key: &usize) -> bool { self.indices.contains_key(key) } // The return value matters not for me. fn insert(&mut self, key: usize) { if let Some(key_index) = self.indices.get(&key) { debug_assert!(*key_index < self.children.len()); *self.children.get_mut(*key_index).unwrap() = key; } else { let key_index = self.children.len(); self.indices.insert(key, key_index); self.children.push(key); } } /// Remove an element from children. /// /// We must preserve the order of elements, so we have to shift /// every element that follows it, which leads to a slow /// performance. So try to avoid removing edges, if possible. fn remove(&mut self, key: usize) { let key_index = if let Some(key_index_result) = self.indices.get(&key) { *key_index_result } else { // If the key is not contained in children, we have // nothing to do. return; }; let children_len = self.children.len(); debug_assert!(key_index < children_len); for i in (key_index + 1)..children_len { let key = self.children.get(i).unwrap(); *self.indices.get_mut(key).unwrap() -= 1; } self.children.remove(key_index); } /// Retrieve the `n`-th child. #[inline] fn nth(&self, n: usize) -> Result { self.children .get(n) .copied() .ok_or(Error::IndexOutOfBounds(n, self.children.len())) } } /// A node has some children, some parents, and a label. #[derive(Debug, Clone)] struct PLNode { children: PLChildren, parents: Map, label: T, } impl PLNode { fn new(children: PLChildren, parents: Map, label: T) -> Self { Self { children, parents, label, } } } impl Default for PLNode { #[inline] fn default() -> Self { let children = Default::default(); let parents = Default::default(); let label = Default::default(); Self { children, label, parents, } } } /// A Parents-knowing, vertex-Labelled Graph. #[derive(Debug, Clone)] pub struct PLGraph { nodes: Vec>, label_index_map: Map, } impl Default for PLGraph { #[inline] fn default() -> Self { let nodes = Default::default(); let label_index_map = Default::default(); Self { nodes, label_index_map, } } } impl Graph for PLGraph { type Iter<'a> = std::iter::Copied> where Self: 'a; #[inline] fn is_empty(&self) -> bool { self.nodes.is_empty() } #[inline] fn nodes_len(&self) -> usize { self.nodes.len() } #[inline] fn edges_len(&self) -> Option { Some(self.nodes.iter().map(|node| node.children.len()).sum()) } #[inline] fn children_of(&self, node_id: usize) -> Result, Error> { if let Some(node) = self.nodes.get(node_id) { Ok(node.children.iter().copied()) } else { Err(Error::IndexOutOfBounds(node_id, self.nodes.len())) } } #[inline] fn degree(&self, node_id: usize) -> Result { if let Some(node) = self.nodes.get(node_id) { Ok(node.children.len()) } else { Err(Error::IndexOutOfBounds(node_id, self.nodes.len())) } } #[inline] fn is_empty_node(&self, node_id: usize) -> Result { if let Some(node) = self.nodes.get(node_id) { Ok(node.children.is_empty()) } else { Err(Error::IndexOutOfBounds(node_id, self.nodes.len())) } } #[inline] fn has_edge(&self, source: usize, target: usize) -> Result { if let Some(node) = self.nodes.get(source) { if !self.has_node(target) { return Err(Error::IndexOutOfBounds(target, self.nodes.len())); } Ok(node.children.contains(&target)) } else { Err(Error::IndexOutOfBounds(source, self.nodes.len())) } } fn replace_by_builder(&mut self, _builder: impl Builder) { unimplemented!("use a `PLGBuilderMut` instead") } fn print_viz(&self, filename: &str) -> Result<(), std::io::Error> { let filename = format!("output/{filename}"); let preamble = "digraph nfa { fontname=\"Helvetica,Arial,sans-serif\" node [fontname=\"Helvetica,Arial,sans-serif\"] edge [fontname=\"Helvetica,Arial,sans-serif\"] rankdir=LR;\n"; let mut post = String::new(); // FIXME: Find a way to print only used nodes. Maybe remove // unwanted edges from unwanted nodes, so that we can print // out only those used nodes. for node in self.nodes() { post.push_str(&format!( " {node} [label = \"{}\"]\n", match self.vertex_label(node) { Ok(Some(label)) => { format!("{label}") } _ => { " ε ".to_owned() } } )); } for (source, target) in self.edges() { post.push_str(&format!(" {source} -> {target}\n")); } post.push_str("}\n"); let result = format!("{preamble}{post}"); if std::fs::metadata(&filename).is_ok() { std::fs::remove_file(&filename)?; } let mut file = std::fs::File::options() .write(true) .create(true) .open(&filename)?; use std::io::Write; file.write_all(result.as_bytes()) } } /// An iterator of parents. /// /// This is to avoid a boxed allocation. #[derive(Debug, Clone)] pub struct ParentIter<'a> { /// MapIter yields (&usize, &usize), so we need to dereference /// that. parents: MapIter<'a, usize, usize>, } impl<'a> ParentIter<'a> { fn new(parents: MapIter<'a, usize, usize>) -> Self { Self { parents } } } impl<'a> Iterator for ParentIter<'a> { type Item = Parent; #[inline] fn next(&mut self) -> Option { self.parents .next() .map(|(key, value)| Parent::new(*key, *value)) } #[inline] fn size_hint(&self) -> (usize, Option) { self.parents.size_hint() } } impl<'a> ExactSizeIterator for ParentIter<'a> { #[inline] fn len(&self) -> usize { self.parents.len() } } impl ParentsGraph for PLGraph { type Iter<'a> = ParentIter<'a> where Self: 'a; #[inline] fn parents_of(&self, node_id: usize) -> Result<::Iter<'_>, Error> { if let Some(node) = self.nodes.get(node_id) { Ok(ParentIter::new(node.parents.iter())) } else { Err(Error::IndexOutOfBounds(node_id, self.nodes.len())) } } #[inline] fn nth_child(&self, node_id: usize, n: usize) -> Result { if let Some(node) = self.nodes.get(node_id) { node.children.nth(n) } else { Err(Error::IndexOutOfBounds(node_id, self.nodes_len())) } } #[inline] fn edge_to_parent(&self, source: usize, target: usize) -> Result, Error> { if !self.has_edge(source, target)? { return Ok(None); } Ok(self .nodes .get(source) .unwrap() .children .indices .get(&target) .map(|index| Parent::new(source, *index))) } } impl RedirectGraph for PLGraph { fn redirect( &mut self, node_id: usize, child_index: usize, new_child: usize, ) -> Result<(), Error> { let nodes_len = self.nodes.len(); if !self.has_node(new_child) { return Err(Error::IndexOutOfBounds(new_child, nodes_len)); } if let Some(node) = self.nodes.get_mut(node_id) { if node.children.len() <= child_index { return Err(Error::IndexOutOfBounds(child_index, node.children.len())); } // Check if `new_child` is already pointed to by the node. if let Some(index) = node.children.indices.get(&new_child) { // This should not happen in our use case, but we // should still do somthing: as the edges cannot // duplicate, we simply remove the original edge, // unless index = child_index, in which case the old // child is equal to the new child, and we have // nothing to do. if *index != child_index { node.children.remove(new_child); } } else { // The index has been checked above, so it is safe to // call `unwrap` here. let old_child = std::mem::replace( node.children.children.get_mut(child_index).unwrap(), new_child, ); node.children.indices.remove(&old_child); node.children.indices.insert(new_child, child_index); // Don't forget to remove `node` from the parents of // the old child. if let Some(old_child_node) = self.nodes.get_mut(old_child) { old_child_node.parents.remove(&node_id); } else { // The node contained an invalid child. return Err(Error::IndexOutOfBounds(old_child, nodes_len)); } // Don't forget to add node as a parent to the new // child. // new_child has been checked at the beginning of the // function, so it is safe to call `unwrap`. self.nodes .get_mut(new_child) .unwrap() .parents .insert(node_id, child_index); } Ok(()) } else { Err(Error::IndexOutOfBounds(node_id, nodes_len)) } } } impl LabelGraph for PLGraph { type Iter<'a> = std::iter::Empty where Self: 'a; type LabelIter<'a> = std::iter::Empty<(&'a T, >::Iter<'a>)> where Self: 'a, T: 'a; type EdgeLabelIter<'a> = std::iter::Empty where Self: 'a, T: 'a; #[inline] fn query_label(&self, label: T) -> Option { self.label_index_map.get(&label).copied() } #[inline] fn vertex_label(&self, node_id: usize) -> Result, Error> { if let Some(node) = self.nodes.get(node_id) { Ok(Some(node.label)) } else { Err(Error::IndexOutOfBounds(node_id, self.nodes.len())) } } fn edge_label(&self, _source: usize, _target: usize) -> Result, Error> { unimplemented!("Edges have no labels") } fn find_children_with_label( &self, _node_id: usize, _label: &T, ) -> Result<>::Iter<'_>, Error> { unimplemented!("Edges have no labels") } fn labels_of(&self, _node_id: usize) -> Result, Error> { unimplemented!("Edges have no labels") } fn has_edge_label(&self, _node_id: usize, _label: &T, _target: usize) -> Result { unimplemented!("Edges have no labels") } } /// A builder that modifies PLGraph in place. #[derive(Debug)] pub struct PLGBuilderMut<'a, T: GraphLabel> { graph: &'a mut PLGraph, } impl<'a, T: GraphLabel> std::ops::Deref for PLGBuilderMut<'a, T> { type Target = PLGraph; fn deref(&self) -> &Self::Target { self.graph } } impl<'a, T: GraphLabel> std::ops::DerefMut for PLGBuilderMut<'a, T> { fn deref_mut(&mut self) -> &mut Self::Target { self.graph } } impl<'a, T: GraphLabel> BuilderMut for PLGBuilderMut<'a, T> { type Label = T; type Graph = PLGraph; type ResultBuilder<'b> = PLGBuilderMut<'b, T> where Self::Label: 'b; #[inline] fn from_graph_mut(graph: &mut Self::Graph) -> Self::ResultBuilder<'_> { PLGBuilderMut { graph } } #[inline] fn add_vertex(&mut self, label: Self::Label) -> usize { if let Some(old) = self.graph.label_index_map.get(&label) { *old } else { let new_node = PLNode::new(Default::default(), Default::default(), label); self.graph.nodes.push(new_node); let new_index = self.graph.nodes.len() - 1; self.graph.label_index_map.insert(label, new_index); new_index } } #[inline] fn add_edge(&mut self, source: usize, target: usize, _label: Self::Label) -> Result<(), Error> { if self.graph.has_edge(source, target)? { return Ok(()); } // The validity of source and target is guaranteed now. let parent_child_index = self.graph.degree(source).unwrap(); self.graph .nodes .get_mut(source) .unwrap() .children .insert(target); self.graph .nodes .get_mut(target) .unwrap() .parents .insert(source, parent_child_index); Ok(()) } /// Remove an edge from the source to the target. /// /// Since some graphs are labelled, the users are allowed to pass /// a predicate to determine if an edge from the source to the /// target should be removed. /// /// # Performance /// /// Removal is slow since we need to keep the order of the elements. fn remove_edge(&mut self, source: usize, target: usize, _predicate: F) -> Result<(), Error> where F: FnMut(Self::Label) -> bool, { if !self.graph.has_edge(source, target)? { return Ok(()); } // Both source and target are valid indices now. let child_index = *self .graph .nodes .get(target) .unwrap() .parents .get(&source) .unwrap(); let source_degree = self.graph.degree(source).unwrap(); // Decrement the relevant children's parents index. for i in (child_index + 1)..source_degree { let child = *self .graph .nodes .get(source) .unwrap() .children .children .get(i) .unwrap(); *self .graph .nodes .get_mut(child) .unwrap() .parents .get_mut(&source) .unwrap() -= 1; } self.graph .nodes .get_mut(source) .unwrap() .children .remove(target); self.graph .nodes .get_mut(target) .unwrap() .parents .remove(&source); Ok(()) } #[inline] fn set_label(&mut self, node_id: usize, label: Self::Label) -> Result<(), Error> { if !self.graph.has_node(node_id) { return Err(Error::IndexOutOfBounds(node_id, self.nodes_len())); } // node_id is now guaranteed to be valid. let old_label = self.graph.nodes.get(node_id).unwrap().label; self.graph.nodes.get_mut(node_id).unwrap().label = label; self.graph.label_index_map.remove(&old_label); self.graph.label_index_map.insert(label, node_id); Ok(()) } } #[cfg(test)] mod binary_test { use super::*; use std::collections::HashSet as Set; macro_rules! set { ($type:tt) => { Set::<$type>::default() }; ($($num:expr),*) => { { let mut set: Set<_> = Set::default(); $(set.insert($num);)* set } }; } #[test] fn test_graph_apis() -> Result<(), Error> { let mut graph: PLGraph = Default::default(); // testing empty graph assert!(graph.is_empty()); let mut builder = PLGBuilderMut::from_graph_mut(&mut graph); // testing adding an empty node assert_eq!(builder.add_vertex(0), 0); // testing nodes_len assert_eq!(graph.nodes_len(), 1); let mut builder = PLGBuilderMut::from_graph_mut(&mut graph); // testing more vertices and edges builder.add_vertex(1); builder.add_vertex(2); builder.add_vertex(3); builder.add_vertex(4); builder.add_vertex(5); // These labels are not used on edges: they are just place // holders. builder.add_edge(1, 0, 0)?; builder.add_edge(2, 0, 0)?; builder.add_edge(2, 1, 0)?; builder.add_edge(3, 0, 0)?; builder.add_edge(3, 2, 0)?; builder.add_edge(4, 1, 0)?; builder.add_edge(4, 2, 0)?; builder.add_edge(5, 2, 0)?; builder.add_edge(5, 3, 0)?; builder.add_edge(5, 1, 0)?; // testing adding a duplicatedly labelled node assert_eq!(builder.add_vertex(0), 0); // ensuring the correct length assert_eq!(builder.nodes_len(), 6); // testing children_of assert_eq!(builder.children_of(5)?.collect::>(), set!(1, 3, 2)); // testing parents_of assert_eq!( builder.parents_of(0)?.collect::>(), set!(Parent::new(1, 0), Parent::new(2, 0), Parent::new(3, 0)) ); assert_eq!( builder.parents_of(1)?.collect::>(), set!(Parent::new(2, 1), Parent::new(4, 0), Parent::new(5, 2)) ); assert_eq!(builder.parents_of(5)?.len(), 0); // testing degree assert_eq!(builder.degree(4)?, 2); // testing is_empty_node assert!(builder.is_empty_node(0)?); assert!(!builder.is_empty_node(1)?); // testing has_edge assert!(builder.has_edge(3, 2)?); assert!(!builder.has_edge(3, 1)?); assert!(matches!( builder.has_edge(3, 6), Err(Error::IndexOutOfBounds(6, 6)) )); // testing redirect builder.redirect(5, 2, 0)?; assert_eq!(builder.children_of(5)?.collect::>(), set!(0, 3, 2)); assert_eq!( builder.parents_of(0)?.collect::>(), set!( Parent::new(1, 0), Parent::new(2, 0), Parent::new(3, 0), Parent::new(5, 2) ) ); builder.redirect(5, 0, 1)?; assert_eq!(builder.children_of(5)?.collect::>(), set!(1, 0, 3)); assert_eq!( builder.parents_of(1)?.collect::>(), set!(Parent::new(2, 1), Parent::new(4, 0), Parent::new(5, 0)) ); builder.redirect(5, 0, 1)?; // should be no-op assert_eq!(builder.children_of(5)?.collect::>(), set!(1, 0, 3)); assert_eq!( builder.parents_of(1)?.collect::>(), set!(Parent::new(2, 1), Parent::new(4, 0), Parent::new(5, 0)) ); Ok(()) } } #[cfg(test)] mod test_plgraph_builder { use super::*; #[test] fn test_builder() -> Result<(), Box> { let mut graph = PLGraph::::default(); let mut builder = PLGBuilderMut::from_graph_mut(&mut graph); // Add five nodes builder.add_vertex(0); builder.add_vertex(1); builder.add_vertex(2); builder.add_vertex(3); builder.add_vertex(4); // println!("five empty nodes: {builder:?}"); // Link each node to its successor and link the last node with // the first one to form a cycle. for i in 0..5 { builder.add_edge(i, if i < 4 { i + 1 } else { 0 }, 0)?; } // println!("a cycle of five nodes: {builder:?}"); // Remove the link from the last node to the first node. builder.remove_edge(4, 0, |_| true)?; // println!("a line of five nodes: {builder:?}"); // build a graph let graph = graph; println!("final graph: {graph:?}"); Ok(()) } #[test] fn test_errors() -> Result<(), Box> { let mut graph = PLGraph::::default(); let mut builder = PLGBuilderMut::from_graph_mut(&mut graph); // Add five nodes builder.add_vertex(0); builder.add_vertex(1); builder.add_vertex(2); builder.add_vertex(3); builder.add_vertex(4); // println!("five empty nodes: {builder:?}"); // Errors in add_edge // println!(); // println!("Testing errors in add_edge:"); // println!(); assert!(matches!( builder.add_edge(0, 5, 0), Err(Error::IndexOutOfBounds(5, 5)) )); // println!("Right error for an index out of bounds as the target"); assert!(matches!( builder.add_edge(10, 5, 0), Err(Error::IndexOutOfBounds(10, 5)) )); // println!("Right error for an index out of bounds as the source"); assert!(matches!( builder.add_edge(10, 50, 0), Err(Error::IndexOutOfBounds(10, 5)) )); // println!("Right error for both indices out of bounds"); // Errors in remove_edge // println!(); // println!("Testing errors in remove_edge:"); // println!(); assert!(matches!( builder.remove_edge(0, 5, |_| true), Err(Error::IndexOutOfBounds(5, 5)) )); // println!("Right error for an index out of bounds as the target"); assert!(matches!( builder.remove_edge(10, 5, |_| true), Err(Error::IndexOutOfBounds(10, 5)) )); // println!("Right error for an index out of bounds as the source"); assert!(matches!( builder.remove_edge(10, 50, |_| true), Err(Error::IndexOutOfBounds(10, 5)) )); // println!("Right error for both indices out of bounds"); assert!(builder.remove_edge(0, 1, |_| true).is_ok()); // println!("No errors for removing a non-existing edge"); // println!(); // source out of bounds assert!(matches!( builder.redirect(5, 0, 0), Err(Error::IndexOutOfBounds(5, 5)) )); // child_index out of bounds assert!(matches!( builder.redirect(4, 0, 0), Err(Error::IndexOutOfBounds(0, 0)) )); // new_child out of bounds assert!(matches!( builder.redirect(4, 0, 10), Err(Error::IndexOutOfBounds(10, 5)) )); // println!("Correct errors when redirecting"); // println!(); let graph = graph; println!("final graph: {graph:?}"); Ok(()) } }