From 7dd4935230e303aef8d295d992239d59d95b32d7 Mon Sep 17 00:00:00 2001 From: JSDurand Date: Thu, 5 Jan 2023 10:24:39 +0800 Subject: singly labelled graphs Now I have a new type of labelled graphs, which can index vertices by labels, but not index edges by labels. The biggest difference is that I do not have to keep a hashmap of edge targets by labels, and I do not have to guard against the duplication of nodes with the same set of edges. I guard against nodes with the same label, though. Also, in this graph, both vertices and edges have one label at a time, whereas in the previous labelled graph there can be a multitude of edges between the same source and target nodes, but with different labels. Now it remains to test this type of graphs, and to think through how we attach forest fragments to nondeterministic finite automata edges, and how to join forest fragments together while skipping nullable edges, in order to finish the "compilation" part. --- chain/src/plan.org | 21 +- forest/src/default.rs | 4 +- grammar/src/lib.rs | 2 +- graph/src/builder.rs | 37 ++ graph/src/error.rs | 17 +- graph/src/labelled.rs | 879 ------------------------------------------- graph/src/labelled/double.rs | 876 ++++++++++++++++++++++++++++++++++++++++++ graph/src/labelled/mod.rs | 20 + graph/src/labelled/single.rs | 343 +++++++++++++++++ graph/src/lib.rs | 11 +- 10 files changed, 1315 insertions(+), 895 deletions(-) delete mode 100644 graph/src/labelled.rs create mode 100644 graph/src/labelled/double.rs create mode 100644 graph/src/labelled/mod.rs create mode 100644 graph/src/labelled/single.rs diff --git a/chain/src/plan.org b/chain/src/plan.org index 61738a9..75acf30 100644 --- a/chain/src/plan.org +++ b/chain/src/plan.org @@ -7,7 +7,7 @@ - [X] Implement builders for graphs - [X] Find sets of the first terminals for each non-terminal, in the grammar module. -- [-] Establish a testing framework of various grammars. [5/6] +- [-] Collect various grammars for testing. [5/6] + [X] One simple grammar + [X] Notes + [X] Parentheses @@ -38,7 +38,11 @@ finite automata. * [X] Test the removal of dead states, where a state is dead if and only if no other states have an edge into that state. -- [ ] Refactor [0/3] +- [-] Refactor [2/5] + + [X] Implement a data type for graphs with labels on vertices and + edges, but do not need to index edges by labels, which can index + vertices by labels instead. + + [X] Implement a builder that borrows a graph mutably. + [ ] When we pull in some regular language because of the left-linear expansion, we need to mark that branch as coming from left-linear expansions. This is necessary because we cannot @@ -62,13 +66,12 @@ + [X] First define a trait with the expected behaviours. + [ ] Then implement them as doubly labelled graphs. + [ ] Thenimplement finding derivatives by use of the chain rule. -- [-] Implement forests [0/2] - + [-] Define a trait with the expected behaviours. - + [ ] Implement them using adjacency list: we only need one label - per edge, and we do not wish to exclude duplicate edges, and we do - not need to index edges by the labels. All in all, the labels can - be implemented by a simple hash map that maps edges to labels, so - a special data type is not needed here. +- [-] Implement forests [1/2] + + [X] Define a trait with the expected behaviours. + + [-] Implement them using adjacency map: we only need one label per + edge, and we do not wish to exclude duplicate edges, and we do not + need to index edges by the labels. All in all, we implement them + using a vector of hashmaps. - [ ] Implement semiring. [0/5] + [ ] Define the trait. + [ ] Implement the boolean semiring. diff --git a/forest/src/default.rs b/forest/src/default.rs index 36145f4..456413b 100644 --- a/forest/src/default.rs +++ b/forest/src/default.rs @@ -122,8 +122,8 @@ impl Forest } fn plug(&mut self, other: &Self) -> Result<(), Error> { - // PLAN: Clone the `other` forest to `self`, adjust the - // indices for edges, and then add edges between plugs. + // PLAN: Produce a BuilderMut, adjust the indices for edges, + // and then add edges between plugs. // // Since I cannot touch the underlying nodes directly, I have // to add the nodes and edges individually. diff --git a/grammar/src/lib.rs b/grammar/src/lib.rs index 55adc9a..4e544c9 100644 --- a/grammar/src/lib.rs +++ b/grammar/src/lib.rs @@ -8,7 +8,7 @@ // words, the current focus is not on the optimisations, whereas // scanners are for optimisations only, so to speak. -// TODO: Separate contents into modules. +// REVIEW: Separate contents into modules. use nfa::{ default::{ diff --git a/graph/src/builder.rs b/graph/src/builder.rs index 9ab5895..4a480a5 100644 --- a/graph/src/builder.rs +++ b/graph/src/builder.rs @@ -54,3 +54,40 @@ pub trait Builder: Default { /// still be used later on. fn build_ref(&self) -> Self::Result; } + +/// The type of builders that actually reference the underlying graph +/// instead of owe it. +/// +/// To finish the task of the builder, just do not use it anymore. +/// The building happens right when the building methods are called. +pub trait BuilderMut { + /// Some graphs are labelled. This type should be the type of the + /// labels. + type Label; + + /// The type of the underlying graph. + type Graph: Graph; + + /// The type of the builder from a borrow. + type ResultBuilder<'a>: BuilderMut + where + Self::Label: 'a; + + /// Borrow a graph to create a builder without copying. + fn from_graph_mut(graph: &mut Self::Graph) -> Self::ResultBuilder<'_>; + + /// Add a new vertex. + fn add_vertex(&mut self, label: Self::Label) -> Result; + + /// Add an edge from the source to the target. + fn add_edge(&mut self, source: usize, target: usize, label: Self::Label) -> Result<(), Error>; + + /// Remove an edge from the source to the target. + /// + /// Since some graphs are labelled, the users are allowed to pass + /// a predicate to determine if an edge from the source to the + /// target should be removed. + fn remove_edge(&mut self, source: usize, target: usize, predicate: F) -> Result<(), Error> + where + F: Fn(Self::Label) -> bool; +} diff --git a/graph/src/error.rs b/graph/src/error.rs index 3600005..bf2714b 100644 --- a/graph/src/error.rs +++ b/graph/src/error.rs @@ -13,8 +13,11 @@ pub enum Error { /// the second component is the current length of nodes. IndexOutOfBounds(usize, usize), /// The graph does not permit duplicate nodes but encounters a - /// repeated node - DuplicatedNode, + /// repeated node. + DuplicatedNode(usize), + /// The graph does not permit duplicate edges but encounters a + /// repeated edge. + DuplicatedEdge(usize, usize), } impl Display for Error { @@ -23,7 +26,15 @@ impl Display for Error { Error::IndexOutOfBounds(index, len) => { write!(f, "index {index} out of bounds {len} ") } - Error::DuplicatedNode => write!(f, "No duplicate nodes permitted, but found one"), + Error::DuplicatedNode(node) => { + write!(f, "No duplicate nodes permitted, but found one: {node}") + } + Error::DuplicatedEdge(source, target) => { + write!( + f, + "No duplicate edges permitted, but found one from {source} to {target}" + ) + } } } } diff --git a/graph/src/labelled.rs b/graph/src/labelled.rs deleted file mode 100644 index 6341787..0000000 --- a/graph/src/labelled.rs +++ /dev/null @@ -1,879 +0,0 @@ -#![warn(missing_docs)] -//! This file implements a labelled graph. See the -//! [trait][super::LabelGraph] for details. -//! -//! Since the method -//! [`find_children_with_label`][super::LabelGraph::find_children_with_label] -//! needs to be implemented efficiently, we store the mappings between -//! labels and edges in both directions. - -use std::ops::{Deref, DerefMut}; - -use crate::{builder::Builder, error::Error, Graph, GraphLabel, LabelExtGraph, LabelGraph}; - -// We use BTreeMap and BTreeSet here as we need to exclude duplicate -// edge sets, while an ordinary hashmap and hashset do not allow -// hashing. -use std::collections::{ - btree_map::{Iter as MapIter, Keys}, - btree_set::Iter, - BTreeMap as Map, BTreeSet as Set, HashMap as HMap, -}; - -#[derive(Debug, Clone)] -struct DLNode { - by_target: Map>, - by_label: Map>, - flat: Vec<(T, usize)>, -} - -impl Default for DLNode { - fn default() -> Self { - Self { - by_target: Default::default(), - by_label: Default::default(), - flat: Default::default(), - } - } -} - -impl DLNode { - fn new( - by_target: Map>, - by_label: Map>, - flat: Vec<(T, usize)>, - ) -> Self { - Self { - by_target, - by_label, - flat, - } - } -} - -/// Mapping a set of edges to an index of node. -type EdgeMap = HMap, usize>; - -/// Double direction Labelled Graph. -/// -/// Each node is supposed to have a unique edge set. Constructing -/// methods such as from the trait -/// [`LabelExtGraph`][super::LabelExtGraph] already handles the -/// elimination of duplication. -#[derive(Debug, Clone)] -pub struct DLGraph { - nodes: Vec>, - edges_table: EdgeMap, -} - -impl DLGraph { - #[inline] - /// Return an empty graph. - pub fn new() -> Self { - Self { - nodes: Vec::new(), - edges_table: HMap::default(), - } - } - - /// Return a builder. - #[inline] - pub fn builder(self) -> DLGBuilder { - DLGBuilder(self) - } - - /// Return a builder from a reference. - #[inline] - pub fn builder_ref(&self) -> DLGBuilder { - DLGBuilder(self.clone()) - } -} - -impl Default for DLGraph { - #[inline] - fn default() -> Self { - Self::new() - } -} - -impl Graph for DLGraph { - // Not using a boxed pointer is supposed to save some allocations. - type Iter<'a> = std::iter::Copied>> where T: 'a; - - #[inline] - fn is_empty(&self) -> bool { - self.nodes.is_empty() - } - - #[inline] - fn nodes_len(&self) -> usize { - self.nodes.len() - } - - #[inline] - fn children_of(&self, node_id: usize) -> Result, Error> { - match self.nodes.get(node_id) { - Some(node) => Ok(node.by_target.keys().copied()), - None => Err(Error::IndexOutOfBounds(node_id, self.nodes.len())), - } - } - - #[inline] - /// Return the number of "children" of a node, or an error if the - /// node is not a member of the graph. - /// - /// This counts edges with different labels as different edges. - fn degree(&self, node_id: usize) -> Result { - self.nodes - .get(node_id) - .ok_or(Error::IndexOutOfBounds(node_id, self.nodes.len())) - .map(|node| node.flat.len()) - } - - #[inline] - fn is_empty_node(&self, node_id: usize) -> Result { - self.nodes - .get(node_id) - .ok_or(Error::IndexOutOfBounds(node_id, self.nodes.len())) - .map(|node| node.flat.is_empty()) - } - - fn has_edge(&self, source: usize, target: usize) -> Result { - match self.nodes.get(source) { - Some(source_node) => { - if self.nodes.get(target).is_none() { - Err(Error::IndexOutOfBounds(target, self.nodes.len())) - } else { - Ok(source_node.by_target.contains_key(&target) - && !source_node.by_target.get(&target).unwrap().is_empty()) - } - } - None => Err(Error::IndexOutOfBounds(source, self.nodes.len())), - } - } - - fn print_viz(&self, filename: &str) -> Result<(), std::io::Error> { - let preamble = "digraph nfa { - fontname=\"Helvetica,Arial,sans-serif\" - node [fontname=\"Helvetica,Arial,sans-serif\"] - edge [fontname=\"Helvetica,Arial,sans-serif\"] - rankdir=LR;\n"; - - let mut post = String::new(); - - use std::fmt::Write as FWrite; - - for (source, target) in self.edges() { - for label in self.edge_label(source, target).unwrap() { - let _ = writeln!(post, " {source} -> {target} [label = \"{label}\"]"); - } - } - - post.push_str("}\n"); - - let result = format!("{preamble}{post}"); - - if std::fs::metadata(filename).is_ok() { - std::fs::remove_file(filename)?; - } - - let mut file = std::fs::File::options() - .write(true) - .create(true) - .open(filename)?; - - use std::io::Write; - - file.write_all(result.as_bytes()) - } - - #[inline] - fn replace_by_builder(&mut self, builder: impl Builder) { - let new_graph = builder.build(); - - self.nodes = new_graph.nodes; - self.edges_table = new_graph.edges_table; - } -} - -/// A delegation of iterators. -/// -/// This is used to avoid a boxed pointer to an iterator. -#[derive(Default, Debug, Clone)] -pub struct LabelIndexIter<'a> { - iter: Option>>, -} - -impl<'a> Iterator for LabelIndexIter<'a> { - type Item = usize; - - #[inline] - fn next(&mut self) -> Option { - self.iter.as_mut().and_then(|iterator| iterator.next()) - } - - #[inline] - fn size_hint(&self) -> (usize, Option) { - match &self.iter { - Some(iter) => iter.size_hint(), - None => (0, Some(0)), - } - } -} - -impl<'a> ExactSizeIterator for LabelIndexIter<'a> { - #[inline] - fn len(&self) -> usize { - match &self.iter { - Some(iter) => iter.len(), - None => 0, - } - } -} - -impl<'a> LabelIndexIter<'a> { - fn new(iter: std::iter::Copied>) -> Self { - let iter = Some(iter); - Self { iter } - } -} - -// A convenience method -impl<'a> From<&'a Set> for LabelIndexIter<'a> { - fn from(set: &'a Set) -> Self { - Self::new(set.iter().copied()) - } -} - -#[derive(Debug)] -/// A delegation of iterators. -/// -/// This is used to avoid a boxed pointer to an iterator. -pub struct LabelIter<'a, T> { - iter: MapIter<'a, T, Set>, -} - -impl<'a, T> ExactSizeIterator for LabelIter<'a, T> { - #[inline] - fn len(&self) -> usize { - self.iter.len() - } -} - -impl<'a, T> LabelIter<'a, T> { - fn new(iter: MapIter<'a, T, Set>) -> Self { - Self { iter } - } -} - -impl<'a, T> Iterator for LabelIter<'a, T> { - type Item = (&'a T, LabelIndexIter<'a>); - - #[inline] - fn next(&mut self) -> Option { - self.iter.next().map(|(label, set)| (label, set.into())) - } - - #[inline] - fn size_hint(&self) -> (usize, Option) { - self.iter.size_hint() - } -} - -/// This is used to avoid a boxed pointer to an iterator. -#[derive(Debug)] -pub struct EdgeLabelIter<'a, T> { - iter: Option>, -} - -impl<'a, T> Default for EdgeLabelIter<'a, T> { - #[inline] - fn default() -> Self { - Self { iter: None } - } -} - -impl<'a, T: Copy + Clone> ExactSizeIterator for EdgeLabelIter<'a, T> { - #[inline] - fn len(&self) -> usize { - if let Some(iter) = &self.iter { - iter.len() - } else { - 0 - } - } -} - -impl<'a, T: Copy + Clone> EdgeLabelIter<'a, T> { - fn new(iter: Iter<'a, T>) -> Self { - Self { iter: Some(iter) } - } -} - -impl<'a, T: Copy + Clone> Iterator for EdgeLabelIter<'a, T> { - type Item = T; - - #[inline] - fn next(&mut self) -> Option { - if let Some(iter) = &mut self.iter { - iter.next().copied() - } else { - None - } - } - - #[inline] - fn size_hint(&self) -> (usize, Option) { - if let Some(iter) = &self.iter { - iter.size_hint() - } else { - (0, Some(0)) - } - } -} - -impl LabelGraph for DLGraph { - type Iter<'a> = LabelIndexIter<'a> where T: 'a; - - type LabelIter<'a> = LabelIter<'a,T> where T: 'a; - - type EdgeLabelIter<'a> = EdgeLabelIter<'a,T> - where - Self: 'a, - T: 'a + Copy + Clone; - - fn edge_label(&self, source: usize, target: usize) -> Result, Error> { - if self.has_edge(source, target)? { - Ok(EdgeLabelIter::new( - self.nodes - .get(source) - .unwrap() - .by_target - .get(&target) - .unwrap() - .iter(), - )) - } else { - Ok(Default::default()) - } - } - - fn find_children_with_label( - &self, - node_id: usize, - label: &T, - ) -> Result<>::Iter<'_>, Error> { - match self - .nodes - .get(node_id) - .ok_or(Error::IndexOutOfBounds(node_id, self.nodes.len()))? - .by_label - .get(label) - { - Some(set) => Ok(set.into()), - None => Ok(Default::default()), - } - } - - #[inline] - fn labels_of(&self, node_id: usize) -> Result, Error> { - match self.nodes.get(node_id) { - Some(node) => Ok(Self::LabelIter::new(node.by_label.iter())), - None => Err(Error::IndexOutOfBounds(node_id, self.nodes.len())), - } - } - - fn has_edge_label(&self, node_id: usize, label: &T, target: usize) -> Result { - let nodes_len = self.nodes.len(); - - match self.nodes.get(node_id) { - Some(node) => { - if target >= nodes_len { - return Err(Error::IndexOutOfBounds(target, nodes_len)); - } - - Ok(node - .by_target - .get(&target) - .map(|labels| labels.contains(label)) - .unwrap_or(false)) - } - None => Err(Error::IndexOutOfBounds(node_id, nodes_len)), - } - } -} - -impl LabelExtGraph for DLGraph { - fn extend(&mut self, edges: impl IntoIterator) -> Result { - let mut by_target: Map> = Map::default(); - let mut by_label: Map> = Map::default(); - let mut flat = Vec::new(); - let mut edges_set = Set::new(); - - for (label, to) in edges { - if !self.has_node(to) { - return Err(Error::IndexOutOfBounds(to, self.nodes.len())); - } - - edges_set.insert((label, to)); - - if let Some(set) = by_target.get(&to) { - if !set.contains(&label) { - flat.push((label, to)); - by_target.get_mut(&to).unwrap().insert(label); - by_label - .entry(label) - .or_insert_with(Default::default) - .insert(to); - } - } else { - flat.push((label, to)); - by_target - .entry(to) - .or_insert_with(Default::default) - .insert(label); - by_label - .entry(label) - .or_insert_with(Default::default) - .insert(to); - } - } - - match self.edges_table.get(&edges_set) { - Some(old_index) => Ok(*old_index), - None => { - let new_node = DLNode::new(by_target, by_label, flat); - let new_index = self.nodes_len(); - - self.edges_table.insert(edges_set, new_index); - - self.nodes.push(new_node); - - Ok(new_index) - } - } - } -} - -// Builder for this implementation of graph - -/// A builder for labelled adjacency doubly linked graphs. -#[derive(Debug, Clone)] -pub struct DLGBuilder(DLGraph); - -impl Default for DLGBuilder { - fn default() -> Self { - Self(Default::default()) - } -} - -impl Deref for DLGBuilder { - type Target = DLGraph; - - fn deref(&self) -> &Self::Target { - &self.0 - } -} - -impl DerefMut for DLGBuilder { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.0 - } -} - -impl Builder for DLGBuilder { - type Label = T; - - type Result = DLGraph; - - #[inline] - fn with_capacity(size: usize) -> Self { - Self(DLGraph { - nodes: Vec::with_capacity(size), - edges_table: Default::default(), - }) - } - - #[inline] - fn add_vertex(&mut self) -> usize { - self.nodes.push(DLNode::default()); - self.nodes.len() - 1 - } - - #[inline] - fn add_vertices(&mut self, n: usize) { - self.nodes - .extend(std::iter::repeat_with(Default::default).take(n)); - } - - fn add_edge(&mut self, source: usize, target: usize, label: Self::Label) -> Result<(), Error> { - let nodes_len = self.nodes.len(); - - let source_node = self - .nodes - .get_mut(source) - .ok_or(Error::IndexOutOfBounds(source, nodes_len))?; - - if !(0..nodes_len).contains(&target) { - return Err(Error::IndexOutOfBounds(target, nodes_len)); - } - - let new_edge_p = !matches!( - source_node - .by_target - .get(&target) - .map(|set| set.contains(&label)), - Some(true) - ); - - if new_edge_p { - let old_children_set: Set<(T, usize)> = source_node.flat.iter().copied().collect(); - - source_node - .by_target - .entry(target) - .or_insert_with(Set::default) - .insert(label); - - source_node - .by_label - .entry(label) - .or_insert_with(Set::default) - .insert(target); - - source_node.flat.push((label, target)); - - let new_children_set: Set<(T, usize)> = source_node.flat.iter().copied().collect(); - - // When source_node is in use, we cannot borrow self - // mutably again, so we move the following two statements - // to after all uses of source_node. - - self.edges_table.remove(&old_children_set); - - self.edges_table.insert(new_children_set, source); - } - - Ok(()) - } - - fn remove_edge(&mut self, source: usize, target: usize, predicate: F) -> Result<(), Error> - where - F: Fn(Self::Label) -> bool, - { - let nodes_len = self.nodes.len(); - - let source_node = self - .nodes - .get_mut(source) - .ok_or(Error::IndexOutOfBounds(source, nodes_len))?; - - if !(0..nodes_len).contains(&target) { - return Err(Error::IndexOutOfBounds(target, nodes_len)); - } - - if let Some(target_labels_set) = source_node.by_target.get(&target) { - let mut to_remove = Vec::new(); - - for label in target_labels_set.iter() { - if predicate(*label) { - to_remove.push(*label); - } - } - - if !to_remove.is_empty() { - let old_children_set: Set<(T, usize)> = source_node.flat.iter().copied().collect(); - - for label in to_remove.iter().copied() { - // This must be "Some", as the outer "if" checks - source_node - .by_target - .get_mut(&target) - .map(|set| set.remove(&label)); - - source_node - .by_label - .get_mut(&label) - .map(|set| set.remove(&target)); - - source_node.flat.retain(|(child_label, child_target)| { - (*child_label, *child_target) != (label, target) - }); - } - - // If after removal no labels remain for the target, - // we remove the edge entirely, to avoid false empty - // edges. - - if let Some(set) = source_node.by_target.get(&target) { - if set.is_empty() { - source_node.by_target.remove(&target); - } - } - - for label in to_remove.iter() { - if let Some(set) = source_node.by_label.get(label) { - if set.is_empty() { - source_node.by_label.remove(label); - } - } - } - - // if source_node.flat.is_empty() { - // source_node.by_target.remove(&target); - - // for label in to_remove.iter() { - // source_node.by_label.remove(label); - // } - // } - - let new_children_set: Set<(T, usize)> = source_node.flat.iter().copied().collect(); - - // When source_node is in use, we cannot borrow self - // mutably again, so we move the following two - // statements to after all uses of source_node. - - self.edges_table.remove(&old_children_set); - - self.edges_table.insert(new_children_set, source); - } - } - - Ok(()) - } - - fn build_ref(&self) -> Self::Result { - self.0.clone() - } - - fn build(self) -> Self::Result { - self.0 - } -} - -#[cfg(test)] -mod label_test { - use super::*; - - macro_rules! set { - () => { Set::::default() }; - ($($num:literal),*) => { - { - let mut set: Set = Set::default(); - $(set.insert($num);)* - set - } - }; - } - - macro_rules! map { - () => { Map::>::default() }; - ($(($key:literal, $value:expr)),*) => { - { - let mut map: Map> = Map::default(); - $(map.insert($key, $value);)* - map - } - }; - } - - #[test] - fn test_graph_apis() -> Result<(), Error> { - let mut graph: DLGraph = Default::default(); - - // testing empty graph - assert!(graph.is_empty()); - - // testing adding an empty node - assert_eq!(graph.extend(std::iter::empty())?, 0); - - // testing nodes_len - assert_eq!(graph.nodes_len(), 1); - - // testing extension - - assert_eq!(graph.extend([(0, 0)].iter().copied())?, 1); - assert_eq!(graph.extend([(1, 0), (1, 1)].iter().copied())?, 2); - assert_eq!(graph.extend([(3, 0), (3, 2)].iter().copied())?, 3); - assert_eq!(graph.extend([(1, 1), (1, 2)].iter().copied())?, 4); - assert_eq!(graph.extend([(2, 1), (3, 2), (2, 3)].iter().copied())?, 5); - - // testing adding a duplicated edge set - assert_eq!(graph.extend([(2, 1), (2, 3), (3, 2)].iter().copied())?, 5); - assert_eq!(graph.extend([(3, 2), (3, 0)].iter().copied())?, 3); - - let graph = graph; - - // ensuring the correct length - assert_eq!(graph.nodes_len(), 6); - - // testing children_of - assert_eq!(graph.children_of(5)?.collect::>(), set!(1, 3, 2)); - - // testing find_children_with_label - assert_eq!( - graph.find_children_with_label(5, &2)?.collect::>(), - set!(1, 3) - ); - - // testing edge_label - assert_eq!(graph.edge_label(5, 2)?.collect::>(), set!(3)); - assert!(matches!( - graph.edge_label(6, 2), - Err(Error::IndexOutOfBounds(6, 6)) - )); - - // testing degree - assert_eq!(graph.degree(4)?, 2); - - // testing is_empty_node - assert!(graph.is_empty_node(0)?); - assert!(!graph.is_empty_node(1)?); - - // testing has_edge - assert!(graph.has_edge(3, 2)?); - assert!(!graph.has_edge(3, 1)?); - assert!(matches!( - graph.has_edge(3, 6), - Err(Error::IndexOutOfBounds(6, 6)) - )); - - // testing labels_of - let mut label_map: Map> = Map::default(); - - for (label, children) in graph.labels_of(5)? { - label_map.insert(*label, children.collect()); - } - - let compare_map = map!((2, set!(1, 3)), (3, set!(2))); - - assert_eq!(label_map, compare_map); - - assert!(matches!( - graph.labels_of(6), - Err(Error::IndexOutOfBounds(6, 6)) - )); - - Ok(()) - } -} - -#[cfg(test)] -mod test_dlgraph_builder { - use super::*; - - #[test] - fn test_builder() -> Result<(), Box> { - let mut builder = DLGBuilder::::default(); - - // Add five nodes - builder.add_vertex(); - builder.add_vertex(); - builder.add_vertex(); - builder.add_vertex(); - builder.add_vertex(); - - println!("five empty nodes: {builder:?}"); - - // Link each node to its successor and link the last node with - // the first one to form a cycle. - for i in 0..5 { - builder.add_edge(i, if i < 4 { i + 1 } else { 0 }, i)?; - } - - println!("a cycle of five nodes: {builder:?}"); - - // Remove the link from the last node to the first node. - builder.remove_edge(4, 0, |_| true)?; - - println!("a line of five nodes: {builder:?}"); - - // build a graph - - let graph = builder.build(); - - println!("final graph: {graph:?}"); - - Ok(()) - } - - #[test] - fn test_errors() -> Result<(), Box> { - let mut builder = DLGBuilder::default(); - - // Add five nodes - builder.add_vertex(); - builder.add_vertex(); - builder.add_vertex(); - builder.add_vertex(); - builder.add_vertex(); - - println!("five empty nodes: {builder:?}"); - - // Errors in add_edge - - println!(); - println!("Testing errors in add_edge:"); - println!(); - - assert!(matches!( - builder.add_edge(0, 5, 0), - Err(Error::IndexOutOfBounds(5, 5)) - )); - - println!("Right error for an index out of bounds as the target"); - - assert!(matches!( - builder.add_edge(10, 5, 0), - Err(Error::IndexOutOfBounds(10, 5)) - )); - - println!("Right error for an index out of bounds as the source"); - - assert!(matches!( - builder.add_edge(10, 50, 0), - Err(Error::IndexOutOfBounds(10, 5)) - )); - - println!("Right error for both indices out of bounds"); - - // Errors in remove_edge - - println!(); - println!("Testing errors in remove_edge:"); - println!(); - - assert!(matches!( - builder.remove_edge(0, 5, |_| true), - Err(Error::IndexOutOfBounds(5, 5)) - )); - - println!("Right error for an index out of bounds as the target"); - - assert!(matches!( - builder.remove_edge(10, 5, |_| true), - Err(Error::IndexOutOfBounds(10, 5)) - )); - - println!("Right error for an index out of bounds as the source"); - - assert!(matches!( - builder.remove_edge(10, 50, |_| true), - Err(Error::IndexOutOfBounds(10, 5)) - )); - - println!("Right error for both indices out of bounds"); - - assert!(matches!(builder.remove_edge(0, 1, |_| true), Ok(()),)); - - println!("No errors for removing a non-existing edge"); - - println!(); - - let graph = builder.build(); - - println!("final graph: {graph:?}"); - - Ok(()) - } -} diff --git a/graph/src/labelled/double.rs b/graph/src/labelled/double.rs new file mode 100644 index 0000000..53b5dc8 --- /dev/null +++ b/graph/src/labelled/double.rs @@ -0,0 +1,876 @@ +//! This file implements a labelled graph that can index edges by +//! labels. +//! +//! Since the method +//! [`find_children_with_label`][crate::LabelGraph::find_children_with_label] +//! needs to be implemented efficiently, we store the mappings between +//! labels and edges in both directions. + +use super::*; + +// We use BTreeMap and BTreeSet here as we need to exclude duplicate +// edge sets, while an ordinary hashmap and hashset do not allow +// hashing. +use std::collections::{ + btree_map::{Iter as MapIter, Keys}, + btree_set::Iter, + BTreeMap as Map, BTreeSet as Set, HashMap as HMap, +}; + +#[derive(Debug, Clone)] +struct DLNode { + by_target: Map>, + by_label: Map>, + flat: Vec<(T, usize)>, +} + +impl Default for DLNode { + fn default() -> Self { + Self { + by_target: Default::default(), + by_label: Default::default(), + flat: Default::default(), + } + } +} + +impl DLNode { + fn new( + by_target: Map>, + by_label: Map>, + flat: Vec<(T, usize)>, + ) -> Self { + Self { + by_target, + by_label, + flat, + } + } +} + +/// Mapping a set of edges to an index of node. +type EdgeMap = HMap, usize>; + +/// Double direction Labelled Graph. +/// +/// Each node is supposed to have a unique edge set. Constructing +/// methods such as from the trait +/// [`LabelExtGraph`][super::LabelExtGraph] already handles the +/// elimination of duplication. +#[derive(Debug, Clone)] +pub struct DLGraph { + nodes: Vec>, + edges_table: EdgeMap, +} + +impl DLGraph { + #[inline] + /// Return an empty graph. + pub fn new() -> Self { + Self { + nodes: Vec::new(), + edges_table: HMap::default(), + } + } + + /// Return a builder. + #[inline] + pub fn builder(self) -> DLGBuilder { + DLGBuilder(self) + } + + /// Return a builder from a reference. + #[inline] + pub fn builder_ref(&self) -> DLGBuilder { + DLGBuilder(self.clone()) + } +} + +impl Default for DLGraph { + #[inline] + fn default() -> Self { + Self::new() + } +} + +impl Graph for DLGraph { + // Not using a boxed pointer is supposed to save some allocations. + type Iter<'a> = std::iter::Copied>> where T: 'a; + + #[inline] + fn is_empty(&self) -> bool { + self.nodes.is_empty() + } + + #[inline] + fn nodes_len(&self) -> usize { + self.nodes.len() + } + + #[inline] + fn children_of(&self, node_id: usize) -> Result, Error> { + match self.nodes.get(node_id) { + Some(node) => Ok(node.by_target.keys().copied()), + None => Err(Error::IndexOutOfBounds(node_id, self.nodes.len())), + } + } + + #[inline] + /// Return the number of "children" of a node, or an error if the + /// node is not a member of the graph. + /// + /// This counts edges with different labels as different edges. + fn degree(&self, node_id: usize) -> Result { + self.nodes + .get(node_id) + .ok_or(Error::IndexOutOfBounds(node_id, self.nodes.len())) + .map(|node| node.flat.len()) + } + + #[inline] + fn is_empty_node(&self, node_id: usize) -> Result { + self.nodes + .get(node_id) + .ok_or(Error::IndexOutOfBounds(node_id, self.nodes.len())) + .map(|node| node.flat.is_empty()) + } + + fn has_edge(&self, source: usize, target: usize) -> Result { + match self.nodes.get(source) { + Some(source_node) => { + if self.nodes.get(target).is_none() { + Err(Error::IndexOutOfBounds(target, self.nodes.len())) + } else { + Ok(source_node.by_target.contains_key(&target) + && !source_node.by_target.get(&target).unwrap().is_empty()) + } + } + None => Err(Error::IndexOutOfBounds(source, self.nodes.len())), + } + } + + fn print_viz(&self, filename: &str) -> Result<(), std::io::Error> { + let preamble = "digraph nfa { + fontname=\"Helvetica,Arial,sans-serif\" + node [fontname=\"Helvetica,Arial,sans-serif\"] + edge [fontname=\"Helvetica,Arial,sans-serif\"] + rankdir=LR;\n"; + + let mut post = String::new(); + + use std::fmt::Write as FWrite; + + for (source, target) in self.edges() { + for label in self.edge_label(source, target).unwrap() { + let _ = writeln!(post, " {source} -> {target} [label = \"{label}\"]"); + } + } + + post.push_str("}\n"); + + let result = format!("{preamble}{post}"); + + if std::fs::metadata(filename).is_ok() { + std::fs::remove_file(filename)?; + } + + let mut file = std::fs::File::options() + .write(true) + .create(true) + .open(filename)?; + + use std::io::Write; + + file.write_all(result.as_bytes()) + } + + #[inline] + fn replace_by_builder(&mut self, builder: impl Builder) { + let new_graph = builder.build(); + + self.nodes = new_graph.nodes; + self.edges_table = new_graph.edges_table; + } +} + +/// A delegation of iterators. +/// +/// This is used to avoid a boxed pointer to an iterator. +#[derive(Default, Debug, Clone)] +pub struct LabelIndexIter<'a> { + iter: Option>>, +} + +impl<'a> Iterator for LabelIndexIter<'a> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + self.iter.as_mut().and_then(|iterator| iterator.next()) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + match &self.iter { + Some(iter) => iter.size_hint(), + None => (0, Some(0)), + } + } +} + +impl<'a> ExactSizeIterator for LabelIndexIter<'a> { + #[inline] + fn len(&self) -> usize { + match &self.iter { + Some(iter) => iter.len(), + None => 0, + } + } +} + +impl<'a> LabelIndexIter<'a> { + fn new(iter: std::iter::Copied>) -> Self { + let iter = Some(iter); + Self { iter } + } +} + +// A convenience method +impl<'a> From<&'a Set> for LabelIndexIter<'a> { + fn from(set: &'a Set) -> Self { + Self::new(set.iter().copied()) + } +} + +#[derive(Debug)] +/// A delegation of iterators. +/// +/// This is used to avoid a boxed pointer to an iterator. +pub struct LabelIter<'a, T> { + iter: MapIter<'a, T, Set>, +} + +impl<'a, T> ExactSizeIterator for LabelIter<'a, T> { + #[inline] + fn len(&self) -> usize { + self.iter.len() + } +} + +impl<'a, T> LabelIter<'a, T> { + fn new(iter: MapIter<'a, T, Set>) -> Self { + Self { iter } + } +} + +impl<'a, T> Iterator for LabelIter<'a, T> { + type Item = (&'a T, LabelIndexIter<'a>); + + #[inline] + fn next(&mut self) -> Option { + self.iter.next().map(|(label, set)| (label, set.into())) + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } +} + +/// This is used to avoid a boxed pointer to an iterator. +#[derive(Debug)] +pub struct EdgeLabelIter<'a, T> { + iter: Option>, +} + +impl<'a, T> Default for EdgeLabelIter<'a, T> { + #[inline] + fn default() -> Self { + Self { iter: None } + } +} + +impl<'a, T: Copy + Clone> ExactSizeIterator for EdgeLabelIter<'a, T> { + #[inline] + fn len(&self) -> usize { + if let Some(iter) = &self.iter { + iter.len() + } else { + 0 + } + } +} + +impl<'a, T: Copy + Clone> EdgeLabelIter<'a, T> { + fn new(iter: Iter<'a, T>) -> Self { + Self { iter: Some(iter) } + } +} + +impl<'a, T: Copy + Clone> Iterator for EdgeLabelIter<'a, T> { + type Item = T; + + #[inline] + fn next(&mut self) -> Option { + if let Some(iter) = &mut self.iter { + iter.next().copied() + } else { + None + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + if let Some(iter) = &self.iter { + iter.size_hint() + } else { + (0, Some(0)) + } + } +} + +impl LabelGraph for DLGraph { + type Iter<'a> = LabelIndexIter<'a> where T: 'a; + + type LabelIter<'a> = LabelIter<'a,T> where T: 'a; + + type EdgeLabelIter<'a> = EdgeLabelIter<'a,T> + where + Self: 'a, + T: 'a + Copy + Clone; + + fn edge_label(&self, source: usize, target: usize) -> Result, Error> { + if self.has_edge(source, target)? { + Ok(EdgeLabelIter::new( + self.nodes + .get(source) + .unwrap() + .by_target + .get(&target) + .unwrap() + .iter(), + )) + } else { + Ok(Default::default()) + } + } + + fn find_children_with_label( + &self, + node_id: usize, + label: &T, + ) -> Result<>::Iter<'_>, Error> { + match self + .nodes + .get(node_id) + .ok_or(Error::IndexOutOfBounds(node_id, self.nodes.len()))? + .by_label + .get(label) + { + Some(set) => Ok(set.into()), + None => Ok(Default::default()), + } + } + + #[inline] + fn labels_of(&self, node_id: usize) -> Result, Error> { + match self.nodes.get(node_id) { + Some(node) => Ok(Self::LabelIter::new(node.by_label.iter())), + None => Err(Error::IndexOutOfBounds(node_id, self.nodes.len())), + } + } + + fn has_edge_label(&self, node_id: usize, label: &T, target: usize) -> Result { + let nodes_len = self.nodes.len(); + + match self.nodes.get(node_id) { + Some(node) => { + if target >= nodes_len { + return Err(Error::IndexOutOfBounds(target, nodes_len)); + } + + Ok(node + .by_target + .get(&target) + .map(|labels| labels.contains(label)) + .unwrap_or(false)) + } + None => Err(Error::IndexOutOfBounds(node_id, nodes_len)), + } + } +} + +impl LabelExtGraph for DLGraph { + fn extend(&mut self, edges: impl IntoIterator) -> Result { + let mut by_target: Map> = Map::default(); + let mut by_label: Map> = Map::default(); + let mut flat = Vec::new(); + let mut edges_set = Set::new(); + + for (label, to) in edges { + if !self.has_node(to) { + return Err(Error::IndexOutOfBounds(to, self.nodes.len())); + } + + edges_set.insert((label, to)); + + if let Some(set) = by_target.get(&to) { + if !set.contains(&label) { + flat.push((label, to)); + by_target.get_mut(&to).unwrap().insert(label); + by_label + .entry(label) + .or_insert_with(Default::default) + .insert(to); + } + } else { + flat.push((label, to)); + by_target + .entry(to) + .or_insert_with(Default::default) + .insert(label); + by_label + .entry(label) + .or_insert_with(Default::default) + .insert(to); + } + } + + match self.edges_table.get(&edges_set) { + Some(old_index) => Ok(*old_index), + None => { + let new_node = DLNode::new(by_target, by_label, flat); + let new_index = self.nodes_len(); + + self.edges_table.insert(edges_set, new_index); + + self.nodes.push(new_node); + + Ok(new_index) + } + } + } +} + +// Builder for this implementation of graph + +/// A builder for labelled adjacency doubly linked graphs. +#[derive(Debug, Clone)] +pub struct DLGBuilder(DLGraph); + +impl Default for DLGBuilder { + fn default() -> Self { + Self(Default::default()) + } +} + +impl Deref for DLGBuilder { + type Target = DLGraph; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for DLGBuilder { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl Builder for DLGBuilder { + type Label = T; + + type Result = DLGraph; + + #[inline] + fn with_capacity(size: usize) -> Self { + Self(DLGraph { + nodes: Vec::with_capacity(size), + edges_table: Default::default(), + }) + } + + #[inline] + fn add_vertex(&mut self) -> usize { + self.nodes.push(DLNode::default()); + self.nodes.len() - 1 + } + + #[inline] + fn add_vertices(&mut self, n: usize) { + self.nodes + .extend(std::iter::repeat_with(Default::default).take(n)); + } + + fn add_edge(&mut self, source: usize, target: usize, label: Self::Label) -> Result<(), Error> { + let nodes_len = self.nodes.len(); + + let source_node = self + .nodes + .get_mut(source) + .ok_or(Error::IndexOutOfBounds(source, nodes_len))?; + + if !(0..nodes_len).contains(&target) { + return Err(Error::IndexOutOfBounds(target, nodes_len)); + } + + let new_edge_p = !matches!( + source_node + .by_target + .get(&target) + .map(|set| set.contains(&label)), + Some(true) + ); + + if new_edge_p { + let old_children_set: Set<(T, usize)> = source_node.flat.iter().copied().collect(); + + source_node + .by_target + .entry(target) + .or_insert_with(Set::default) + .insert(label); + + source_node + .by_label + .entry(label) + .or_insert_with(Set::default) + .insert(target); + + source_node.flat.push((label, target)); + + let new_children_set: Set<(T, usize)> = source_node.flat.iter().copied().collect(); + + // When source_node is in use, we cannot borrow self + // mutably again, so we move the following two statements + // to after all uses of source_node. + + self.edges_table.remove(&old_children_set); + + self.edges_table.insert(new_children_set, source); + } + + Ok(()) + } + + fn remove_edge(&mut self, source: usize, target: usize, predicate: F) -> Result<(), Error> + where + F: Fn(Self::Label) -> bool, + { + let nodes_len = self.nodes.len(); + + let source_node = self + .nodes + .get_mut(source) + .ok_or(Error::IndexOutOfBounds(source, nodes_len))?; + + if !(0..nodes_len).contains(&target) { + return Err(Error::IndexOutOfBounds(target, nodes_len)); + } + + if let Some(target_labels_set) = source_node.by_target.get(&target) { + let mut to_remove = Vec::new(); + + for label in target_labels_set.iter() { + if predicate(*label) { + to_remove.push(*label); + } + } + + if !to_remove.is_empty() { + let old_children_set: Set<(T, usize)> = source_node.flat.iter().copied().collect(); + + for label in to_remove.iter().copied() { + // This must be "Some", as the outer "if" checks + source_node + .by_target + .get_mut(&target) + .map(|set| set.remove(&label)); + + source_node + .by_label + .get_mut(&label) + .map(|set| set.remove(&target)); + + source_node.flat.retain(|(child_label, child_target)| { + (*child_label, *child_target) != (label, target) + }); + } + + // If after removal no labels remain for the target, + // we remove the edge entirely, to avoid false empty + // edges. + + if let Some(set) = source_node.by_target.get(&target) { + if set.is_empty() { + source_node.by_target.remove(&target); + } + } + + for label in to_remove.iter() { + if let Some(set) = source_node.by_label.get(label) { + if set.is_empty() { + source_node.by_label.remove(label); + } + } + } + + // if source_node.flat.is_empty() { + // source_node.by_target.remove(&target); + + // for label in to_remove.iter() { + // source_node.by_label.remove(label); + // } + // } + + let new_children_set: Set<(T, usize)> = source_node.flat.iter().copied().collect(); + + // When source_node is in use, we cannot borrow self + // mutably again, so we move the following two + // statements to after all uses of source_node. + + self.edges_table.remove(&old_children_set); + + self.edges_table.insert(new_children_set, source); + } + } + + Ok(()) + } + + fn build_ref(&self) -> Self::Result { + self.0.clone() + } + + fn build(self) -> Self::Result { + self.0 + } +} + +#[cfg(test)] +mod label_test { + use super::*; + + macro_rules! set { + () => { Set::::default() }; + ($($num:literal),*) => { + { + let mut set: Set = Set::default(); + $(set.insert($num);)* + set + } + }; + } + + macro_rules! map { + () => { Map::>::default() }; + ($(($key:literal, $value:expr)),*) => { + { + let mut map: Map> = Map::default(); + $(map.insert($key, $value);)* + map + } + }; + } + + #[test] + fn test_graph_apis() -> Result<(), Error> { + let mut graph: DLGraph = Default::default(); + + // testing empty graph + assert!(graph.is_empty()); + + // testing adding an empty node + assert_eq!(graph.extend(std::iter::empty())?, 0); + + // testing nodes_len + assert_eq!(graph.nodes_len(), 1); + + // testing extension + + assert_eq!(graph.extend([(0, 0)].iter().copied())?, 1); + assert_eq!(graph.extend([(1, 0), (1, 1)].iter().copied())?, 2); + assert_eq!(graph.extend([(3, 0), (3, 2)].iter().copied())?, 3); + assert_eq!(graph.extend([(1, 1), (1, 2)].iter().copied())?, 4); + assert_eq!(graph.extend([(2, 1), (3, 2), (2, 3)].iter().copied())?, 5); + + // testing adding a duplicated edge set + assert_eq!(graph.extend([(2, 1), (2, 3), (3, 2)].iter().copied())?, 5); + assert_eq!(graph.extend([(3, 2), (3, 0)].iter().copied())?, 3); + + let graph = graph; + + // ensuring the correct length + assert_eq!(graph.nodes_len(), 6); + + // testing children_of + assert_eq!(graph.children_of(5)?.collect::>(), set!(1, 3, 2)); + + // testing find_children_with_label + assert_eq!( + graph.find_children_with_label(5, &2)?.collect::>(), + set!(1, 3) + ); + + // testing edge_label + assert_eq!(graph.edge_label(5, 2)?.collect::>(), set!(3)); + assert!(matches!( + graph.edge_label(6, 2), + Err(Error::IndexOutOfBounds(6, 6)) + )); + + // testing degree + assert_eq!(graph.degree(4)?, 2); + + // testing is_empty_node + assert!(graph.is_empty_node(0)?); + assert!(!graph.is_empty_node(1)?); + + // testing has_edge + assert!(graph.has_edge(3, 2)?); + assert!(!graph.has_edge(3, 1)?); + assert!(matches!( + graph.has_edge(3, 6), + Err(Error::IndexOutOfBounds(6, 6)) + )); + + // testing labels_of + let mut label_map: Map> = Map::default(); + + for (label, children) in graph.labels_of(5)? { + label_map.insert(*label, children.collect()); + } + + let compare_map = map!((2, set!(1, 3)), (3, set!(2))); + + assert_eq!(label_map, compare_map); + + assert!(matches!( + graph.labels_of(6), + Err(Error::IndexOutOfBounds(6, 6)) + )); + + Ok(()) + } +} + +#[cfg(test)] +mod test_dlgraph_builder { + use super::*; + + #[test] + fn test_builder() -> Result<(), Box> { + let mut builder = DLGBuilder::::default(); + + // Add five nodes + builder.add_vertex(); + builder.add_vertex(); + builder.add_vertex(); + builder.add_vertex(); + builder.add_vertex(); + + println!("five empty nodes: {builder:?}"); + + // Link each node to its successor and link the last node with + // the first one to form a cycle. + for i in 0..5 { + builder.add_edge(i, if i < 4 { i + 1 } else { 0 }, i)?; + } + + println!("a cycle of five nodes: {builder:?}"); + + // Remove the link from the last node to the first node. + builder.remove_edge(4, 0, |_| true)?; + + println!("a line of five nodes: {builder:?}"); + + // build a graph + + let graph = builder.build(); + + println!("final graph: {graph:?}"); + + Ok(()) + } + + #[test] + fn test_errors() -> Result<(), Box> { + let mut builder = DLGBuilder::default(); + + // Add five nodes + builder.add_vertex(); + builder.add_vertex(); + builder.add_vertex(); + builder.add_vertex(); + builder.add_vertex(); + + println!("five empty nodes: {builder:?}"); + + // Errors in add_edge + + println!(); + println!("Testing errors in add_edge:"); + println!(); + + assert!(matches!( + builder.add_edge(0, 5, 0), + Err(Error::IndexOutOfBounds(5, 5)) + )); + + println!("Right error for an index out of bounds as the target"); + + assert!(matches!( + builder.add_edge(10, 5, 0), + Err(Error::IndexOutOfBounds(10, 5)) + )); + + println!("Right error for an index out of bounds as the source"); + + assert!(matches!( + builder.add_edge(10, 50, 0), + Err(Error::IndexOutOfBounds(10, 5)) + )); + + println!("Right error for both indices out of bounds"); + + // Errors in remove_edge + + println!(); + println!("Testing errors in remove_edge:"); + println!(); + + assert!(matches!( + builder.remove_edge(0, 5, |_| true), + Err(Error::IndexOutOfBounds(5, 5)) + )); + + println!("Right error for an index out of bounds as the target"); + + assert!(matches!( + builder.remove_edge(10, 5, |_| true), + Err(Error::IndexOutOfBounds(10, 5)) + )); + + println!("Right error for an index out of bounds as the source"); + + assert!(matches!( + builder.remove_edge(10, 50, |_| true), + Err(Error::IndexOutOfBounds(10, 5)) + )); + + println!("Right error for both indices out of bounds"); + + assert!(matches!(builder.remove_edge(0, 1, |_| true), Ok(()),)); + + println!("No errors for removing a non-existing edge"); + + println!(); + + let graph = builder.build(); + + println!("final graph: {graph:?}"); + + Ok(()) + } +} diff --git a/graph/src/labelled/mod.rs b/graph/src/labelled/mod.rs new file mode 100644 index 0000000..61e3014 --- /dev/null +++ b/graph/src/labelled/mod.rs @@ -0,0 +1,20 @@ +#![warn(missing_docs)] +//! This file implements a labelled graph. See the trait +//! [`LabelGraph`][crate::LabelGraph] for details. +//! +//! Since the method +//! [`find_children_with_label`][super::LabelGraph::find_children_with_label] +//! needs to be implemented efficiently, we store the mappings between +//! labels and edges in both directions. + +use std::ops::{Deref, DerefMut}; + +use crate::{builder::Builder, error::Error, Graph, GraphLabel, LabelExtGraph, LabelGraph}; + +pub mod double; + +pub use double::{DLGBuilder, DLGraph}; + +pub mod single; + +pub use single::SLGraph; diff --git a/graph/src/labelled/single.rs b/graph/src/labelled/single.rs new file mode 100644 index 0000000..bed65c1 --- /dev/null +++ b/graph/src/labelled/single.rs @@ -0,0 +1,343 @@ +//! This file implements a labelled graph that can index vertices by +//! labels. +//! +//! Since we do not have to index edges by labels, the labels can be +//! stored simply by adjacency maps. This may save memory usage and +//! improve performance, potentially. + +use super::*; + +use std::collections::{hash_map::Keys, HashMap as Map}; + +use crate::builder::BuilderMut; + +/// The type of a node. +#[derive(Debug, Clone, Default)] +struct SLNode { + /// The edges are stored as an association from the target node + /// index to the label of the corresponding edge. + /// + /// An implication is that an edge can only have one label. + children: Map, + /// The label of this node. + label: T, +} + +impl SLNode { + fn new(children: Map, label: T) -> Self { + Self { children, label } + } + + /// This function just adds an edge, blindly. + /// + /// # Safety + /// + /// Only use this after making sure that `target` refers to a + /// valid node, and there was no edge from this node to the node + /// pointed to by `target` previously. + unsafe fn add_edge(&mut self, target: usize, label: T) { + self.children.insert(target, label); + } +} + +/// The type of labelled graphs implemented using adjacency maps. +#[derive(Debug, Clone)] +pub struct SLGraph { + nodes: Vec>, + label_index_map: Map, +} + +impl Default for SLGraph { + fn default() -> Self { + let nodes = Vec::new(); + let label_index_map = Default::default(); + Self { + nodes, + label_index_map, + } + } +} + +impl Graph for SLGraph { + type Iter<'a> = std::iter::Copied> + where + Self: 'a; + + #[inline] + fn is_empty(&self) -> bool { + self.nodes.is_empty() + } + + #[inline] + fn nodes_len(&self) -> usize { + self.nodes.len() + } + + #[inline] + fn children_of(&self, node_id: usize) -> Result, Error> { + if let Some(node) = self.nodes.get(node_id) { + Ok(node.children.keys().copied()) + } else { + Err(Error::IndexOutOfBounds(node_id, self.nodes.len())) + } + } + + #[inline] + fn degree(&self, node_id: usize) -> Result { + if let Some(node) = self.nodes.get(node_id) { + Ok(node.children.len()) + } else { + Err(Error::IndexOutOfBounds(node_id, self.nodes.len())) + } + } + + #[inline] + fn is_empty_node(&self, node_id: usize) -> Result { + if let Some(node) = self.nodes.get(node_id) { + Ok(node.children.is_empty()) + } else { + Err(Error::IndexOutOfBounds(node_id, self.nodes.len())) + } + } + + #[inline] + fn has_edge(&self, source: usize, target: usize) -> Result { + let nodes_len = self.nodes.len(); + + let node = if let Some(node) = self.nodes.get(source) { + node + } else { + return Err(Error::IndexOutOfBounds(source, nodes_len)); + }; + + if !self.has_node(target) { + return Err(Error::IndexOutOfBounds(target, nodes_len)); + } + + Ok(node.children.contains_key(&target)) + } + + fn replace_by_builder(&mut self, _builder: impl Builder) { + // In case this is not clear enough, I deliberately avoid + // implementing this. + unimplemented!() + } +} + +/// An iterator of edge labels. +/// +/// This is used to avoid a box allocation. +#[derive(Copy, Clone, Debug)] +pub struct EdgeLabelIter(Option); + +impl Iterator for EdgeLabelIter { + type Item = T; + + #[inline] + fn next(&mut self) -> Option { + self.0.take() + } + + #[inline] + fn size_hint(&self) -> (usize, Option) { + let len = self.0.is_some() as usize; + + (len, Some(len)) + } +} + +impl DoubleEndedIterator for EdgeLabelIter { + #[inline] + fn next_back(&mut self) -> Option { + self.0.take() + } +} + +impl ExactSizeIterator for EdgeLabelIter { + #[inline] + fn len(&self) -> usize { + // Thanks to Clippy for teaching me about coercing a boolean + // value to an integer. + self.0.is_some() as usize + } +} + +impl LabelGraph for SLGraph { + // The following two types are not used, as we do not need to + // index edges by labels. + type Iter<'a> = std::iter::Empty + where + Self: 'a; + + type LabelIter<'a> = std::iter::Empty<(&'a T, >::Iter<'a>)> + where + Self: 'a, + T: 'a; + + type EdgeLabelIter<'a> = EdgeLabelIter + where + Self: 'a, + T: 'a; + + #[inline] + fn query_label(&self, label: T) -> Option { + self.label_index_map.get(&label).copied() + } + + #[inline] + fn vertex_label(&self, node_id: usize) -> Result, Error> { + if let Some(node) = self.nodes.get(node_id) { + Ok(Some(node.label)) + } else { + Err(Error::IndexOutOfBounds(node_id, self.nodes_len())) + } + } + + #[inline] + fn edge_label(&self, source: usize, target: usize) -> Result, Error> { + let nodes_len = self.nodes.len(); + + let node = if let Some(node) = self.nodes.get(source) { + node + } else { + return Err(Error::IndexOutOfBounds(source, nodes_len)); + }; + + if !self.has_node(target) { + return Err(Error::IndexOutOfBounds(target, nodes_len)); + } + + Ok(EdgeLabelIter(node.children.get(&target).copied())) + } + + fn find_children_with_label( + &self, + _node_id: usize, + _label: &T, + ) -> Result<>::Iter<'_>, Error> { + unimplemented!() + } + + fn labels_of(&self, _node_id: usize) -> Result, Error> { + unimplemented!() + } + + #[inline] + fn has_edge_label(&self, node_id: usize, label: &T, target: usize) -> Result { + let nodes_len = self.nodes.len(); + + let node = if let Some(node) = self.nodes.get(node_id) { + node + } else { + return Err(Error::IndexOutOfBounds(node_id, nodes_len)); + }; + + if !self.has_node(target) { + return Err(Error::IndexOutOfBounds(target, nodes_len)); + } + + Ok(node.children.get(&target) == Some(label)) + } +} + +/// The type of `builder_mut` builders for this type of graphs. +#[derive(Debug)] +pub struct SLGBuilderMut<'a, T: GraphLabel> { + graph: &'a mut SLGraph, +} + +impl<'a, T: GraphLabel> BuilderMut for SLGBuilderMut<'a, T> { + type Label = T; + + type Graph = SLGraph; + + type ResultBuilder<'b> = SLGBuilderMut<'b, T> + where + T:'b; + + fn from_graph_mut(graph: &mut Self::Graph) -> Self::ResultBuilder<'_> { + SLGBuilderMut { graph } + } + + fn add_vertex(&mut self, label: T) -> Result { + if let Some(old_node) = self.graph.label_index_map.get(&label) { + dbg!(label); + return Err(Error::DuplicatedNode(*old_node)); + } + + self.graph + .nodes + .push(SLNode::new(Default::default(), label)); + + let new = self.graph.nodes_len() - 1; + + self.graph.label_index_map.insert(label, new); + + Ok(new) + } + + fn add_edge(&mut self, source: usize, target: usize, label: Self::Label) -> Result<(), Error> { + let graph = &mut self.graph; + + let nodes_len = graph.nodes.len(); + + // NOTE: We check the validity of `target` first because we + // need to borrow graph mutably later, which would prevent us + // from borrowing graph immutably to check the validity of + // `target` then. + if !graph.has_node(target) { + return Err(Error::IndexOutOfBounds(target, nodes_len)); + } + + let node = if let Some(node) = graph.nodes.get_mut(source) { + node + } else { + return Err(Error::IndexOutOfBounds(source, nodes_len)); + }; + + if node.children.get(&target).is_some() { + return Err(Error::DuplicatedEdge(source, target)); + } + + // We checked what we need to check, so this is safe. + unsafe { + node.add_edge(target, label); + } + + Ok(()) + } + + fn remove_edge(&mut self, source: usize, target: usize, predicate: F) -> Result<(), Error> + where + F: Fn(Self::Label) -> bool, + { + let graph = &mut self.graph; + + let nodes_len = graph.nodes.len(); + + // NOTE: We check the validity of `target` first because we + // need to borrow graph mutably later, which would prevent us + // from borrowing graph immutably to check the validity of + // `target` then. + if !graph.has_node(target) { + return Err(Error::IndexOutOfBounds(target, nodes_len)); + } + + let node = if let Some(node) = graph.nodes.get_mut(source) { + node + } else { + return Err(Error::IndexOutOfBounds(source, nodes_len)); + }; + + if let Some(child_label) = node.children.get(&target) { + // There is only one label, so we just check this label. + if predicate(*child_label) { + node.children.remove(&target); + } + + Ok(()) + } else { + Err(Error::DuplicatedEdge(source, target)) + } + } +} diff --git a/graph/src/lib.rs b/graph/src/lib.rs index d4f6d7c..79f9646 100644 --- a/graph/src/lib.rs +++ b/graph/src/lib.rs @@ -239,11 +239,20 @@ pub trait LabelGraph: Graph { Self: 'a, T: 'a; + /// Query the graph for a label, and return the node index if + /// found. + /// + /// The default implementation always returns `None`. + #[inline] + fn query_label(&self, _label: T) -> Option { + None + } + #[inline] /// Return the label of a vertex or an error if the node is /// invalid. /// - /// The default implementation always returns None for a valid + /// The default implementation always returns `None` for a valid /// node. fn vertex_label(&self, node_id: usize) -> Result, Error> { if self.has_node(node_id) { -- cgit v1.2.3-18-g5258