diff options
author | JSDurand <mmemmew@gmail.com> | 2023-01-05 10:24:39 +0800 |
---|---|---|
committer | JSDurand <mmemmew@gmail.com> | 2023-01-05 10:24:39 +0800 |
commit | 7dd4935230e303aef8d295d992239d59d95b32d7 (patch) | |
tree | 486104820b5f3701518c1030a0393a5cef428cb9 /graph | |
parent | bdbd4b4dc21af09711c97d3f903877443199af06 (diff) |
singly labelled graphs
Now I have a new type of labelled graphs, which can index vertices by
labels, but not index edges by labels. The biggest difference is that
I do not have to keep a hashmap of edge targets by labels, and I do
not have to guard against the duplication of nodes with the same set
of edges. I guard against nodes with the same label, though.
Also, in this graph, both vertices and edges have one label at a time,
whereas in the previous labelled graph there can be a multitude of
edges between the same source and target nodes, but with different
labels.
Now it remains to test this type of graphs, and to think through how
we attach forest fragments to nondeterministic finite automata edges,
and how to join forest fragments together while skipping nullable
edges, in order to finish the "compilation" part.
Diffstat (limited to 'graph')
-rw-r--r-- | graph/src/builder.rs | 37 | ||||
-rw-r--r-- | graph/src/error.rs | 17 | ||||
-rw-r--r-- | graph/src/labelled/double.rs (renamed from graph/src/labelled.rs) | 11 | ||||
-rw-r--r-- | graph/src/labelled/mod.rs | 20 | ||||
-rw-r--r-- | graph/src/labelled/single.rs | 343 | ||||
-rw-r--r-- | graph/src/lib.rs | 11 |
6 files changed, 428 insertions, 11 deletions
diff --git a/graph/src/builder.rs b/graph/src/builder.rs index 9ab5895..4a480a5 100644 --- a/graph/src/builder.rs +++ b/graph/src/builder.rs @@ -54,3 +54,40 @@ pub trait Builder: Default { /// still be used later on. fn build_ref(&self) -> Self::Result; } + +/// The type of builders that actually reference the underlying graph +/// instead of owe it. +/// +/// To finish the task of the builder, just do not use it anymore. +/// The building happens right when the building methods are called. +pub trait BuilderMut { + /// Some graphs are labelled. This type should be the type of the + /// labels. + type Label; + + /// The type of the underlying graph. + type Graph: Graph; + + /// The type of the builder from a borrow. + type ResultBuilder<'a>: BuilderMut + where + Self::Label: 'a; + + /// Borrow a graph to create a builder without copying. + fn from_graph_mut(graph: &mut Self::Graph) -> Self::ResultBuilder<'_>; + + /// Add a new vertex. + fn add_vertex(&mut self, label: Self::Label) -> Result<usize, Error>; + + /// Add an edge from the source to the target. + fn add_edge(&mut self, source: usize, target: usize, label: Self::Label) -> Result<(), Error>; + + /// Remove an edge from the source to the target. + /// + /// Since some graphs are labelled, the users are allowed to pass + /// a predicate to determine if an edge from the source to the + /// target should be removed. + fn remove_edge<F>(&mut self, source: usize, target: usize, predicate: F) -> Result<(), Error> + where + F: Fn(Self::Label) -> bool; +} diff --git a/graph/src/error.rs b/graph/src/error.rs index 3600005..bf2714b 100644 --- a/graph/src/error.rs +++ b/graph/src/error.rs @@ -13,8 +13,11 @@ pub enum Error { /// the second component is the current length of nodes. IndexOutOfBounds(usize, usize), /// The graph does not permit duplicate nodes but encounters a - /// repeated node - DuplicatedNode, + /// repeated node. + DuplicatedNode(usize), + /// The graph does not permit duplicate edges but encounters a + /// repeated edge. + DuplicatedEdge(usize, usize), } impl Display for Error { @@ -23,7 +26,15 @@ impl Display for Error { Error::IndexOutOfBounds(index, len) => { write!(f, "index {index} out of bounds {len} ") } - Error::DuplicatedNode => write!(f, "No duplicate nodes permitted, but found one"), + Error::DuplicatedNode(node) => { + write!(f, "No duplicate nodes permitted, but found one: {node}") + } + Error::DuplicatedEdge(source, target) => { + write!( + f, + "No duplicate edges permitted, but found one from {source} to {target}" + ) + } } } } diff --git a/graph/src/labelled.rs b/graph/src/labelled/double.rs index 6341787..53b5dc8 100644 --- a/graph/src/labelled.rs +++ b/graph/src/labelled/double.rs @@ -1,15 +1,12 @@ -#![warn(missing_docs)] -//! This file implements a labelled graph. See the -//! [trait][super::LabelGraph] for details. +//! This file implements a labelled graph that can index edges by +//! labels. //! //! Since the method -//! [`find_children_with_label`][super::LabelGraph::find_children_with_label] +//! [`find_children_with_label`][crate::LabelGraph::find_children_with_label] //! needs to be implemented efficiently, we store the mappings between //! labels and edges in both directions. -use std::ops::{Deref, DerefMut}; - -use crate::{builder::Builder, error::Error, Graph, GraphLabel, LabelExtGraph, LabelGraph}; +use super::*; // We use BTreeMap and BTreeSet here as we need to exclude duplicate // edge sets, while an ordinary hashmap and hashset do not allow diff --git a/graph/src/labelled/mod.rs b/graph/src/labelled/mod.rs new file mode 100644 index 0000000..61e3014 --- /dev/null +++ b/graph/src/labelled/mod.rs @@ -0,0 +1,20 @@ +#![warn(missing_docs)] +//! This file implements a labelled graph. See the trait +//! [`LabelGraph`][crate::LabelGraph] for details. +//! +//! Since the method +//! [`find_children_with_label`][super::LabelGraph::find_children_with_label] +//! needs to be implemented efficiently, we store the mappings between +//! labels and edges in both directions. + +use std::ops::{Deref, DerefMut}; + +use crate::{builder::Builder, error::Error, Graph, GraphLabel, LabelExtGraph, LabelGraph}; + +pub mod double; + +pub use double::{DLGBuilder, DLGraph}; + +pub mod single; + +pub use single::SLGraph; diff --git a/graph/src/labelled/single.rs b/graph/src/labelled/single.rs new file mode 100644 index 0000000..bed65c1 --- /dev/null +++ b/graph/src/labelled/single.rs @@ -0,0 +1,343 @@ +//! This file implements a labelled graph that can index vertices by +//! labels. +//! +//! Since we do not have to index edges by labels, the labels can be +//! stored simply by adjacency maps. This may save memory usage and +//! improve performance, potentially. + +use super::*; + +use std::collections::{hash_map::Keys, HashMap as Map}; + +use crate::builder::BuilderMut; + +/// The type of a node. +#[derive(Debug, Clone, Default)] +struct SLNode<T: GraphLabel> { + /// The edges are stored as an association from the target node + /// index to the label of the corresponding edge. + /// + /// An implication is that an edge can only have one label. + children: Map<usize, T>, + /// The label of this node. + label: T, +} + +impl<T: GraphLabel> SLNode<T> { + fn new(children: Map<usize, T>, label: T) -> Self { + Self { children, label } + } + + /// This function just adds an edge, blindly. + /// + /// # Safety + /// + /// Only use this after making sure that `target` refers to a + /// valid node, and there was no edge from this node to the node + /// pointed to by `target` previously. + unsafe fn add_edge(&mut self, target: usize, label: T) { + self.children.insert(target, label); + } +} + +/// The type of labelled graphs implemented using adjacency maps. +#[derive(Debug, Clone)] +pub struct SLGraph<T: GraphLabel> { + nodes: Vec<SLNode<T>>, + label_index_map: Map<T, usize>, +} + +impl<T: GraphLabel> Default for SLGraph<T> { + fn default() -> Self { + let nodes = Vec::new(); + let label_index_map = Default::default(); + Self { + nodes, + label_index_map, + } + } +} + +impl<T: GraphLabel> Graph for SLGraph<T> { + type Iter<'a> = std::iter::Copied<Keys<'a, usize, T>> + where + Self: 'a; + + #[inline] + fn is_empty(&self) -> bool { + self.nodes.is_empty() + } + + #[inline] + fn nodes_len(&self) -> usize { + self.nodes.len() + } + + #[inline] + fn children_of(&self, node_id: usize) -> Result<Self::Iter<'_>, Error> { + if let Some(node) = self.nodes.get(node_id) { + Ok(node.children.keys().copied()) + } else { + Err(Error::IndexOutOfBounds(node_id, self.nodes.len())) + } + } + + #[inline] + fn degree(&self, node_id: usize) -> Result<usize, Error> { + if let Some(node) = self.nodes.get(node_id) { + Ok(node.children.len()) + } else { + Err(Error::IndexOutOfBounds(node_id, self.nodes.len())) + } + } + + #[inline] + fn is_empty_node(&self, node_id: usize) -> Result<bool, Error> { + if let Some(node) = self.nodes.get(node_id) { + Ok(node.children.is_empty()) + } else { + Err(Error::IndexOutOfBounds(node_id, self.nodes.len())) + } + } + + #[inline] + fn has_edge(&self, source: usize, target: usize) -> Result<bool, Error> { + let nodes_len = self.nodes.len(); + + let node = if let Some(node) = self.nodes.get(source) { + node + } else { + return Err(Error::IndexOutOfBounds(source, nodes_len)); + }; + + if !self.has_node(target) { + return Err(Error::IndexOutOfBounds(target, nodes_len)); + } + + Ok(node.children.contains_key(&target)) + } + + fn replace_by_builder(&mut self, _builder: impl Builder<Result = Self>) { + // In case this is not clear enough, I deliberately avoid + // implementing this. + unimplemented!() + } +} + +/// An iterator of edge labels. +/// +/// This is used to avoid a box allocation. +#[derive(Copy, Clone, Debug)] +pub struct EdgeLabelIter<T: GraphLabel>(Option<T>); + +impl<T: GraphLabel> Iterator for EdgeLabelIter<T> { + type Item = T; + + #[inline] + fn next(&mut self) -> Option<Self::Item> { + self.0.take() + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + let len = self.0.is_some() as usize; + + (len, Some(len)) + } +} + +impl<T: GraphLabel> DoubleEndedIterator for EdgeLabelIter<T> { + #[inline] + fn next_back(&mut self) -> Option<Self::Item> { + self.0.take() + } +} + +impl<T: GraphLabel> ExactSizeIterator for EdgeLabelIter<T> { + #[inline] + fn len(&self) -> usize { + // Thanks to Clippy for teaching me about coercing a boolean + // value to an integer. + self.0.is_some() as usize + } +} + +impl<T: GraphLabel> LabelGraph<T> for SLGraph<T> { + // The following two types are not used, as we do not need to + // index edges by labels. + type Iter<'a> = std::iter::Empty<usize> + where + Self: 'a; + + type LabelIter<'a> = std::iter::Empty<(&'a T, <Self as LabelGraph<T>>::Iter<'a>)> + where + Self: 'a, + T: 'a; + + type EdgeLabelIter<'a> = EdgeLabelIter<T> + where + Self: 'a, + T: 'a; + + #[inline] + fn query_label(&self, label: T) -> Option<usize> { + self.label_index_map.get(&label).copied() + } + + #[inline] + fn vertex_label(&self, node_id: usize) -> Result<Option<T>, Error> { + if let Some(node) = self.nodes.get(node_id) { + Ok(Some(node.label)) + } else { + Err(Error::IndexOutOfBounds(node_id, self.nodes_len())) + } + } + + #[inline] + fn edge_label(&self, source: usize, target: usize) -> Result<Self::EdgeLabelIter<'_>, Error> { + let nodes_len = self.nodes.len(); + + let node = if let Some(node) = self.nodes.get(source) { + node + } else { + return Err(Error::IndexOutOfBounds(source, nodes_len)); + }; + + if !self.has_node(target) { + return Err(Error::IndexOutOfBounds(target, nodes_len)); + } + + Ok(EdgeLabelIter(node.children.get(&target).copied())) + } + + fn find_children_with_label( + &self, + _node_id: usize, + _label: &T, + ) -> Result<<Self as LabelGraph<T>>::Iter<'_>, Error> { + unimplemented!() + } + + fn labels_of(&self, _node_id: usize) -> Result<Self::LabelIter<'_>, Error> { + unimplemented!() + } + + #[inline] + fn has_edge_label(&self, node_id: usize, label: &T, target: usize) -> Result<bool, Error> { + let nodes_len = self.nodes.len(); + + let node = if let Some(node) = self.nodes.get(node_id) { + node + } else { + return Err(Error::IndexOutOfBounds(node_id, nodes_len)); + }; + + if !self.has_node(target) { + return Err(Error::IndexOutOfBounds(target, nodes_len)); + } + + Ok(node.children.get(&target) == Some(label)) + } +} + +/// The type of `builder_mut` builders for this type of graphs. +#[derive(Debug)] +pub struct SLGBuilderMut<'a, T: GraphLabel> { + graph: &'a mut SLGraph<T>, +} + +impl<'a, T: GraphLabel> BuilderMut for SLGBuilderMut<'a, T> { + type Label = T; + + type Graph = SLGraph<T>; + + type ResultBuilder<'b> = SLGBuilderMut<'b, T> + where + T:'b; + + fn from_graph_mut(graph: &mut Self::Graph) -> Self::ResultBuilder<'_> { + SLGBuilderMut { graph } + } + + fn add_vertex(&mut self, label: T) -> Result<usize, Error> { + if let Some(old_node) = self.graph.label_index_map.get(&label) { + dbg!(label); + return Err(Error::DuplicatedNode(*old_node)); + } + + self.graph + .nodes + .push(SLNode::new(Default::default(), label)); + + let new = self.graph.nodes_len() - 1; + + self.graph.label_index_map.insert(label, new); + + Ok(new) + } + + fn add_edge(&mut self, source: usize, target: usize, label: Self::Label) -> Result<(), Error> { + let graph = &mut self.graph; + + let nodes_len = graph.nodes.len(); + + // NOTE: We check the validity of `target` first because we + // need to borrow graph mutably later, which would prevent us + // from borrowing graph immutably to check the validity of + // `target` then. + if !graph.has_node(target) { + return Err(Error::IndexOutOfBounds(target, nodes_len)); + } + + let node = if let Some(node) = graph.nodes.get_mut(source) { + node + } else { + return Err(Error::IndexOutOfBounds(source, nodes_len)); + }; + + if node.children.get(&target).is_some() { + return Err(Error::DuplicatedEdge(source, target)); + } + + // We checked what we need to check, so this is safe. + unsafe { + node.add_edge(target, label); + } + + Ok(()) + } + + fn remove_edge<F>(&mut self, source: usize, target: usize, predicate: F) -> Result<(), Error> + where + F: Fn(Self::Label) -> bool, + { + let graph = &mut self.graph; + + let nodes_len = graph.nodes.len(); + + // NOTE: We check the validity of `target` first because we + // need to borrow graph mutably later, which would prevent us + // from borrowing graph immutably to check the validity of + // `target` then. + if !graph.has_node(target) { + return Err(Error::IndexOutOfBounds(target, nodes_len)); + } + + let node = if let Some(node) = graph.nodes.get_mut(source) { + node + } else { + return Err(Error::IndexOutOfBounds(source, nodes_len)); + }; + + if let Some(child_label) = node.children.get(&target) { + // There is only one label, so we just check this label. + if predicate(*child_label) { + node.children.remove(&target); + } + + Ok(()) + } else { + Err(Error::DuplicatedEdge(source, target)) + } + } +} diff --git a/graph/src/lib.rs b/graph/src/lib.rs index d4f6d7c..79f9646 100644 --- a/graph/src/lib.rs +++ b/graph/src/lib.rs @@ -239,11 +239,20 @@ pub trait LabelGraph<T: GraphLabel>: Graph { Self: 'a, T: 'a; + /// Query the graph for a label, and return the node index if + /// found. + /// + /// The default implementation always returns `None`. + #[inline] + fn query_label(&self, _label: T) -> Option<usize> { + None + } + #[inline] /// Return the label of a vertex or an error if the node is /// invalid. /// - /// The default implementation always returns None for a valid + /// The default implementation always returns `None` for a valid /// node. fn vertex_label(&self, node_id: usize) -> Result<Option<T>, Error> { if self.has_node(node_id) { |