summaryrefslogtreecommitdiff
path: root/graph
diff options
context:
space:
mode:
authorJSDurand <mmemmew@gmail.com>2023-01-03 23:44:02 +0800
committerJSDurand <mmemmew@gmail.com>2023-01-03 23:44:02 +0800
commitbdbd4b4dc21af09711c97d3f903877443199af06 (patch)
treec6a9602f72ee1f6fd7fd3f64b8679a4de50a0159 /graph
parent8463dd24f815fe2b8f25fe9763e0a43023bfbb20 (diff)
structural change: separate crates out
I put functionalities that are not strictly core to separate crates, so that the whole package becomes more modular, and makes it easier to try other parsing algorithms in the future. Also I have to figure the forests out before finishing the core chain-rule algorithm, as the part about forests affects the labels of the grammars directly. From my experiences in writing the previous version, it is asking for trouble to change the labels type dramatically at a later point: too many places need to be changed. Thus I decide to figure the rough part of forests out. Actually I only have to figure out how to attach forests fragments to edges of the underlying atomic languages, and the more complex parts of putting forests together can be left to the recorders, which is my vision of assembling semi-ring values during the chain-rule machine. It should be relatively easy to produce forests fragments from grammars since we are just trying to extract some information from the grammar, not to manipulate those information in some complicated way. We have to do some manipulations in the process, though, in order to make sure that the nulling and epsilon-removal processes do not invalidate these fragments.
Diffstat (limited to 'graph')
-rw-r--r--graph/src/adlist.rs6
-rw-r--r--graph/src/adset.rs6
-rw-r--r--graph/src/builder.rs3
-rw-r--r--graph/src/labelled.rs135
-rw-r--r--graph/src/lib.rs14
5 files changed, 126 insertions, 38 deletions
diff --git a/graph/src/adlist.rs b/graph/src/adlist.rs
index 6c1dcd0..ba9afb8 100644
--- a/graph/src/adlist.rs
+++ b/graph/src/adlist.rs
@@ -155,6 +155,12 @@ impl Builder for ALGBuilder {
self.nodes.len() - 1
}
+ #[inline]
+ fn add_vertices(&mut self, n: usize) {
+ self.nodes
+ .extend(std::iter::repeat_with(ALNode::default).take(n));
+ }
+
fn add_edge(&mut self, source: usize, target: usize, _label: Self::Label) -> Result<(), Error> {
let nodes_len = self.nodes.len();
diff --git a/graph/src/adset.rs b/graph/src/adset.rs
index c225986..adf2aaf 100644
--- a/graph/src/adset.rs
+++ b/graph/src/adset.rs
@@ -190,6 +190,12 @@ impl Builder for ASGBuilder {
self.nodes.len() - 1
}
+ #[inline]
+ fn add_vertices(&mut self, n: usize) {
+ self.nodes
+ .extend(std::iter::repeat_with(ASNode::default).take(n));
+ }
+
fn add_edge(&mut self, source: usize, target: usize, _label: Self::Label) -> Result<(), Error> {
let nodes_len = self.nodes.len();
diff --git a/graph/src/builder.rs b/graph/src/builder.rs
index ce85cce..9ab5895 100644
--- a/graph/src/builder.rs
+++ b/graph/src/builder.rs
@@ -27,6 +27,9 @@ pub trait Builder: Default {
/// Add a vertex without children.
fn add_vertex(&mut self) -> usize;
+ /// Add a number of vertices at the same time.
+ fn add_vertices(&mut self, n: usize);
+
/// Add an edge from the source to the target.
fn add_edge(&mut self, source: usize, target: usize, label: Self::Label) -> Result<(), Error>;
diff --git a/graph/src/labelled.rs b/graph/src/labelled.rs
index d0a02d0..6341787 100644
--- a/graph/src/labelled.rs
+++ b/graph/src/labelled.rs
@@ -142,10 +142,11 @@ impl<T: GraphLabel> Graph for DLGraph<T> {
match self.nodes.get(source) {
Some(source_node) => {
if self.nodes.get(target).is_none() {
- return Err(Error::IndexOutOfBounds(target, self.nodes.len()));
+ Err(Error::IndexOutOfBounds(target, self.nodes.len()))
+ } else {
+ Ok(source_node.by_target.contains_key(&target)
+ && !source_node.by_target.get(&target).unwrap().is_empty())
}
-
- Ok(source_node.by_target.contains_key(&target))
}
None => Err(Error::IndexOutOfBounds(source, self.nodes.len())),
}
@@ -198,7 +199,7 @@ impl<T: GraphLabel> Graph for DLGraph<T> {
/// A delegation of iterators.
///
/// This is used to avoid a boxed pointer to an iterator.
-#[derive(Default, Debug)]
+#[derive(Default, Debug, Clone)]
pub struct LabelIndexIter<'a> {
iter: Option<std::iter::Copied<Iter<'a, usize>>>,
}
@@ -279,25 +280,81 @@ impl<'a, T> Iterator for LabelIter<'a, T> {
}
}
+/// This is used to avoid a boxed pointer to an iterator.
+#[derive(Debug)]
+pub struct EdgeLabelIter<'a, T> {
+ iter: Option<Iter<'a, T>>,
+}
+
+impl<'a, T> Default for EdgeLabelIter<'a, T> {
+ #[inline]
+ fn default() -> Self {
+ Self { iter: None }
+ }
+}
+
+impl<'a, T: Copy + Clone> ExactSizeIterator for EdgeLabelIter<'a, T> {
+ #[inline]
+ fn len(&self) -> usize {
+ if let Some(iter) = &self.iter {
+ iter.len()
+ } else {
+ 0
+ }
+ }
+}
+
+impl<'a, T: Copy + Clone> EdgeLabelIter<'a, T> {
+ fn new(iter: Iter<'a, T>) -> Self {
+ Self { iter: Some(iter) }
+ }
+}
+
+impl<'a, T: Copy + Clone> Iterator for EdgeLabelIter<'a, T> {
+ type Item = T;
+
+ #[inline]
+ fn next(&mut self) -> Option<Self::Item> {
+ if let Some(iter) = &mut self.iter {
+ iter.next().copied()
+ } else {
+ None
+ }
+ }
+
+ #[inline]
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ if let Some(iter) = &self.iter {
+ iter.size_hint()
+ } else {
+ (0, Some(0))
+ }
+ }
+}
+
impl<T: GraphLabel> LabelGraph<T> for DLGraph<T> {
type Iter<'a> = LabelIndexIter<'a> where T: 'a;
type LabelIter<'a> = LabelIter<'a,T> where T: 'a;
- fn edge_label(&self, source: usize, target: usize) -> Result<Vec<T>, Error> {
+ type EdgeLabelIter<'a> = EdgeLabelIter<'a,T>
+ where
+ Self: 'a,
+ T: 'a + Copy + Clone;
+
+ fn edge_label(&self, source: usize, target: usize) -> Result<Self::EdgeLabelIter<'_>, Error> {
if self.has_edge(source, target)? {
- Ok(self
- .nodes
- .get(source)
- .unwrap()
- .by_target
- .get(&target)
- .unwrap()
- .iter()
- .copied()
- .collect())
+ Ok(EdgeLabelIter::new(
+ self.nodes
+ .get(source)
+ .unwrap()
+ .by_target
+ .get(&target)
+ .unwrap()
+ .iter(),
+ ))
} else {
- Ok(Vec::new())
+ Ok(Default::default())
}
}
@@ -335,11 +392,11 @@ impl<T: GraphLabel> LabelGraph<T> for DLGraph<T> {
return Err(Error::IndexOutOfBounds(target, nodes_len));
}
- if let Some(labels) = node.by_target.get(&target) {
- Ok(labels.contains(label))
- } else {
- Ok(false)
- }
+ Ok(node
+ .by_target
+ .get(&target)
+ .map(|labels| labels.contains(label))
+ .unwrap_or(false))
}
None => Err(Error::IndexOutOfBounds(node_id, nodes_len)),
}
@@ -443,6 +500,12 @@ impl<T: GraphLabel> Builder for DLGBuilder<T> {
self.nodes.len() - 1
}
+ #[inline]
+ fn add_vertices(&mut self, n: usize) {
+ self.nodes
+ .extend(std::iter::repeat_with(Default::default).take(n));
+ }
+
fn add_edge(&mut self, source: usize, target: usize, label: Self::Label) -> Result<(), Error> {
let nodes_len = self.nodes.len();
@@ -541,14 +604,29 @@ impl<T: GraphLabel> Builder for DLGBuilder<T> {
// If after removal no labels remain for the target,
// we remove the edge entirely, to avoid false empty
// edges.
- if source_node.flat.is_empty() {
- source_node.by_target.remove(&target);
- for label in to_remove.iter() {
- source_node.by_label.remove(label);
+ if let Some(set) = source_node.by_target.get(&target) {
+ if set.is_empty() {
+ source_node.by_target.remove(&target);
}
}
+ for label in to_remove.iter() {
+ if let Some(set) = source_node.by_label.get(label) {
+ if set.is_empty() {
+ source_node.by_label.remove(label);
+ }
+ }
+ }
+
+ // if source_node.flat.is_empty() {
+ // source_node.by_target.remove(&target);
+
+ // for label in to_remove.iter() {
+ // source_node.by_label.remove(label);
+ // }
+ // }
+
let new_children_set: Set<(T, usize)> = source_node.flat.iter().copied().collect();
// When source_node is in use, we cannot borrow self
@@ -639,10 +717,7 @@ mod label_test {
);
// testing edge_label
- assert_eq!(
- graph.edge_label(5, 2)?.into_iter().collect::<Set<_>>(),
- set!(3)
- );
+ assert_eq!(graph.edge_label(5, 2)?.collect::<Set<_>>(), set!(3));
assert!(matches!(
graph.edge_label(6, 2),
Err(Error::IndexOutOfBounds(6, 6))
@@ -683,8 +758,6 @@ mod label_test {
}
}
-// TODO: Test print_viz
-
#[cfg(test)]
mod test_dlgraph_builder {
use super::*;
diff --git a/graph/src/lib.rs b/graph/src/lib.rs
index 2d23af3..d4f6d7c 100644
--- a/graph/src/lib.rs
+++ b/graph/src/lib.rs
@@ -233,6 +233,12 @@ pub trait LabelGraph<T: GraphLabel>: Graph {
Self: 'a,
T: 'a;
+ /// A type that iterates over labels of an edge.
+ type EdgeLabelIter<'a>: Iterator<Item = T> + 'a
+ where
+ Self: 'a,
+ T: 'a;
+
#[inline]
/// Return the label of a vertex or an error if the node is
/// invalid.
@@ -247,15 +253,9 @@ pub trait LabelGraph<T: GraphLabel>: Graph {
}
}
- #[inline]
/// Return the label of an edge or an error if some node is
/// invalid.
- ///
- /// The default implementation always returns an empty vector for
- /// valid nodes.
- fn edge_label(&self, source: usize, target: usize) -> Result<Vec<T>, Error> {
- self.has_edge(source, target).map(|_| Vec::new())
- }
+ fn edge_label(&self, source: usize, target: usize) -> Result<Self::EdgeLabelIter<'_>, Error>;
/// Return an iterator of edges out of a node, whose associated
/// label is as given.