summaryrefslogtreecommitdiff
path: root/forest
diff options
context:
space:
mode:
authorJSDurand <mmemmew@gmail.com>2023-01-03 23:44:02 +0800
committerJSDurand <mmemmew@gmail.com>2023-01-03 23:44:02 +0800
commitbdbd4b4dc21af09711c97d3f903877443199af06 (patch)
treec6a9602f72ee1f6fd7fd3f64b8679a4de50a0159 /forest
parent8463dd24f815fe2b8f25fe9763e0a43023bfbb20 (diff)
structural change: separate crates out
I put functionalities that are not strictly core to separate crates, so that the whole package becomes more modular, and makes it easier to try other parsing algorithms in the future. Also I have to figure the forests out before finishing the core chain-rule algorithm, as the part about forests affects the labels of the grammars directly. From my experiences in writing the previous version, it is asking for trouble to change the labels type dramatically at a later point: too many places need to be changed. Thus I decide to figure the rough part of forests out. Actually I only have to figure out how to attach forests fragments to edges of the underlying atomic languages, and the more complex parts of putting forests together can be left to the recorders, which is my vision of assembling semi-ring values during the chain-rule machine. It should be relatively easy to produce forests fragments from grammars since we are just trying to extract some information from the grammar, not to manipulate those information in some complicated way. We have to do some manipulations in the process, though, in order to make sure that the nulling and epsilon-removal processes do not invalidate these fragments.
Diffstat (limited to 'forest')
-rw-r--r--forest/Cargo.toml9
-rw-r--r--forest/src/default.rs153
-rw-r--r--forest/src/lib.rs105
3 files changed, 267 insertions, 0 deletions
diff --git a/forest/Cargo.toml b/forest/Cargo.toml
new file mode 100644
index 0000000..b88451d
--- /dev/null
+++ b/forest/Cargo.toml
@@ -0,0 +1,9 @@
+[package]
+name = "forest"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+graph = { path = "../graph" }
diff --git a/forest/src/default.rs b/forest/src/default.rs
new file mode 100644
index 0000000..36145f4
--- /dev/null
+++ b/forest/src/default.rs
@@ -0,0 +1,153 @@
+//! This file provides a default implementation for the
+//! [`Forest`][crate::Forest] trait.
+
+use super::*;
+use graph::{error::Error as GraphError, ALGraph, Graph};
+
+use std::collections::{hash_set::Iter, HashMap, HashSet};
+
+#[derive(Debug, Clone)]
+pub struct DefaultForest<NodeLabel: GraphLabel, EdgeLabel: GraphLabel> {
+ graph: ALGraph,
+ vertex_labels: HashMap<usize, NodeLabel>,
+ edge_labels: HashMap<(usize, usize), EdgeLabel>,
+ plugins: HashSet<usize>,
+ plugouts: HashSet<usize>,
+}
+
+impl<NodeLabel: GraphLabel, EdgeLabel: GraphLabel> Default for DefaultForest<NodeLabel, EdgeLabel> {
+ fn default() -> Self {
+ Self {
+ graph: Default::default(),
+ vertex_labels: Default::default(),
+ edge_labels: Default::default(),
+ plugins: Default::default(),
+ plugouts: Default::default(),
+ }
+ }
+}
+
+impl<NodeLabel: GraphLabel, EdgeLabel: GraphLabel> Graph for DefaultForest<NodeLabel, EdgeLabel> {
+ type Iter<'a> = <ALGraph as Graph>::Iter<'a>
+ where
+ Self: 'a;
+
+ fn is_empty(&self) -> bool {
+ self.graph.is_empty()
+ }
+
+ fn nodes_len(&self) -> usize {
+ self.graph.nodes_len()
+ }
+
+ fn children_of(&self, node_id: usize) -> Result<Self::Iter<'_>, GraphError> {
+ self.graph.children_of(node_id)
+ }
+
+ fn degree(&self, node_id: usize) -> Result<usize, GraphError> {
+ self.graph.degree(node_id)
+ }
+
+ fn is_empty_node(&self, node_id: usize) -> Result<bool, GraphError> {
+ self.graph.is_empty_node(node_id)
+ }
+
+ fn has_edge(&self, source: usize, target: usize) -> Result<bool, GraphError> {
+ self.graph.has_edge(source, target)
+ }
+
+ fn replace_by_builder(&mut self, _builder: impl graph::Builder<Result = Self>) {
+ unimplemented!()
+ }
+}
+
+impl<NodeLabel: GraphLabel, EdgeLabel: GraphLabel> ExtGraph
+ for DefaultForest<NodeLabel, EdgeLabel>
+{
+ fn extend(&mut self, edges: impl IntoIterator<Item = usize>) -> Result<usize, GraphError> {
+ self.graph.extend(edges)
+ }
+}
+
+#[derive(Debug)]
+pub struct LabelIter<'a> {
+ set_iter: Iter<'a, usize>,
+}
+
+impl<'a> Iterator for LabelIter<'a> {
+ type Item = usize;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ self.set_iter.next().copied()
+ }
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ self.set_iter.size_hint()
+ }
+}
+
+impl<'a> ExactSizeIterator for LabelIter<'a> {
+ fn len(&self) -> usize {
+ let (lower, upper) = self.size_hint();
+ // Note: This assertion is overly defensive, but it checks the
+ // invariant guaranteed by the trait.
+ debug_assert!(upper == Some(lower));
+ lower
+ }
+}
+
+impl<'a> From<Iter<'a, usize>> for LabelIter<'a> {
+ fn from(set_iter: Iter<'a, usize>) -> Self {
+ Self { set_iter }
+ }
+}
+
+impl<NodeLabel: GraphLabel, EdgeLabel: GraphLabel> Forest<NodeLabel, EdgeLabel>
+ for DefaultForest<NodeLabel, EdgeLabel>
+{
+ type PluginIter<'a> = LabelIter<'a>
+ where
+ Self: 'a;
+
+ type PlugoutIter<'a> = LabelIter<'a>
+ where
+ Self: 'a;
+
+ fn plugins(&self) -> Self::PluginIter<'_> {
+ self.plugins.iter().into()
+ }
+
+ fn plugouts(&self) -> Self::PlugoutIter<'_> {
+ self.plugouts.iter().into()
+ }
+
+ fn plug(&mut self, other: &Self) -> Result<(), Error> {
+ // PLAN: Clone the `other` forest to `self`, adjust the
+ // indices for edges, and then add edges between plugs.
+ //
+ // Since I cannot touch the underlying nodes directly, I have
+ // to add the nodes and edges individually.
+
+ todo!()
+ }
+
+ fn vertex_label(&self, node_id: usize) -> Result<Option<NodeLabel>, Error> {
+ if node_id >= self.nodes_len() {
+ return Err(Error::IndexOutOfBounds(node_id, self.nodes_len()));
+ }
+
+ Ok(self.vertex_labels.get(&node_id).copied())
+ }
+
+ fn edge_label(&self, source: usize, target: usize) -> Result<Option<EdgeLabel>, Error> {
+ if source >= self.nodes_len() {
+ return Err(Error::IndexOutOfBounds(source, self.nodes_len()));
+ }
+
+ if target >= self.nodes_len() {
+ return Err(Error::IndexOutOfBounds(target, self.nodes_len()));
+ }
+
+ Ok(self.edge_labels.get(&(source, target)).copied())
+ }
+}
diff --git a/forest/src/lib.rs b/forest/src/lib.rs
new file mode 100644
index 0000000..527a78c
--- /dev/null
+++ b/forest/src/lib.rs
@@ -0,0 +1,105 @@
+//! This file defines the expected behaviours of forests, and provides
+//! a default implementation for forests.
+//!
+//! The default forest actually implements the so-called shared packed
+//! parse forest, or SPPF. It packs sub-trees with the same parents
+//! under the same branch, and lets nodes from different branches
+//! share the same children, and hence the name.
+//!
+//! What is special here is that the forests are marked with some
+//! out-coming and in-coming plugs. These plugs are used to join
+//! different fragments of forests together.
+
+use graph::{ExtGraph, GraphLabel};
+
+use core::fmt::Display;
+
+#[derive(Debug)]
+pub enum Error {
+ IndexOutOfBounds(usize, usize),
+}
+
+impl Display for Error {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ match self {
+ Error::IndexOutOfBounds(index, bound) => {
+ write!(f, "index {index} is out of bound {bound}")
+ }
+ }
+ }
+}
+
+impl std::error::Error for Error {}
+
+/// A builder of a forest.
+pub trait ForestBuilder<NodeLabel: GraphLabel, EdgeLabel: GraphLabel> {
+ /// The type of the resulting forest.
+ type Output: Forest<NodeLabel, EdgeLabel>;
+
+ /// Construct a new builder with only one node with the given
+ /// label.
+ fn new_leaf(label: NodeLabel) -> Self;
+
+ /// Add a child to the builder the given labels for the new node
+ /// and the added edge.
+ ///
+ /// All plug-out nodes within the builder should have a new child
+ /// with the specified labels, and hence multiple children might
+ /// be added, and the plug-out nodes should be modified
+ /// accordingly.
+ fn add_children(&mut self, vertex_label: NodeLabel, edge_labe: EdgeLabel);
+
+ /// Build the forest.
+ fn build(self) -> Self::Output;
+
+ /// Build the forest from a reference.
+ fn build_ref(&self) -> Self::Output;
+}
+
+/// The expected behaviours of a forest.
+///
+/// Note that it contains a "striped down" version of the labelled
+/// graphs.
+pub trait Forest<NodeLabel: GraphLabel, EdgeLabel: GraphLabel>: ExtGraph {
+ /// Type of iterator of plug-in nodes.
+ type PluginIter<'a>: Iterator<Item = usize> + 'a
+ where
+ Self: 'a;
+
+ /// Type of iterator of plug-out nodes.
+ type PlugoutIter<'a>: Iterator<Item = usize> + 'a
+ where
+ Self: 'a;
+
+ /// Return the plug-in nodes
+ fn plugins(&self) -> Self::PluginIter<'_>;
+
+ /// Return the plug-out nodes
+ fn plugouts(&self) -> Self::PlugoutIter<'_>;
+
+ /// Plug another forest onto this forest.
+ ///
+ /// The plug-out nodes of this forest will be joined onto the
+ /// plug-in nodes of the joining forest.
+ ///
+ /// # Performance warning
+ ///
+ /// It is recommended to only call this function with a "small"
+ /// `other`, as this function might copy the whole graph
+ /// individually, node by node and edge by edge.
+ fn plug(&mut self, other: &Self) -> Result<(), Error>;
+
+ /// Return the vertex label.
+ ///
+ /// A vertex may not have labels.
+ fn vertex_label(&self, node_id: usize) -> Result<Option<NodeLabel>, Error>;
+
+ /// Return the edge label.
+ ///
+ /// An edge may have no labels. If there is no edge from the
+ /// given source to the given target, then `Ok(None)` is returned
+ /// as well.
+ fn edge_label(&self, source: usize, target: usize) -> Result<Option<EdgeLabel>, Error>;
+}
+
+pub mod default;