summaryrefslogtreecommitdiff
path: root/forest
diff options
context:
space:
mode:
authorJSDurand <mmemmew@gmail.com>2023-01-11 23:47:26 +0800
committerJSDurand <mmemmew@gmail.com>2023-01-11 23:47:26 +0800
commit1a3d346f413325ed37848a6b2526e8e729269833 (patch)
treeab8812f8094d096c68aee53cf516e986cc9a273a /forest
parentf27d604d93ce583d4404e1874664e08382ea2f00 (diff)
Record left-linear expansion and forest format
Now the grammar will record the left-linear expansions when generating the nondeterministic finite automaton frmo its rules, and will record whether an edge in the nondeterministic finite automaton comes from a left-linear expansion. The latter is needed because while performing a chain-rule derivation, we do not need the left-linear expanded derivations in the "first layer". This might well have been the root cause of the bad performance of the previous version of this package. Also I have figured out how to properly generate and handle parse forests while manipulating the "chain-rule machine".
Diffstat (limited to 'forest')
-rw-r--r--forest/src/default.rs144
-rw-r--r--forest/src/design.org95
-rw-r--r--forest/src/lib.rs116
3 files changed, 146 insertions, 209 deletions
diff --git a/forest/src/default.rs b/forest/src/default.rs
index 5e438d4..d3970e9 100644
--- a/forest/src/default.rs
+++ b/forest/src/default.rs
@@ -1,141 +1,21 @@
//! This file provides a default implementation for the
//! [`Forest`][crate::Forest] trait.
+#[allow(unused_imports)]
use super::*;
+#[allow(unused_imports)]
use graph::{error::Error as GraphError, ALGraph, Graph};
+#[allow(unused_imports)]
use std::collections::{hash_set::Iter, HashMap, HashSet};
-#[derive(Debug, Clone)]
-pub struct DefaultForest<NodeLabel: GraphLabel, EdgeLabel: GraphLabel> {
- graph: ALGraph,
- vertex_labels: HashMap<usize, NodeLabel>,
- edge_labels: HashMap<(usize, usize), EdgeLabel>,
- plugins: HashSet<usize>,
- plugouts: HashSet<usize>,
-}
+// TODO: Use PLGraph instead.
-impl<NodeLabel: GraphLabel, EdgeLabel: GraphLabel> Default for DefaultForest<NodeLabel, EdgeLabel> {
- fn default() -> Self {
- Self {
- graph: Default::default(),
- vertex_labels: Default::default(),
- edge_labels: Default::default(),
- plugins: Default::default(),
- plugouts: Default::default(),
- }
- }
-}
-
-impl<NodeLabel: GraphLabel, EdgeLabel: GraphLabel> Graph for DefaultForest<NodeLabel, EdgeLabel> {
- type Iter<'a> = <ALGraph as Graph>::Iter<'a>
- where
- Self: 'a;
-
- fn is_empty(&self) -> bool {
- self.graph.is_empty()
- }
-
- fn nodes_len(&self) -> usize {
- self.graph.nodes_len()
- }
-
- fn children_of(&self, node_id: usize) -> Result<Self::Iter<'_>, GraphError> {
- self.graph.children_of(node_id)
- }
-
- fn degree(&self, node_id: usize) -> Result<usize, GraphError> {
- self.graph.degree(node_id)
- }
-
- fn is_empty_node(&self, node_id: usize) -> Result<bool, GraphError> {
- self.graph.is_empty_node(node_id)
- }
-
- fn has_edge(&self, source: usize, target: usize) -> Result<bool, GraphError> {
- self.graph.has_edge(source, target)
- }
-
- fn replace_by_builder(&mut self, _builder: impl graph::Builder<Result = Self>) {
- unimplemented!()
- }
-}
-
-#[derive(Debug)]
-pub struct LabelIter<'a> {
- set_iter: Iter<'a, usize>,
-}
-
-impl<'a> Iterator for LabelIter<'a> {
- type Item = usize;
-
- fn next(&mut self) -> Option<Self::Item> {
- self.set_iter.next().copied()
- }
-
- fn size_hint(&self) -> (usize, Option<usize>) {
- self.set_iter.size_hint()
- }
-}
-
-impl<'a> ExactSizeIterator for LabelIter<'a> {
- fn len(&self) -> usize {
- self.set_iter.len()
- }
-}
-
-impl<'a> From<Iter<'a, usize>> for LabelIter<'a> {
- fn from(set_iter: Iter<'a, usize>) -> Self {
- Self { set_iter }
- }
-}
-
-impl<NodeLabel: GraphLabel, EdgeLabel: GraphLabel> Forest<NodeLabel, EdgeLabel>
- for DefaultForest<NodeLabel, EdgeLabel>
-{
- type PluginIter<'a> = LabelIter<'a>
- where
- Self: 'a;
-
- type PlugoutIter<'a> = LabelIter<'a>
- where
- Self: 'a;
-
- fn plugins(&self) -> Self::PluginIter<'_> {
- self.plugins.iter().into()
- }
-
- fn plugouts(&self) -> Self::PlugoutIter<'_> {
- self.plugouts.iter().into()
- }
-
- fn plug(&mut self, other: &Self) -> Result<(), Error> {
- // PLAN: Produce a BuilderMut, adjust the indices for edges,
- // and then add edges between plugs.
- //
- // Since I cannot touch the underlying nodes directly, I have
- // to add the nodes and edges individually.
-
- todo!()
- }
-
- fn vertex_label(&self, node_id: usize) -> Result<Option<NodeLabel>, Error> {
- if node_id >= self.nodes_len() {
- return Err(Error::IndexOutOfBounds(node_id, self.nodes_len()));
- }
-
- Ok(self.vertex_labels.get(&node_id).copied())
- }
-
- fn edge_label(&self, source: usize, target: usize) -> Result<Option<EdgeLabel>, Error> {
- if source >= self.nodes_len() {
- return Err(Error::IndexOutOfBounds(source, self.nodes_len()));
- }
-
- if target >= self.nodes_len() {
- return Err(Error::IndexOutOfBounds(target, self.nodes_len()));
- }
-
- Ok(self.edge_labels.get(&(source, target)).copied())
- }
-}
+// #[derive(Debug, Clone)]
+// pub struct DefaultForest<NodeLabel: GraphLabel, EdgeLabel: GraphLabel> {
+// graph: ALGraph,
+// vertex_labels: HashMap<usize, NodeLabel>,
+// edge_labels: HashMap<(usize, usize), EdgeLabel>,
+// plugins: HashSet<usize>,
+// plugouts: HashSet<usize>,
+// }
diff --git a/forest/src/design.org b/forest/src/design.org
index 771ca4b..09db113 100644
--- a/forest/src/design.org
+++ b/forest/src/design.org
@@ -28,7 +28,7 @@ topic sooner or later, why not deal with it now? ;P
represent this alternation. Packed nodes are not labelled. They
just serve the role of putting nodes together.
-* Some thoughts [1/3]
+* Some thoughts
We do not need to attach forest fragments to nodes of nondeterministic
finite automata: we just attach to them lists of grammar slots, which
@@ -44,23 +44,98 @@ non-terminals.
Some caveats:
-- [X] Every node in the forest needs to know its parents. This is
- needed for "reductions". That is, after we have parsed the entire
+- Every node in the forest needs to know its parents. This is needed
+ for "reductions". That is, after we have parsed the entire
expansion for a non-terminal, we need to go back to where that
non-terminal was and continue from there.
-- [ ] We need to record the expansions of those "virtual nodes". That
- is, the virtual nodes represent atomic languages such as [s]^{(t)}
- where s is a non-terminal and t is a terminal. To be more specific,
- it represents the derivative of the left-linear closure of the
+- We need to record the expansions of those "virtual nodes". That is,
+ the virtual nodes represent atomic languages such as [s]^{(t)} where s
+ is a non-terminal and t is a terminal. To be more specific, it
+ represents the derivative of the left-linear closure of the
non-terminal s with respect to the terminal t. The left-linear
closure of s is the set of all strings (consisting of terminals and
non-terminals alike) that are derivable from s alone, without
consuming any inputs, by expanding according to the grammar rules.
This means that we need to know if which non-terminals were expanded
in order to get to a state in [s]^{(t)}.
-- [ ] Each edge in the chain-rule machine needs to be labelled also
- with a position in the forest. This perfectly solves the problem of
- those "plugs"!
+- Each edge in the chain-rule machine needs to be labelled also with a
+ position in the forest. This perfectly solves the problem of those
+ "plugs"!
+* Formal Definition
+We need to figure out the definition of the forest. That it to say,
+what will the resulting forest look like for a grammar.
+Moreover, we need to know how to correspond each step in the
+chain-rule machine with the definition of the forest.
+
+After these two steps, we can easily proceed with the construction of
+the chain-rule machine.
+
+** Axioms
+
+There are two *axioms* from which the definition of the forests
+follows.
+
+1. Each node has a unique label, given (principally) by three
+ components:
+ 1) input range start
+ 2) input range end
+ 3) grammar label
+2. If different subtrees share the same node label as the root, a
+ "clone" of the root should be made, and there should be a
+ "representative" of all clones, which is treated as a normal node
+ with that label, and all clones are its children.
+
+The first axiom ensures that the size of the forest is bounded by the
+cube of \(n\), \(n\) is the length of the input sequence. And the
+second axiom specifies what to do when subtrees share a parent.
+
+Since the clones are introduced only when subtrees share a parent,
+their number cannot exceed the number of nodes of a forest without
+clones. This shows that adding clones does not affect the growth rate
+of the size of forests.
+
+A remark: the /clones/ are called /packed nodes/ in the traditional
+terminology, but the terminology of a clone makes more sense to me.
+
+** Correspondence with chain-rule machine
+
+Every edge in the chain-rule machine corresponds to an edge in the
+forest; denote this correspondence by \(f\).
+
+The chain-rule operation can be described as follows:
+
+1. Start with an edge \(e\) with children \(d_1, \ldots, d_n\) in the
+ chain-rule machine.
+2. Prepare a new forest fragment as follows.
+ 1) For every child \(g\) of \(e\) in the atomic language, if \(g\)
+ is the terminal that appears as the current input \(t\), let the
+ new fragment be defined as the node moved by \(g\), alone.
+ 2) If \(g\) is a non-terminal and its first-set contains \(t\),
+ then for every left-linearly closed child of \(g\) that is
+ labelled \(t\), denoted as \(h\), then let the fragment be
+ defined as the node moved by \(g\), with the pre-recorded
+ left-linear expansion information from \(g\) to \(h\) appended
+ as children.
+3. For \(i=1,\ldots,n\), consider the edge \(f(d_i)\) in the forest.
+ There are three cases:
+ 1) If the next edge is empty, that means we have found something
+ totally new to add to the forest, so we just add that.
+ 2) If the next edge after \(f(e)\) contains the new fragment as a
+ sub-tree already, this means the current branch has already been
+ explored before, and we have nothing else to do.
+ 3) If the next edge is non-empty and does not match the new
+ fragment, this means we have a conflict with a previously found
+ branch. We solve it simply by following the second axiom above:
+ we make the node that is the source of \(f(e)\) a clone, and add
+ the new fragment under a new clone. Note that if the node is
+ already a clone, we just make a clone under the parent of that
+ clone.
+4. Update the correspondence \(f\) by updating \(f(g)\) and \(f(h)\)
+ accordingly.
+
+In this process we always follow the first axiom by ensuring that the
+labels of the forest are unique to each node, except for those clones,
+which are only created by following the second axiom.
diff --git a/forest/src/lib.rs b/forest/src/lib.rs
index 3925bd5..c2f4988 100644
--- a/forest/src/lib.rs
+++ b/forest/src/lib.rs
@@ -1,3 +1,4 @@
+#![warn(missing_docs)]
//! This file defines the expected behaviours of forests, and provides
//! a default implementation for forests.
//!
@@ -10,12 +11,16 @@
//! out-coming and in-coming plugs. These plugs are used to join
//! different fragments of forests together.
-use graph::{Graph, GraphLabel};
+use graph::{GraphLabel, LabelGraph, ParentsGraph};
use core::fmt::Display;
+// TODO: move this to default
+
+/// The type of errors for forest operations.
#[derive(Debug)]
pub enum Error {
+ /// An index is out of bounds.
IndexOutOfBounds(usize, usize),
}
@@ -31,75 +36,52 @@ impl Display for Error {
impl std::error::Error for Error {}
-/// A builder of a forest.
-pub trait ForestBuilder<NodeLabel: GraphLabel, EdgeLabel: GraphLabel> {
- /// The type of the resulting forest.
- type Output: Forest<NodeLabel, EdgeLabel>;
-
- /// Construct a new builder with only one node with the given
- /// label.
- fn new_leaf(label: NodeLabel) -> Self;
-
- /// Add a child to the builder the given labels for the new node
- /// and the added edge.
- ///
- /// All plug-out nodes within the builder should have a new child
- /// with the specified labels, and hence multiple children might
- /// be added, and the plug-out nodes should be modified
- /// accordingly.
- fn add_children(&mut self, vertex_label: NodeLabel, edge_labe: EdgeLabel);
-
- /// Build the forest.
- fn build(self) -> Self::Output;
-
- /// Build the forest from a reference.
- fn build_ref(&self) -> Self::Output;
-}
+// /// A builder of a forest.
+// pub trait ForestBuilder<NodeLabel: GraphLabel, EdgeLabel: GraphLabel> {
+// /// The type of the resulting forest.
+// type Output: Forest<NodeLabel, EdgeLabel>;
+
+// /// Construct a new builder with only one node with the given
+// /// label.
+// fn new_leaf(label: NodeLabel) -> Self;
+
+// /// Add a child to the builder the given labels for the new node
+// /// and the added edge.
+// ///
+// /// All plug-out nodes within the builder should have a new child
+// /// with the specified labels, and hence multiple children might
+// /// be added, and the plug-out nodes should be modified
+// /// accordingly.
+// fn add_children(&mut self, vertex_label: NodeLabel, edge_labe: EdgeLabel);
+
+// /// Build the forest.
+// fn build(self) -> Self::Output;
+
+// /// Build the forest from a reference.
+// fn build_ref(&self) -> Self::Output;
+// }
+
+// FIXME: The trait should be re-designed.
/// The expected behaviours of a forest.
///
-/// Note that it contains a "striped down" version of the labelled
-/// graphs.
-pub trait Forest<NodeLabel: GraphLabel, EdgeLabel: GraphLabel>: Graph {
- /// Type of iterator of plug-in nodes.
- type PluginIter<'a>: Iterator<Item = usize> + 'a
- where
- Self: 'a;
-
- /// Type of iterator of plug-out nodes.
- type PlugoutIter<'a>: Iterator<Item = usize> + 'a
- where
- Self: 'a;
-
- /// Return the plug-in nodes
- fn plugins(&self) -> Self::PluginIter<'_>;
-
- /// Return the plug-out nodes
- fn plugouts(&self) -> Self::PlugoutIter<'_>;
-
- /// Plug another forest onto this forest.
- ///
- /// The plug-out nodes of this forest will be joined onto the
- /// plug-in nodes of the joining forest.
- ///
- /// # Performance warning
- ///
- /// It is recommended to only call this function with a "small"
- /// `other`, as this function might copy the whole graph
- /// individually, node by node and edge by edge.
- fn plug(&mut self, other: &Self) -> Result<(), Error>;
-
- /// Return the vertex label.
- ///
- /// A vertex may not have labels.
- fn vertex_label(&self, node_id: usize) -> Result<Option<NodeLabel>, Error>;
-
- /// Return the edge label.
- ///
- /// An edge may have no labels. If there is no edge from the
- /// given source to the given target, then `Ok(None)` is returned
- /// as well.
- fn edge_label(&self, source: usize, target: usize) -> Result<Option<EdgeLabel>, Error>;
+/// Note that it requires only a subset of the functionalities of
+/// labelled graphs.
+pub trait Forest<T: GraphLabel>: ParentsGraph + LabelGraph<T> {
+ /// The type of errors for operations on the forest.
+ type Error: std::error::Error + From<graph::error::Error>;
+
+ /// Detect if the fragment is a prefix of the sub-forest rooted at
+ /// `node_id`.
+ fn is_prefix<F: AsRef<Self>>(&self, node_id: usize, fragment: F) -> Result<bool, Self::Error>;
+
+ /// Extend the forest by adjoining another forest at a given node.
+ fn plant<F: AsRef<Self>>(&mut self, node_id: usize, fragment: F) -> Result<(), Self::Error>;
+
+ /// Clone a node by making a new node and making all the nodes
+ /// that previously pointed to the old node now point to the new
+ /// node, and the new node points to the old node.
+ fn clone(&mut self, node_id: usize) -> Result<(), Self::Error>;
}
pub mod default;