From f27d604d93ce583d4404e1874664e08382ea2f00 Mon Sep 17 00:00:00 2001 From: JSDurand Date: Fri, 6 Jan 2023 23:42:28 +0800 Subject: Save before system restart. I am about to re-start my system, so I save before any crashes happen. --- forest/src/default.rs | 14 +---------- forest/src/design.org | 66 +++++++++++++++++++++++++++++++++++++++++++++++++++ forest/src/lib.rs | 4 ++-- 3 files changed, 69 insertions(+), 15 deletions(-) create mode 100644 forest/src/design.org (limited to 'forest') diff --git a/forest/src/default.rs b/forest/src/default.rs index 456413b..5e438d4 100644 --- a/forest/src/default.rs +++ b/forest/src/default.rs @@ -61,14 +61,6 @@ impl Graph for DefaultForest ExtGraph - for DefaultForest -{ - fn extend(&mut self, edges: impl IntoIterator) -> Result { - self.graph.extend(edges) - } -} - #[derive(Debug)] pub struct LabelIter<'a> { set_iter: Iter<'a, usize>, @@ -88,11 +80,7 @@ impl<'a> Iterator for LabelIter<'a> { impl<'a> ExactSizeIterator for LabelIter<'a> { fn len(&self) -> usize { - let (lower, upper) = self.size_hint(); - // Note: This assertion is overly defensive, but it checks the - // invariant guaranteed by the trait. - debug_assert!(upper == Some(lower)); - lower + self.set_iter.len() } } diff --git a/forest/src/design.org b/forest/src/design.org new file mode 100644 index 0000000..771ca4b --- /dev/null +++ b/forest/src/design.org @@ -0,0 +1,66 @@ +#+TITLE: Format of a binarised shared packed parse forests +#+AUTHOR: Durand +#+DATE: <2023-01-05 Jeu 23:55> +#+STARTUP: content + +* Introduction + +I want to design a format for shared packed parse forests. +They are needed for the chain-rule machine because the graphs need to +be labelled (indirectly) by fragments of forests so that we can +correctly and efficiently compute the semi-ring values along the +chain-rule machine operations. Moreover, what we are really +interested is the parse forests, so we will need to deal with this +topic sooner or later, why not deal with it now? ;P + +* Principles + +- The number of children of nodes is bounded by a constant, in theory. + So it is only bounded by the computer hardware (and the grammar): + this representation is closer to the grammar rules. +- Labels of nodes are grammar slots, that is positions within the + rules. +- Edges have no labels: they are simply not needed. +- We need to have two lists of "plugs" that we can plug other forests + in. For this we also need to know if a label is labelled on a node + that is a plug. This implies the use of hashmaps I guess. +- When there are alternations in the forest, we use a "packed node" to + represent this alternation. Packed nodes are not labelled. They + just serve the role of putting nodes together. + +* Some thoughts [1/3] + +We do not need to attach forest fragments to nodes of nondeterministic +finite automata: we just attach to them lists of grammar slots, which +represent a sequence of "nulling out" nullable non-terminals, until +the main non-terminal appears as the last element of the list (or even +a range of slots to be even more efficient). A normal node would have +a range with one element. + +That is it! We do not need to worry about the forests now as we would +know how to combine the forest segments when we are performing the +chain-rule operations: we know exactly when we are expanding +non-terminals. + +Some caveats: + +- [X] Every node in the forest needs to know its parents. This is + needed for "reductions". That is, after we have parsed the entire + expansion for a non-terminal, we need to go back to where that + non-terminal was and continue from there. +- [ ] We need to record the expansions of those "virtual nodes". That + is, the virtual nodes represent atomic languages such as [s]^{(t)} + where s is a non-terminal and t is a terminal. To be more specific, + it represents the derivative of the left-linear closure of the + non-terminal s with respect to the terminal t. The left-linear + closure of s is the set of all strings (consisting of terminals and + non-terminals alike) that are derivable from s alone, without + consuming any inputs, by expanding according to the grammar rules. + This means that we need to know if which non-terminals were expanded + in order to get to a state in [s]^{(t)}. +- [ ] Each edge in the chain-rule machine needs to be labelled also + with a position in the forest. This perfectly solves the problem of + those "plugs"! + + + diff --git a/forest/src/lib.rs b/forest/src/lib.rs index 527a78c..3925bd5 100644 --- a/forest/src/lib.rs +++ b/forest/src/lib.rs @@ -10,7 +10,7 @@ //! out-coming and in-coming plugs. These plugs are used to join //! different fragments of forests together. -use graph::{ExtGraph, GraphLabel}; +use graph::{Graph, GraphLabel}; use core::fmt::Display; @@ -60,7 +60,7 @@ pub trait ForestBuilder { /// /// Note that it contains a "striped down" version of the labelled /// graphs. -pub trait Forest: ExtGraph { +pub trait Forest: Graph { /// Type of iterator of plug-in nodes. type PluginIter<'a>: Iterator + 'a where -- cgit v1.2.3-18-g5258