From 18d7955b7d84c00467ede38baae53f4ce1fb6908 Mon Sep 17 00:00:00 2001 From: JSDurand Date: Fri, 20 Jan 2023 13:48:26 +0800 Subject: chain: a prototype is added. I have an ostensibly working prototype now. Further tests are needed to make sure that the algorithm meets the time complexity requirement, though. --- chain/src/lib.rs | 194 ++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 171 insertions(+), 23 deletions(-) (limited to 'chain/src/lib.rs') diff --git a/chain/src/lib.rs b/chain/src/lib.rs index 4e21e1d..91c37f7 100644 --- a/chain/src/lib.rs +++ b/chain/src/lib.rs @@ -10,41 +10,189 @@ pub mod atom; -// TODO: Define errors. +use graph::{error::Error as GError, LabelExtGraph, Parent}; + +use forest::Error as ForestError; + +/// An edge in the Chain-Rule machine. +#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)] +pub struct Edge { + /// The position in the atomic languages. + label: usize, + /// The source of the associated forest edge. + forest_source: Parent, + /// Whether or not this edge is "accepting". + accepting: bool, +} + +impl Edge { + /// Construct a new edge. + pub fn new(label: usize, forest_source: Parent, accepting: bool) -> Self { + Self { + label, + forest_source, + accepting, + } + } + + /// Return the label of the edge. + pub fn label(&self) -> usize { + self.label + } + + /// Tell whether or not the edge is accepting. + pub fn is_accepting(&self) -> bool { + self.accepting + } + + /// Return the associated forest edge of the edge. + pub fn forest_source(&self) -> Parent { + self.forest_source + } +} + +impl core::fmt::Display for Edge { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let label = self.label(); + let forest_source = self.forest_source(); + + write!( + f, + "edge label {label} with forest source {} and edge index {}", + forest_source.node(), + forest_source.edge() + ) + } +} + +/// Each derivation is a concatenation of two items, so there are two +/// layers. But some items have no children and are accepting, in +/// which case we just skip that item completely, for performance +/// reasons, and hence there could be only one layer as well. +/// +/// It might even happen that both layers have no children, in which +/// case we shall just put all previous edges here. +#[derive(Debug, Clone, Eq, PartialEq)] +pub enum TwoLayers { + /// One layer has no children. + One(Edge, usize), + // REVIEW: Maybe we do not need to store an edge in the forest: we + // only need the source of the edge? + /// Both layers have children. + /// + /// The first element is the label of the second layer, the second + /// the source of the associated forest edge of the second layer, + /// and the third is a list of edges, which are the common first + /// layers. + Two(usize, Parent, bool, Vec<(Edge, usize)>), +} + +/// The type of a collapsed iterator. +pub struct Collapsed<'a, I, C> +where + I: Iterator, + C: Chain, +{ + iter: I, + chain: &'a mut C, + stop: bool, +} + +impl<'a, I, C> Collapsed<'a, I, C> +where + I: Iterator, + C: Chain, +{ + /// Collapse an iterator. + pub fn collapse(iter: I, chain: &'a mut C) -> Self { + let stop = false; + + Self { iter, chain, stop } + } +} + +impl<'a, I, C> Iterator for Collapsed<'a, I, C> +where + I: Iterator, + C: Chain, +{ + type Item = Result<(Edge, usize), ::Error>; + + fn next(&mut self) -> Option { + if self.stop { + return None; + } + + if let Some(two_layer) = self.iter.next() { + match two_layer { + TwoLayers::One(edge, to) => Some(Ok((edge, to))), + TwoLayers::Two(label, forest_source, accepting, edges) => { + let new_index = match self.chain.extend(edges.into_iter()) { + Ok(new) => new, + Err(error) => { + // Prevent further iterations. + + return Some(Err(error.into())); + } + }; + + Some(Ok((Edge::new(label, forest_source, accepting), new_index))) + } + } + } else { + None + } + } +} /// The expected behaviours of a language which can take derivatives /// by chain rule. -pub trait Chain: Default { +pub trait Chain: LabelExtGraph { /// The implementations should choose a type to represent errors. - type Error: std::error::Error; + type Error: std::error::Error + From + From; + + /// A type of atomic languages that is chosen for this chain rule + /// machine. + type Atom: atom::Atom; /// Represents the language that is present after we parse the /// empty string, that is the initial configuration of the /// language. This may or may not be different from what /// `Default::default` gives. - fn unit() -> Self; - - /// Take the derivative by a terminal symbol. - /// - /// This takes care of the union and the prepending operations. - /// - /// # A little remark about the design - /// - /// I have thought to separate different operations (like the - /// union, the prepending, and single derivatives) and then define - /// a function to put everything together. But I think that - /// design is not convenient to use. Also, I really do not need - /// those operations other than to provide this derivative - /// operation, so why define them? And putting all things - /// together may reduce the number of bugs caused by wrong uses of - /// those component functions, and can reduce the amount of - /// documentation strings a library user needs to read, in order - /// to make use of this trait. So I ended up with this design. - fn chain(&mut self, t: usize); + fn unit(atom: Self::Atom) -> Result; /// Return true if and only if the language contains the empty /// string. - fn epsilon(&self) -> bool; + fn epsilon(&self) -> Result; + + /// Update the history + fn update_history(&mut self, new: usize); + + /// An iterator that iterates all layers that need to be merged. + type DerIter: Iterator; + + /// Take the derivative by a terminal symbol at position `POS`. + fn derive(&mut self, t: usize, pos: usize) -> Result; + + /// Take the union of all derivatives. + fn union(&mut self, der_iter: Self::DerIter) -> Result, Self::Error> { + // REVIEW: Find a way to avoid allocations. + Collapsed::<_, Self>::collapse(der_iter, self).collect() + } + + /// Use chain rule to compute the derivative with respect to a + /// terminal. + fn chain(&mut self, t: usize, pos: usize) -> Result<(), Self::Error> { + let der_iter = self.derive(t, pos)?; + + let edges = self.union(der_iter)?; + + let new_index = self.extend(edges.into_iter())?; + + self.update_history(new_index); + + Ok(()) + } } pub mod default; -- cgit v1.2.3-18-g5258