#![warn(missing_docs)] //! This package implements the core algorithm of the entire //! workspace: parsing with derivatives by means of chain rule and //! regular nulling languages. //! //! Since I shall not name my crate "core" to avoid collisions with //! the Rust's own core, I decided to name this crate after what I //! think is the essence of this algorithm, the chain-rule for //! derivatives of languages. pub mod atom; pub mod item; use graph::{error::Error as GError, LabelExtGraph}; use item::default::Error as ForestError; use item::PaVi; /// An edge in the Chain-Rule machine. #[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)] pub struct Edge { /// The position in the atomic languages. label: usize, /// The source of the associated forest edge. forest_source: PaVi, /// The bottom source on which we shall perform reduction. /// /// If this equals `forest_source`, no extra reduction needs to be /// done. true_source: PaVi, /// Whether or not this edge is "accepting". accepting: bool, } impl Edge { /// Construct a new edge. pub fn new(label: usize, forest_source: PaVi, accepting: bool) -> Self { let true_source = forest_source; Self { label, forest_source, accepting, true_source, } } /// Return the label of the edge. pub fn label(&self) -> usize { self.label } /// Tell whether or not the edge is accepting. pub fn is_accepting(&self) -> bool { self.accepting } /// Return the associated forest edge of the edge. pub fn forest_source(&self) -> PaVi { self.forest_source } /// Set the associated forest edge of the edge of the edge. pub fn set_forest_source(&mut self, source: PaVi) { self.forest_source = source; } /// Set the associated bottom edge of the edge from which onwards /// we shall perform the reduction. pub fn set_true_source(&mut self, true_source: PaVi) { self.true_source = true_source; } /// Return the associated bottom edge of the edge from which /// onwards we shall perform the reduction. pub fn true_source(&self) -> PaVi { self.true_source } } impl core::fmt::Display for Edge { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { let label = self.label(); let forest_source = self.forest_source(); write!(f, "edge label {label} with item {}", forest_source) } } /// A Real Or Imaginary edge. /// /// An imaginary edge will be ignored when adding edges. The /// imaginary edges will be used to determine the acceptance of a /// node, when and only when that new node turns out to have no /// children. /// /// # Note /// /// Non, je ne suis pas un roi. Je ne sais pas qu'est-ce que vous /// parlez. #[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)] pub enum Roi { /// A real edge is an ordinary edge. Re(Edge), /// The label of an imaginary edge is not important, so we only /// record its target. Im(usize), } // Some convenient conversions impl From for Roi { fn from(edge: Edge) -> Self { Self::Re(edge) } } impl From for Roi { fn from(target: usize) -> Self { Self::Im(target) } } impl Roi { /// Return the real part if it is a real edge. fn real_part(self) -> Option { if let Self::Re(edge) = self { Some(edge) } else { None } } /// Return the imaginary part if it is an imaginary edge. fn imaginary_part(self) -> Option { if let Self::Im(target) = self { Some(target) } else { None } } } /// Each derivation is a concatenation of two items, so there are two /// layers. But some items have no children and are accepting, in /// which case we just skip that item completely, for performance /// reasons, and hence there could be only one layer as well. /// /// It might even happen that both layers have no children, in which /// case we shall just put all previous edges here. #[derive(Debug, Clone, Eq, PartialEq)] pub enum TwoLayers { /// One layer has no children. One(Roi, usize), // REVIEW: Maybe we do not need to store an edge in the forest: we // only need the source of the edge? /// Both layers have children. /// /// The first element is the label of the second layer, the second /// the source of the associated forest edge of the second layer, /// and the third is a list of edges, which are the common first /// layers. Two(usize, PaVi, bool, Vec<(Roi, usize)>), } /// The type of a collapsed iterator. pub struct Collapsed<'a, I, C> where I: Iterator, C: Chain, { iter: I, chain: &'a mut C, stop: bool, } impl<'a, I, C> Collapsed<'a, I, C> where I: Iterator, C: Chain, { /// Collapse an iterator. pub fn collapse(iter: I, chain: &'a mut C) -> Self { let stop = false; Self { iter, chain, stop } } } impl<'a, I, C> Iterator for Collapsed<'a, I, C> where I: Iterator, C: Chain, { type Item = Result<(Roi, usize), ::Error>; fn next(&mut self) -> Option { if self.stop { return None; } if let Some(two_layer) = self.iter.next() { match two_layer { TwoLayers::One(edge, to) => Some(Ok((edge, to))), TwoLayers::Two(label, forest_source, accepting, edges) => { let new_index = match self.chain.extend( edges .into_iter() .filter_map(|(roi, target)| roi.real_part().map(|edge| (edge, target))), ) { Ok(new) => new, Err(error) => { // Prevent further iterations. self.stop = true; return Some(Err(error.into())); } }; Some(Ok(( Edge::new(label, forest_source, accepting).into(), new_index, ))) } } } else { None } } } /// The expected behaviours of a language which can take derivatives /// by chain rule. pub trait Chain: LabelExtGraph { /// The implementations should choose a type to represent errors. type Error: std::error::Error + From + From; /// A type of atomic languages that is chosen for this chain rule /// machine. type Atom: atom::Atom; /// Represents the language that is present after we parse the /// empty string, that is the initial configuration of the /// language. fn unit(atom: Self::Atom) -> Result; /// Produce the underlying atom, so that we can produce an initial /// empty chain from the same atom again. fn atom(&self) -> &Self::Atom; /// Return true if and only if the language contains the empty /// string. fn epsilon(&self) -> Result; /// Update the history fn update_history(&mut self, new: usize); /// Update the acceptance of a node, when and only when that node /// turns out to have no children. fn update_epsilon( &mut self, node_id: usize, edges: impl Iterator, ) -> Result<(), Self::Error>; /// An iterator that iterates all layers that need to be merged. type DerIter: Iterator; /// Take the derivative by a terminal `t` at position `pos`. fn derive(&mut self, t: usize, pos: usize, no_item: bool) -> Result; /// Take the union of all derivatives. fn union(&mut self, der_iter: Self::DerIter) -> Result, Self::Error> { // REVIEW: Think about the possibilities to avoid allocations. Collapsed::<_, Self>::collapse(der_iter, self) .collect::, Self::Error>>() .map(|mut v| { v.retain(|(_, target)| { *target == 0 || matches!(self.degree(*target), Ok(deg) if deg != 0) }); v }) } /// Use chain rule to compute the derivative with respect to a /// terminal. /// /// # Arguments /// /// The argument `t` is the terminal we computet the derivative /// with. /// /// The argument `pos` is the zero-based position within the /// input. /// /// The argument `no_item` determines whether we want the item /// derivation forest as well. fn chain(&mut self, t: usize, pos: usize, no_item: bool) -> Result<(), Self::Error> { let der_iter = self.derive(t, pos, no_item)?; let edges = self.union(der_iter)?; let new_index = self.extend( edges .iter() .filter_map(|(roi, target)| roi.real_part().map(|edge| (edge, *target))), )?; if self.is_empty_node(new_index)? { self.update_epsilon(new_index, edges.into_iter())?; } self.update_history(new_index); Ok(()) } // FIXME: I shall probably not use the trait of forests for this // purpose, but I have not figured out yet wha the correct trait // should be. /// The type of output item that will be produced by this machine. type Item: item::Forest; /// Signal to the parser that the end of the input is reached, so /// that the parser knows to generate suitable forests. /// /// This is not needed when recognizing instead of parsing. /// /// We also pass in the current position `pos` so that we have a /// little flexibility in the position of the end of input. In /// addition, this makes it easier to produce the suitable /// forests. /// /// As a reminder, `pos` should be the position of the last /// terminal, so if there are three terminals in total, the `pos` /// should be `2` , instead of `3`. /// /// Similarly, we pass in the terminal at the position `pos` to /// aid the production of the forest. fn end_of_input(&mut self, pos: usize, ter: usize) -> Result; } pub mod default;