From bdbd4b4dc21af09711c97d3f903877443199af06 Mon Sep 17 00:00:00 2001 From: JSDurand Date: Tue, 3 Jan 2023 23:44:02 +0800 Subject: structural change: separate crates out I put functionalities that are not strictly core to separate crates, so that the whole package becomes more modular, and makes it easier to try other parsing algorithms in the future. Also I have to figure the forests out before finishing the core chain-rule algorithm, as the part about forests affects the labels of the grammars directly. From my experiences in writing the previous version, it is asking for trouble to change the labels type dramatically at a later point: too many places need to be changed. Thus I decide to figure the rough part of forests out. Actually I only have to figure out how to attach forests fragments to edges of the underlying atomic languages, and the more complex parts of putting forests together can be left to the recorders, which is my vision of assembling semi-ring values during the chain-rule machine. It should be relatively easy to produce forests fragments from grammars since we are just trying to extract some information from the grammar, not to manipulate those information in some complicated way. We have to do some manipulations in the process, though, in order to make sure that the nulling and epsilon-removal processes do not invalidate these fragments. --- chain/src/lib.rs | 49 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 12 deletions(-) (limited to 'chain/src/lib.rs') diff --git a/chain/src/lib.rs b/chain/src/lib.rs index 0ec4d4c..4e21e1d 100644 --- a/chain/src/lib.rs +++ b/chain/src/lib.rs @@ -1,3 +1,4 @@ +#![warn(missing_docs)] //! This package implements the core algorithm of the entire //! workspace: parsing with derivatives by means of chain rule and //! regular nulling languages. @@ -7,19 +8,43 @@ //! think is the essence of this algorithm, the chain-rule for //! derivatives of languages. -pub mod grammar; +pub mod atom; -pub fn add(left: usize, right: usize) -> usize { - left + right -} +// TODO: Define errors. + +/// The expected behaviours of a language which can take derivatives +/// by chain rule. +pub trait Chain: Default { + /// The implementations should choose a type to represent errors. + type Error: std::error::Error; -#[cfg(test)] -mod tests { - use super::*; + /// Represents the language that is present after we parse the + /// empty string, that is the initial configuration of the + /// language. This may or may not be different from what + /// `Default::default` gives. + fn unit() -> Self; - #[test] - fn it_works() { - let result = add(2, 2); - assert_eq!(result, 4); - } + /// Take the derivative by a terminal symbol. + /// + /// This takes care of the union and the prepending operations. + /// + /// # A little remark about the design + /// + /// I have thought to separate different operations (like the + /// union, the prepending, and single derivatives) and then define + /// a function to put everything together. But I think that + /// design is not convenient to use. Also, I really do not need + /// those operations other than to provide this derivative + /// operation, so why define them? And putting all things + /// together may reduce the number of bugs caused by wrong uses of + /// those component functions, and can reduce the amount of + /// documentation strings a library user needs to read, in order + /// to make use of this trait. So I ended up with this design. + fn chain(&mut self, t: usize); + + /// Return true if and only if the language contains the empty + /// string. + fn epsilon(&self) -> bool; } + +pub mod default; -- cgit v1.2.3-18-g5258