From 9f1c88b863e247da3cd60d2792a7a13b18e25e53 Mon Sep 17 00:00:00 2001 From: JSDurand Date: Wed, 14 Dec 2022 23:48:22 +0800 Subject: a temporary check point just to save things in a commit --- repcore/src/grammar.rs | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++ repcore/src/lib.rs | 6 +++++ repcore/src/plan.org | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 124 insertions(+) create mode 100644 repcore/src/grammar.rs create mode 100644 repcore/src/plan.org (limited to 'repcore') diff --git a/repcore/src/grammar.rs b/repcore/src/grammar.rs new file mode 100644 index 0000000..ee9f033 --- /dev/null +++ b/repcore/src/grammar.rs @@ -0,0 +1,59 @@ +//! This file implements the extected behaviours of grammars. + +// NOTE: We shall first start with a parser that works at the level of +// characters. The purpose is to first experiment with the workings +// and the performance of the algorithms, before optimising by using +// regular expressions to classify inputs into tokens. In other +// words, the current focus is not on the optimisations, whereas +// scanners are for optimisations only, so to speak. + +/// The type of a terminal. +/// +/// For the time being this is a wrapper around a string, but in the +/// future it may hold more information of scanners. +pub struct Terminal { + // If we want to use scanners, per chance add them as a new field + // here. + name: String, +} + +impl Terminal { + /// Create a terminal with the given name. + #[inline] + pub fn new(name: String) -> Self { + Self { name } + } + + /// Return the name of the terminal. + #[inline] + pub fn name(&self) -> &str { + &self.name + } +} + +/// The type of a non-terminal. +/// +/// This is just a wrapper around a string. +pub struct Nonterminal(String); + +impl Nonterminal { + /// Return the name of the nonterminal. + /// + /// Just to improve readability. + #[inline] + pub fn name(&self) -> &str { + &self.0 + } +} + +/// The type of a terminal or a non-terminal. +/// +/// Only an index is stored here. Actual data are stored in two other +/// arrays. +#[derive(Debug, Hash, Eq, PartialEq, Clone, Copy, Ord, PartialOrd)] +pub enum TNT { + /// Terminal variant + Ter(usize), + /// Nonterminal variant + Non(usize), +} diff --git a/repcore/src/lib.rs b/repcore/src/lib.rs index 7d12d9a..589b61c 100644 --- a/repcore/src/lib.rs +++ b/repcore/src/lib.rs @@ -1,3 +1,9 @@ +//! This package implements the core algorithm of the entire +//! workspace: parsing with derivatives by means of regular nulling +//! languages. + +pub mod grammar; + pub fn add(left: usize, right: usize) -> usize { left + right } diff --git a/repcore/src/plan.org b/repcore/src/plan.org new file mode 100644 index 0000000..13c2ee0 --- /dev/null +++ b/repcore/src/plan.org @@ -0,0 +1,59 @@ +#+TITLE: Plan of the package +#+AUTHOR: Durand +#+DATE: <2022-11-18 Ven 19:57> + +* Atomic Languages + +This describes the behaviours of atomic languages. The atomic +language consists of the null closure of any non-terminal symbol in +the grammar, and their deriavtives by terminals and non-terminal. + +* Derivative Languages + +This is the main driving device of the algorithm. Basically, the +algorithm works by taking successive derivatives, according to the +input symbol. At each step, we calculate the derivative language. In +this process, we also compute some semiring value and store in a +carrier. The end result of the algorithm is the final semiring +value. + +If one wants simply to determine if the input string belongs to the +grammar, one chooses the semiring to be the field with two elements, +the boolean field. If one wants to find how many ways are there to +derive a given input string, then one uses the semiring of natural +numbers instead. If one wants, moreover, to find all the possible +ways to derive a particular input string, then one can use the +free semiring on the set of terminals and non-terminals of the +grammar. Here the free semiring is the left-adjoint functor to the +forgetful functor from the category of semirings to the category of +sets. + +To be more specific, the free semiring on a set is given by sets of +sequences of elements in the set. The addition of the semiring is the +set union operation, and the multiplication is taking the respective +concatenations. + +** Semirings + +So we need a module to define the behaviours of semirings, and provide +some common semirings implementations. Then in the main driving force +we can freely substitute different semirings, according to the +particular needs. + +** Languages + +Then the main part is to define the behaviour of languages. This +should be easy enough, since we already have the mechanism of graphs, +nondeterministic automata, and semirings. All we need to do is to +combine them together. + +* Testing ground + +I am in a strong need to test things out. The most important one is +to visualize each step of the derivation, in a human-friendly manner. +I need this to examine whether my atomic languages are wrongly +implemented, or my atomic languages are wrongly derived, or my +understanding of the main algorithm is plain wrong. + +This is the main reason I started this rewrite of the package. + -- cgit v1.2.3-18-g5258