a temporary check point

just to save things in a commit
author: JSDurand <mmemmew@gmail.com> 2022-12-14 23:48:22 +0800
committer: JSDurand <mmemmew@gmail.com> 2022-12-14 23:48:22 +0800
commit: 9f1c88b863e247da3cd60d2792a7a13b18e25e53 (patch)
tree: d29c0e19793a88a1de6898fdfd2a376fca21378f
parent: cb7bcfad4ab0041aaf3fde3185e27ee46bb37788 (diff)
18 files changed, 940 insertions, 130 deletions
diff --git a/THANKS b/THANKS
index e69de29..9045a0b 100644
--- a/THANKS
+++ b/THANKS
@@ -0,0 +1 @@
+TODO: Thank many people here.
+\ No newline at end of file
diff --git a/configure.ac b/configure.ac
index fbf987e..fe765e6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2,8 +2,6 @@ AC_PREREQ([2.60])
 
 AC_INIT([REP], m4_esyscmd([./find-version.sh]), [durand@jsdurand.xyz])
 
-dnl patsubst(m4_esyscmd([./find-version.sh]), ` "')
-
 AC_COPYRIGHT([This package is covered by GPL v3.])
 
 AC_CONFIG_AUX_DIR([build-aux])
@@ -21,7 +19,7 @@ AS_IF([test "$CARGO" = "notfound"], [AC_MSG_ERROR([cargo is required])])
 AC_PATH_PROG([RUSTC], [rustc], [notfound])
 AS_IF([test "$RUSTC" = "notfound"], [AC_MSG_ERROR([rustc is required])])
 
-AC_CONFIG_FILES([Makefile graph/Makefile])
+AC_CONFIG_FILES([Makefile graph/Makefile nfa/Makefile])
 
 AC_OUTPUT
 
diff --git a/find-version.sh b/find-version.sh
index 4ac390e..99aa538 100755
--- a/find-version.sh
+++ b/find-version.sh
@@ -2,18 +2,24 @@
 ":"; exec emacs --quick --script "$0" -- "$@" # -*- mode: emacs-lisp; lexical-binding: t; -*-
 
 (with-temp-buffer
+  ;; the version information is stored in the Cargo.toml file.
   (insert-file-contents "Cargo.toml")
   (goto-char (point-min))
   (cond
+   ;; search for the version assignment statement
    ((search-forward "version =" nil t)
-    (re-search-forward " *" (line-end-position) t)
+    ;; skip spaces
+    (re-search-forward "[[:space:]]*" (line-end-position) t)
+    ;; check it starts with a double quote
     (cond
      ((= (char-after) 34))
      ((error "Invalid syntax at %d" (point))))
     (let ((end (line-end-position)))
+      ;; and check it ends with a double quote as well
       (cond
        ((= (char-before end) 34))
        ((error "Invalid syntax at %d" (1- end))))
+      ;; then print the bare version string out without delimiters
       (princ
        (buffer-substring-no-properties
         (1+ (point)) (1- end)))))
diff --git a/graph/Makefile.am b/graph/Makefile.am
index 776b911..623572a 100644
--- a/graph/Makefile.am
+++ b/graph/Makefile.am
@@ -1,12 +1,19 @@
-.PHONY: dev rel
+.PHONY: dev rel clean check
 
 all: dev
 
 dev:
-	@CARGO@ build
+	@echo "cargo build"
+	@@CARGO@ build
 
 rel:
-	@CARGO@ build --release
+	@echo "cargo build --release"
+	@@CARGO@ build --release
 
 clean:
-	@CARGO@ clean
+	@echo "cargo clean"
+	@@CARGO@ clean
+
+check:
+	@echo "cargo clippy"
+	@@CARGO@ clippy
diff --git a/graph/src/adlist.rs b/graph/src/adlist.rs
index c16ceb2..18ad770 100644
--- a/graph/src/adlist.rs
+++ b/graph/src/adlist.rs
@@ -102,6 +102,14 @@ impl ExtGraph for ALGraph {
     }
 }
 
+// TODO: Add a way to build a graph by its raw adjacency list representation.
+impl From<Vec<Vec<usize>>> for ALGraph {
+    fn from(adlist: Vec<Vec<usize>>) -> Self {
+        let nodes: Vec<ALNode> = adlist.iter().cloned().map(ALNode::new).collect();
+        Self { nodes }
+    }
+}
+
 #[cfg(test)]
 mod algraph_test {
     use super::*;
diff --git a/graph/src/labelled.rs b/graph/src/labelled.rs
index 1cb2461..d02e301 100644
--- a/graph/src/labelled.rs
+++ b/graph/src/labelled.rs
@@ -144,7 +144,7 @@ impl<'a> Iterator for LabelIndexIter<'a> {
 
     #[inline]
     fn next(&mut self) -> Option<Self::Item> {
-        self.iter.as_mut().map(|iterator| iterator.next()).flatten()
+        self.iter.as_mut().and_then(|iterator| iterator.next())
     }
 
     #[inline]
diff --git a/nfa/Cargo.toml b/nfa/Cargo.toml
index b1387b6..7f48760 100644
--- a/nfa/Cargo.toml
+++ b/nfa/Cargo.toml
@@ -7,6 +7,7 @@ edition = "2021"
 
 [dependencies]
 graph = { path = "../graph", optional = true }
+receme = { path = "../receme" }
 
 [features]
 default = ["default-graph"]
diff --git a/nfa/Makefile.am b/nfa/Makefile.am
new file mode 100644
index 0000000..623572a
--- /dev/null
+++ b/nfa/Makefile.am
@@ -0,0 +1,19 @@
+.PHONY: dev rel clean check
+
+all: dev
+
+dev:
+	@echo "cargo build"
+	@@CARGO@ build
+
+rel:
+	@echo "cargo build --release"
+	@@CARGO@ build --release
+
+clean:
+	@echo "cargo clean"
+	@@CARGO@ clean
+
+check:
+	@echo "cargo clippy"
+	@@CARGO@ clippy
diff --git a/nfa/src/default/mod.rs b/nfa/src/default/mod.rs
index 805540b..b9ee398 100644
--- a/nfa/src/default/mod.rs
+++ b/nfa/src/default/mod.rs
@@ -1,123 +1,10 @@
 //! This file provides a structure that implements the trait
-//! [`NFA`][super::Nfa].
-//!
-//! It is used as the default implementation.
+//! [`NFA`][super::Nfa] and another that umplements the trait
+//! [`Regex`][super::Regex].
 
-use graph::{error::Error as GError, DLGraph, Graph, GraphLabel, LabelGraph};
+pub mod nfa;
 
-use super::{error::Error, Nfa, Regex};
-
-// TODO: Store the regular expression in the NFA as well.
-//
-// The current focus of the project is to understand the growth rate
-// of the algorithm, to know whether I made a mistake in the previous
-// iteration of the implementation, or the algorithm is not as fast as
-// the author estimated, which is not quite likely, of course.
-//
-// Thus I shall establish a friendly interface that allows me to view
-// and debug the atomic languages and the languages, transparently.
-
-#[non_exhaustive]
-#[derive(Debug)]
-/// Default NFA implementation.
-pub struct DefaultNFA<T: GraphLabel> {
-    graph: DLGraph<T>,
-}
-
-impl<T: GraphLabel> Default for DefaultNFA<T> {
-    fn default() -> Self {
-        let graph = Default::default();
-        Self { graph }
-    }
-}
-
-impl<T: GraphLabel> Graph for DefaultNFA<T> {
-    type Iter<'a> = <DLGraph<T> as Graph>::Iter<'a> where T: 'a;
-
-    #[inline]
-    fn is_empty(&self) -> bool {
-        self.graph.is_empty()
-    }
-
-    #[inline]
-    fn nodes_len(&self) -> usize {
-        self.graph.nodes_len()
-    }
-
-    #[inline]
-    fn children_of(&self, node_id: usize) -> Result<Self::Iter<'_>, GError> {
-        self.graph.children_of(node_id)
-    }
-
-    #[inline]
-    fn degree(&self, node_id: usize) -> Result<usize, GError> {
-        self.graph.degree(node_id)
-    }
-
-    #[inline]
-    fn is_empty_node(&self, node_id: usize) -> Result<bool, GError> {
-        self.graph.is_empty_node(node_id)
-    }
-
-    #[inline]
-    fn has_edge(&self, source: usize, target: usize) -> Result<bool, GError> {
-        self.graph.has_edge(source, target)
-    }
-}
-
-impl<T: GraphLabel> LabelGraph<T> for DefaultNFA<T> {
-    type Iter<'a> = <DLGraph<T> as LabelGraph<T>>::Iter<'a> where T: 'a;
-
-    type LabelIter<'a> = <DLGraph<T> as LabelGraph<T>>::LabelIter<'a> where T: 'a;
-
-    // TODO: Return the label from the contained regular language.
-    #[inline]
-    fn vertex_label(&self, node_id: usize) -> Result<Option<T>, GError> {
-        if self.has_node(node_id) {
-            todo!()
-        } else {
-            Err(GError::IndexOutOfBounds(node_id, self.nodes_len()))
-        }
-    }
-
-    #[inline]
-    fn edge_label(&self, source: usize, target: usize) -> Result<Vec<T>, GError> {
-        self.graph.edge_label(source, target)
-    }
-
-    #[inline]
-    fn find_children_with_label(
-        &self,
-        node_id: usize,
-        label: &T,
-    ) -> Result<<Self as LabelGraph<T>>::Iter<'_>, GError> {
-        self.graph.find_children_with_label(node_id, label)
-    }
-
-    #[inline]
-    fn labels_of(&self, node_id: usize) -> Result<Self::LabelIter<'_>, GError> {
-        self.graph.labels_of(node_id)
-    }
-}
-
-impl<T: GraphLabel> Nfa<T> for DefaultNFA<T> {
-    #[allow(unused)]
-    fn to_nfa(regex: impl Regex<T>) -> Self {
-        todo!()
-    }
-
-    fn remove_epsilon(&mut self) -> Result<(), Error> {
-        todo!()
-    }
-
-    fn remove_dead(&mut self) -> Result<(), Error> {
-        todo!()
-    }
-
-    fn nulling(&mut self) -> Result<(), Error> {
-        todo!()
-    }
-}
+pub mod regex;
 
 #[cfg(test)]
 mod default_nfa_test {}
diff --git a/nfa/src/default/nfa.rs b/nfa/src/default/nfa.rs
new file mode 100644
index 0000000..3c2bd83
--- /dev/null
+++ b/nfa/src/default/nfa.rs
@@ -0,0 +1,120 @@
+//! This file provides a default implementation of NFA.
+
+// TODO: Store the regular expression in the NFA as well.
+//
+// The current focus of the project is to understand the growth rate
+// of the algorithm, to know whether I made a mistake in the previous
+// iteration of the implementation, or the algorithm is not as fast as
+// the author estimated, which is not quite likely, of course.
+//
+// Thus I shall establish a friendly interface that allows me to view
+// and debug the atomic languages and the languages, transparently.
+
+use graph::{error::Error as GError, DLGraph, Graph, GraphLabel, LabelGraph};
+
+use crate::{error::Error, Nfa, Regex};
+
+use std::fmt::Display;
+
+#[non_exhaustive]
+#[derive(Debug)]
+/// Default NFA implementation.
+pub struct DefaultNFA<T: GraphLabel + Display> {
+    graph: DLGraph<T>,
+}
+
+impl<T: GraphLabel + Display> Default for DefaultNFA<T> {
+    fn default() -> Self {
+        Self {
+            graph: Default::default(),
+        }
+    }
+}
+
+impl<T: GraphLabel + Display> Graph for DefaultNFA<T> {
+    type Iter<'a> = <DLGraph<T> as Graph>::Iter<'a> where T: 'a;
+
+    #[inline]
+    fn is_empty(&self) -> bool {
+        self.graph.is_empty()
+    }
+
+    #[inline]
+    fn nodes_len(&self) -> usize {
+        self.graph.nodes_len()
+    }
+
+    #[inline]
+    fn children_of(&self, node_id: usize) -> Result<Self::Iter<'_>, GError> {
+        self.graph.children_of(node_id)
+    }
+
+    #[inline]
+    fn degree(&self, node_id: usize) -> Result<usize, GError> {
+        self.graph.degree(node_id)
+    }
+
+    #[inline]
+    fn is_empty_node(&self, node_id: usize) -> Result<bool, GError> {
+        self.graph.is_empty_node(node_id)
+    }
+
+    #[inline]
+    fn has_edge(&self, source: usize, target: usize) -> Result<bool, GError> {
+        self.graph.has_edge(source, target)
+    }
+}
+
+impl<T: GraphLabel + Display> LabelGraph<T> for DefaultNFA<T> {
+    type Iter<'a> = <DLGraph<T> as LabelGraph<T>>::Iter<'a> where T: 'a;
+
+    type LabelIter<'a> = <DLGraph<T> as LabelGraph<T>>::LabelIter<'a> where T: 'a;
+
+    // TODO: Return the label from the contained regular language.
+    #[inline]
+    fn vertex_label(&self, node_id: usize) -> Result<Option<T>, GError> {
+        if self.has_node(node_id) {
+            todo!()
+        } else {
+            Err(GError::IndexOutOfBounds(node_id, self.nodes_len()))
+        }
+    }
+
+    #[inline]
+    fn edge_label(&self, source: usize, target: usize) -> Result<Vec<T>, GError> {
+        self.graph.edge_label(source, target)
+    }
+
+    #[inline]
+    fn find_children_with_label(
+        &self,
+        node_id: usize,
+        label: &T,
+    ) -> Result<<Self as LabelGraph<T>>::Iter<'_>, GError> {
+        self.graph.find_children_with_label(node_id, label)
+    }
+
+    #[inline]
+    fn labels_of(&self, node_id: usize) -> Result<Self::LabelIter<'_>, GError> {
+        self.graph.labels_of(node_id)
+    }
+}
+
+impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
+    #[allow(unused)]
+    fn to_nfa(regex: impl Regex<T>) -> Self {
+        todo!()
+    }
+
+    fn remove_epsilon(&mut self) -> Result<(), Error> {
+        todo!()
+    }
+
+    fn remove_dead(&mut self) -> Result<(), Error> {
+        todo!()
+    }
+
+    fn nulling(&mut self) -> Result<(), Error> {
+        todo!()
+    }
+}
diff --git a/nfa/src/default/regex.rs b/nfa/src/default/regex.rs
new file mode 100644
index 0000000..a60f801
--- /dev/null
+++ b/nfa/src/default/regex.rs
@@ -0,0 +1,575 @@
+//! This file provides a default implementation of Regex.
+
+use graph::{error::Error as GError, ALGraph, ExtGraph, Graph, GraphLabel};
+
+use crate::{desrec::DesRec, error::Error, Regex};
+
+use receme::{algebra::Algebra, catana::Cata};
+
+use std::fmt::{Debug, Display};
+
+/// The type of a node in a regular expression.
+///
+/// # Example
+///
+/// If a node has type "Kleene", this means it represents a star
+/// construct in a regular expression, and its children are the
+/// contents of the star.
+///
+/// # Note
+///
+/// There is no "concatenation" node type.  A concatenation of two
+/// nodes is represented as the two nodes being successive children in
+/// their common parent node.
+///
+/// This is possible because a regular expression has a root node.
+/// For the sake of convenience, the root node has type "Or".
+#[derive(Debug, Hash, Default, Eq, PartialEq, Clone, Copy, Ord, PartialOrd)]
+pub enum RegexType<T: GraphLabel + Display> {
+    /// A star node is a node with an arbitrary number of repetitions.
+    Kleene,
+    /// A plus node is a node with at least one repetition: a+ equals
+    /// aa*
+    Plus,
+    /// An optional node
+    Optional,
+    /// An or node means an alternation of its children.
+    Or,
+    /// A paren node represents a parenthesis.
+    Paren,
+    /// An empty node
+    #[default]
+    Empty,
+    /// A literal node
+    Lit(T),
+}
+
+/// A default implementation of regular expressions.
+#[derive(Debug)]
+pub struct DefaultRegex<T: GraphLabel + Display> {
+    /// The underlying graph is stored using adjacency lists.
+    graph: ALGraph,
+    /// The types of the underlying nodes.
+    types: Vec<RegexType<T>>,
+}
+
+impl<T: GraphLabel + Display> Default for DefaultRegex<T> {
+    fn default() -> Self {
+        Self {
+            graph: Default::default(),
+            types: Default::default(),
+        }
+    }
+}
+
+impl<T: GraphLabel + Display> DefaultRegex<T> {
+    /// Return the number of elements in this regular expression,
+    /// counting nodes.
+    pub fn len(&self) -> usize {
+        self.types.len()
+    }
+
+    /// Return true if and only if this regular expression has no
+    /// nodes.
+    pub fn is_empty(&self) -> bool {
+        self.types.is_empty()
+    }
+
+    /// Add a node as the child of an existing node or as a root.
+    pub fn add_node(
+        &mut self,
+        label: RegexType<T>,
+        parent: Option<usize>,
+    ) -> Result<(), ParseError> {
+        self.graph.extend(parent.iter().copied())?;
+        self.types.push(label);
+
+        Ok(())
+    }
+}
+
+// REVIEW: This may not be needed.
+impl<S: GraphLabel + Display, T, A> Cata<T, Vec<T>, A> for &DefaultRegex<S>
+where
+    A: Algebra<T, Vec<T>>,
+{
+    fn cata(self, mut alg: A) -> T {
+        let mut results: Vec<Option<T>> = std::iter::repeat_with(Default::default)
+            .take(self.len())
+            .collect();
+
+        for index in 0..=self.len() {
+            let algebra_result = {
+                let results_of_children: Vec<T> = self
+                    .graph
+                    .children_of(index)
+                    .unwrap()
+                    .map(|child_index| std::mem::replace(&mut results[child_index], None).unwrap())
+                    .collect();
+
+                alg(results_of_children)
+            };
+
+            // Artificially use this value to satisfy the compiler.
+            let _ = std::mem::replace(&mut results[index], Some(algebra_result));
+        }
+
+        std::mem::replace(&mut results[0], None).unwrap()
+    }
+}
+
+impl<T: GraphLabel + Display> Display for DefaultRegex<T> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        #[derive(Copy, Clone)]
+        enum StackElement {
+            Seen(usize),
+            Unseen(usize),
+        }
+
+        impl StackElement {
+            fn index(&self) -> usize {
+                match self {
+                    Seen(index) => *index,
+                    Unseen(index) => *index,
+                }
+            }
+
+            fn is_seen(&self) -> bool {
+                match self {
+                    Seen(_) => true,
+                    Unseen(_) => false,
+                }
+            }
+        }
+
+        use StackElement::{Seen, Unseen};
+
+        let mut stack: Vec<StackElement> = Vec::new();
+        let mut types = self.types.clone();
+        types.push(RegexType::Paren);
+
+        stack.push(Unseen(0));
+
+        while let Some(top) = stack.pop() {
+            let node_type = types[top.index()];
+
+            // TODO: Do not use unwrap here.
+
+            match node_type {
+                RegexType::Kleene => {
+                    if !top.is_seen() {
+                        stack.push(Seen(top.index()));
+                        stack.extend(
+                            self.graph
+                                .children_of(top.index())
+                                .unwrap()
+                                .map(Unseen)
+                                .rev(),
+                        );
+                    } else {
+                        write!(f, "*")?;
+                    }
+                }
+                RegexType::Plus => {
+                    if !top.is_seen() {
+                        stack.push(Seen(top.index()));
+                        stack.extend(
+                            self.graph
+                                .children_of(top.index())
+                                .unwrap()
+                                .map(Unseen)
+                                .rev(),
+                        );
+                    } else {
+                        write!(f, "+")?;
+                    }
+                }
+                RegexType::Optional => {
+                    if !top.is_seen() {
+                        stack.push(Seen(top.index()));
+                        stack.extend(
+                            self.graph
+                                .children_of(top.index())
+                                .unwrap()
+                                .map(Unseen)
+                                .rev(),
+                        );
+                    } else {
+                        write!(f, "?")?;
+                    }
+                }
+                RegexType::Or => {
+                    if !top.is_seen() {
+                        write!(f, "(")?;
+
+                        let len = self.len();
+
+                        stack.push(Unseen(types.len() - 1));
+
+                        for (child_index, child) in self
+                            .graph
+                            .children_of(top.index())
+                            .unwrap()
+                            .enumerate()
+                            .rev()
+                        {
+                            if child_index != len - 1 && child_index != 0 {
+                                stack.push(Unseen(child));
+                                stack.push(Seen(top.index()));
+                            } else {
+                                stack.push(Unseen(child));
+                            }
+                        }
+                    } else {
+                        write!(f, "|")?;
+                    }
+                }
+                RegexType::Paren => {
+                    write!(f, ")")?;
+                }
+                RegexType::Empty => {
+                    stack.extend(
+                        self.graph
+                            .children_of(top.index())
+                            .unwrap()
+                            .map(Unseen)
+                            .rev(),
+                    );
+                }
+                RegexType::Lit(label) => write!(f, "{label}")?,
+            }
+        }
+
+        Ok(())
+    }
+}
+
+impl<T: GraphLabel + Display> Graph for DefaultRegex<T> {
+    type Iter<'a> = <ALGraph as Graph>::Iter<'a>
+    where
+        Self: 'a;
+
+    #[inline]
+    fn is_empty(&self) -> bool {
+        self.graph.is_empty()
+    }
+
+    #[inline]
+    fn nodes_len(&self) -> usize {
+        self.graph.nodes_len()
+    }
+
+    #[inline]
+    fn children_of(&self, node_id: usize) -> Result<Self::Iter<'_>, GError> {
+        self.graph.children_of(node_id)
+    }
+
+    #[inline]
+    fn degree(&self, node_id: usize) -> Result<usize, GError> {
+        self.graph.degree(node_id)
+    }
+
+    #[inline]
+    fn is_empty_node(&self, node_id: usize) -> Result<bool, GError> {
+        self.graph.is_empty_node(node_id)
+    }
+
+    #[inline]
+    fn has_edge(&self, source: usize, target: usize) -> Result<bool, GError> {
+        self.graph.has_edge(source, target)
+    }
+}
+
+impl<T: GraphLabel + Display> Regex<RegexType<T>> for DefaultRegex<T> {
+    #[inline]
+    fn vertex_label(&self, node_id: usize) -> Result<RegexType<T>, Error> {
+        self.types
+            .get(node_id)
+            .copied()
+            .ok_or(Error::UnknownNode(node_id))
+    }
+}
+
+/// An error type for holding parsing errors.
+#[derive(Debug)]
+pub enum ParseError {
+    /// Encounter an invalid state.
+    Invalid,
+    /// An error from graph operations.
+    Graph(GError),
+    /// Encounter an empty stack.
+    EmptyStack,
+    /// Encounter a non-single stack at the end.
+    NonSingleStack,
+    /// Encounter a stack whose element is out of bounds.
+    ///
+    /// The first component is the stack element, while the second the
+    /// bound.
+    InvalidStack(usize, usize),
+    /// Encounter a repetition construct without a preceding element.
+    InvalidRepetition(usize),
+    /// Invalid character
+    InvalidCharacter(char),
+}
+
+impl Display for ParseError {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{self:?}")
+    }
+}
+
+impl std::error::Error for ParseError {}
+
+impl From<GError> for ParseError {
+    fn from(ge: GError) -> Self {
+        Self::Graph(ge)
+    }
+}
+
+/// The direction of parsing.
+///
+/// This means whether we want to stay at the same level of
+/// parent-child hierarchy, or to become the child, or to climb back
+/// to the last parent.
+#[derive(Debug, Copy, Clone, Default)]
+pub enum ParseDirection {
+    /// Climb back to the last parent.
+    Up,
+    /// Stay at the same level in the hierarchy.
+    #[default]
+    Right,
+    /// Become the child.
+    Down,
+}
+
+impl Display for ParseDirection {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let direction = match self {
+            ParseDirection::Up => "↑",
+            ParseDirection::Right => "→",
+            ParseDirection::Down => "↓",
+        };
+
+        write!(f, "{direction}")
+    }
+}
+
+impl<T: GraphLabel + Display + Debug> DesRec for DefaultRegex<T> {
+    type Label = RegexType<T>;
+
+    type Regex = Self;
+
+    type Error = ParseError;
+
+    type Aux = ParseDirection;
+
+    type Scanner<'a> =
+        Box<dyn FnMut(&'a str) -> Result<Option<(usize, Self::Label, Self::Aux)>, Self::Error>>;
+
+    fn parse<'a>(
+        mut input: &'a str,
+        mut scanner: Self::Scanner<'a>,
+        post_p: bool,
+    ) -> Result<Option<(DefaultRegex<T>, &'a str)>, Self::Error> {
+        use ParseDirection::*;
+        use RegexType::*;
+
+        let mut intermediate_stack: Vec<(RegexType<T>, ParseDirection)> =
+            Vec::with_capacity(input.len());
+
+        // Classifies the input into a sequence of tokens with
+        // directions.
+        while !input.is_empty() {
+            if let Some((len, label, direction)) = scanner(input)? {
+                if len == 0 {
+                    break;
+                }
+
+                input = &input[len..];
+
+                intermediate_stack.push((label, direction));
+
+                // If we encounter an opening parenthesis, we add
+                // another auxiliary instruction.
+                if matches!((label, direction), (Or, Down)) {
+                    intermediate_stack.push((Empty, Down));
+                }
+            } else {
+                break;
+            }
+        }
+
+        let inter_len = intermediate_stack.len();
+
+        let mut parents_stack: Vec<usize> = Vec::with_capacity(inter_len + 2);
+
+        parents_stack.push(0);
+        parents_stack.push(1);
+
+        let mut list_of_children: Vec<Vec<usize>> = std::iter::repeat_with(Vec::new)
+            .take(inter_len + 2)
+            .collect();
+
+        list_of_children[0].push(1);
+
+        let mut types: Vec<RegexType<T>> = vec![Or, Empty];
+
+        types.extend(intermediate_stack.iter().map(|(label, _direction)| *label));
+
+        // Converts the sequence of tokens and directions into a
+        // regular expression.
+        for (index, (label, direction)) in intermediate_stack.iter().copied().enumerate() {
+            let mut parent: usize;
+            let mut parent_children: &mut Vec<usize>;
+
+            if let Some(parent_stack_parent) = parents_stack.last().copied() {
+                parent = parent_stack_parent;
+
+                match list_of_children.get_mut(parent) {
+                    Some(stack_parent_children) => {
+                        parent_children = stack_parent_children;
+
+                        match (label, direction) {
+                            (Paren, Up) => {
+                                // a closing parenthesis does not need
+                                // to be counted as a child
+                                parents_stack.pop();
+                                // a closing parenthesis jumps out of
+                                // two levels at once
+                                parents_stack.pop();
+                            }
+                            (Empty, Up) => {
+                                // an upwards pipe
+
+                                // first add the current node to the parent of the parent
+                                parents_stack.pop();
+
+                                if let Some(parent_stack_parent) = parents_stack.last().copied() {
+                                    parent = parent_stack_parent;
+
+                                    if let Some(stack_parent_children) =
+                                        list_of_children.get_mut(parent)
+                                    {
+                                        parent_children = stack_parent_children;
+
+                                        parent_children.push(index + 2);
+                                    } else {
+                                        return Err(ParseError::InvalidStack(
+                                            parent,
+                                            inter_len + 2,
+                                        ));
+                                    }
+                                } else {
+                                    return Err(ParseError::EmptyStack);
+                                }
+
+                                // then make the current node the new parent
+                                parents_stack.push(index + 2);
+                            }
+                            (_, Up) => {
+                                parents_stack.pop();
+                                parent_children.push(index + 2);
+                            }
+                            (_, Right) => {
+                                parent_children.push(index + 2);
+                            }
+                            (_, Down) => {
+                                parents_stack.push(index + 2);
+                                parent_children.push(index + 2);
+                            }
+                        }
+                    }
+                    None => return Err(ParseError::InvalidStack(parent, inter_len)),
+                }
+            } else {
+                // There are unbalanced closing parentheses.
+                return Err(ParseError::EmptyStack);
+            }
+
+            // A special handling of repetition constructs as postfix
+            // operators: it swaps with the preceding element.
+            if post_p {
+                match label {
+                    Kleene | Plus | Optional => {
+                        // remove the current node from the parent
+                        parent_children.pop();
+
+                        if let Some(preceding) = parent_children.last().copied() {
+                            list_of_children.swap(preceding, index + 2);
+
+                            types.swap(preceding, index + 2);
+
+                            match list_of_children.get_mut(preceding) {
+                                Some(preceding_children) => {
+                                    preceding_children.push(index + 2);
+                                }
+                                None => {
+                                    return Err(ParseError::InvalidStack(preceding, inter_len + 2))
+                                }
+                            }
+                        } else {
+                            return Err(ParseError::InvalidRepetition(index));
+                        }
+                    }
+                    _ => {}
+                }
+            }
+        }
+
+        // There are unbalanced opening parentheses.
+        if parents_stack.len() != 2 {
+            return Err(ParseError::NonSingleStack);
+        }
+
+        let graph = list_of_children.into();
+
+        let result = DefaultRegex { graph, types };
+
+        Ok(Some((result, input)))
+    }
+}
+
+#[cfg(test)]
+mod test_des_rec {
+    use super::*;
+
+    fn test_scanner(
+        input: &str,
+    ) -> Result<Option<(usize, RegexType<char>, ParseDirection)>, ParseError> {
+        use ParseDirection::*;
+        use RegexType::*;
+
+        if let Some(first) = input.chars().next() {
+            match first {
+                '*' => Ok(Some((1, Kleene, Right))),
+                '+' => Ok(Some((1, Plus, Right))),
+                '?' => Ok(Some((1, Optional, Right))),
+                '|' => Ok(Some((1, Empty, Up))),
+                '(' => Ok(Some((1, Or, Down))),
+                ')' => Ok(Some((1, Paren, Up))),
+                ' '..='~' => Ok(Some((1, Lit(first), Right))),
+                _ => Err(ParseError::InvalidCharacter(first)),
+            }
+        } else {
+            Ok(None)
+        }
+    }
+
+    #[test]
+    fn test_des_rec() -> Result<(), Box<dyn std::error::Error>> {
+        let input_string = "a*b?c+|(d*| +)?".to_owned();
+
+        if let Some((regex, remain)) =
+            DefaultRegex::<char>::parse(&input_string, Box::new(test_scanner), true)?
+        {
+            println!("regex = {regex}");
+            println!("remain = {remain}");
+
+            println!("regex length = {}", regex.len());
+
+            Ok(())
+        } else {
+            unreachable!()
+        }
+    }
+}
diff --git a/nfa/src/desrec.rs b/nfa/src/desrec.rs
new file mode 100644
index 0000000..c57d313
--- /dev/null
+++ b/nfa/src/desrec.rs
@@ -0,0 +1,50 @@
+//! This file defines the expected behaviours of a recursive descent
+//! parser.
+
+use super::Regex;
+use graph::GraphLabel;
+
+/// A thin wrapper of the parse output, to simplify the function
+/// signature a little.
+pub type ParseOutput<'a, T> = Option<(T, &'a str)>;
+
+/// Types implementing this trait provide a method to be recursively
+/// parsed.
+pub trait DesRec {
+    /// The type of labels of the resulting regular expression.
+    type Label: GraphLabel;
+
+    /// The type of the resulting regular expression.
+    type Regex: Regex<Self::Label>;
+
+    /// The type of errors encountered when parsing.
+    type Error: std::error::Error;
+
+    /// Auxiliary data when parsing
+    type Aux;
+
+    /// The type of a scanner that classifies inputs into tokens.
+    ///
+    /// The return type indicates the result of classification:
+    ///
+    /// - `Err(_)`: the classification fails
+    ///
+    /// - `Ok(None)`: the classification succeeds, and the parsing
+    /// should stop here
+    ///
+    /// - `Ok(Some(_))`: the classification succeeds and the parsing
+    /// should continue.
+    type Scanner<'a>: FnMut(&'a str) -> Result<Option<(usize, Self::Label, Self::Aux)>, Self::Error>;
+
+    /// Parse a string into a regular expression with the aid of this
+    /// type.
+    ///
+    /// Accept a slice of string and return either a parsing error, or
+    /// a pair of correctly parsed regular expression and the
+    /// remaining slice.
+    fn parse<'a>(
+        input: &'a str,
+        scanner: Self::Scanner<'a>,
+        post_p: bool,
+    ) -> Result<ParseOutput<'a, Self::Regex>, Self::Error>;
+}
diff --git a/nfa/src/error.rs b/nfa/src/error.rs
index 6112878..0c6bb3c 100644
--- a/nfa/src/error.rs
+++ b/nfa/src/error.rs
@@ -5,9 +5,15 @@ use graph::error::Error as GError;
 use std::fmt::{Display, Formatter};
 
 #[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd)]
+/// The error type for NFA operations.
 pub enum Error {
+    /// An unknown node id is encountered.
     UnknownNode(usize),
+    /// Some operations are not supported by the implementations.
+    ///
+    /// Everything is a trade-off, wink wink.
     UnsupportedOperation,
+    /// This error comes from some underlying graph operation.
     Graph(GError),
 }
 
diff --git a/nfa/src/lib.rs b/nfa/src/lib.rs
index ef207cf..1e2a30c 100644
--- a/nfa/src/lib.rs
+++ b/nfa/src/lib.rs
@@ -6,7 +6,7 @@
 //! implements the Graph trait from the [`graph`] crate, and then use
 //! that external graph type as [`Graph`][graph::Graph] here.
 
-mod error;
+pub mod error;
 
 extern crate graph;
 
@@ -49,13 +49,12 @@ pub trait Regex<T: GraphLabel>: Graph + Display {
     // TODO: add functions that determine if certain "positions" in a
     // regular language satisfy some special properties, like at the
     // end of a Kleene star, or at the end of a regular language, et
-    // cetera.  These will be needed later.
+    // cetera.  These might be needed later.
 }
 
 /// The expected behvaiour of a nondeterministic finite automaton.
 ///
-/// Every NFA is a special labelled graph, so this trait extends the
-/// [`LabelGraph`][graph::LabelGraph] trait.
+/// Every NFA is a special labelled graph.
 pub trait Nfa<T: GraphLabel>: LabelGraph<T> {
     /// Remove all empty transitions from the nondeterministic finite
     /// automaton.
@@ -89,6 +88,7 @@ pub trait Nfa<T: GraphLabel>: LabelGraph<T> {
 }
 
 pub mod default;
+pub mod desrec;
 
 #[cfg(test)]
 mod nfa_tests {}
diff --git a/receme/src/functor.rs b/receme/src/functor.rs
index 95e2555..cd4dac2 100644
--- a/receme/src/functor.rs
+++ b/receme/src/functor.rs
@@ -31,6 +31,14 @@ pub trait Functor<T> {
     fn fmap<S>(self, f: impl FnMut(T) -> S) -> Self::Target<S>;
 }
 
+impl<T> Functor<T> for Vec<T> {
+    type Target<S> = Vec<S>;
+
+    fn fmap<S>(self, f: impl FnMut(T) -> S) -> Self::Target<S> {
+        self.into_iter().map(f).collect()
+    }
+}
+
 /// A functor can map over its generic type parameter.
 ///
 /// It can map from Functor(T) to Functor(S).
diff --git a/repcore/src/grammar.rs b/repcore/src/grammar.rs
new file mode 100644
index 0000000..ee9f033
--- /dev/null
+++ b/repcore/src/grammar.rs
@@ -0,0 +1,59 @@
+//! This file implements the extected behaviours of grammars.
+
+// NOTE: We shall first start with a parser that works at the level of
+// characters.  The purpose is to first experiment with the workings
+// and the performance of the algorithms, before optimising by using
+// regular expressions to classify inputs into tokens.  In other
+// words, the current focus is not on the optimisations, whereas
+// scanners are for optimisations only, so to speak.
+
+/// The type of a terminal.
+///
+/// For the time being this is a wrapper around a string, but in the
+/// future it may hold more information of scanners.
+pub struct Terminal {
+    // If we want to use scanners, per chance add them as a new field
+    // here.
+    name: String,
+}
+
+impl Terminal {
+    /// Create a terminal with the given name.
+    #[inline]
+    pub fn new(name: String) -> Self {
+        Self { name }
+    }
+
+    /// Return the name of the terminal.
+    #[inline]
+    pub fn name(&self) -> &str {
+        &self.name
+    }
+}
+
+/// The type of a non-terminal.
+///
+/// This is just a wrapper around a string.
+pub struct Nonterminal(String);
+
+impl Nonterminal {
+    /// Return the name of the nonterminal.
+    ///
+    /// Just to improve readability.
+    #[inline]
+    pub fn name(&self) -> &str {
+        &self.0
+    }
+}
+
+/// The type of a terminal or a non-terminal.
+///
+/// Only an index is stored here.  Actual data are stored in two other
+/// arrays.
+#[derive(Debug, Hash, Eq, PartialEq, Clone, Copy, Ord, PartialOrd)]
+pub enum TNT {
+    /// Terminal variant
+    Ter(usize),
+    /// Nonterminal variant
+    Non(usize),
+}
diff --git a/repcore/src/lib.rs b/repcore/src/lib.rs
index 7d12d9a..589b61c 100644
--- a/repcore/src/lib.rs
+++ b/repcore/src/lib.rs
@@ -1,3 +1,9 @@
+//! This package implements the core algorithm of the entire
+//! workspace: parsing with derivatives by means of regular nulling
+//! languages.
+
+pub mod grammar;
+
 pub fn add(left: usize, right: usize) -> usize {
     left + right
 }
diff --git a/repcore/src/plan.org b/repcore/src/plan.org
new file mode 100644
index 0000000..13c2ee0
--- /dev/null
+++ b/repcore/src/plan.org
@@ -0,0 +1,59 @@
+#+TITLE: Plan of the package
+#+AUTHOR: Durand
+#+DATE: <2022-11-18 Ven 19:57>
+
+* Atomic Languages
+
+This describes the behaviours of atomic languages.  The atomic
+language consists of the null closure of any non-terminal symbol in
+the grammar, and their deriavtives by terminals and non-terminal.
+
+* Derivative Languages
+
+This is the main driving device of the algorithm.  Basically, the
+algorithm works by taking successive derivatives, according to the
+input symbol.  At each step, we calculate the derivative language.  In
+this process, we also compute some semiring value and store in a
+carrier.  The end result of the algorithm is the final semiring
+value.
+
+If one wants simply to determine if the input string belongs to the
+grammar, one chooses the semiring to be the field with two elements,
+the boolean field.  If one wants to find how many ways are there to
+derive a given input string, then one uses the semiring of natural
+numbers instead.  If one wants, moreover, to find all the possible
+ways to derive a particular input string, then one can use the
+free semiring on the set of terminals and non-terminals of the
+grammar.  Here the free semiring is the left-adjoint functor to the
+forgetful functor from the category of semirings to the category of
+sets.
+
+To be more specific, the free semiring on a set is given by sets of
+sequences of elements in the set.  The addition of the semiring is the
+set union operation, and the multiplication is taking the respective
+concatenations.
+
+** Semirings
+
+So we need a module to define the behaviours of semirings, and provide
+some common semirings implementations.  Then in the main driving force
+we can freely substitute different semirings, according to the
+particular needs.
+
+** Languages
+
+Then the main part is to define the behaviour of languages.  This
+should be easy enough, since we already have the mechanism of graphs,
+nondeterministic automata, and semirings.  All we need to do is to
+combine them together.
+
+* Testing ground
+
+I am in a strong need to test things out.  The most important one is
+to visualize each step of the derivation, in a human-friendly manner.
+I need this to examine whether my atomic languages are wrongly
+implemented, or my atomic languages are wrongly derived, or my
+understanding of the main algorithm is plain wrong.
+
+This is the main reason I started this rewrite of the package.
+
author	JSDurand <mmemmew@gmail.com>	2022-12-14 23:48:22 +0800
committer	JSDurand <mmemmew@gmail.com>	2022-12-14 23:48:22 +0800
commit	9f1c88b863e247da3cd60d2792a7a13b18e25e53 (patch)
tree	d29c0e19793a88a1de6898fdfd2a376fca21378f
parent	cb7bcfad4ab0041aaf3fde3185e27ee46bb37788 (diff)