summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJSDurand <mmemmew@gmail.com>2022-12-14 23:48:22 +0800
committerJSDurand <mmemmew@gmail.com>2022-12-14 23:48:22 +0800
commit9f1c88b863e247da3cd60d2792a7a13b18e25e53 (patch)
treed29c0e19793a88a1de6898fdfd2a376fca21378f
parentcb7bcfad4ab0041aaf3fde3185e27ee46bb37788 (diff)
a temporary check point
just to save things in a commit
-rw-r--r--THANKS1
-rw-r--r--configure.ac4
-rwxr-xr-xfind-version.sh8
-rw-r--r--graph/Makefile.am15
-rw-r--r--graph/src/adlist.rs8
-rw-r--r--graph/src/labelled.rs2
-rw-r--r--nfa/Cargo.toml1
-rw-r--r--nfa/Makefile.am19
-rw-r--r--nfa/src/default/mod.rs121
-rw-r--r--nfa/src/default/nfa.rs120
-rw-r--r--nfa/src/default/regex.rs575
-rw-r--r--nfa/src/desrec.rs50
-rw-r--r--nfa/src/error.rs6
-rw-r--r--nfa/src/lib.rs8
-rw-r--r--receme/src/functor.rs8
-rw-r--r--repcore/src/grammar.rs59
-rw-r--r--repcore/src/lib.rs6
-rw-r--r--repcore/src/plan.org59
18 files changed, 940 insertions, 130 deletions
diff --git a/THANKS b/THANKS
index e69de29..9045a0b 100644
--- a/THANKS
+++ b/THANKS
@@ -0,0 +1 @@
+TODO: Thank many people here. \ No newline at end of file
diff --git a/configure.ac b/configure.ac
index fbf987e..fe765e6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2,8 +2,6 @@ AC_PREREQ([2.60])
AC_INIT([REP], m4_esyscmd([./find-version.sh]), [durand@jsdurand.xyz])
-dnl patsubst(m4_esyscmd([./find-version.sh]), ` "')
-
AC_COPYRIGHT([This package is covered by GPL v3.])
AC_CONFIG_AUX_DIR([build-aux])
@@ -21,7 +19,7 @@ AS_IF([test "$CARGO" = "notfound"], [AC_MSG_ERROR([cargo is required])])
AC_PATH_PROG([RUSTC], [rustc], [notfound])
AS_IF([test "$RUSTC" = "notfound"], [AC_MSG_ERROR([rustc is required])])
-AC_CONFIG_FILES([Makefile graph/Makefile])
+AC_CONFIG_FILES([Makefile graph/Makefile nfa/Makefile])
AC_OUTPUT
diff --git a/find-version.sh b/find-version.sh
index 4ac390e..99aa538 100755
--- a/find-version.sh
+++ b/find-version.sh
@@ -2,18 +2,24 @@
":"; exec emacs --quick --script "$0" -- "$@" # -*- mode: emacs-lisp; lexical-binding: t; -*-
(with-temp-buffer
+ ;; the version information is stored in the Cargo.toml file.
(insert-file-contents "Cargo.toml")
(goto-char (point-min))
(cond
+ ;; search for the version assignment statement
((search-forward "version =" nil t)
- (re-search-forward " *" (line-end-position) t)
+ ;; skip spaces
+ (re-search-forward "[[:space:]]*" (line-end-position) t)
+ ;; check it starts with a double quote
(cond
((= (char-after) 34))
((error "Invalid syntax at %d" (point))))
(let ((end (line-end-position)))
+ ;; and check it ends with a double quote as well
(cond
((= (char-before end) 34))
((error "Invalid syntax at %d" (1- end))))
+ ;; then print the bare version string out without delimiters
(princ
(buffer-substring-no-properties
(1+ (point)) (1- end)))))
diff --git a/graph/Makefile.am b/graph/Makefile.am
index 776b911..623572a 100644
--- a/graph/Makefile.am
+++ b/graph/Makefile.am
@@ -1,12 +1,19 @@
-.PHONY: dev rel
+.PHONY: dev rel clean check
all: dev
dev:
- @CARGO@ build
+ @echo "cargo build"
+ @@CARGO@ build
rel:
- @CARGO@ build --release
+ @echo "cargo build --release"
+ @@CARGO@ build --release
clean:
- @CARGO@ clean
+ @echo "cargo clean"
+ @@CARGO@ clean
+
+check:
+ @echo "cargo clippy"
+ @@CARGO@ clippy
diff --git a/graph/src/adlist.rs b/graph/src/adlist.rs
index c16ceb2..18ad770 100644
--- a/graph/src/adlist.rs
+++ b/graph/src/adlist.rs
@@ -102,6 +102,14 @@ impl ExtGraph for ALGraph {
}
}
+// TODO: Add a way to build a graph by its raw adjacency list representation.
+impl From<Vec<Vec<usize>>> for ALGraph {
+ fn from(adlist: Vec<Vec<usize>>) -> Self {
+ let nodes: Vec<ALNode> = adlist.iter().cloned().map(ALNode::new).collect();
+ Self { nodes }
+ }
+}
+
#[cfg(test)]
mod algraph_test {
use super::*;
diff --git a/graph/src/labelled.rs b/graph/src/labelled.rs
index 1cb2461..d02e301 100644
--- a/graph/src/labelled.rs
+++ b/graph/src/labelled.rs
@@ -144,7 +144,7 @@ impl<'a> Iterator for LabelIndexIter<'a> {
#[inline]
fn next(&mut self) -> Option<Self::Item> {
- self.iter.as_mut().map(|iterator| iterator.next()).flatten()
+ self.iter.as_mut().and_then(|iterator| iterator.next())
}
#[inline]
diff --git a/nfa/Cargo.toml b/nfa/Cargo.toml
index b1387b6..7f48760 100644
--- a/nfa/Cargo.toml
+++ b/nfa/Cargo.toml
@@ -7,6 +7,7 @@ edition = "2021"
[dependencies]
graph = { path = "../graph", optional = true }
+receme = { path = "../receme" }
[features]
default = ["default-graph"]
diff --git a/nfa/Makefile.am b/nfa/Makefile.am
new file mode 100644
index 0000000..623572a
--- /dev/null
+++ b/nfa/Makefile.am
@@ -0,0 +1,19 @@
+.PHONY: dev rel clean check
+
+all: dev
+
+dev:
+ @echo "cargo build"
+ @@CARGO@ build
+
+rel:
+ @echo "cargo build --release"
+ @@CARGO@ build --release
+
+clean:
+ @echo "cargo clean"
+ @@CARGO@ clean
+
+check:
+ @echo "cargo clippy"
+ @@CARGO@ clippy
diff --git a/nfa/src/default/mod.rs b/nfa/src/default/mod.rs
index 805540b..b9ee398 100644
--- a/nfa/src/default/mod.rs
+++ b/nfa/src/default/mod.rs
@@ -1,123 +1,10 @@
//! This file provides a structure that implements the trait
-//! [`NFA`][super::Nfa].
-//!
-//! It is used as the default implementation.
+//! [`NFA`][super::Nfa] and another that umplements the trait
+//! [`Regex`][super::Regex].
-use graph::{error::Error as GError, DLGraph, Graph, GraphLabel, LabelGraph};
+pub mod nfa;
-use super::{error::Error, Nfa, Regex};
-
-// TODO: Store the regular expression in the NFA as well.
-//
-// The current focus of the project is to understand the growth rate
-// of the algorithm, to know whether I made a mistake in the previous
-// iteration of the implementation, or the algorithm is not as fast as
-// the author estimated, which is not quite likely, of course.
-//
-// Thus I shall establish a friendly interface that allows me to view
-// and debug the atomic languages and the languages, transparently.
-
-#[non_exhaustive]
-#[derive(Debug)]
-/// Default NFA implementation.
-pub struct DefaultNFA<T: GraphLabel> {
- graph: DLGraph<T>,
-}
-
-impl<T: GraphLabel> Default for DefaultNFA<T> {
- fn default() -> Self {
- let graph = Default::default();
- Self { graph }
- }
-}
-
-impl<T: GraphLabel> Graph for DefaultNFA<T> {
- type Iter<'a> = <DLGraph<T> as Graph>::Iter<'a> where T: 'a;
-
- #[inline]
- fn is_empty(&self) -> bool {
- self.graph.is_empty()
- }
-
- #[inline]
- fn nodes_len(&self) -> usize {
- self.graph.nodes_len()
- }
-
- #[inline]
- fn children_of(&self, node_id: usize) -> Result<Self::Iter<'_>, GError> {
- self.graph.children_of(node_id)
- }
-
- #[inline]
- fn degree(&self, node_id: usize) -> Result<usize, GError> {
- self.graph.degree(node_id)
- }
-
- #[inline]
- fn is_empty_node(&self, node_id: usize) -> Result<bool, GError> {
- self.graph.is_empty_node(node_id)
- }
-
- #[inline]
- fn has_edge(&self, source: usize, target: usize) -> Result<bool, GError> {
- self.graph.has_edge(source, target)
- }
-}
-
-impl<T: GraphLabel> LabelGraph<T> for DefaultNFA<T> {
- type Iter<'a> = <DLGraph<T> as LabelGraph<T>>::Iter<'a> where T: 'a;
-
- type LabelIter<'a> = <DLGraph<T> as LabelGraph<T>>::LabelIter<'a> where T: 'a;
-
- // TODO: Return the label from the contained regular language.
- #[inline]
- fn vertex_label(&self, node_id: usize) -> Result<Option<T>, GError> {
- if self.has_node(node_id) {
- todo!()
- } else {
- Err(GError::IndexOutOfBounds(node_id, self.nodes_len()))
- }
- }
-
- #[inline]
- fn edge_label(&self, source: usize, target: usize) -> Result<Vec<T>, GError> {
- self.graph.edge_label(source, target)
- }
-
- #[inline]
- fn find_children_with_label(
- &self,
- node_id: usize,
- label: &T,
- ) -> Result<<Self as LabelGraph<T>>::Iter<'_>, GError> {
- self.graph.find_children_with_label(node_id, label)
- }
-
- #[inline]
- fn labels_of(&self, node_id: usize) -> Result<Self::LabelIter<'_>, GError> {
- self.graph.labels_of(node_id)
- }
-}
-
-impl<T: GraphLabel> Nfa<T> for DefaultNFA<T> {
- #[allow(unused)]
- fn to_nfa(regex: impl Regex<T>) -> Self {
- todo!()
- }
-
- fn remove_epsilon(&mut self) -> Result<(), Error> {
- todo!()
- }
-
- fn remove_dead(&mut self) -> Result<(), Error> {
- todo!()
- }
-
- fn nulling(&mut self) -> Result<(), Error> {
- todo!()
- }
-}
+pub mod regex;
#[cfg(test)]
mod default_nfa_test {}
diff --git a/nfa/src/default/nfa.rs b/nfa/src/default/nfa.rs
new file mode 100644
index 0000000..3c2bd83
--- /dev/null
+++ b/nfa/src/default/nfa.rs
@@ -0,0 +1,120 @@
+//! This file provides a default implementation of NFA.
+
+// TODO: Store the regular expression in the NFA as well.
+//
+// The current focus of the project is to understand the growth rate
+// of the algorithm, to know whether I made a mistake in the previous
+// iteration of the implementation, or the algorithm is not as fast as
+// the author estimated, which is not quite likely, of course.
+//
+// Thus I shall establish a friendly interface that allows me to view
+// and debug the atomic languages and the languages, transparently.
+
+use graph::{error::Error as GError, DLGraph, Graph, GraphLabel, LabelGraph};
+
+use crate::{error::Error, Nfa, Regex};
+
+use std::fmt::Display;
+
+#[non_exhaustive]
+#[derive(Debug)]
+/// Default NFA implementation.
+pub struct DefaultNFA<T: GraphLabel + Display> {
+ graph: DLGraph<T>,
+}
+
+impl<T: GraphLabel + Display> Default for DefaultNFA<T> {
+ fn default() -> Self {
+ Self {
+ graph: Default::default(),
+ }
+ }
+}
+
+impl<T: GraphLabel + Display> Graph for DefaultNFA<T> {
+ type Iter<'a> = <DLGraph<T> as Graph>::Iter<'a> where T: 'a;
+
+ #[inline]
+ fn is_empty(&self) -> bool {
+ self.graph.is_empty()
+ }
+
+ #[inline]
+ fn nodes_len(&self) -> usize {
+ self.graph.nodes_len()
+ }
+
+ #[inline]
+ fn children_of(&self, node_id: usize) -> Result<Self::Iter<'_>, GError> {
+ self.graph.children_of(node_id)
+ }
+
+ #[inline]
+ fn degree(&self, node_id: usize) -> Result<usize, GError> {
+ self.graph.degree(node_id)
+ }
+
+ #[inline]
+ fn is_empty_node(&self, node_id: usize) -> Result<bool, GError> {
+ self.graph.is_empty_node(node_id)
+ }
+
+ #[inline]
+ fn has_edge(&self, source: usize, target: usize) -> Result<bool, GError> {
+ self.graph.has_edge(source, target)
+ }
+}
+
+impl<T: GraphLabel + Display> LabelGraph<T> for DefaultNFA<T> {
+ type Iter<'a> = <DLGraph<T> as LabelGraph<T>>::Iter<'a> where T: 'a;
+
+ type LabelIter<'a> = <DLGraph<T> as LabelGraph<T>>::LabelIter<'a> where T: 'a;
+
+ // TODO: Return the label from the contained regular language.
+ #[inline]
+ fn vertex_label(&self, node_id: usize) -> Result<Option<T>, GError> {
+ if self.has_node(node_id) {
+ todo!()
+ } else {
+ Err(GError::IndexOutOfBounds(node_id, self.nodes_len()))
+ }
+ }
+
+ #[inline]
+ fn edge_label(&self, source: usize, target: usize) -> Result<Vec<T>, GError> {
+ self.graph.edge_label(source, target)
+ }
+
+ #[inline]
+ fn find_children_with_label(
+ &self,
+ node_id: usize,
+ label: &T,
+ ) -> Result<<Self as LabelGraph<T>>::Iter<'_>, GError> {
+ self.graph.find_children_with_label(node_id, label)
+ }
+
+ #[inline]
+ fn labels_of(&self, node_id: usize) -> Result<Self::LabelIter<'_>, GError> {
+ self.graph.labels_of(node_id)
+ }
+}
+
+impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
+ #[allow(unused)]
+ fn to_nfa(regex: impl Regex<T>) -> Self {
+ todo!()
+ }
+
+ fn remove_epsilon(&mut self) -> Result<(), Error> {
+ todo!()
+ }
+
+ fn remove_dead(&mut self) -> Result<(), Error> {
+ todo!()
+ }
+
+ fn nulling(&mut self) -> Result<(), Error> {
+ todo!()
+ }
+}
diff --git a/nfa/src/default/regex.rs b/nfa/src/default/regex.rs
new file mode 100644
index 0000000..a60f801
--- /dev/null
+++ b/nfa/src/default/regex.rs
@@ -0,0 +1,575 @@
+//! This file provides a default implementation of Regex.
+
+use graph::{error::Error as GError, ALGraph, ExtGraph, Graph, GraphLabel};
+
+use crate::{desrec::DesRec, error::Error, Regex};
+
+use receme::{algebra::Algebra, catana::Cata};
+
+use std::fmt::{Debug, Display};
+
+/// The type of a node in a regular expression.
+///
+/// # Example
+///
+/// If a node has type "Kleene", this means it represents a star
+/// construct in a regular expression, and its children are the
+/// contents of the star.
+///
+/// # Note
+///
+/// There is no "concatenation" node type. A concatenation of two
+/// nodes is represented as the two nodes being successive children in
+/// their common parent node.
+///
+/// This is possible because a regular expression has a root node.
+/// For the sake of convenience, the root node has type "Or".
+#[derive(Debug, Hash, Default, Eq, PartialEq, Clone, Copy, Ord, PartialOrd)]
+pub enum RegexType<T: GraphLabel + Display> {
+ /// A star node is a node with an arbitrary number of repetitions.
+ Kleene,
+ /// A plus node is a node with at least one repetition: a+ equals
+ /// aa*
+ Plus,
+ /// An optional node
+ Optional,
+ /// An or node means an alternation of its children.
+ Or,
+ /// A paren node represents a parenthesis.
+ Paren,
+ /// An empty node
+ #[default]
+ Empty,
+ /// A literal node
+ Lit(T),
+}
+
+/// A default implementation of regular expressions.
+#[derive(Debug)]
+pub struct DefaultRegex<T: GraphLabel + Display> {
+ /// The underlying graph is stored using adjacency lists.
+ graph: ALGraph,
+ /// The types of the underlying nodes.
+ types: Vec<RegexType<T>>,
+}
+
+impl<T: GraphLabel + Display> Default for DefaultRegex<T> {
+ fn default() -> Self {
+ Self {
+ graph: Default::default(),
+ types: Default::default(),
+ }
+ }
+}
+
+impl<T: GraphLabel + Display> DefaultRegex<T> {
+ /// Return the number of elements in this regular expression,
+ /// counting nodes.
+ pub fn len(&self) -> usize {
+ self.types.len()
+ }
+
+ /// Return true if and only if this regular expression has no
+ /// nodes.
+ pub fn is_empty(&self) -> bool {
+ self.types.is_empty()
+ }
+
+ /// Add a node as the child of an existing node or as a root.
+ pub fn add_node(
+ &mut self,
+ label: RegexType<T>,
+ parent: Option<usize>,
+ ) -> Result<(), ParseError> {
+ self.graph.extend(parent.iter().copied())?;
+ self.types.push(label);
+
+ Ok(())
+ }
+}
+
+// REVIEW: This may not be needed.
+impl<S: GraphLabel + Display, T, A> Cata<T, Vec<T>, A> for &DefaultRegex<S>
+where
+ A: Algebra<T, Vec<T>>,
+{
+ fn cata(self, mut alg: A) -> T {
+ let mut results: Vec<Option<T>> = std::iter::repeat_with(Default::default)
+ .take(self.len())
+ .collect();
+
+ for index in 0..=self.len() {
+ let algebra_result = {
+ let results_of_children: Vec<T> = self
+ .graph
+ .children_of(index)
+ .unwrap()
+ .map(|child_index| std::mem::replace(&mut results[child_index], None).unwrap())
+ .collect();
+
+ alg(results_of_children)
+ };
+
+ // Artificially use this value to satisfy the compiler.
+ let _ = std::mem::replace(&mut results[index], Some(algebra_result));
+ }
+
+ std::mem::replace(&mut results[0], None).unwrap()
+ }
+}
+
+impl<T: GraphLabel + Display> Display for DefaultRegex<T> {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ #[derive(Copy, Clone)]
+ enum StackElement {
+ Seen(usize),
+ Unseen(usize),
+ }
+
+ impl StackElement {
+ fn index(&self) -> usize {
+ match self {
+ Seen(index) => *index,
+ Unseen(index) => *index,
+ }
+ }
+
+ fn is_seen(&self) -> bool {
+ match self {
+ Seen(_) => true,
+ Unseen(_) => false,
+ }
+ }
+ }
+
+ use StackElement::{Seen, Unseen};
+
+ let mut stack: Vec<StackElement> = Vec::new();
+ let mut types = self.types.clone();
+ types.push(RegexType::Paren);
+
+ stack.push(Unseen(0));
+
+ while let Some(top) = stack.pop() {
+ let node_type = types[top.index()];
+
+ // TODO: Do not use unwrap here.
+
+ match node_type {
+ RegexType::Kleene => {
+ if !top.is_seen() {
+ stack.push(Seen(top.index()));
+ stack.extend(
+ self.graph
+ .children_of(top.index())
+ .unwrap()
+ .map(Unseen)
+ .rev(),
+ );
+ } else {
+ write!(f, "*")?;
+ }
+ }
+ RegexType::Plus => {
+ if !top.is_seen() {
+ stack.push(Seen(top.index()));
+ stack.extend(
+ self.graph
+ .children_of(top.index())
+ .unwrap()
+ .map(Unseen)
+ .rev(),
+ );
+ } else {
+ write!(f, "+")?;
+ }
+ }
+ RegexType::Optional => {
+ if !top.is_seen() {
+ stack.push(Seen(top.index()));
+ stack.extend(
+ self.graph
+ .children_of(top.index())
+ .unwrap()
+ .map(Unseen)
+ .rev(),
+ );
+ } else {
+ write!(f, "?")?;
+ }
+ }
+ RegexType::Or => {
+ if !top.is_seen() {
+ write!(f, "(")?;
+
+ let len = self.len();
+
+ stack.push(Unseen(types.len() - 1));
+
+ for (child_index, child) in self
+ .graph
+ .children_of(top.index())
+ .unwrap()
+ .enumerate()
+ .rev()
+ {
+ if child_index != len - 1 && child_index != 0 {
+ stack.push(Unseen(child));
+ stack.push(Seen(top.index()));
+ } else {
+ stack.push(Unseen(child));
+ }
+ }
+ } else {
+ write!(f, "|")?;
+ }
+ }
+ RegexType::Paren => {
+ write!(f, ")")?;
+ }
+ RegexType::Empty => {
+ stack.extend(
+ self.graph
+ .children_of(top.index())
+ .unwrap()
+ .map(Unseen)
+ .rev(),
+ );
+ }
+ RegexType::Lit(label) => write!(f, "{label}")?,
+ }
+ }
+
+ Ok(())
+ }
+}
+
+impl<T: GraphLabel + Display> Graph for DefaultRegex<T> {
+ type Iter<'a> = <ALGraph as Graph>::Iter<'a>
+ where
+ Self: 'a;
+
+ #[inline]
+ fn is_empty(&self) -> bool {
+ self.graph.is_empty()
+ }
+
+ #[inline]
+ fn nodes_len(&self) -> usize {
+ self.graph.nodes_len()
+ }
+
+ #[inline]
+ fn children_of(&self, node_id: usize) -> Result<Self::Iter<'_>, GError> {
+ self.graph.children_of(node_id)
+ }
+
+ #[inline]
+ fn degree(&self, node_id: usize) -> Result<usize, GError> {
+ self.graph.degree(node_id)
+ }
+
+ #[inline]
+ fn is_empty_node(&self, node_id: usize) -> Result<bool, GError> {
+ self.graph.is_empty_node(node_id)
+ }
+
+ #[inline]
+ fn has_edge(&self, source: usize, target: usize) -> Result<bool, GError> {
+ self.graph.has_edge(source, target)
+ }
+}
+
+impl<T: GraphLabel + Display> Regex<RegexType<T>> for DefaultRegex<T> {
+ #[inline]
+ fn vertex_label(&self, node_id: usize) -> Result<RegexType<T>, Error> {
+ self.types
+ .get(node_id)
+ .copied()
+ .ok_or(Error::UnknownNode(node_id))
+ }
+}
+
+/// An error type for holding parsing errors.
+#[derive(Debug)]
+pub enum ParseError {
+ /// Encounter an invalid state.
+ Invalid,
+ /// An error from graph operations.
+ Graph(GError),
+ /// Encounter an empty stack.
+ EmptyStack,
+ /// Encounter a non-single stack at the end.
+ NonSingleStack,
+ /// Encounter a stack whose element is out of bounds.
+ ///
+ /// The first component is the stack element, while the second the
+ /// bound.
+ InvalidStack(usize, usize),
+ /// Encounter a repetition construct without a preceding element.
+ InvalidRepetition(usize),
+ /// Invalid character
+ InvalidCharacter(char),
+}
+
+impl Display for ParseError {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "{self:?}")
+ }
+}
+
+impl std::error::Error for ParseError {}
+
+impl From<GError> for ParseError {
+ fn from(ge: GError) -> Self {
+ Self::Graph(ge)
+ }
+}
+
+/// The direction of parsing.
+///
+/// This means whether we want to stay at the same level of
+/// parent-child hierarchy, or to become the child, or to climb back
+/// to the last parent.
+#[derive(Debug, Copy, Clone, Default)]
+pub enum ParseDirection {
+ /// Climb back to the last parent.
+ Up,
+ /// Stay at the same level in the hierarchy.
+ #[default]
+ Right,
+ /// Become the child.
+ Down,
+}
+
+impl Display for ParseDirection {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ let direction = match self {
+ ParseDirection::Up => "↑",
+ ParseDirection::Right => "→",
+ ParseDirection::Down => "↓",
+ };
+
+ write!(f, "{direction}")
+ }
+}
+
+impl<T: GraphLabel + Display + Debug> DesRec for DefaultRegex<T> {
+ type Label = RegexType<T>;
+
+ type Regex = Self;
+
+ type Error = ParseError;
+
+ type Aux = ParseDirection;
+
+ type Scanner<'a> =
+ Box<dyn FnMut(&'a str) -> Result<Option<(usize, Self::Label, Self::Aux)>, Self::Error>>;
+
+ fn parse<'a>(
+ mut input: &'a str,
+ mut scanner: Self::Scanner<'a>,
+ post_p: bool,
+ ) -> Result<Option<(DefaultRegex<T>, &'a str)>, Self::Error> {
+ use ParseDirection::*;
+ use RegexType::*;
+
+ let mut intermediate_stack: Vec<(RegexType<T>, ParseDirection)> =
+ Vec::with_capacity(input.len());
+
+ // Classifies the input into a sequence of tokens with
+ // directions.
+ while !input.is_empty() {
+ if let Some((len, label, direction)) = scanner(input)? {
+ if len == 0 {
+ break;
+ }
+
+ input = &input[len..];
+
+ intermediate_stack.push((label, direction));
+
+ // If we encounter an opening parenthesis, we add
+ // another auxiliary instruction.
+ if matches!((label, direction), (Or, Down)) {
+ intermediate_stack.push((Empty, Down));
+ }
+ } else {
+ break;
+ }
+ }
+
+ let inter_len = intermediate_stack.len();
+
+ let mut parents_stack: Vec<usize> = Vec::with_capacity(inter_len + 2);
+
+ parents_stack.push(0);
+ parents_stack.push(1);
+
+ let mut list_of_children: Vec<Vec<usize>> = std::iter::repeat_with(Vec::new)
+ .take(inter_len + 2)
+ .collect();
+
+ list_of_children[0].push(1);
+
+ let mut types: Vec<RegexType<T>> = vec![Or, Empty];
+
+ types.extend(intermediate_stack.iter().map(|(label, _direction)| *label));
+
+ // Converts the sequence of tokens and directions into a
+ // regular expression.
+ for (index, (label, direction)) in intermediate_stack.iter().copied().enumerate() {
+ let mut parent: usize;
+ let mut parent_children: &mut Vec<usize>;
+
+ if let Some(parent_stack_parent) = parents_stack.last().copied() {
+ parent = parent_stack_parent;
+
+ match list_of_children.get_mut(parent) {
+ Some(stack_parent_children) => {
+ parent_children = stack_parent_children;
+
+ match (label, direction) {
+ (Paren, Up) => {
+ // a closing parenthesis does not need
+ // to be counted as a child
+ parents_stack.pop();
+ // a closing parenthesis jumps out of
+ // two levels at once
+ parents_stack.pop();
+ }
+ (Empty, Up) => {
+ // an upwards pipe
+
+ // first add the current node to the parent of the parent
+ parents_stack.pop();
+
+ if let Some(parent_stack_parent) = parents_stack.last().copied() {
+ parent = parent_stack_parent;
+
+ if let Some(stack_parent_children) =
+ list_of_children.get_mut(parent)
+ {
+ parent_children = stack_parent_children;
+
+ parent_children.push(index + 2);
+ } else {
+ return Err(ParseError::InvalidStack(
+ parent,
+ inter_len + 2,
+ ));
+ }
+ } else {
+ return Err(ParseError::EmptyStack);
+ }
+
+ // then make the current node the new parent
+ parents_stack.push(index + 2);
+ }
+ (_, Up) => {
+ parents_stack.pop();
+ parent_children.push(index + 2);
+ }
+ (_, Right) => {
+ parent_children.push(index + 2);
+ }
+ (_, Down) => {
+ parents_stack.push(index + 2);
+ parent_children.push(index + 2);
+ }
+ }
+ }
+ None => return Err(ParseError::InvalidStack(parent, inter_len)),
+ }
+ } else {
+ // There are unbalanced closing parentheses.
+ return Err(ParseError::EmptyStack);
+ }
+
+ // A special handling of repetition constructs as postfix
+ // operators: it swaps with the preceding element.
+ if post_p {
+ match label {
+ Kleene | Plus | Optional => {
+ // remove the current node from the parent
+ parent_children.pop();
+
+ if let Some(preceding) = parent_children.last().copied() {
+ list_of_children.swap(preceding, index + 2);
+
+ types.swap(preceding, index + 2);
+
+ match list_of_children.get_mut(preceding) {
+ Some(preceding_children) => {
+ preceding_children.push(index + 2);
+ }
+ None => {
+ return Err(ParseError::InvalidStack(preceding, inter_len + 2))
+ }
+ }
+ } else {
+ return Err(ParseError::InvalidRepetition(index));
+ }
+ }
+ _ => {}
+ }
+ }
+ }
+
+ // There are unbalanced opening parentheses.
+ if parents_stack.len() != 2 {
+ return Err(ParseError::NonSingleStack);
+ }
+
+ let graph = list_of_children.into();
+
+ let result = DefaultRegex { graph, types };
+
+ Ok(Some((result, input)))
+ }
+}
+
+#[cfg(test)]
+mod test_des_rec {
+ use super::*;
+
+ fn test_scanner(
+ input: &str,
+ ) -> Result<Option<(usize, RegexType<char>, ParseDirection)>, ParseError> {
+ use ParseDirection::*;
+ use RegexType::*;
+
+ if let Some(first) = input.chars().next() {
+ match first {
+ '*' => Ok(Some((1, Kleene, Right))),
+ '+' => Ok(Some((1, Plus, Right))),
+ '?' => Ok(Some((1, Optional, Right))),
+ '|' => Ok(Some((1, Empty, Up))),
+ '(' => Ok(Some((1, Or, Down))),
+ ')' => Ok(Some((1, Paren, Up))),
+ ' '..='~' => Ok(Some((1, Lit(first), Right))),
+ _ => Err(ParseError::InvalidCharacter(first)),
+ }
+ } else {
+ Ok(None)
+ }
+ }
+
+ #[test]
+ fn test_des_rec() -> Result<(), Box<dyn std::error::Error>> {
+ let input_string = "a*b?c+|(d*| +)?".to_owned();
+
+ if let Some((regex, remain)) =
+ DefaultRegex::<char>::parse(&input_string, Box::new(test_scanner), true)?
+ {
+ println!("regex = {regex}");
+ println!("remain = {remain}");
+
+ println!("regex length = {}", regex.len());
+
+ Ok(())
+ } else {
+ unreachable!()
+ }
+ }
+}
diff --git a/nfa/src/desrec.rs b/nfa/src/desrec.rs
new file mode 100644
index 0000000..c57d313
--- /dev/null
+++ b/nfa/src/desrec.rs
@@ -0,0 +1,50 @@
+//! This file defines the expected behaviours of a recursive descent
+//! parser.
+
+use super::Regex;
+use graph::GraphLabel;
+
+/// A thin wrapper of the parse output, to simplify the function
+/// signature a little.
+pub type ParseOutput<'a, T> = Option<(T, &'a str)>;
+
+/// Types implementing this trait provide a method to be recursively
+/// parsed.
+pub trait DesRec {
+ /// The type of labels of the resulting regular expression.
+ type Label: GraphLabel;
+
+ /// The type of the resulting regular expression.
+ type Regex: Regex<Self::Label>;
+
+ /// The type of errors encountered when parsing.
+ type Error: std::error::Error;
+
+ /// Auxiliary data when parsing
+ type Aux;
+
+ /// The type of a scanner that classifies inputs into tokens.
+ ///
+ /// The return type indicates the result of classification:
+ ///
+ /// - `Err(_)`: the classification fails
+ ///
+ /// - `Ok(None)`: the classification succeeds, and the parsing
+ /// should stop here
+ ///
+ /// - `Ok(Some(_))`: the classification succeeds and the parsing
+ /// should continue.
+ type Scanner<'a>: FnMut(&'a str) -> Result<Option<(usize, Self::Label, Self::Aux)>, Self::Error>;
+
+ /// Parse a string into a regular expression with the aid of this
+ /// type.
+ ///
+ /// Accept a slice of string and return either a parsing error, or
+ /// a pair of correctly parsed regular expression and the
+ /// remaining slice.
+ fn parse<'a>(
+ input: &'a str,
+ scanner: Self::Scanner<'a>,
+ post_p: bool,
+ ) -> Result<ParseOutput<'a, Self::Regex>, Self::Error>;
+}
diff --git a/nfa/src/error.rs b/nfa/src/error.rs
index 6112878..0c6bb3c 100644
--- a/nfa/src/error.rs
+++ b/nfa/src/error.rs
@@ -5,9 +5,15 @@ use graph::error::Error as GError;
use std::fmt::{Display, Formatter};
#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd)]
+/// The error type for NFA operations.
pub enum Error {
+ /// An unknown node id is encountered.
UnknownNode(usize),
+ /// Some operations are not supported by the implementations.
+ ///
+ /// Everything is a trade-off, wink wink.
UnsupportedOperation,
+ /// This error comes from some underlying graph operation.
Graph(GError),
}
diff --git a/nfa/src/lib.rs b/nfa/src/lib.rs
index ef207cf..1e2a30c 100644
--- a/nfa/src/lib.rs
+++ b/nfa/src/lib.rs
@@ -6,7 +6,7 @@
//! implements the Graph trait from the [`graph`] crate, and then use
//! that external graph type as [`Graph`][graph::Graph] here.
-mod error;
+pub mod error;
extern crate graph;
@@ -49,13 +49,12 @@ pub trait Regex<T: GraphLabel>: Graph + Display {
// TODO: add functions that determine if certain "positions" in a
// regular language satisfy some special properties, like at the
// end of a Kleene star, or at the end of a regular language, et
- // cetera. These will be needed later.
+ // cetera. These might be needed later.
}
/// The expected behvaiour of a nondeterministic finite automaton.
///
-/// Every NFA is a special labelled graph, so this trait extends the
-/// [`LabelGraph`][graph::LabelGraph] trait.
+/// Every NFA is a special labelled graph.
pub trait Nfa<T: GraphLabel>: LabelGraph<T> {
/// Remove all empty transitions from the nondeterministic finite
/// automaton.
@@ -89,6 +88,7 @@ pub trait Nfa<T: GraphLabel>: LabelGraph<T> {
}
pub mod default;
+pub mod desrec;
#[cfg(test)]
mod nfa_tests {}
diff --git a/receme/src/functor.rs b/receme/src/functor.rs
index 95e2555..cd4dac2 100644
--- a/receme/src/functor.rs
+++ b/receme/src/functor.rs
@@ -31,6 +31,14 @@ pub trait Functor<T> {
fn fmap<S>(self, f: impl FnMut(T) -> S) -> Self::Target<S>;
}
+impl<T> Functor<T> for Vec<T> {
+ type Target<S> = Vec<S>;
+
+ fn fmap<S>(self, f: impl FnMut(T) -> S) -> Self::Target<S> {
+ self.into_iter().map(f).collect()
+ }
+}
+
/// A functor can map over its generic type parameter.
///
/// It can map from Functor(T) to Functor(S).
diff --git a/repcore/src/grammar.rs b/repcore/src/grammar.rs
new file mode 100644
index 0000000..ee9f033
--- /dev/null
+++ b/repcore/src/grammar.rs
@@ -0,0 +1,59 @@
+//! This file implements the extected behaviours of grammars.
+
+// NOTE: We shall first start with a parser that works at the level of
+// characters. The purpose is to first experiment with the workings
+// and the performance of the algorithms, before optimising by using
+// regular expressions to classify inputs into tokens. In other
+// words, the current focus is not on the optimisations, whereas
+// scanners are for optimisations only, so to speak.
+
+/// The type of a terminal.
+///
+/// For the time being this is a wrapper around a string, but in the
+/// future it may hold more information of scanners.
+pub struct Terminal {
+ // If we want to use scanners, per chance add them as a new field
+ // here.
+ name: String,
+}
+
+impl Terminal {
+ /// Create a terminal with the given name.
+ #[inline]
+ pub fn new(name: String) -> Self {
+ Self { name }
+ }
+
+ /// Return the name of the terminal.
+ #[inline]
+ pub fn name(&self) -> &str {
+ &self.name
+ }
+}
+
+/// The type of a non-terminal.
+///
+/// This is just a wrapper around a string.
+pub struct Nonterminal(String);
+
+impl Nonterminal {
+ /// Return the name of the nonterminal.
+ ///
+ /// Just to improve readability.
+ #[inline]
+ pub fn name(&self) -> &str {
+ &self.0
+ }
+}
+
+/// The type of a terminal or a non-terminal.
+///
+/// Only an index is stored here. Actual data are stored in two other
+/// arrays.
+#[derive(Debug, Hash, Eq, PartialEq, Clone, Copy, Ord, PartialOrd)]
+pub enum TNT {
+ /// Terminal variant
+ Ter(usize),
+ /// Nonterminal variant
+ Non(usize),
+}
diff --git a/repcore/src/lib.rs b/repcore/src/lib.rs
index 7d12d9a..589b61c 100644
--- a/repcore/src/lib.rs
+++ b/repcore/src/lib.rs
@@ -1,3 +1,9 @@
+//! This package implements the core algorithm of the entire
+//! workspace: parsing with derivatives by means of regular nulling
+//! languages.
+
+pub mod grammar;
+
pub fn add(left: usize, right: usize) -> usize {
left + right
}
diff --git a/repcore/src/plan.org b/repcore/src/plan.org
new file mode 100644
index 0000000..13c2ee0
--- /dev/null
+++ b/repcore/src/plan.org
@@ -0,0 +1,59 @@
+#+TITLE: Plan of the package
+#+AUTHOR: Durand
+#+DATE: <2022-11-18 Ven 19:57>
+
+* Atomic Languages
+
+This describes the behaviours of atomic languages. The atomic
+language consists of the null closure of any non-terminal symbol in
+the grammar, and their deriavtives by terminals and non-terminal.
+
+* Derivative Languages
+
+This is the main driving device of the algorithm. Basically, the
+algorithm works by taking successive derivatives, according to the
+input symbol. At each step, we calculate the derivative language. In
+this process, we also compute some semiring value and store in a
+carrier. The end result of the algorithm is the final semiring
+value.
+
+If one wants simply to determine if the input string belongs to the
+grammar, one chooses the semiring to be the field with two elements,
+the boolean field. If one wants to find how many ways are there to
+derive a given input string, then one uses the semiring of natural
+numbers instead. If one wants, moreover, to find all the possible
+ways to derive a particular input string, then one can use the
+free semiring on the set of terminals and non-terminals of the
+grammar. Here the free semiring is the left-adjoint functor to the
+forgetful functor from the category of semirings to the category of
+sets.
+
+To be more specific, the free semiring on a set is given by sets of
+sequences of elements in the set. The addition of the semiring is the
+set union operation, and the multiplication is taking the respective
+concatenations.
+
+** Semirings
+
+So we need a module to define the behaviours of semirings, and provide
+some common semirings implementations. Then in the main driving force
+we can freely substitute different semirings, according to the
+particular needs.
+
+** Languages
+
+Then the main part is to define the behaviour of languages. This
+should be easy enough, since we already have the mechanism of graphs,
+nondeterministic automata, and semirings. All we need to do is to
+combine them together.
+
+* Testing ground
+
+I am in a strong need to test things out. The most important one is
+to visualize each step of the derivation, in a human-friendly manner.
+I need this to examine whether my atomic languages are wrongly
+implemented, or my atomic languages are wrongly derived, or my
+understanding of the main algorithm is plain wrong.
+
+This is the main reason I started this rewrite of the package.
+