summaryrefslogtreecommitdiff
path: root/nfa
diff options
context:
space:
mode:
Diffstat (limited to 'nfa')
-rw-r--r--nfa/Cargo.toml1
-rw-r--r--nfa/Makefile.am19
-rw-r--r--nfa/src/default/mod.rs121
-rw-r--r--nfa/src/default/nfa.rs120
-rw-r--r--nfa/src/default/regex.rs575
-rw-r--r--nfa/src/desrec.rs50
-rw-r--r--nfa/src/error.rs6
-rw-r--r--nfa/src/lib.rs8
8 files changed, 779 insertions, 121 deletions
diff --git a/nfa/Cargo.toml b/nfa/Cargo.toml
index b1387b6..7f48760 100644
--- a/nfa/Cargo.toml
+++ b/nfa/Cargo.toml
@@ -7,6 +7,7 @@ edition = "2021"
[dependencies]
graph = { path = "../graph", optional = true }
+receme = { path = "../receme" }
[features]
default = ["default-graph"]
diff --git a/nfa/Makefile.am b/nfa/Makefile.am
new file mode 100644
index 0000000..623572a
--- /dev/null
+++ b/nfa/Makefile.am
@@ -0,0 +1,19 @@
+.PHONY: dev rel clean check
+
+all: dev
+
+dev:
+ @echo "cargo build"
+ @@CARGO@ build
+
+rel:
+ @echo "cargo build --release"
+ @@CARGO@ build --release
+
+clean:
+ @echo "cargo clean"
+ @@CARGO@ clean
+
+check:
+ @echo "cargo clippy"
+ @@CARGO@ clippy
diff --git a/nfa/src/default/mod.rs b/nfa/src/default/mod.rs
index 805540b..b9ee398 100644
--- a/nfa/src/default/mod.rs
+++ b/nfa/src/default/mod.rs
@@ -1,123 +1,10 @@
//! This file provides a structure that implements the trait
-//! [`NFA`][super::Nfa].
-//!
-//! It is used as the default implementation.
+//! [`NFA`][super::Nfa] and another that umplements the trait
+//! [`Regex`][super::Regex].
-use graph::{error::Error as GError, DLGraph, Graph, GraphLabel, LabelGraph};
+pub mod nfa;
-use super::{error::Error, Nfa, Regex};
-
-// TODO: Store the regular expression in the NFA as well.
-//
-// The current focus of the project is to understand the growth rate
-// of the algorithm, to know whether I made a mistake in the previous
-// iteration of the implementation, or the algorithm is not as fast as
-// the author estimated, which is not quite likely, of course.
-//
-// Thus I shall establish a friendly interface that allows me to view
-// and debug the atomic languages and the languages, transparently.
-
-#[non_exhaustive]
-#[derive(Debug)]
-/// Default NFA implementation.
-pub struct DefaultNFA<T: GraphLabel> {
- graph: DLGraph<T>,
-}
-
-impl<T: GraphLabel> Default for DefaultNFA<T> {
- fn default() -> Self {
- let graph = Default::default();
- Self { graph }
- }
-}
-
-impl<T: GraphLabel> Graph for DefaultNFA<T> {
- type Iter<'a> = <DLGraph<T> as Graph>::Iter<'a> where T: 'a;
-
- #[inline]
- fn is_empty(&self) -> bool {
- self.graph.is_empty()
- }
-
- #[inline]
- fn nodes_len(&self) -> usize {
- self.graph.nodes_len()
- }
-
- #[inline]
- fn children_of(&self, node_id: usize) -> Result<Self::Iter<'_>, GError> {
- self.graph.children_of(node_id)
- }
-
- #[inline]
- fn degree(&self, node_id: usize) -> Result<usize, GError> {
- self.graph.degree(node_id)
- }
-
- #[inline]
- fn is_empty_node(&self, node_id: usize) -> Result<bool, GError> {
- self.graph.is_empty_node(node_id)
- }
-
- #[inline]
- fn has_edge(&self, source: usize, target: usize) -> Result<bool, GError> {
- self.graph.has_edge(source, target)
- }
-}
-
-impl<T: GraphLabel> LabelGraph<T> for DefaultNFA<T> {
- type Iter<'a> = <DLGraph<T> as LabelGraph<T>>::Iter<'a> where T: 'a;
-
- type LabelIter<'a> = <DLGraph<T> as LabelGraph<T>>::LabelIter<'a> where T: 'a;
-
- // TODO: Return the label from the contained regular language.
- #[inline]
- fn vertex_label(&self, node_id: usize) -> Result<Option<T>, GError> {
- if self.has_node(node_id) {
- todo!()
- } else {
- Err(GError::IndexOutOfBounds(node_id, self.nodes_len()))
- }
- }
-
- #[inline]
- fn edge_label(&self, source: usize, target: usize) -> Result<Vec<T>, GError> {
- self.graph.edge_label(source, target)
- }
-
- #[inline]
- fn find_children_with_label(
- &self,
- node_id: usize,
- label: &T,
- ) -> Result<<Self as LabelGraph<T>>::Iter<'_>, GError> {
- self.graph.find_children_with_label(node_id, label)
- }
-
- #[inline]
- fn labels_of(&self, node_id: usize) -> Result<Self::LabelIter<'_>, GError> {
- self.graph.labels_of(node_id)
- }
-}
-
-impl<T: GraphLabel> Nfa<T> for DefaultNFA<T> {
- #[allow(unused)]
- fn to_nfa(regex: impl Regex<T>) -> Self {
- todo!()
- }
-
- fn remove_epsilon(&mut self) -> Result<(), Error> {
- todo!()
- }
-
- fn remove_dead(&mut self) -> Result<(), Error> {
- todo!()
- }
-
- fn nulling(&mut self) -> Result<(), Error> {
- todo!()
- }
-}
+pub mod regex;
#[cfg(test)]
mod default_nfa_test {}
diff --git a/nfa/src/default/nfa.rs b/nfa/src/default/nfa.rs
new file mode 100644
index 0000000..3c2bd83
--- /dev/null
+++ b/nfa/src/default/nfa.rs
@@ -0,0 +1,120 @@
+//! This file provides a default implementation of NFA.
+
+// TODO: Store the regular expression in the NFA as well.
+//
+// The current focus of the project is to understand the growth rate
+// of the algorithm, to know whether I made a mistake in the previous
+// iteration of the implementation, or the algorithm is not as fast as
+// the author estimated, which is not quite likely, of course.
+//
+// Thus I shall establish a friendly interface that allows me to view
+// and debug the atomic languages and the languages, transparently.
+
+use graph::{error::Error as GError, DLGraph, Graph, GraphLabel, LabelGraph};
+
+use crate::{error::Error, Nfa, Regex};
+
+use std::fmt::Display;
+
+#[non_exhaustive]
+#[derive(Debug)]
+/// Default NFA implementation.
+pub struct DefaultNFA<T: GraphLabel + Display> {
+ graph: DLGraph<T>,
+}
+
+impl<T: GraphLabel + Display> Default for DefaultNFA<T> {
+ fn default() -> Self {
+ Self {
+ graph: Default::default(),
+ }
+ }
+}
+
+impl<T: GraphLabel + Display> Graph for DefaultNFA<T> {
+ type Iter<'a> = <DLGraph<T> as Graph>::Iter<'a> where T: 'a;
+
+ #[inline]
+ fn is_empty(&self) -> bool {
+ self.graph.is_empty()
+ }
+
+ #[inline]
+ fn nodes_len(&self) -> usize {
+ self.graph.nodes_len()
+ }
+
+ #[inline]
+ fn children_of(&self, node_id: usize) -> Result<Self::Iter<'_>, GError> {
+ self.graph.children_of(node_id)
+ }
+
+ #[inline]
+ fn degree(&self, node_id: usize) -> Result<usize, GError> {
+ self.graph.degree(node_id)
+ }
+
+ #[inline]
+ fn is_empty_node(&self, node_id: usize) -> Result<bool, GError> {
+ self.graph.is_empty_node(node_id)
+ }
+
+ #[inline]
+ fn has_edge(&self, source: usize, target: usize) -> Result<bool, GError> {
+ self.graph.has_edge(source, target)
+ }
+}
+
+impl<T: GraphLabel + Display> LabelGraph<T> for DefaultNFA<T> {
+ type Iter<'a> = <DLGraph<T> as LabelGraph<T>>::Iter<'a> where T: 'a;
+
+ type LabelIter<'a> = <DLGraph<T> as LabelGraph<T>>::LabelIter<'a> where T: 'a;
+
+ // TODO: Return the label from the contained regular language.
+ #[inline]
+ fn vertex_label(&self, node_id: usize) -> Result<Option<T>, GError> {
+ if self.has_node(node_id) {
+ todo!()
+ } else {
+ Err(GError::IndexOutOfBounds(node_id, self.nodes_len()))
+ }
+ }
+
+ #[inline]
+ fn edge_label(&self, source: usize, target: usize) -> Result<Vec<T>, GError> {
+ self.graph.edge_label(source, target)
+ }
+
+ #[inline]
+ fn find_children_with_label(
+ &self,
+ node_id: usize,
+ label: &T,
+ ) -> Result<<Self as LabelGraph<T>>::Iter<'_>, GError> {
+ self.graph.find_children_with_label(node_id, label)
+ }
+
+ #[inline]
+ fn labels_of(&self, node_id: usize) -> Result<Self::LabelIter<'_>, GError> {
+ self.graph.labels_of(node_id)
+ }
+}
+
+impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
+ #[allow(unused)]
+ fn to_nfa(regex: impl Regex<T>) -> Self {
+ todo!()
+ }
+
+ fn remove_epsilon(&mut self) -> Result<(), Error> {
+ todo!()
+ }
+
+ fn remove_dead(&mut self) -> Result<(), Error> {
+ todo!()
+ }
+
+ fn nulling(&mut self) -> Result<(), Error> {
+ todo!()
+ }
+}
diff --git a/nfa/src/default/regex.rs b/nfa/src/default/regex.rs
new file mode 100644
index 0000000..a60f801
--- /dev/null
+++ b/nfa/src/default/regex.rs
@@ -0,0 +1,575 @@
+//! This file provides a default implementation of Regex.
+
+use graph::{error::Error as GError, ALGraph, ExtGraph, Graph, GraphLabel};
+
+use crate::{desrec::DesRec, error::Error, Regex};
+
+use receme::{algebra::Algebra, catana::Cata};
+
+use std::fmt::{Debug, Display};
+
+/// The type of a node in a regular expression.
+///
+/// # Example
+///
+/// If a node has type "Kleene", this means it represents a star
+/// construct in a regular expression, and its children are the
+/// contents of the star.
+///
+/// # Note
+///
+/// There is no "concatenation" node type. A concatenation of two
+/// nodes is represented as the two nodes being successive children in
+/// their common parent node.
+///
+/// This is possible because a regular expression has a root node.
+/// For the sake of convenience, the root node has type "Or".
+#[derive(Debug, Hash, Default, Eq, PartialEq, Clone, Copy, Ord, PartialOrd)]
+pub enum RegexType<T: GraphLabel + Display> {
+ /// A star node is a node with an arbitrary number of repetitions.
+ Kleene,
+ /// A plus node is a node with at least one repetition: a+ equals
+ /// aa*
+ Plus,
+ /// An optional node
+ Optional,
+ /// An or node means an alternation of its children.
+ Or,
+ /// A paren node represents a parenthesis.
+ Paren,
+ /// An empty node
+ #[default]
+ Empty,
+ /// A literal node
+ Lit(T),
+}
+
+/// A default implementation of regular expressions.
+#[derive(Debug)]
+pub struct DefaultRegex<T: GraphLabel + Display> {
+ /// The underlying graph is stored using adjacency lists.
+ graph: ALGraph,
+ /// The types of the underlying nodes.
+ types: Vec<RegexType<T>>,
+}
+
+impl<T: GraphLabel + Display> Default for DefaultRegex<T> {
+ fn default() -> Self {
+ Self {
+ graph: Default::default(),
+ types: Default::default(),
+ }
+ }
+}
+
+impl<T: GraphLabel + Display> DefaultRegex<T> {
+ /// Return the number of elements in this regular expression,
+ /// counting nodes.
+ pub fn len(&self) -> usize {
+ self.types.len()
+ }
+
+ /// Return true if and only if this regular expression has no
+ /// nodes.
+ pub fn is_empty(&self) -> bool {
+ self.types.is_empty()
+ }
+
+ /// Add a node as the child of an existing node or as a root.
+ pub fn add_node(
+ &mut self,
+ label: RegexType<T>,
+ parent: Option<usize>,
+ ) -> Result<(), ParseError> {
+ self.graph.extend(parent.iter().copied())?;
+ self.types.push(label);
+
+ Ok(())
+ }
+}
+
+// REVIEW: This may not be needed.
+impl<S: GraphLabel + Display, T, A> Cata<T, Vec<T>, A> for &DefaultRegex<S>
+where
+ A: Algebra<T, Vec<T>>,
+{
+ fn cata(self, mut alg: A) -> T {
+ let mut results: Vec<Option<T>> = std::iter::repeat_with(Default::default)
+ .take(self.len())
+ .collect();
+
+ for index in 0..=self.len() {
+ let algebra_result = {
+ let results_of_children: Vec<T> = self
+ .graph
+ .children_of(index)
+ .unwrap()
+ .map(|child_index| std::mem::replace(&mut results[child_index], None).unwrap())
+ .collect();
+
+ alg(results_of_children)
+ };
+
+ // Artificially use this value to satisfy the compiler.
+ let _ = std::mem::replace(&mut results[index], Some(algebra_result));
+ }
+
+ std::mem::replace(&mut results[0], None).unwrap()
+ }
+}
+
+impl<T: GraphLabel + Display> Display for DefaultRegex<T> {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ #[derive(Copy, Clone)]
+ enum StackElement {
+ Seen(usize),
+ Unseen(usize),
+ }
+
+ impl StackElement {
+ fn index(&self) -> usize {
+ match self {
+ Seen(index) => *index,
+ Unseen(index) => *index,
+ }
+ }
+
+ fn is_seen(&self) -> bool {
+ match self {
+ Seen(_) => true,
+ Unseen(_) => false,
+ }
+ }
+ }
+
+ use StackElement::{Seen, Unseen};
+
+ let mut stack: Vec<StackElement> = Vec::new();
+ let mut types = self.types.clone();
+ types.push(RegexType::Paren);
+
+ stack.push(Unseen(0));
+
+ while let Some(top) = stack.pop() {
+ let node_type = types[top.index()];
+
+ // TODO: Do not use unwrap here.
+
+ match node_type {
+ RegexType::Kleene => {
+ if !top.is_seen() {
+ stack.push(Seen(top.index()));
+ stack.extend(
+ self.graph
+ .children_of(top.index())
+ .unwrap()
+ .map(Unseen)
+ .rev(),
+ );
+ } else {
+ write!(f, "*")?;
+ }
+ }
+ RegexType::Plus => {
+ if !top.is_seen() {
+ stack.push(Seen(top.index()));
+ stack.extend(
+ self.graph
+ .children_of(top.index())
+ .unwrap()
+ .map(Unseen)
+ .rev(),
+ );
+ } else {
+ write!(f, "+")?;
+ }
+ }
+ RegexType::Optional => {
+ if !top.is_seen() {
+ stack.push(Seen(top.index()));
+ stack.extend(
+ self.graph
+ .children_of(top.index())
+ .unwrap()
+ .map(Unseen)
+ .rev(),
+ );
+ } else {
+ write!(f, "?")?;
+ }
+ }
+ RegexType::Or => {
+ if !top.is_seen() {
+ write!(f, "(")?;
+
+ let len = self.len();
+
+ stack.push(Unseen(types.len() - 1));
+
+ for (child_index, child) in self
+ .graph
+ .children_of(top.index())
+ .unwrap()
+ .enumerate()
+ .rev()
+ {
+ if child_index != len - 1 && child_index != 0 {
+ stack.push(Unseen(child));
+ stack.push(Seen(top.index()));
+ } else {
+ stack.push(Unseen(child));
+ }
+ }
+ } else {
+ write!(f, "|")?;
+ }
+ }
+ RegexType::Paren => {
+ write!(f, ")")?;
+ }
+ RegexType::Empty => {
+ stack.extend(
+ self.graph
+ .children_of(top.index())
+ .unwrap()
+ .map(Unseen)
+ .rev(),
+ );
+ }
+ RegexType::Lit(label) => write!(f, "{label}")?,
+ }
+ }
+
+ Ok(())
+ }
+}
+
+impl<T: GraphLabel + Display> Graph for DefaultRegex<T> {
+ type Iter<'a> = <ALGraph as Graph>::Iter<'a>
+ where
+ Self: 'a;
+
+ #[inline]
+ fn is_empty(&self) -> bool {
+ self.graph.is_empty()
+ }
+
+ #[inline]
+ fn nodes_len(&self) -> usize {
+ self.graph.nodes_len()
+ }
+
+ #[inline]
+ fn children_of(&self, node_id: usize) -> Result<Self::Iter<'_>, GError> {
+ self.graph.children_of(node_id)
+ }
+
+ #[inline]
+ fn degree(&self, node_id: usize) -> Result<usize, GError> {
+ self.graph.degree(node_id)
+ }
+
+ #[inline]
+ fn is_empty_node(&self, node_id: usize) -> Result<bool, GError> {
+ self.graph.is_empty_node(node_id)
+ }
+
+ #[inline]
+ fn has_edge(&self, source: usize, target: usize) -> Result<bool, GError> {
+ self.graph.has_edge(source, target)
+ }
+}
+
+impl<T: GraphLabel + Display> Regex<RegexType<T>> for DefaultRegex<T> {
+ #[inline]
+ fn vertex_label(&self, node_id: usize) -> Result<RegexType<T>, Error> {
+ self.types
+ .get(node_id)
+ .copied()
+ .ok_or(Error::UnknownNode(node_id))
+ }
+}
+
+/// An error type for holding parsing errors.
+#[derive(Debug)]
+pub enum ParseError {
+ /// Encounter an invalid state.
+ Invalid,
+ /// An error from graph operations.
+ Graph(GError),
+ /// Encounter an empty stack.
+ EmptyStack,
+ /// Encounter a non-single stack at the end.
+ NonSingleStack,
+ /// Encounter a stack whose element is out of bounds.
+ ///
+ /// The first component is the stack element, while the second the
+ /// bound.
+ InvalidStack(usize, usize),
+ /// Encounter a repetition construct without a preceding element.
+ InvalidRepetition(usize),
+ /// Invalid character
+ InvalidCharacter(char),
+}
+
+impl Display for ParseError {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "{self:?}")
+ }
+}
+
+impl std::error::Error for ParseError {}
+
+impl From<GError> for ParseError {
+ fn from(ge: GError) -> Self {
+ Self::Graph(ge)
+ }
+}
+
+/// The direction of parsing.
+///
+/// This means whether we want to stay at the same level of
+/// parent-child hierarchy, or to become the child, or to climb back
+/// to the last parent.
+#[derive(Debug, Copy, Clone, Default)]
+pub enum ParseDirection {
+ /// Climb back to the last parent.
+ Up,
+ /// Stay at the same level in the hierarchy.
+ #[default]
+ Right,
+ /// Become the child.
+ Down,
+}
+
+impl Display for ParseDirection {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ let direction = match self {
+ ParseDirection::Up => "↑",
+ ParseDirection::Right => "→",
+ ParseDirection::Down => "↓",
+ };
+
+ write!(f, "{direction}")
+ }
+}
+
+impl<T: GraphLabel + Display + Debug> DesRec for DefaultRegex<T> {
+ type Label = RegexType<T>;
+
+ type Regex = Self;
+
+ type Error = ParseError;
+
+ type Aux = ParseDirection;
+
+ type Scanner<'a> =
+ Box<dyn FnMut(&'a str) -> Result<Option<(usize, Self::Label, Self::Aux)>, Self::Error>>;
+
+ fn parse<'a>(
+ mut input: &'a str,
+ mut scanner: Self::Scanner<'a>,
+ post_p: bool,
+ ) -> Result<Option<(DefaultRegex<T>, &'a str)>, Self::Error> {
+ use ParseDirection::*;
+ use RegexType::*;
+
+ let mut intermediate_stack: Vec<(RegexType<T>, ParseDirection)> =
+ Vec::with_capacity(input.len());
+
+ // Classifies the input into a sequence of tokens with
+ // directions.
+ while !input.is_empty() {
+ if let Some((len, label, direction)) = scanner(input)? {
+ if len == 0 {
+ break;
+ }
+
+ input = &input[len..];
+
+ intermediate_stack.push((label, direction));
+
+ // If we encounter an opening parenthesis, we add
+ // another auxiliary instruction.
+ if matches!((label, direction), (Or, Down)) {
+ intermediate_stack.push((Empty, Down));
+ }
+ } else {
+ break;
+ }
+ }
+
+ let inter_len = intermediate_stack.len();
+
+ let mut parents_stack: Vec<usize> = Vec::with_capacity(inter_len + 2);
+
+ parents_stack.push(0);
+ parents_stack.push(1);
+
+ let mut list_of_children: Vec<Vec<usize>> = std::iter::repeat_with(Vec::new)
+ .take(inter_len + 2)
+ .collect();
+
+ list_of_children[0].push(1);
+
+ let mut types: Vec<RegexType<T>> = vec![Or, Empty];
+
+ types.extend(intermediate_stack.iter().map(|(label, _direction)| *label));
+
+ // Converts the sequence of tokens and directions into a
+ // regular expression.
+ for (index, (label, direction)) in intermediate_stack.iter().copied().enumerate() {
+ let mut parent: usize;
+ let mut parent_children: &mut Vec<usize>;
+
+ if let Some(parent_stack_parent) = parents_stack.last().copied() {
+ parent = parent_stack_parent;
+
+ match list_of_children.get_mut(parent) {
+ Some(stack_parent_children) => {
+ parent_children = stack_parent_children;
+
+ match (label, direction) {
+ (Paren, Up) => {
+ // a closing parenthesis does not need
+ // to be counted as a child
+ parents_stack.pop();
+ // a closing parenthesis jumps out of
+ // two levels at once
+ parents_stack.pop();
+ }
+ (Empty, Up) => {
+ // an upwards pipe
+
+ // first add the current node to the parent of the parent
+ parents_stack.pop();
+
+ if let Some(parent_stack_parent) = parents_stack.last().copied() {
+ parent = parent_stack_parent;
+
+ if let Some(stack_parent_children) =
+ list_of_children.get_mut(parent)
+ {
+ parent_children = stack_parent_children;
+
+ parent_children.push(index + 2);
+ } else {
+ return Err(ParseError::InvalidStack(
+ parent,
+ inter_len + 2,
+ ));
+ }
+ } else {
+ return Err(ParseError::EmptyStack);
+ }
+
+ // then make the current node the new parent
+ parents_stack.push(index + 2);
+ }
+ (_, Up) => {
+ parents_stack.pop();
+ parent_children.push(index + 2);
+ }
+ (_, Right) => {
+ parent_children.push(index + 2);
+ }
+ (_, Down) => {
+ parents_stack.push(index + 2);
+ parent_children.push(index + 2);
+ }
+ }
+ }
+ None => return Err(ParseError::InvalidStack(parent, inter_len)),
+ }
+ } else {
+ // There are unbalanced closing parentheses.
+ return Err(ParseError::EmptyStack);
+ }
+
+ // A special handling of repetition constructs as postfix
+ // operators: it swaps with the preceding element.
+ if post_p {
+ match label {
+ Kleene | Plus | Optional => {
+ // remove the current node from the parent
+ parent_children.pop();
+
+ if let Some(preceding) = parent_children.last().copied() {
+ list_of_children.swap(preceding, index + 2);
+
+ types.swap(preceding, index + 2);
+
+ match list_of_children.get_mut(preceding) {
+ Some(preceding_children) => {
+ preceding_children.push(index + 2);
+ }
+ None => {
+ return Err(ParseError::InvalidStack(preceding, inter_len + 2))
+ }
+ }
+ } else {
+ return Err(ParseError::InvalidRepetition(index));
+ }
+ }
+ _ => {}
+ }
+ }
+ }
+
+ // There are unbalanced opening parentheses.
+ if parents_stack.len() != 2 {
+ return Err(ParseError::NonSingleStack);
+ }
+
+ let graph = list_of_children.into();
+
+ let result = DefaultRegex { graph, types };
+
+ Ok(Some((result, input)))
+ }
+}
+
+#[cfg(test)]
+mod test_des_rec {
+ use super::*;
+
+ fn test_scanner(
+ input: &str,
+ ) -> Result<Option<(usize, RegexType<char>, ParseDirection)>, ParseError> {
+ use ParseDirection::*;
+ use RegexType::*;
+
+ if let Some(first) = input.chars().next() {
+ match first {
+ '*' => Ok(Some((1, Kleene, Right))),
+ '+' => Ok(Some((1, Plus, Right))),
+ '?' => Ok(Some((1, Optional, Right))),
+ '|' => Ok(Some((1, Empty, Up))),
+ '(' => Ok(Some((1, Or, Down))),
+ ')' => Ok(Some((1, Paren, Up))),
+ ' '..='~' => Ok(Some((1, Lit(first), Right))),
+ _ => Err(ParseError::InvalidCharacter(first)),
+ }
+ } else {
+ Ok(None)
+ }
+ }
+
+ #[test]
+ fn test_des_rec() -> Result<(), Box<dyn std::error::Error>> {
+ let input_string = "a*b?c+|(d*| +)?".to_owned();
+
+ if let Some((regex, remain)) =
+ DefaultRegex::<char>::parse(&input_string, Box::new(test_scanner), true)?
+ {
+ println!("regex = {regex}");
+ println!("remain = {remain}");
+
+ println!("regex length = {}", regex.len());
+
+ Ok(())
+ } else {
+ unreachable!()
+ }
+ }
+}
diff --git a/nfa/src/desrec.rs b/nfa/src/desrec.rs
new file mode 100644
index 0000000..c57d313
--- /dev/null
+++ b/nfa/src/desrec.rs
@@ -0,0 +1,50 @@
+//! This file defines the expected behaviours of a recursive descent
+//! parser.
+
+use super::Regex;
+use graph::GraphLabel;
+
+/// A thin wrapper of the parse output, to simplify the function
+/// signature a little.
+pub type ParseOutput<'a, T> = Option<(T, &'a str)>;
+
+/// Types implementing this trait provide a method to be recursively
+/// parsed.
+pub trait DesRec {
+ /// The type of labels of the resulting regular expression.
+ type Label: GraphLabel;
+
+ /// The type of the resulting regular expression.
+ type Regex: Regex<Self::Label>;
+
+ /// The type of errors encountered when parsing.
+ type Error: std::error::Error;
+
+ /// Auxiliary data when parsing
+ type Aux;
+
+ /// The type of a scanner that classifies inputs into tokens.
+ ///
+ /// The return type indicates the result of classification:
+ ///
+ /// - `Err(_)`: the classification fails
+ ///
+ /// - `Ok(None)`: the classification succeeds, and the parsing
+ /// should stop here
+ ///
+ /// - `Ok(Some(_))`: the classification succeeds and the parsing
+ /// should continue.
+ type Scanner<'a>: FnMut(&'a str) -> Result<Option<(usize, Self::Label, Self::Aux)>, Self::Error>;
+
+ /// Parse a string into a regular expression with the aid of this
+ /// type.
+ ///
+ /// Accept a slice of string and return either a parsing error, or
+ /// a pair of correctly parsed regular expression and the
+ /// remaining slice.
+ fn parse<'a>(
+ input: &'a str,
+ scanner: Self::Scanner<'a>,
+ post_p: bool,
+ ) -> Result<ParseOutput<'a, Self::Regex>, Self::Error>;
+}
diff --git a/nfa/src/error.rs b/nfa/src/error.rs
index 6112878..0c6bb3c 100644
--- a/nfa/src/error.rs
+++ b/nfa/src/error.rs
@@ -5,9 +5,15 @@ use graph::error::Error as GError;
use std::fmt::{Display, Formatter};
#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd)]
+/// The error type for NFA operations.
pub enum Error {
+ /// An unknown node id is encountered.
UnknownNode(usize),
+ /// Some operations are not supported by the implementations.
+ ///
+ /// Everything is a trade-off, wink wink.
UnsupportedOperation,
+ /// This error comes from some underlying graph operation.
Graph(GError),
}
diff --git a/nfa/src/lib.rs b/nfa/src/lib.rs
index ef207cf..1e2a30c 100644
--- a/nfa/src/lib.rs
+++ b/nfa/src/lib.rs
@@ -6,7 +6,7 @@
//! implements the Graph trait from the [`graph`] crate, and then use
//! that external graph type as [`Graph`][graph::Graph] here.
-mod error;
+pub mod error;
extern crate graph;
@@ -49,13 +49,12 @@ pub trait Regex<T: GraphLabel>: Graph + Display {
// TODO: add functions that determine if certain "positions" in a
// regular language satisfy some special properties, like at the
// end of a Kleene star, or at the end of a regular language, et
- // cetera. These will be needed later.
+ // cetera. These might be needed later.
}
/// The expected behvaiour of a nondeterministic finite automaton.
///
-/// Every NFA is a special labelled graph, so this trait extends the
-/// [`LabelGraph`][graph::LabelGraph] trait.
+/// Every NFA is a special labelled graph.
pub trait Nfa<T: GraphLabel>: LabelGraph<T> {
/// Remove all empty transitions from the nondeterministic finite
/// automaton.
@@ -89,6 +88,7 @@ pub trait Nfa<T: GraphLabel>: LabelGraph<T> {
}
pub mod default;
+pub mod desrec;
#[cfg(test)]
mod nfa_tests {}