summaryrefslogtreecommitdiff
path: root/nfa
diff options
context:
space:
mode:
authorJSDurand <mmemmew@gmail.com>2022-11-15 12:01:28 +0800
committerJSDurand <mmemmew@gmail.com>2022-11-15 12:01:28 +0800
commitcb7bcfad4ab0041aaf3fde3185e27ee46bb37788 (patch)
treea4fd99b138b72617b6c4c2b04f5d2655d0fedcc5 /nfa
Initial commit
Basic GNU standard files are added, and we now stop worrying about monadic anamorphisms. The current focus is on testing the correctness of the algorithm, so I need convenient support for manipulating, interpreting, examining, and per chance animating nondeterministic automata.
Diffstat (limited to 'nfa')
-rw-r--r--nfa/Cargo.toml13
-rw-r--r--nfa/src/default/mod.rs123
-rw-r--r--nfa/src/error.rs30
-rw-r--r--nfa/src/lib.rs94
4 files changed, 260 insertions, 0 deletions
diff --git a/nfa/Cargo.toml b/nfa/Cargo.toml
new file mode 100644
index 0000000..b1387b6
--- /dev/null
+++ b/nfa/Cargo.toml
@@ -0,0 +1,13 @@
+[package]
+name = "nfa"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+graph = { path = "../graph", optional = true }
+
+[features]
+default = ["default-graph"]
+default-graph = ["dep:graph"]
diff --git a/nfa/src/default/mod.rs b/nfa/src/default/mod.rs
new file mode 100644
index 0000000..805540b
--- /dev/null
+++ b/nfa/src/default/mod.rs
@@ -0,0 +1,123 @@
+//! This file provides a structure that implements the trait
+//! [`NFA`][super::Nfa].
+//!
+//! It is used as the default implementation.
+
+use graph::{error::Error as GError, DLGraph, Graph, GraphLabel, LabelGraph};
+
+use super::{error::Error, Nfa, Regex};
+
+// TODO: Store the regular expression in the NFA as well.
+//
+// The current focus of the project is to understand the growth rate
+// of the algorithm, to know whether I made a mistake in the previous
+// iteration of the implementation, or the algorithm is not as fast as
+// the author estimated, which is not quite likely, of course.
+//
+// Thus I shall establish a friendly interface that allows me to view
+// and debug the atomic languages and the languages, transparently.
+
+#[non_exhaustive]
+#[derive(Debug)]
+/// Default NFA implementation.
+pub struct DefaultNFA<T: GraphLabel> {
+ graph: DLGraph<T>,
+}
+
+impl<T: GraphLabel> Default for DefaultNFA<T> {
+ fn default() -> Self {
+ let graph = Default::default();
+ Self { graph }
+ }
+}
+
+impl<T: GraphLabel> Graph for DefaultNFA<T> {
+ type Iter<'a> = <DLGraph<T> as Graph>::Iter<'a> where T: 'a;
+
+ #[inline]
+ fn is_empty(&self) -> bool {
+ self.graph.is_empty()
+ }
+
+ #[inline]
+ fn nodes_len(&self) -> usize {
+ self.graph.nodes_len()
+ }
+
+ #[inline]
+ fn children_of(&self, node_id: usize) -> Result<Self::Iter<'_>, GError> {
+ self.graph.children_of(node_id)
+ }
+
+ #[inline]
+ fn degree(&self, node_id: usize) -> Result<usize, GError> {
+ self.graph.degree(node_id)
+ }
+
+ #[inline]
+ fn is_empty_node(&self, node_id: usize) -> Result<bool, GError> {
+ self.graph.is_empty_node(node_id)
+ }
+
+ #[inline]
+ fn has_edge(&self, source: usize, target: usize) -> Result<bool, GError> {
+ self.graph.has_edge(source, target)
+ }
+}
+
+impl<T: GraphLabel> LabelGraph<T> for DefaultNFA<T> {
+ type Iter<'a> = <DLGraph<T> as LabelGraph<T>>::Iter<'a> where T: 'a;
+
+ type LabelIter<'a> = <DLGraph<T> as LabelGraph<T>>::LabelIter<'a> where T: 'a;
+
+ // TODO: Return the label from the contained regular language.
+ #[inline]
+ fn vertex_label(&self, node_id: usize) -> Result<Option<T>, GError> {
+ if self.has_node(node_id) {
+ todo!()
+ } else {
+ Err(GError::IndexOutOfBounds(node_id, self.nodes_len()))
+ }
+ }
+
+ #[inline]
+ fn edge_label(&self, source: usize, target: usize) -> Result<Vec<T>, GError> {
+ self.graph.edge_label(source, target)
+ }
+
+ #[inline]
+ fn find_children_with_label(
+ &self,
+ node_id: usize,
+ label: &T,
+ ) -> Result<<Self as LabelGraph<T>>::Iter<'_>, GError> {
+ self.graph.find_children_with_label(node_id, label)
+ }
+
+ #[inline]
+ fn labels_of(&self, node_id: usize) -> Result<Self::LabelIter<'_>, GError> {
+ self.graph.labels_of(node_id)
+ }
+}
+
+impl<T: GraphLabel> Nfa<T> for DefaultNFA<T> {
+ #[allow(unused)]
+ fn to_nfa(regex: impl Regex<T>) -> Self {
+ todo!()
+ }
+
+ fn remove_epsilon(&mut self) -> Result<(), Error> {
+ todo!()
+ }
+
+ fn remove_dead(&mut self) -> Result<(), Error> {
+ todo!()
+ }
+
+ fn nulling(&mut self) -> Result<(), Error> {
+ todo!()
+ }
+}
+
+#[cfg(test)]
+mod default_nfa_test {}
diff --git a/nfa/src/error.rs b/nfa/src/error.rs
new file mode 100644
index 0000000..6112878
--- /dev/null
+++ b/nfa/src/error.rs
@@ -0,0 +1,30 @@
+//! This file implements the error type for the crate.
+
+use graph::error::Error as GError;
+
+use std::fmt::{Display, Formatter};
+
+#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd)]
+pub enum Error {
+ UnknownNode(usize),
+ UnsupportedOperation,
+ Graph(GError),
+}
+
+impl Display for Error {
+ fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
+ match self {
+ Error::UnknownNode(id) => write!(f, "unknown node: {id}"),
+ Error::UnsupportedOperation => write!(f, "unsupported operation"),
+ Error::Graph(e) => write!(f, "graph error: {e}"),
+ }
+ }
+}
+
+impl std::error::Error for Error {}
+
+impl From<GError> for Error {
+ fn from(e: GError) -> Self {
+ Self::Graph(e)
+ }
+}
diff --git a/nfa/src/lib.rs b/nfa/src/lib.rs
new file mode 100644
index 0000000..ef207cf
--- /dev/null
+++ b/nfa/src/lib.rs
@@ -0,0 +1,94 @@
+#![warn(missing_docs)]
+//! This crate implements non-deterministic finite automata.
+//!
+//! By default this uses the graph from the crate [`graph`]. To use
+//! another external graph, add a module in which the external graph
+//! implements the Graph trait from the [`graph`] crate, and then use
+//! that external graph type as [`Graph`][graph::Graph] here.
+
+mod error;
+
+extern crate graph;
+
+use core::fmt::Display;
+
+use graph::{Graph, GraphLabel, LabelGraph};
+
+use error::Error;
+
+/// The expected behaviour of a regular language.
+///
+/// Nondeterministic finite automata are equivalent to regular
+/// languages. Since regular languages are easier to understand for a
+/// human being, nondeterministic finite automata include the data for
+/// the equivalent regular languages.
+pub trait Regex<T: GraphLabel>: Graph + Display {
+ /// Return the label of a vertex, or an error if the node is
+ /// invalid.
+ fn vertex_label(&self, node_id: usize) -> Result<T, Error>;
+
+ #[inline]
+ /// Return the root node of the regular language.
+ ///
+ /// Implementations can follow different conventions for the root
+ /// node, and hence this function.
+ ///
+ /// If the regular language is empty, the implementation should
+ /// return None.
+ ///
+ /// The default implementation uses the convention that the root
+ /// node is always the first node.
+ fn root(&self) -> Option<usize> {
+ if self.is_empty() {
+ None
+ } else {
+ Some(0)
+ }
+ }
+
+ // TODO: add functions that determine if certain "positions" in a
+ // regular language satisfy some special properties, like at the
+ // end of a Kleene star, or at the end of a regular language, et
+ // cetera. These will be needed later.
+}
+
+/// The expected behvaiour of a nondeterministic finite automaton.
+///
+/// Every NFA is a special labelled graph, so this trait extends the
+/// [`LabelGraph`][graph::LabelGraph] trait.
+pub trait Nfa<T: GraphLabel>: LabelGraph<T> {
+ /// Remove all empty transitions from the nondeterministic finite
+ /// automaton.
+ fn remove_epsilon(&mut self) -> Result<(), Error>;
+
+ /// Return a state-minimal NFA equivalent with the original one.
+ ///
+ /// This is not required. It is just to allow me to experiment
+ /// with NFA optimization algorithms.
+ fn minimize(&self) -> Result<Self, Error> {
+ Err(Error::UnsupportedOperation)
+ }
+
+ /// Build a nondeterministic finite automaton out of a regular
+ /// language.
+ fn to_nfa(regex: impl Regex<T>) -> Self;
+
+ /// Remove all dead states from the nondeterministic finite
+ /// automaton.
+ ///
+ /// A state is dead if there are no edges going to the state.
+ fn remove_dead(&mut self) -> Result<(), Error>;
+
+ /// For each empty transition from A to B, and for every edge from
+ /// B to C, say, add an edge from A to C.
+ ///
+ /// This is needed specifically by the algorithm to produce a set
+ /// of atomic languages that represent "null-closed" derived
+ /// languages.
+ fn nulling(&mut self) -> Result<(), Error>;
+}
+
+pub mod default;
+
+#[cfg(test)]
+mod nfa_tests {}