From cb7bcfad4ab0041aaf3fde3185e27ee46bb37788 Mon Sep 17 00:00:00 2001 From: JSDurand Date: Tue, 15 Nov 2022 12:01:28 +0800 Subject: Initial commit Basic GNU standard files are added, and we now stop worrying about monadic anamorphisms. The current focus is on testing the correctness of the algorithm, so I need convenient support for manipulating, interpreting, examining, and per chance animating nondeterministic automata. --- nfa/src/default/mod.rs | 123 +++++++++++++++++++++++++++++++++++++++++++++++++ nfa/src/error.rs | 30 ++++++++++++ nfa/src/lib.rs | 94 +++++++++++++++++++++++++++++++++++++ 3 files changed, 247 insertions(+) create mode 100644 nfa/src/default/mod.rs create mode 100644 nfa/src/error.rs create mode 100644 nfa/src/lib.rs (limited to 'nfa/src') diff --git a/nfa/src/default/mod.rs b/nfa/src/default/mod.rs new file mode 100644 index 0000000..805540b --- /dev/null +++ b/nfa/src/default/mod.rs @@ -0,0 +1,123 @@ +//! This file provides a structure that implements the trait +//! [`NFA`][super::Nfa]. +//! +//! It is used as the default implementation. + +use graph::{error::Error as GError, DLGraph, Graph, GraphLabel, LabelGraph}; + +use super::{error::Error, Nfa, Regex}; + +// TODO: Store the regular expression in the NFA as well. +// +// The current focus of the project is to understand the growth rate +// of the algorithm, to know whether I made a mistake in the previous +// iteration of the implementation, or the algorithm is not as fast as +// the author estimated, which is not quite likely, of course. +// +// Thus I shall establish a friendly interface that allows me to view +// and debug the atomic languages and the languages, transparently. + +#[non_exhaustive] +#[derive(Debug)] +/// Default NFA implementation. +pub struct DefaultNFA { + graph: DLGraph, +} + +impl Default for DefaultNFA { + fn default() -> Self { + let graph = Default::default(); + Self { graph } + } +} + +impl Graph for DefaultNFA { + type Iter<'a> = as Graph>::Iter<'a> where T: 'a; + + #[inline] + fn is_empty(&self) -> bool { + self.graph.is_empty() + } + + #[inline] + fn nodes_len(&self) -> usize { + self.graph.nodes_len() + } + + #[inline] + fn children_of(&self, node_id: usize) -> Result, GError> { + self.graph.children_of(node_id) + } + + #[inline] + fn degree(&self, node_id: usize) -> Result { + self.graph.degree(node_id) + } + + #[inline] + fn is_empty_node(&self, node_id: usize) -> Result { + self.graph.is_empty_node(node_id) + } + + #[inline] + fn has_edge(&self, source: usize, target: usize) -> Result { + self.graph.has_edge(source, target) + } +} + +impl LabelGraph for DefaultNFA { + type Iter<'a> = as LabelGraph>::Iter<'a> where T: 'a; + + type LabelIter<'a> = as LabelGraph>::LabelIter<'a> where T: 'a; + + // TODO: Return the label from the contained regular language. + #[inline] + fn vertex_label(&self, node_id: usize) -> Result, GError> { + if self.has_node(node_id) { + todo!() + } else { + Err(GError::IndexOutOfBounds(node_id, self.nodes_len())) + } + } + + #[inline] + fn edge_label(&self, source: usize, target: usize) -> Result, GError> { + self.graph.edge_label(source, target) + } + + #[inline] + fn find_children_with_label( + &self, + node_id: usize, + label: &T, + ) -> Result<>::Iter<'_>, GError> { + self.graph.find_children_with_label(node_id, label) + } + + #[inline] + fn labels_of(&self, node_id: usize) -> Result, GError> { + self.graph.labels_of(node_id) + } +} + +impl Nfa for DefaultNFA { + #[allow(unused)] + fn to_nfa(regex: impl Regex) -> Self { + todo!() + } + + fn remove_epsilon(&mut self) -> Result<(), Error> { + todo!() + } + + fn remove_dead(&mut self) -> Result<(), Error> { + todo!() + } + + fn nulling(&mut self) -> Result<(), Error> { + todo!() + } +} + +#[cfg(test)] +mod default_nfa_test {} diff --git a/nfa/src/error.rs b/nfa/src/error.rs new file mode 100644 index 0000000..6112878 --- /dev/null +++ b/nfa/src/error.rs @@ -0,0 +1,30 @@ +//! This file implements the error type for the crate. + +use graph::error::Error as GError; + +use std::fmt::{Display, Formatter}; + +#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd)] +pub enum Error { + UnknownNode(usize), + UnsupportedOperation, + Graph(GError), +} + +impl Display for Error { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + Error::UnknownNode(id) => write!(f, "unknown node: {id}"), + Error::UnsupportedOperation => write!(f, "unsupported operation"), + Error::Graph(e) => write!(f, "graph error: {e}"), + } + } +} + +impl std::error::Error for Error {} + +impl From for Error { + fn from(e: GError) -> Self { + Self::Graph(e) + } +} diff --git a/nfa/src/lib.rs b/nfa/src/lib.rs new file mode 100644 index 0000000..ef207cf --- /dev/null +++ b/nfa/src/lib.rs @@ -0,0 +1,94 @@ +#![warn(missing_docs)] +//! This crate implements non-deterministic finite automata. +//! +//! By default this uses the graph from the crate [`graph`]. To use +//! another external graph, add a module in which the external graph +//! implements the Graph trait from the [`graph`] crate, and then use +//! that external graph type as [`Graph`][graph::Graph] here. + +mod error; + +extern crate graph; + +use core::fmt::Display; + +use graph::{Graph, GraphLabel, LabelGraph}; + +use error::Error; + +/// The expected behaviour of a regular language. +/// +/// Nondeterministic finite automata are equivalent to regular +/// languages. Since regular languages are easier to understand for a +/// human being, nondeterministic finite automata include the data for +/// the equivalent regular languages. +pub trait Regex: Graph + Display { + /// Return the label of a vertex, or an error if the node is + /// invalid. + fn vertex_label(&self, node_id: usize) -> Result; + + #[inline] + /// Return the root node of the regular language. + /// + /// Implementations can follow different conventions for the root + /// node, and hence this function. + /// + /// If the regular language is empty, the implementation should + /// return None. + /// + /// The default implementation uses the convention that the root + /// node is always the first node. + fn root(&self) -> Option { + if self.is_empty() { + None + } else { + Some(0) + } + } + + // TODO: add functions that determine if certain "positions" in a + // regular language satisfy some special properties, like at the + // end of a Kleene star, or at the end of a regular language, et + // cetera. These will be needed later. +} + +/// The expected behvaiour of a nondeterministic finite automaton. +/// +/// Every NFA is a special labelled graph, so this trait extends the +/// [`LabelGraph`][graph::LabelGraph] trait. +pub trait Nfa: LabelGraph { + /// Remove all empty transitions from the nondeterministic finite + /// automaton. + fn remove_epsilon(&mut self) -> Result<(), Error>; + + /// Return a state-minimal NFA equivalent with the original one. + /// + /// This is not required. It is just to allow me to experiment + /// with NFA optimization algorithms. + fn minimize(&self) -> Result { + Err(Error::UnsupportedOperation) + } + + /// Build a nondeterministic finite automaton out of a regular + /// language. + fn to_nfa(regex: impl Regex) -> Self; + + /// Remove all dead states from the nondeterministic finite + /// automaton. + /// + /// A state is dead if there are no edges going to the state. + fn remove_dead(&mut self) -> Result<(), Error>; + + /// For each empty transition from A to B, and for every edge from + /// B to C, say, add an edge from A to C. + /// + /// This is needed specifically by the algorithm to produce a set + /// of atomic languages that represent "null-closed" derived + /// languages. + fn nulling(&mut self) -> Result<(), Error>; +} + +pub mod default; + +#[cfg(test)] +mod nfa_tests {} -- cgit v1.2.3-18-g5258