#![warn(missing_docs)] //! This crate implements non-deterministic finite automata. //! //! By default this uses the graph from the crate [`graph`]. To use //! another external graph, add a module in which the external graph //! implements the Graph trait from the [`graph`] crate, and then use //! that external graph type as [`Graph`][graph::Graph] here. pub mod error; extern crate graph; use core::fmt::Display; use std::ops::{Deref, DerefMut}; use graph::{Graph, GraphLabel, LabelExtGraph}; use error::Error; pub use desrec::DesRec; use default::regex::RegexType; /// The expected behaviour of a regular language. /// /// Nondeterministic finite automata are equivalent to regular /// languages. Since regular languages are easier to understand for a /// human being, nondeterministic finite automata include the data for /// the equivalent regular languages. pub trait Regex: Graph + Display + Clone { /// Return the label of a vertex, or an error if the node is /// invalid. fn vertex_label(&self, node_id: usize) -> Result; #[inline] /// Return the root node of the regular language. /// /// Implementations can follow different conventions for the root /// node, and hence this function. /// /// If the regular language is empty, the implementation should /// return None. /// /// The default implementation uses the convention that the root /// node is always the first node. fn root(&self) -> Option { if self.is_empty() { None } else { Some(0) } } // TODO: Add functions that determine if certain "positions" in a // regular language satisfy some special properties, like at the // end of a Kleene star, or at the end of a regular language, et // cetera. These might be needed later. } /// Since `Option` does not inherit the `Display` from `T`, we wrap /// it to provide an automatic implementation of `Display`. #[derive(Debug, Clone, Copy, Ord, PartialOrd, Eq, PartialEq, Hash)] pub struct DOption(Option); impl Default for DOption { fn default() -> Self { Self(None) } } impl Deref for DOption { type Target = Option; fn deref(&self) -> &Self::Target { &self.0 } } impl DerefMut for DOption { fn deref_mut(&mut self) -> &mut Self::Target { &mut self.0 } } impl Display for DOption { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self.deref() { Some(value) => Display::fmt(value, f), None => write!(f, "ε"), } } } /// Substitute or Carry /// /// This enumeration indicates whether a label from a regular /// expression should be substituted by another regular expression, or /// to be carried around in the resulting nondeterministic finite /// automaton, in the process of the [`to_nfa`][Nfa::to_nfa] function. /// /// # Transform labels /// /// The label that is returned to be carried can be different from the /// original label, as a way to transform the labels. /// /// # Remark on the abbreviation /// /// It happens "by chance" that this abbreviation coincides with the /// abbreviation of "system on chip". Since I know nothing about this /// topic, this is just a meaningless pun. #[derive(Debug, Copy, Clone)] pub enum SoC { /// To be substituted by another regular expression. Sub(usize), /// To carry around this label. Carry(T), } /// The expected behvaiour of a nondeterministic finite automaton. /// /// Every NFA is a special labelled graph. pub trait Nfa: LabelExtGraph { // TODO: This trait should have a type for the labels. /// Remove all empty transitions from the nondeterministic finite /// automaton. fn remove_epsilon(&mut self, f: F) -> Result<(), Error> where F: Fn(T) -> bool; /// Return a state-minimal NFA equivalent with the original one. /// /// This is not required. It is just to allow me to experiment /// with NFA optimization algorithms. fn minimize(&self) -> Result, Error> { Err(Error::UnsupportedOperation) } /// Check every node or edge by a given predicate. /// /// This should also verify that every node and edge has correct /// indices, so that we can safely use `unwrap` later. A /// consequence is that, if one only wants to check the validity /// of nodes and edges, one can pass a function that always /// returns `true`. #[inline] fn labels_satisfy(&self, f: impl Fn(T) -> bool) -> Result { let nodes_len = self.nodes_len(); let mut result = true; for node in self.nodes() { for (label, children_iter) in self.labels_of(node)? { for child in children_iter { if child >= nodes_len { dbg!(node, label); return Err(graph::error::Error::IndexOutOfBounds(child, nodes_len).into()); } } // NOTE: Avoid executing `f` if `result` is already // false. But still proceed in verifying that nodes // and edges are correct: the correctness of nodes and // edges is more important than the function execution // results, as the former directly affects the safety // of many algorithms. if result && !f(*label) { dbg!(node, label); result = false; } } } Ok(result) } /// When we convert a regular expression to a nondeterministic /// finite automaton, the label should be optional, so we use a /// different type for the result type. type FromRegex; /// Build a nondeterministic finite automaton out of a set /// `regexps` of regular expressions. /// /// The `sub_pred` is a predicate function that returns an /// indication whether to carry the value around or to substitute /// by another regular language in the given set. /// /// The `default` parameter specifies the label of a default edge, /// that goes from a node to another, both of which are not /// associated with the given regular languages. /// /// This function should perform Thompson's construction, /// basically. fn to_nfa( regexps: &[impl Regex>], // TODO: The transformation should produce more information. sub_pred: impl Fn(T) -> Result, Error>, default: Option, ) -> Result>, Error>; /// Remove all dead states from the nondeterministic finite /// automaton. /// /// A state is dead if there are no edges going to the state, and /// if it is not reserved. /// /// # Note /// /// Actually an implementation is allowed to just remove all edges /// out of the dead nodes. Then these nodes are considered gone /// from the graph, and we don't need to re-index the existing /// edges. We can call this "a poor man's removal of nodes". fn remove_dead(&mut self, reserve: impl Fn(usize) -> bool) -> Result<(), Error>; /// For each edge from A to B whose edge is considered nullable by /// a function `f`, and for every edge from B to C, add an edge /// from A to C. /// /// This is needed specifically by the algorithm to produce a set /// of atomic languages that represent "null-closed" derived /// languages. fn nulling(&mut self, f: impl Fn(T) -> bool) -> Result<(), Error>; } pub mod default; pub mod desrec;