#![warn(missing_docs)] //! This crate implements non-deterministic finite automata. //! //! By default this uses the graph from the crate [`graph`]. To use //! another external graph, add a module in which the external graph //! implements the Graph trait from the [`graph`] crate, and then use //! that external graph type as [`Graph`][graph::Graph] here. pub mod error; extern crate graph; use core::fmt::Display; use std::ops::{Deref, DerefMut}; use graph::{Graph, GraphLabel, LabelExtGraph}; use error::Error; pub use desrec::DesRec; use default::regex::RegexType; /// The expected behaviour of a regular language. /// /// Nondeterministic finite automata are equivalent to regular /// languages. Since regular languages are easier to understand for a /// human being, nondeterministic finite automata include the data for /// the equivalent regular languages. pub trait Regex: Graph + Display + Clone { /// Return the label of a vertex, or an error if the node is /// invalid. fn vertex_label(&self, node_id: usize) -> Result; #[inline] /// Return the root node of the regular language. /// /// Implementations can follow different conventions for the root /// node, and hence this function. /// /// If the regular language is empty, the implementation should /// return None. /// /// The default implementation uses the convention that the root /// node is always the first node. fn root(&self) -> Option { if self.is_empty() { None } else { Some(0) } } // TODO: Add functions that determine if certain "positions" in a // regular language satisfy some special properties, like at the // end of a Kleene star, or at the end of a regular language, et // cetera. These might be needed later. } /// Since `Option` does not inherit the `Display` from `T`, we wrap /// it to provide an automatic implementation of `Display`. /// /// # Convert to `Option` /// /// One can dereference a `DOption` to obtain an `Option`. #[derive(Debug, Clone, Copy, Ord, PartialOrd, Eq, PartialEq, Hash)] pub struct DOption(pub Option); impl Default for DOption { fn default() -> Self { Self(None) } } impl Deref for DOption { type Target = Option; fn deref(&self) -> &Self::Target { &self.0 } } impl DerefMut for DOption { fn deref_mut(&mut self) -> &mut Self::Target { &mut self.0 } } impl Display for DOption { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self.deref() { Some(value) => Display::fmt(value, f), None => write!(f, "ε"), } } } /// Substitute or Carry /// /// This enumeration indicates whether a label from a regular /// expression should be substituted by another regular expression, or /// to be carried around in the resulting nondeterministic finite /// automaton, in the process of the [`to_nfa`][Nfa::to_nfa] function. /// /// # Transform labels /// /// The label that is returned to be carried can be different from the /// original label, as a way to transform the labels. /// /// # Remark on the abbreviation /// /// It happens "by chance" that this abbreviation coincides with the /// abbreviation of "system on chip". Since I know nothing about this /// topic, this is just a meaningless pun. #[derive(Debug, Copy, Clone)] pub enum SoC { /// To be substituted by another regular expression. Sub(usize), /// To carry around this label. Carry(T), } /// This type records some information that is obtained from the /// process of converting a regular expression to a nondeterministic /// finite automaton. #[derive(Debug, Clone, Copy, Ord, PartialOrd, Eq, PartialEq, Hash, Default)] pub struct NfaLabel { /// A terminal or a non-terminal. value: T, /// The corresponding position in the rules. moved: usize, /// Whether this comes from left-linear expansion. left_p: bool, } impl NfaLabel { /// Construct a new label. #[inline] pub fn new(value: T, moved: usize, left_p: bool) -> Self { Self { value, moved, left_p, } } /// Retrieve the value from the label. #[inline] pub fn get_value(&self) -> T { self.value } /// Retrieve the moved position from the label. #[inline] pub fn get_moved(&self) -> usize { self.moved } /// Retrieve whether or not the label comes from left-linear /// expansions. #[inline] pub fn get_left_p(&self) -> bool { self.left_p } } impl Display for NfaLabel { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "edge {} at {}{}", self.value, self.moved, { if self.left_p { ", by left" } else { "" } }) } } /// A convenient alias of the information of two edges. /// /// If the tuple is (a, b, c, la, lb), then the first edge goes from a /// to b, is labelled la, and the second edge goes from b to c, and is /// labelled by lb. #[derive(Debug, Clone, Copy)] pub struct TwoEdges(usize, usize, usize, T, T); impl TwoEdges { /// Extract the first edge. pub fn first_edge(&self) -> (usize, usize, T) { (self.0, self.1, self.3) } /// Extract the second edge. pub fn second_edge(&self) -> (usize, usize, T) { (self.1, self.2, self.4) } } /// The type of nondeterministic finite automata that is obtained from /// a regular expression, via the method [`to_nfa`][Nfa::to_nfa]. pub type LabelType = NfaLabel>; /// The expected behvaiour of a nondeterministic finite automaton. /// /// Every NFA is a special labelled graph. pub trait Nfa: LabelExtGraph { /// When we convert a regular expression to a nondeterministic /// finite automaton, the label should be optional, so we use a /// different type for the result type. type FromRegex; // FIXME: This should not be needed. /// Remove all empty transitions from the nondeterministic finite /// automaton. #[inline] fn remove_epsilon(&mut self, _f: F) -> Result<(), Error> where F: FnMut(T) -> bool, { // self.closure(f, true, |two_edges| two_edges.second_edge().2) unimplemented!("deprecated") } /// Return a state-minimal NFA equivalent with the original one. /// /// This is not required. It is just to allow me to experiment /// with NFA optimization algorithms. fn minimize(&self) -> Result, Error> { Err(Error::UnsupportedOperation) } /// Check every node or edge by a given predicate. /// /// This should also verify that every node and edge has correct /// indices, so that we can safely use `unwrap` later. A /// consequence is that, if one only wants to check the validity /// of nodes and edges, one can pass a function that always /// returns `true`. #[inline] fn labels_satisfy(&self, f: impl Fn(T) -> bool) -> Result { let nodes_len = self.nodes_len(); let mut result = true; for node in self.nodes() { for (label, children_iter) in self.labels_of(node)? { for child in children_iter { if child >= nodes_len { dbg!(node, label); return Err(graph::error::Error::IndexOutOfBounds(child, nodes_len).into()); } } // NOTE: Avoid executing `f` if `result` is already // false. But still proceed in verifying that nodes // and edges are correct: the correctness of nodes and // edges is more important than the function execution // results, as the former directly affects the safety // of many algorithms. if result && !f(*label) { dbg!(node, label); result = false; } } } Ok(result) } /// Build a nondeterministic finite automaton out of a set /// `regexps` of regular expressions. /// /// The `sub_pred` is a predicate function that returns an /// indication whether to carry the value around or to substitute /// by another regular language in the given set. /// /// The `default` parameter specifies the label of a default edge, /// that goes from a node to another, both of which are not /// associated with the given regular languages. /// /// This function should perform Thompson's construction, /// basically. fn to_nfa( regexps: &[impl Regex>], sub_pred: impl Fn(T) -> Result, Error>, default: Option, ) -> Result>, Error>; /// Remove all dead states from the nondeterministic finite /// automaton. /// /// A state is dead if there are no edges going to the state, and /// if it is not reserved. /// /// # Note /// /// Actually an implementation is allowed to just remove all edges /// out of the dead nodes. Then these nodes are considered gone /// from the graph, and we don't need to re-index the existing /// edges. We can call this "a poor man's removal of nodes". fn remove_dead(&mut self, reserve: impl FnMut(usize) -> bool) -> Result<(), Error>; // FIXME: This should not be needed. /// For each edge from A to B whose edge is considered nullable by /// a function `f`, and for every edge from B to C, add an edge /// from A to C. /// /// This is needed specifically by the algorithm to produce a set /// of atomic languages that represent "null-closed" derived /// languages. #[inline] fn nulling(&mut self, _f: impl FnMut(T) -> bool) -> Result<(), Error> { // self.closure(f, false, |two_edges| two_edges.second_edge().2) unimplemented!("deprecated") } /// Return the *closure* of the nondeterministic finite automaton. /// /// # Definition /// /// The closure of a nondeterministic finite automaton N is /// defined as the unique *minimal* nondeterministic finite /// automaton N+ that can be obtained by adjoining some edges to N /// such that if there are edges a -> b and b -> c, and if the /// edge a -> b is deemed as *nullable* by some function, then /// there is an edge a -> c, where the minimality is the /// minimality of the set of edges: if there is another such /// nondeterministic finite automaton M satisfying the above /// property, then the set of edges of N+ is a subset of the set /// of edges of M. /// /// # Remove edges afterwards /// /// If `remove_after_p` is true, remove all those nullable edges. /// /// # Transformation of labels /// /// We can apply a transformer to labels, to be able to combine /// labels in non-trivial ways. If one just wants the *new* label /// that is the label of the edge from b to c in the above /// definition, one can use the function that sends `two_edges` to /// `two_edges.second_edge().2`. /// /// # Error /// /// The function should emit errors if the edges of the /// nondeterministic finite automaton point to invalid nodes. fn closure( &mut self, predicate: impl FnMut(T) -> bool, remove_after_p: bool, transform: impl FnMut(TwoEdges) -> T, remove_predicate: impl FnMut(T) -> bool, ) -> Result<(), Error>; } pub mod default; pub mod desrec;