1 files changed, 142 insertions, 13 deletions
diff --git a/nfa/src/lib.rs b/nfa/src/lib.rs
index 31bd69a..c1906e1 100644
--- a/nfa/src/lib.rs
+++ b/nfa/src/lib.rs
@@ -60,8 +60,12 @@ pub trait Regex<T: GraphLabel>: Graph + Display + Clone {
 
 /// Since `Option<T>` does not inherit the `Display` from `T`, we wrap
 /// it to provide an automatic implementation of `Display`.
+///
+/// # Convert to `Option`
+///
+/// One can dereference a `DOption` to obtain an `Option`.
 #[derive(Debug, Clone, Copy, Ord, PartialOrd, Eq, PartialEq, Hash)]
-pub struct DOption<T>(Option<T>);
+pub struct DOption<T>(pub Option<T>);
 
 impl<T> Default for DOption<T> {
     fn default() -> Self {
@@ -117,17 +121,104 @@ pub enum SoC<T> {
     Carry(T),
 }
 
+/// This type records some information that is obtained from the
+/// process of converting a regular expression to a nondeterministic
+/// finite automaton.
+#[derive(Debug, Clone, Copy, Ord, PartialOrd, Eq, PartialEq, Hash, Default)]
+pub struct NfaLabel<T: GraphLabel> {
+    /// A terminal or a non-terminal.
+    value: T,
+    /// The corresponding position in the rules.
+    moved: usize,
+    /// Whether this comes from left-linear expansion.
+    left_p: bool,
+}
+
+impl<T: GraphLabel> NfaLabel<T> {
+    /// Construct a new label.
+    #[inline]
+    pub fn new(value: T, moved: usize, left_p: bool) -> Self {
+        Self {
+            value,
+            moved,
+            left_p,
+        }
+    }
+
+    /// Retrieve the value from the label.
+    #[inline]
+    pub fn get_value(&self) -> T {
+        self.value
+    }
+    /// Retrieve the moved position from the label.
+    #[inline]
+    pub fn get_moved(&self) -> usize {
+        self.moved
+    }
+    /// Retrieve whether or not the label comes from left-linear
+    /// expansions.
+    #[inline]
+    pub fn get_left_p(&self) -> bool {
+        self.left_p
+    }
+}
+
+impl<T: GraphLabel> Display for NfaLabel<T> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "edge {} at {}{}", self.value, self.moved, {
+            if self.left_p {
+                ", by left"
+            } else {
+                ""
+            }
+        })
+    }
+}
+
+/// A convenient alias of the information of two edges.
+///
+/// If the tuple is (a, b, c, la, lb), then the first edge goes from a
+/// to b, is labelled la, and the second edge goes from b to c, and is
+/// labelled by lb.
+#[derive(Debug, Clone, Copy)]
+pub struct TwoEdges<T: Copy>(usize, usize, usize, T, T);
+
+impl<T: Copy> TwoEdges<T> {
+    /// Extract the first edge.
+    pub fn first_edge(&self) -> (usize, usize, T) {
+        (self.0, self.1, self.3)
+    }
+
+    /// Extract the second edge.
+    pub fn second_edge(&self) -> (usize, usize, T) {
+        (self.1, self.2, self.4)
+    }
+}
+
+/// The type of nondeterministic finite automata that is obtained from
+/// a regular expression, via the method [`to_nfa`][Nfa::to_nfa].
+pub type LabelType<T> = NfaLabel<DOption<T>>;
+
 /// The expected behvaiour of a nondeterministic finite automaton.
 ///
 /// Every NFA is a special labelled graph.
 pub trait Nfa<T: GraphLabel>: LabelExtGraph<T> {
-    // TODO: This trait should have a type for the labels.
+    /// When we convert a regular expression to a nondeterministic
+    /// finite automaton, the label should be optional, so we use a
+    /// different type for the result type.
+    type FromRegex<S: GraphLabel + Display + Default>;
 
+    // FIXME: This should not be needed.
     /// Remove all empty transitions from the nondeterministic finite
     /// automaton.
-    fn remove_epsilon<F>(&mut self, f: F) -> Result<(), Error>
+    #[inline]
+    fn remove_epsilon<F>(&mut self, _f: F) -> Result<(), Error>
     where
-        F: Fn(T) -> bool;
+        F: FnMut(T) -> bool,
+    {
+        // self.closure(f, true, |two_edges| two_edges.second_edge().2)
+        unimplemented!("deprecated")
+    }
 
     /// Return a state-minimal NFA equivalent with the original one.
     ///
@@ -174,11 +265,6 @@ pub trait Nfa<T: GraphLabel>: LabelExtGraph<T> {
         Ok(result)
     }
 
-    /// When we convert a regular expression to a nondeterministic
-    /// finite automaton, the label should be optional, so we use a
-    /// different type for the result type.
-    type FromRegex<S: GraphLabel + Display + Default>;
-
     /// Build a nondeterministic finite automaton out of a set
     /// `regexps` of regular expressions.
     ///
@@ -194,10 +280,9 @@ pub trait Nfa<T: GraphLabel>: LabelExtGraph<T> {
     /// basically.
     fn to_nfa(
         regexps: &[impl Regex<RegexType<T>>],
-        // TODO: The transformation should produce more information.
         sub_pred: impl Fn(T) -> Result<SoC<T>, Error>,
         default: Option<T>,
-    ) -> Result<Self::FromRegex<DOption<T>>, Error>;
+    ) -> Result<Self::FromRegex<LabelType<T>>, Error>;
 
     /// Remove all dead states from the nondeterministic finite
     /// automaton.
@@ -211,8 +296,9 @@ pub trait Nfa<T: GraphLabel>: LabelExtGraph<T> {
     /// out of the dead nodes.  Then these nodes are considered gone
     /// from the graph, and we don't need to re-index the existing
     /// edges.  We can call this "a poor man's removal of nodes".
-    fn remove_dead(&mut self, reserve: impl Fn(usize) -> bool) -> Result<(), Error>;
+    fn remove_dead(&mut self, reserve: impl FnMut(usize) -> bool) -> Result<(), Error>;
 
+    // FIXME: This should not be needed.
     /// For each edge from A to B whose edge is considered nullable by
     /// a function `f`, and for every edge from B to C, add an edge
     /// from A to C.
@@ -220,7 +306,50 @@ pub trait Nfa<T: GraphLabel>: LabelExtGraph<T> {
     /// This is needed specifically by the algorithm to produce a set
     /// of atomic languages that represent "null-closed" derived
     /// languages.
-    fn nulling(&mut self, f: impl Fn(T) -> bool) -> Result<(), Error>;
+    #[inline]
+    fn nulling(&mut self, _f: impl FnMut(T) -> bool) -> Result<(), Error> {
+        // self.closure(f, false, |two_edges| two_edges.second_edge().2)
+        unimplemented!("deprecated")
+    }
+
+    /// Return the *closure* of the nondeterministic finite automaton.
+    ///
+    /// # Definition
+    ///
+    /// The closure of a nondeterministic finite automaton N is
+    /// defined as the unique *minimal* nondeterministic finite
+    /// automaton N+ that can be obtained by adjoining some edges to N
+    /// such that if there are edges a -> b and b -> c, and if the
+    /// edge a -> b is deemed as *nullable* by some function, then
+    /// there is an edge a -> c, where the minimality is the
+    /// minimality of the set of edges: if there is another such
+    /// nondeterministic finite automaton M satisfying the above
+    /// property, then the set of edges of N+ is a subset of the set
+    /// of edges of M.
+    ///
+    /// # Remove edges afterwards
+    ///
+    /// If `remove_after_p` is true, remove all those nullable edges.
+    ///
+    /// # Transformation of labels
+    ///
+    /// We can apply a transformer to labels, to be able to combine
+    /// labels in non-trivial ways.  If one just wants the *new* label
+    /// that is the label of the edge from b to c in the above
+    /// definition, one can use the function that sends `two_edges` to
+    /// `two_edges.second_edge().2`.
+    ///
+    /// # Error
+    ///
+    /// The function should emit errors if the edges of the
+    /// nondeterministic finite automaton point to invalid nodes.
+    fn closure(
+        &mut self,
+        predicate: impl FnMut(T) -> bool,
+        remove_after_p: bool,
+        transform: impl FnMut(TwoEdges<T>) -> T,
+        remove_predicate: impl FnMut(T) -> bool,
+    ) -> Result<(), Error>;
 }
 
 pub mod default;