1 files changed, 72 insertions, 19 deletions
diff --git a/nfa/src/lib.rs b/nfa/src/lib.rs
index b1d62b3..31bd69a 100644
--- a/nfa/src/lib.rs
+++ b/nfa/src/lib.rs
@@ -14,7 +14,7 @@ use core::fmt::Display;
 
 use std::ops::{Deref, DerefMut};
 
-use graph::{Graph, GraphLabel, LabelGraph};
+use graph::{Graph, GraphLabel, LabelExtGraph};
 
 use error::Error;
 
@@ -52,14 +52,14 @@ pub trait Regex<T: GraphLabel>: Graph + Display + Clone {
         }
     }
 
-    // TODO: add functions that determine if certain "positions" in a
+    // TODO: Add functions that determine if certain "positions" in a
     // regular language satisfy some special properties, like at the
     // end of a Kleene star, or at the end of a regular language, et
     // cetera.  These might be needed later.
 }
 
-/// Since Option<T> does not inherit the Display from T, we wrap it to
-/// provide an automatic implementation of Display.
+/// Since `Option<T>` does not inherit the `Display` from `T`, we wrap
+/// it to provide an automatic implementation of `Display`.
 #[derive(Debug, Clone, Copy, Ord, PartialOrd, Eq, PartialEq, Hash)]
 pub struct DOption<T>(Option<T>);
 
@@ -120,42 +120,98 @@ pub enum SoC<T> {
 /// The expected behvaiour of a nondeterministic finite automaton.
 ///
 /// Every NFA is a special labelled graph.
-pub trait Nfa<T: GraphLabel>: LabelGraph<T> {
+pub trait Nfa<T: GraphLabel>: LabelExtGraph<T> {
+    // TODO: This trait should have a type for the labels.
+
     /// Remove all empty transitions from the nondeterministic finite
     /// automaton.
-    fn remove_epsilon(&mut self) -> Result<(), Error>;
+    fn remove_epsilon<F>(&mut self, f: F) -> Result<(), Error>
+    where
+        F: Fn(T) -> bool;
 
     /// Return a state-minimal NFA equivalent with the original one.
     ///
     /// This is not required.  It is just to allow me to experiment
     /// with NFA optimization algorithms.
-    fn minimize(&self) -> Result<Self, Error> {
+    fn minimize(&self) -> Result<Box<Self>, Error> {
         Err(Error::UnsupportedOperation)
     }
 
+    /// Check every node or edge by a given predicate.
+    ///
+    /// This should also verify that every node and edge has correct
+    /// indices, so that we can safely use `unwrap` later.  A
+    /// consequence is that, if one only wants to check the validity
+    /// of nodes and edges, one can pass a function that always
+    /// returns `true`.
+    #[inline]
+    fn labels_satisfy(&self, f: impl Fn(T) -> bool) -> Result<bool, Error> {
+        let nodes_len = self.nodes_len();
+        let mut result = true;
+
+        for node in self.nodes() {
+            for (label, children_iter) in self.labels_of(node)? {
+                for child in children_iter {
+                    if child >= nodes_len {
+                        dbg!(node, label);
+                        return Err(graph::error::Error::IndexOutOfBounds(child, nodes_len).into());
+                    }
+                }
+
+                // NOTE: Avoid executing `f` if `result` is already
+                // false.  But still proceed in verifying that nodes
+                // and edges are correct: the correctness of nodes and
+                // edges is more important than the function execution
+                // results, as the former directly affects the safety
+                // of many algorithms.
+                if result && !f(*label) {
+                    dbg!(node, label);
+                    result = false;
+                }
+            }
+        }
+
+        Ok(result)
+    }
+
     /// When we convert a regular expression to a nondeterministic
     /// finite automaton, the label should be optional, so we use a
     /// different type for the result type.
     type FromRegex<S: GraphLabel + Display + Default>;
 
-    /// Build a nondeterministic finite automaton out of a set of
-    /// regular expressions.
+    /// Build a nondeterministic finite automaton out of a set
+    /// `regexps` of regular expressions.
+    ///
+    /// The `sub_pred` is a predicate function that returns an
+    /// indication whether to carry the value around or to substitute
+    /// by another regular language in the given set.
+    ///
+    /// The `default` parameter specifies the label of a default edge,
+    /// that goes from a node to another, both of which are not
+    /// associated with the given regular languages.
     ///
-    /// The SUB_PRED is a predicate function that returns an optional
-    /// index for a label.  If it returns some index, then this means
-    /// we should substitute the index-th regular expression in the
-    /// set, whereever that label occurs in the set of regular
-    /// expressions.
+    /// This function should perform Thompson's construction,
+    /// basically.
     fn to_nfa(
         regexps: &[impl Regex<RegexType<T>>],
+        // TODO: The transformation should produce more information.
         sub_pred: impl Fn(T) -> Result<SoC<T>, Error>,
+        default: Option<T>,
     ) -> Result<Self::FromRegex<DOption<T>>, Error>;
 
     /// Remove all dead states from the nondeterministic finite
     /// automaton.
     ///
-    /// A state is dead if there are no edges going to the state.
-    fn remove_dead(&mut self) -> Result<(), Error>;
+    /// A state is dead if there are no edges going to the state, and
+    /// if it is not reserved.
+    ///
+    /// # Note
+    ///
+    /// Actually an implementation is allowed to just remove all edges
+    /// out of the dead nodes.  Then these nodes are considered gone
+    /// from the graph, and we don't need to re-index the existing
+    /// edges.  We can call this "a poor man's removal of nodes".
+    fn remove_dead(&mut self, reserve: impl Fn(usize) -> bool) -> Result<(), Error>;
 
     /// For each edge from A to B whose edge is considered nullable by
     /// a function `f`, and for every edge from B to C, add an edge
@@ -169,6 +225,3 @@ pub trait Nfa<T: GraphLabel>: LabelGraph<T> {
 
 pub mod default;
 pub mod desrec;
-
-#[cfg(test)]
-mod nfa_tests {}