5 files changed, 231 insertions, 60 deletions
diff --git a/nfa/src/default/mod.rs b/nfa/src/default/mod.rs
index b9ee398..115bea5 100644
--- a/nfa/src/default/mod.rs
+++ b/nfa/src/default/mod.rs
@@ -1,5 +1,5 @@
 //! This file provides a structure that implements the trait
-//! [`NFA`][super::Nfa] and another that umplements the trait
+//! [`NFA`][super::Nfa] and another that implements the trait
 //! [`Regex`][super::Regex].
 
 pub mod nfa;
diff --git a/nfa/src/default/nfa.rs b/nfa/src/default/nfa.rs
index 229ba4d..e642218 100644
--- a/nfa/src/default/nfa.rs
+++ b/nfa/src/default/nfa.rs
@@ -1,17 +1,8 @@
 //! This file provides a default implementation of NFA.
 
-// TODO: The current focus of the project is to understand the growth
-// rate of the algorithm, to know whether I made a mistake in the
-// previous iteration of the implementation, or the algorithm is not
-// as fast as the author estimated, which is not quite likely, of
-// course.
-//
-// Thus I shall establish a friendly interface that allows me to view
-// and debug the atomic languages and the languages, transparently.
-
 use graph::{
     builder::Builder, error::Error as GError, labelled::DLGBuilder, DLGraph, Graph, GraphLabel,
-    LabelGraph,
+    LabelExtGraph, LabelGraph,
 };
 
 use crate::{default::regex::RegexType, error::Error, DOption, Nfa, Regex, SoC};
@@ -32,6 +23,11 @@ impl<T: GraphLabel + Display> Default for DefaultNFA<T> {
     }
 }
 
+// Some boiler-plate delegation implementations.
+//
+// I deliberately avoid using [`Deref`][std::ops::Deref] here, as I do
+// not want to dereference an NFA to a Graph.
+
 impl<T: GraphLabel + Display> Graph for DefaultNFA<T> {
     type Iter<'a> = <DLGraph<T> as Graph>::Iter<'a> where T: 'a;
 
@@ -81,6 +77,11 @@ impl<T: GraphLabel + Display> LabelGraph<T> for DefaultNFA<T> {
 
     type LabelIter<'a> = <DLGraph<T> as LabelGraph<T>>::LabelIter<'a> where T: 'a;
 
+    type EdgeLabelIter<'a> = <DLGraph<T> as LabelGraph<T>>::EdgeLabelIter<'a>
+    where
+        Self: 'a,
+        T: 'a;
+
     #[inline]
     fn vertex_label(&self, node_id: usize) -> Result<Option<T>, GError> {
         if self.has_node(node_id) {
@@ -91,7 +92,7 @@ impl<T: GraphLabel + Display> LabelGraph<T> for DefaultNFA<T> {
     }
 
     #[inline]
-    fn edge_label(&self, source: usize, target: usize) -> Result<Vec<T>, GError> {
+    fn edge_label(&self, source: usize, target: usize) -> Result<Self::EdgeLabelIter<'_>, GError> {
         self.graph.edge_label(source, target)
     }
 
@@ -115,12 +116,24 @@ impl<T: GraphLabel + Display> LabelGraph<T> for DefaultNFA<T> {
     }
 }
 
+impl<T: GraphLabel + Display> LabelExtGraph<T> for DefaultNFA<T> {
+    #[inline]
+    fn extend(&mut self, edges: impl IntoIterator<Item = (T, usize)>) -> Result<usize, GError> {
+        self.graph.extend(edges)
+    }
+}
+
+// TODO: Define a type for the labels of DefaultNFA.
+
 impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
     type FromRegex<S: GraphLabel + Display + Default> = DefaultNFA<S>;
 
+    // Reminder: This is an adopted version of Thompson's
+    // construction.
     fn to_nfa(
         regexps: &[impl Regex<RegexType<T>>],
         sub_pred: impl Fn(T) -> Result<SoC<T>, Error>,
+        default: Option<T>,
     ) -> Result<Self::FromRegex<DOption<T>>, Error> {
         let total_regexps_len: usize = regexps.iter().map(Graph::nodes_len).sum();
 
@@ -128,7 +141,7 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
             return Ok(Default::default());
         }
 
-        // We reserve two more rooms for later uses.
+        // We reserve two more rooms for the default edge.
         let nfa_len = total_regexps_len * 2 + 2;
 
         let mut builder: DLGBuilder<DOption<T>> = Builder::with_capacity(nfa_len);
@@ -141,20 +154,18 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
             builder.add_vertex();
         }
 
+        // add a default edge
+        builder.add_edge(nfa_len - 2, nfa_len - 1, DOption(default))?;
+
         let accumulators: Vec<usize> = {
-            let mut result = Vec::with_capacity(regexps.len() + 1);
+            let mut result = Vec::with_capacity(regexps.len());
 
             result.push(0);
 
-            let mut accumulator = 0;
-
-            for regexp in regexps.iter() {
-                accumulator += regexp.nodes_len() * 2;
-                result.push(accumulator);
+            for regexp in regexps.iter().take(regexps.len() - 1) {
+                result.push(result.last().unwrap() + regexp.nodes_len() * 2);
             }
 
-            result.pop();
-
             result
         };
 
@@ -313,9 +324,8 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
                             SoC::Sub(sub_non) => {
                                 // a non-terminal
 
-                                let sub_offset = get_offset_safe!(sub_non);
-                                let sub_nfa_start = sub_offset + 2 * sub_non;
-                                let sub_nfa_end = sub_offset + 2 * sub_non + 1;
+                                let sub_nfa_start = get_offset_safe!(sub_non);
+                                let sub_nfa_end = sub_nfa_start + 1;
 
                                 builder.add_edge(nfa_start, sub_nfa_start, empty_label)?;
                                 builder.add_edge(sub_nfa_end, nfa_end, empty_label)?;
@@ -336,14 +346,102 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
         Ok(DefaultNFA { graph })
     }
 
-    fn remove_epsilon(&mut self) -> Result<(), Error> {
-        todo!()
+    fn remove_epsilon<F: Fn(T) -> bool>(&mut self, f: F) -> Result<(), Error> {
+        let mut builder = self.graph.builder_ref();
+
+        let mut updated = true;
+
+        let nodes_len = self.nodes_len();
+
+        while updated {
+            updated = false;
+
+            let mut to_add: Vec<(usize, usize, T)> = Vec::with_capacity(nodes_len);
+
+            for source in builder.nodes() {
+                for (label, target_iter) in builder.labels_of(source)? {
+                    if f(*label) {
+                        // empty label found
+                        for target in target_iter {
+                            for (label, children_iter) in builder.labels_of(target)? {
+                                for child in children_iter {
+                                    if !builder.has_edge_label(source, label, child)? {
+                                        updated = true;
+
+                                        to_add.push((source, child, *label));
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+
+            for (source, target, label) in to_add.into_iter() {
+                builder.add_edge(source, target, label)?;
+            }
+        }
+
+        // Then remove all epsilon edges
+
+        let sources_and_targets: Vec<_> = builder.edges().collect();
+
+        for (source, target) in sources_and_targets.into_iter() {
+            builder.remove_edge(source, target, &f)?;
+        }
+
+        self.graph = builder.build();
+
+        Ok(())
     }
 
-    fn remove_dead(&mut self) -> Result<(), Error> {
-        todo!()
+    fn remove_dead(&mut self, reserve: impl Fn(usize) -> bool) -> Result<(), Error> {
+        let mut builder = self.graph.builder_ref();
+
+        let mut changed = true;
+
+        // Use a counting vector to avoid calculating which nodes are
+        // dead at each iteration.
+        let mut target_counts: Vec<usize> =
+            std::iter::repeat(0).take(self.graph.nodes_len()).collect();
+
+        for (_, target) in self.graph.edges() {
+            *target_counts
+                .get_mut(target)
+                .ok_or(Error::UnknownNode(target))? += 1;
+        }
+
+        while changed {
+            changed = false;
+
+            for node in self.nodes() {
+                let deadp = !matches!(target_counts.get(node).copied(), Some(n) if n > 0);
+
+                if deadp && !reserve(node) {
+                    let to_remove: Vec<usize> = builder.children_of(node)?.collect();
+
+                    if !to_remove.is_empty() {
+                        changed = true;
+
+                        for target in to_remove {
+                            builder.remove_edge(node, target, |_| true)?;
+                            *target_counts
+                                .get_mut(target)
+                                .ok_or(Error::UnknownNode(target))? -= 1;
+                        }
+                    }
+                }
+            }
+        }
+
+        self.graph = builder.build();
+
+        Ok(())
     }
 
+    // REVIEW: Combine nulling and remove_epsilon into the same
+    // method, or not.
+
     fn nulling(&mut self, f: impl Fn(T) -> bool) -> Result<(), Error> {
         let mut updated = true;
         let mut builder = self.graph.builder_ref();
@@ -396,7 +494,6 @@ impl<T: GraphLabel + Display + Debug> Display for DefaultNFA<T> {
 
 #[cfg(test)]
 mod test_to_nfa {
-    #![allow(unused_imports)]
     use super::*;
     use crate::SoC;
 
@@ -446,8 +543,11 @@ mod test_to_nfa {
 
         println!("regex = {regex}");
 
-        let nfa: DefaultNFA<DOption<char>> =
-            DefaultNFA::to_nfa(&[regex], |label| Ok(SoC::Carry(label)))?;
+        let nfa: DefaultNFA<DOption<char>> = DefaultNFA::to_nfa(
+            &[regex],
+            |label| Ok(SoC::Carry(label)),
+            Some(char::default()),
+        )?;
 
         nfa.print_viz("nfa.gv")?;
 
diff --git a/nfa/src/default/regex.rs b/nfa/src/default/regex.rs
index 2670e32..c8ad370 100644
--- a/nfa/src/default/regex.rs
+++ b/nfa/src/default/regex.rs
@@ -260,9 +260,7 @@ impl<T: GraphLabel> DefaultRegex<T> {
         stack.push(Unseen(0));
 
         while let Some(top) = stack.pop() {
-            let node_type = types[top.index()];
-
-            // TODO: Do not use unwrap here.
+            let node_type = types.get(top.index()).unwrap();
 
             match node_type {
                 RegexType::Kleene => {
@@ -350,8 +348,12 @@ impl<T: GraphLabel> DefaultRegex<T> {
                             .map(Unseen)
                             .rev(),
                     );
+
+                    if self.graph.is_empty_node(top.index()).unwrap() {
+                        write!(result, "ε")?;
+                    }
                 }
-                RegexType::Lit(label) => write!(result, "{}", f(label))?,
+                RegexType::Lit(label) => write!(result, "{}", f(*label))?,
             }
         }
 
@@ -452,7 +454,15 @@ impl Display for ParseError {
     }
 }
 
-impl std::error::Error for ParseError {}
+impl std::error::Error for ParseError {
+    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
+        if let ParseError::Graph(gerr) = self {
+            Some(gerr)
+        } else {
+            None
+        }
+    }
+}
 
 impl From<GError> for ParseError {
     fn from(ge: GError) -> Self {
diff --git a/nfa/src/error.rs b/nfa/src/error.rs
index ad75077..7160555 100644
--- a/nfa/src/error.rs
+++ b/nfa/src/error.rs
@@ -1,6 +1,6 @@
 //! This file implements the error type for the crate.
 
-use graph::error::Error as GError;
+use graph::error::Error as GraphError;
 
 use core::fmt::{Display, Formatter};
 
@@ -17,7 +17,7 @@ pub enum Error {
     /// There is no root in the underlying regular expression.
     NoRoot,
     /// This error comes from some underlying graph operation.
-    Graph(GError),
+    Graph(GraphError),
     /// A cycle is found when constructing regular expressions.
     Cycle,
 }
@@ -34,10 +34,18 @@ impl Display for Error {
     }
 }
 
-impl std::error::Error for Error {}
+impl std::error::Error for Error {
+    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
+        if let Self::Graph(gerr) = self {
+            Some(gerr)
+        } else {
+            None
+        }
+    }
+}
 
-impl From<GError> for Error {
-    fn from(e: GError) -> Self {
+impl From<GraphError> for Error {
+    fn from(e: GraphError) -> Self {
         Self::Graph(e)
     }
 }
diff --git a/nfa/src/lib.rs b/nfa/src/lib.rs
index b1d62b3..31bd69a 100644
--- a/nfa/src/lib.rs
+++ b/nfa/src/lib.rs
@@ -14,7 +14,7 @@ use core::fmt::Display;
 
 use std::ops::{Deref, DerefMut};
 
-use graph::{Graph, GraphLabel, LabelGraph};
+use graph::{Graph, GraphLabel, LabelExtGraph};
 
 use error::Error;
 
@@ -52,14 +52,14 @@ pub trait Regex<T: GraphLabel>: Graph + Display + Clone {
         }
     }
 
-    // TODO: add functions that determine if certain "positions" in a
+    // TODO: Add functions that determine if certain "positions" in a
     // regular language satisfy some special properties, like at the
     // end of a Kleene star, or at the end of a regular language, et
     // cetera.  These might be needed later.
 }
 
-/// Since Option<T> does not inherit the Display from T, we wrap it to
-/// provide an automatic implementation of Display.
+/// Since `Option<T>` does not inherit the `Display` from `T`, we wrap
+/// it to provide an automatic implementation of `Display`.
 #[derive(Debug, Clone, Copy, Ord, PartialOrd, Eq, PartialEq, Hash)]
 pub struct DOption<T>(Option<T>);
 
@@ -120,42 +120,98 @@ pub enum SoC<T> {
 /// The expected behvaiour of a nondeterministic finite automaton.
 ///
 /// Every NFA is a special labelled graph.
-pub trait Nfa<T: GraphLabel>: LabelGraph<T> {
+pub trait Nfa<T: GraphLabel>: LabelExtGraph<T> {
+    // TODO: This trait should have a type for the labels.
+
     /// Remove all empty transitions from the nondeterministic finite
     /// automaton.
-    fn remove_epsilon(&mut self) -> Result<(), Error>;
+    fn remove_epsilon<F>(&mut self, f: F) -> Result<(), Error>
+    where
+        F: Fn(T) -> bool;
 
     /// Return a state-minimal NFA equivalent with the original one.
     ///
     /// This is not required.  It is just to allow me to experiment
     /// with NFA optimization algorithms.
-    fn minimize(&self) -> Result<Self, Error> {
+    fn minimize(&self) -> Result<Box<Self>, Error> {
         Err(Error::UnsupportedOperation)
     }
 
+    /// Check every node or edge by a given predicate.
+    ///
+    /// This should also verify that every node and edge has correct
+    /// indices, so that we can safely use `unwrap` later.  A
+    /// consequence is that, if one only wants to check the validity
+    /// of nodes and edges, one can pass a function that always
+    /// returns `true`.
+    #[inline]
+    fn labels_satisfy(&self, f: impl Fn(T) -> bool) -> Result<bool, Error> {
+        let nodes_len = self.nodes_len();
+        let mut result = true;
+
+        for node in self.nodes() {
+            for (label, children_iter) in self.labels_of(node)? {
+                for child in children_iter {
+                    if child >= nodes_len {
+                        dbg!(node, label);
+                        return Err(graph::error::Error::IndexOutOfBounds(child, nodes_len).into());
+                    }
+                }
+
+                // NOTE: Avoid executing `f` if `result` is already
+                // false.  But still proceed in verifying that nodes
+                // and edges are correct: the correctness of nodes and
+                // edges is more important than the function execution
+                // results, as the former directly affects the safety
+                // of many algorithms.
+                if result && !f(*label) {
+                    dbg!(node, label);
+                    result = false;
+                }
+            }
+        }
+
+        Ok(result)
+    }
+
     /// When we convert a regular expression to a nondeterministic
     /// finite automaton, the label should be optional, so we use a
     /// different type for the result type.
     type FromRegex<S: GraphLabel + Display + Default>;
 
-    /// Build a nondeterministic finite automaton out of a set of
-    /// regular expressions.
+    /// Build a nondeterministic finite automaton out of a set
+    /// `regexps` of regular expressions.
+    ///
+    /// The `sub_pred` is a predicate function that returns an
+    /// indication whether to carry the value around or to substitute
+    /// by another regular language in the given set.
+    ///
+    /// The `default` parameter specifies the label of a default edge,
+    /// that goes from a node to another, both of which are not
+    /// associated with the given regular languages.
     ///
-    /// The SUB_PRED is a predicate function that returns an optional
-    /// index for a label.  If it returns some index, then this means
-    /// we should substitute the index-th regular expression in the
-    /// set, whereever that label occurs in the set of regular
-    /// expressions.
+    /// This function should perform Thompson's construction,
+    /// basically.
     fn to_nfa(
         regexps: &[impl Regex<RegexType<T>>],
+        // TODO: The transformation should produce more information.
         sub_pred: impl Fn(T) -> Result<SoC<T>, Error>,
+        default: Option<T>,
     ) -> Result<Self::FromRegex<DOption<T>>, Error>;
 
     /// Remove all dead states from the nondeterministic finite
     /// automaton.
     ///
-    /// A state is dead if there are no edges going to the state.
-    fn remove_dead(&mut self) -> Result<(), Error>;
+    /// A state is dead if there are no edges going to the state, and
+    /// if it is not reserved.
+    ///
+    /// # Note
+    ///
+    /// Actually an implementation is allowed to just remove all edges
+    /// out of the dead nodes.  Then these nodes are considered gone
+    /// from the graph, and we don't need to re-index the existing
+    /// edges.  We can call this "a poor man's removal of nodes".
+    fn remove_dead(&mut self, reserve: impl Fn(usize) -> bool) -> Result<(), Error>;
 
     /// For each edge from A to B whose edge is considered nullable by
     /// a function `f`, and for every edge from B to C, add an edge
@@ -169,6 +225,3 @@ pub trait Nfa<T: GraphLabel>: LabelGraph<T> {
 
 pub mod default;
 pub mod desrec;
-
-#[cfg(test)]
-mod nfa_tests {}