1 files changed, 130 insertions, 30 deletions
diff --git a/nfa/src/default/nfa.rs b/nfa/src/default/nfa.rs
index 229ba4d..e642218 100644
--- a/nfa/src/default/nfa.rs
+++ b/nfa/src/default/nfa.rs
@@ -1,17 +1,8 @@
 //! This file provides a default implementation of NFA.
 
-// TODO: The current focus of the project is to understand the growth
-// rate of the algorithm, to know whether I made a mistake in the
-// previous iteration of the implementation, or the algorithm is not
-// as fast as the author estimated, which is not quite likely, of
-// course.
-//
-// Thus I shall establish a friendly interface that allows me to view
-// and debug the atomic languages and the languages, transparently.
-
 use graph::{
     builder::Builder, error::Error as GError, labelled::DLGBuilder, DLGraph, Graph, GraphLabel,
-    LabelGraph,
+    LabelExtGraph, LabelGraph,
 };
 
 use crate::{default::regex::RegexType, error::Error, DOption, Nfa, Regex, SoC};
@@ -32,6 +23,11 @@ impl<T: GraphLabel + Display> Default for DefaultNFA<T> {
     }
 }
 
+// Some boiler-plate delegation implementations.
+//
+// I deliberately avoid using [`Deref`][std::ops::Deref] here, as I do
+// not want to dereference an NFA to a Graph.
+
 impl<T: GraphLabel + Display> Graph for DefaultNFA<T> {
     type Iter<'a> = <DLGraph<T> as Graph>::Iter<'a> where T: 'a;
 
@@ -81,6 +77,11 @@ impl<T: GraphLabel + Display> LabelGraph<T> for DefaultNFA<T> {
 
     type LabelIter<'a> = <DLGraph<T> as LabelGraph<T>>::LabelIter<'a> where T: 'a;
 
+    type EdgeLabelIter<'a> = <DLGraph<T> as LabelGraph<T>>::EdgeLabelIter<'a>
+    where
+        Self: 'a,
+        T: 'a;
+
     #[inline]
     fn vertex_label(&self, node_id: usize) -> Result<Option<T>, GError> {
         if self.has_node(node_id) {
@@ -91,7 +92,7 @@ impl<T: GraphLabel + Display> LabelGraph<T> for DefaultNFA<T> {
     }
 
     #[inline]
-    fn edge_label(&self, source: usize, target: usize) -> Result<Vec<T>, GError> {
+    fn edge_label(&self, source: usize, target: usize) -> Result<Self::EdgeLabelIter<'_>, GError> {
         self.graph.edge_label(source, target)
     }
 
@@ -115,12 +116,24 @@ impl<T: GraphLabel + Display> LabelGraph<T> for DefaultNFA<T> {
     }
 }
 
+impl<T: GraphLabel + Display> LabelExtGraph<T> for DefaultNFA<T> {
+    #[inline]
+    fn extend(&mut self, edges: impl IntoIterator<Item = (T, usize)>) -> Result<usize, GError> {
+        self.graph.extend(edges)
+    }
+}
+
+// TODO: Define a type for the labels of DefaultNFA.
+
 impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
     type FromRegex<S: GraphLabel + Display + Default> = DefaultNFA<S>;
 
+    // Reminder: This is an adopted version of Thompson's
+    // construction.
     fn to_nfa(
         regexps: &[impl Regex<RegexType<T>>],
         sub_pred: impl Fn(T) -> Result<SoC<T>, Error>,
+        default: Option<T>,
     ) -> Result<Self::FromRegex<DOption<T>>, Error> {
         let total_regexps_len: usize = regexps.iter().map(Graph::nodes_len).sum();
 
@@ -128,7 +141,7 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
             return Ok(Default::default());
         }
 
-        // We reserve two more rooms for later uses.
+        // We reserve two more rooms for the default edge.
         let nfa_len = total_regexps_len * 2 + 2;
 
         let mut builder: DLGBuilder<DOption<T>> = Builder::with_capacity(nfa_len);
@@ -141,20 +154,18 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
             builder.add_vertex();
         }
 
+        // add a default edge
+        builder.add_edge(nfa_len - 2, nfa_len - 1, DOption(default))?;
+
         let accumulators: Vec<usize> = {
-            let mut result = Vec::with_capacity(regexps.len() + 1);
+            let mut result = Vec::with_capacity(regexps.len());
 
             result.push(0);
 
-            let mut accumulator = 0;
-
-            for regexp in regexps.iter() {
-                accumulator += regexp.nodes_len() * 2;
-                result.push(accumulator);
+            for regexp in regexps.iter().take(regexps.len() - 1) {
+                result.push(result.last().unwrap() + regexp.nodes_len() * 2);
             }
 
-            result.pop();
-
             result
         };
 
@@ -313,9 +324,8 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
                             SoC::Sub(sub_non) => {
                                 // a non-terminal
 
-                                let sub_offset = get_offset_safe!(sub_non);
-                                let sub_nfa_start = sub_offset + 2 * sub_non;
-                                let sub_nfa_end = sub_offset + 2 * sub_non + 1;
+                                let sub_nfa_start = get_offset_safe!(sub_non);
+                                let sub_nfa_end = sub_nfa_start + 1;
 
                                 builder.add_edge(nfa_start, sub_nfa_start, empty_label)?;
                                 builder.add_edge(sub_nfa_end, nfa_end, empty_label)?;
@@ -336,14 +346,102 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
         Ok(DefaultNFA { graph })
     }
 
-    fn remove_epsilon(&mut self) -> Result<(), Error> {
-        todo!()
+    fn remove_epsilon<F: Fn(T) -> bool>(&mut self, f: F) -> Result<(), Error> {
+        let mut builder = self.graph.builder_ref();
+
+        let mut updated = true;
+
+        let nodes_len = self.nodes_len();
+
+        while updated {
+            updated = false;
+
+            let mut to_add: Vec<(usize, usize, T)> = Vec::with_capacity(nodes_len);
+
+            for source in builder.nodes() {
+                for (label, target_iter) in builder.labels_of(source)? {
+                    if f(*label) {
+                        // empty label found
+                        for target in target_iter {
+                            for (label, children_iter) in builder.labels_of(target)? {
+                                for child in children_iter {
+                                    if !builder.has_edge_label(source, label, child)? {
+                                        updated = true;
+
+                                        to_add.push((source, child, *label));
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+
+            for (source, target, label) in to_add.into_iter() {
+                builder.add_edge(source, target, label)?;
+            }
+        }
+
+        // Then remove all epsilon edges
+
+        let sources_and_targets: Vec<_> = builder.edges().collect();
+
+        for (source, target) in sources_and_targets.into_iter() {
+            builder.remove_edge(source, target, &f)?;
+        }
+
+        self.graph = builder.build();
+
+        Ok(())
     }
 
-    fn remove_dead(&mut self) -> Result<(), Error> {
-        todo!()
+    fn remove_dead(&mut self, reserve: impl Fn(usize) -> bool) -> Result<(), Error> {
+        let mut builder = self.graph.builder_ref();
+
+        let mut changed = true;
+
+        // Use a counting vector to avoid calculating which nodes are
+        // dead at each iteration.
+        let mut target_counts: Vec<usize> =
+            std::iter::repeat(0).take(self.graph.nodes_len()).collect();
+
+        for (_, target) in self.graph.edges() {
+            *target_counts
+                .get_mut(target)
+                .ok_or(Error::UnknownNode(target))? += 1;
+        }
+
+        while changed {
+            changed = false;
+
+            for node in self.nodes() {
+                let deadp = !matches!(target_counts.get(node).copied(), Some(n) if n > 0);
+
+                if deadp && !reserve(node) {
+                    let to_remove: Vec<usize> = builder.children_of(node)?.collect();
+
+                    if !to_remove.is_empty() {
+                        changed = true;
+
+                        for target in to_remove {
+                            builder.remove_edge(node, target, |_| true)?;
+                            *target_counts
+                                .get_mut(target)
+                                .ok_or(Error::UnknownNode(target))? -= 1;
+                        }
+                    }
+                }
+            }
+        }
+
+        self.graph = builder.build();
+
+        Ok(())
     }
 
+    // REVIEW: Combine nulling and remove_epsilon into the same
+    // method, or not.
+
     fn nulling(&mut self, f: impl Fn(T) -> bool) -> Result<(), Error> {
         let mut updated = true;
         let mut builder = self.graph.builder_ref();
@@ -396,7 +494,6 @@ impl<T: GraphLabel + Display + Debug> Display for DefaultNFA<T> {
 
 #[cfg(test)]
 mod test_to_nfa {
-    #![allow(unused_imports)]
     use super::*;
     use crate::SoC;
 
@@ -446,8 +543,11 @@ mod test_to_nfa {
 
         println!("regex = {regex}");
 
-        let nfa: DefaultNFA<DOption<char>> =
-            DefaultNFA::to_nfa(&[regex], |label| Ok(SoC::Carry(label)))?;
+        let nfa: DefaultNFA<DOption<char>> = DefaultNFA::to_nfa(
+            &[regex],
+            |label| Ok(SoC::Carry(label)),
+            Some(char::default()),
+        )?;
 
         nfa.print_viz("nfa.gv")?;