summaryrefslogtreecommitdiff
path: root/nfa/src/default/nfa.rs
diff options
context:
space:
mode:
Diffstat (limited to 'nfa/src/default/nfa.rs')
-rw-r--r--nfa/src/default/nfa.rs160
1 files changed, 130 insertions, 30 deletions
diff --git a/nfa/src/default/nfa.rs b/nfa/src/default/nfa.rs
index 229ba4d..e642218 100644
--- a/nfa/src/default/nfa.rs
+++ b/nfa/src/default/nfa.rs
@@ -1,17 +1,8 @@
//! This file provides a default implementation of NFA.
-// TODO: The current focus of the project is to understand the growth
-// rate of the algorithm, to know whether I made a mistake in the
-// previous iteration of the implementation, or the algorithm is not
-// as fast as the author estimated, which is not quite likely, of
-// course.
-//
-// Thus I shall establish a friendly interface that allows me to view
-// and debug the atomic languages and the languages, transparently.
-
use graph::{
builder::Builder, error::Error as GError, labelled::DLGBuilder, DLGraph, Graph, GraphLabel,
- LabelGraph,
+ LabelExtGraph, LabelGraph,
};
use crate::{default::regex::RegexType, error::Error, DOption, Nfa, Regex, SoC};
@@ -32,6 +23,11 @@ impl<T: GraphLabel + Display> Default for DefaultNFA<T> {
}
}
+// Some boiler-plate delegation implementations.
+//
+// I deliberately avoid using [`Deref`][std::ops::Deref] here, as I do
+// not want to dereference an NFA to a Graph.
+
impl<T: GraphLabel + Display> Graph for DefaultNFA<T> {
type Iter<'a> = <DLGraph<T> as Graph>::Iter<'a> where T: 'a;
@@ -81,6 +77,11 @@ impl<T: GraphLabel + Display> LabelGraph<T> for DefaultNFA<T> {
type LabelIter<'a> = <DLGraph<T> as LabelGraph<T>>::LabelIter<'a> where T: 'a;
+ type EdgeLabelIter<'a> = <DLGraph<T> as LabelGraph<T>>::EdgeLabelIter<'a>
+ where
+ Self: 'a,
+ T: 'a;
+
#[inline]
fn vertex_label(&self, node_id: usize) -> Result<Option<T>, GError> {
if self.has_node(node_id) {
@@ -91,7 +92,7 @@ impl<T: GraphLabel + Display> LabelGraph<T> for DefaultNFA<T> {
}
#[inline]
- fn edge_label(&self, source: usize, target: usize) -> Result<Vec<T>, GError> {
+ fn edge_label(&self, source: usize, target: usize) -> Result<Self::EdgeLabelIter<'_>, GError> {
self.graph.edge_label(source, target)
}
@@ -115,12 +116,24 @@ impl<T: GraphLabel + Display> LabelGraph<T> for DefaultNFA<T> {
}
}
+impl<T: GraphLabel + Display> LabelExtGraph<T> for DefaultNFA<T> {
+ #[inline]
+ fn extend(&mut self, edges: impl IntoIterator<Item = (T, usize)>) -> Result<usize, GError> {
+ self.graph.extend(edges)
+ }
+}
+
+// TODO: Define a type for the labels of DefaultNFA.
+
impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
type FromRegex<S: GraphLabel + Display + Default> = DefaultNFA<S>;
+ // Reminder: This is an adopted version of Thompson's
+ // construction.
fn to_nfa(
regexps: &[impl Regex<RegexType<T>>],
sub_pred: impl Fn(T) -> Result<SoC<T>, Error>,
+ default: Option<T>,
) -> Result<Self::FromRegex<DOption<T>>, Error> {
let total_regexps_len: usize = regexps.iter().map(Graph::nodes_len).sum();
@@ -128,7 +141,7 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
return Ok(Default::default());
}
- // We reserve two more rooms for later uses.
+ // We reserve two more rooms for the default edge.
let nfa_len = total_regexps_len * 2 + 2;
let mut builder: DLGBuilder<DOption<T>> = Builder::with_capacity(nfa_len);
@@ -141,20 +154,18 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
builder.add_vertex();
}
+ // add a default edge
+ builder.add_edge(nfa_len - 2, nfa_len - 1, DOption(default))?;
+
let accumulators: Vec<usize> = {
- let mut result = Vec::with_capacity(regexps.len() + 1);
+ let mut result = Vec::with_capacity(regexps.len());
result.push(0);
- let mut accumulator = 0;
-
- for regexp in regexps.iter() {
- accumulator += regexp.nodes_len() * 2;
- result.push(accumulator);
+ for regexp in regexps.iter().take(regexps.len() - 1) {
+ result.push(result.last().unwrap() + regexp.nodes_len() * 2);
}
- result.pop();
-
result
};
@@ -313,9 +324,8 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
SoC::Sub(sub_non) => {
// a non-terminal
- let sub_offset = get_offset_safe!(sub_non);
- let sub_nfa_start = sub_offset + 2 * sub_non;
- let sub_nfa_end = sub_offset + 2 * sub_non + 1;
+ let sub_nfa_start = get_offset_safe!(sub_non);
+ let sub_nfa_end = sub_nfa_start + 1;
builder.add_edge(nfa_start, sub_nfa_start, empty_label)?;
builder.add_edge(sub_nfa_end, nfa_end, empty_label)?;
@@ -336,14 +346,102 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
Ok(DefaultNFA { graph })
}
- fn remove_epsilon(&mut self) -> Result<(), Error> {
- todo!()
+ fn remove_epsilon<F: Fn(T) -> bool>(&mut self, f: F) -> Result<(), Error> {
+ let mut builder = self.graph.builder_ref();
+
+ let mut updated = true;
+
+ let nodes_len = self.nodes_len();
+
+ while updated {
+ updated = false;
+
+ let mut to_add: Vec<(usize, usize, T)> = Vec::with_capacity(nodes_len);
+
+ for source in builder.nodes() {
+ for (label, target_iter) in builder.labels_of(source)? {
+ if f(*label) {
+ // empty label found
+ for target in target_iter {
+ for (label, children_iter) in builder.labels_of(target)? {
+ for child in children_iter {
+ if !builder.has_edge_label(source, label, child)? {
+ updated = true;
+
+ to_add.push((source, child, *label));
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ for (source, target, label) in to_add.into_iter() {
+ builder.add_edge(source, target, label)?;
+ }
+ }
+
+ // Then remove all epsilon edges
+
+ let sources_and_targets: Vec<_> = builder.edges().collect();
+
+ for (source, target) in sources_and_targets.into_iter() {
+ builder.remove_edge(source, target, &f)?;
+ }
+
+ self.graph = builder.build();
+
+ Ok(())
}
- fn remove_dead(&mut self) -> Result<(), Error> {
- todo!()
+ fn remove_dead(&mut self, reserve: impl Fn(usize) -> bool) -> Result<(), Error> {
+ let mut builder = self.graph.builder_ref();
+
+ let mut changed = true;
+
+ // Use a counting vector to avoid calculating which nodes are
+ // dead at each iteration.
+ let mut target_counts: Vec<usize> =
+ std::iter::repeat(0).take(self.graph.nodes_len()).collect();
+
+ for (_, target) in self.graph.edges() {
+ *target_counts
+ .get_mut(target)
+ .ok_or(Error::UnknownNode(target))? += 1;
+ }
+
+ while changed {
+ changed = false;
+
+ for node in self.nodes() {
+ let deadp = !matches!(target_counts.get(node).copied(), Some(n) if n > 0);
+
+ if deadp && !reserve(node) {
+ let to_remove: Vec<usize> = builder.children_of(node)?.collect();
+
+ if !to_remove.is_empty() {
+ changed = true;
+
+ for target in to_remove {
+ builder.remove_edge(node, target, |_| true)?;
+ *target_counts
+ .get_mut(target)
+ .ok_or(Error::UnknownNode(target))? -= 1;
+ }
+ }
+ }
+ }
+ }
+
+ self.graph = builder.build();
+
+ Ok(())
}
+ // REVIEW: Combine nulling and remove_epsilon into the same
+ // method, or not.
+
fn nulling(&mut self, f: impl Fn(T) -> bool) -> Result<(), Error> {
let mut updated = true;
let mut builder = self.graph.builder_ref();
@@ -396,7 +494,6 @@ impl<T: GraphLabel + Display + Debug> Display for DefaultNFA<T> {
#[cfg(test)]
mod test_to_nfa {
- #![allow(unused_imports)]
use super::*;
use crate::SoC;
@@ -446,8 +543,11 @@ mod test_to_nfa {
println!("regex = {regex}");
- let nfa: DefaultNFA<DOption<char>> =
- DefaultNFA::to_nfa(&[regex], |label| Ok(SoC::Carry(label)))?;
+ let nfa: DefaultNFA<DOption<char>> = DefaultNFA::to_nfa(
+ &[regex],
+ |label| Ok(SoC::Carry(label)),
+ Some(char::default()),
+ )?;
nfa.print_viz("nfa.gv")?;