summaryrefslogtreecommitdiff
path: root/nfa/src/default
diff options
context:
space:
mode:
Diffstat (limited to 'nfa/src/default')
-rw-r--r--nfa/src/default/mod.rs2
-rw-r--r--nfa/src/default/nfa.rs160
-rw-r--r--nfa/src/default/regex.rs20
3 files changed, 146 insertions, 36 deletions
diff --git a/nfa/src/default/mod.rs b/nfa/src/default/mod.rs
index b9ee398..115bea5 100644
--- a/nfa/src/default/mod.rs
+++ b/nfa/src/default/mod.rs
@@ -1,5 +1,5 @@
//! This file provides a structure that implements the trait
-//! [`NFA`][super::Nfa] and another that umplements the trait
+//! [`NFA`][super::Nfa] and another that implements the trait
//! [`Regex`][super::Regex].
pub mod nfa;
diff --git a/nfa/src/default/nfa.rs b/nfa/src/default/nfa.rs
index 229ba4d..e642218 100644
--- a/nfa/src/default/nfa.rs
+++ b/nfa/src/default/nfa.rs
@@ -1,17 +1,8 @@
//! This file provides a default implementation of NFA.
-// TODO: The current focus of the project is to understand the growth
-// rate of the algorithm, to know whether I made a mistake in the
-// previous iteration of the implementation, or the algorithm is not
-// as fast as the author estimated, which is not quite likely, of
-// course.
-//
-// Thus I shall establish a friendly interface that allows me to view
-// and debug the atomic languages and the languages, transparently.
-
use graph::{
builder::Builder, error::Error as GError, labelled::DLGBuilder, DLGraph, Graph, GraphLabel,
- LabelGraph,
+ LabelExtGraph, LabelGraph,
};
use crate::{default::regex::RegexType, error::Error, DOption, Nfa, Regex, SoC};
@@ -32,6 +23,11 @@ impl<T: GraphLabel + Display> Default for DefaultNFA<T> {
}
}
+// Some boiler-plate delegation implementations.
+//
+// I deliberately avoid using [`Deref`][std::ops::Deref] here, as I do
+// not want to dereference an NFA to a Graph.
+
impl<T: GraphLabel + Display> Graph for DefaultNFA<T> {
type Iter<'a> = <DLGraph<T> as Graph>::Iter<'a> where T: 'a;
@@ -81,6 +77,11 @@ impl<T: GraphLabel + Display> LabelGraph<T> for DefaultNFA<T> {
type LabelIter<'a> = <DLGraph<T> as LabelGraph<T>>::LabelIter<'a> where T: 'a;
+ type EdgeLabelIter<'a> = <DLGraph<T> as LabelGraph<T>>::EdgeLabelIter<'a>
+ where
+ Self: 'a,
+ T: 'a;
+
#[inline]
fn vertex_label(&self, node_id: usize) -> Result<Option<T>, GError> {
if self.has_node(node_id) {
@@ -91,7 +92,7 @@ impl<T: GraphLabel + Display> LabelGraph<T> for DefaultNFA<T> {
}
#[inline]
- fn edge_label(&self, source: usize, target: usize) -> Result<Vec<T>, GError> {
+ fn edge_label(&self, source: usize, target: usize) -> Result<Self::EdgeLabelIter<'_>, GError> {
self.graph.edge_label(source, target)
}
@@ -115,12 +116,24 @@ impl<T: GraphLabel + Display> LabelGraph<T> for DefaultNFA<T> {
}
}
+impl<T: GraphLabel + Display> LabelExtGraph<T> for DefaultNFA<T> {
+ #[inline]
+ fn extend(&mut self, edges: impl IntoIterator<Item = (T, usize)>) -> Result<usize, GError> {
+ self.graph.extend(edges)
+ }
+}
+
+// TODO: Define a type for the labels of DefaultNFA.
+
impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
type FromRegex<S: GraphLabel + Display + Default> = DefaultNFA<S>;
+ // Reminder: This is an adopted version of Thompson's
+ // construction.
fn to_nfa(
regexps: &[impl Regex<RegexType<T>>],
sub_pred: impl Fn(T) -> Result<SoC<T>, Error>,
+ default: Option<T>,
) -> Result<Self::FromRegex<DOption<T>>, Error> {
let total_regexps_len: usize = regexps.iter().map(Graph::nodes_len).sum();
@@ -128,7 +141,7 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
return Ok(Default::default());
}
- // We reserve two more rooms for later uses.
+ // We reserve two more rooms for the default edge.
let nfa_len = total_regexps_len * 2 + 2;
let mut builder: DLGBuilder<DOption<T>> = Builder::with_capacity(nfa_len);
@@ -141,20 +154,18 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
builder.add_vertex();
}
+ // add a default edge
+ builder.add_edge(nfa_len - 2, nfa_len - 1, DOption(default))?;
+
let accumulators: Vec<usize> = {
- let mut result = Vec::with_capacity(regexps.len() + 1);
+ let mut result = Vec::with_capacity(regexps.len());
result.push(0);
- let mut accumulator = 0;
-
- for regexp in regexps.iter() {
- accumulator += regexp.nodes_len() * 2;
- result.push(accumulator);
+ for regexp in regexps.iter().take(regexps.len() - 1) {
+ result.push(result.last().unwrap() + regexp.nodes_len() * 2);
}
- result.pop();
-
result
};
@@ -313,9 +324,8 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
SoC::Sub(sub_non) => {
// a non-terminal
- let sub_offset = get_offset_safe!(sub_non);
- let sub_nfa_start = sub_offset + 2 * sub_non;
- let sub_nfa_end = sub_offset + 2 * sub_non + 1;
+ let sub_nfa_start = get_offset_safe!(sub_non);
+ let sub_nfa_end = sub_nfa_start + 1;
builder.add_edge(nfa_start, sub_nfa_start, empty_label)?;
builder.add_edge(sub_nfa_end, nfa_end, empty_label)?;
@@ -336,14 +346,102 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
Ok(DefaultNFA { graph })
}
- fn remove_epsilon(&mut self) -> Result<(), Error> {
- todo!()
+ fn remove_epsilon<F: Fn(T) -> bool>(&mut self, f: F) -> Result<(), Error> {
+ let mut builder = self.graph.builder_ref();
+
+ let mut updated = true;
+
+ let nodes_len = self.nodes_len();
+
+ while updated {
+ updated = false;
+
+ let mut to_add: Vec<(usize, usize, T)> = Vec::with_capacity(nodes_len);
+
+ for source in builder.nodes() {
+ for (label, target_iter) in builder.labels_of(source)? {
+ if f(*label) {
+ // empty label found
+ for target in target_iter {
+ for (label, children_iter) in builder.labels_of(target)? {
+ for child in children_iter {
+ if !builder.has_edge_label(source, label, child)? {
+ updated = true;
+
+ to_add.push((source, child, *label));
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ for (source, target, label) in to_add.into_iter() {
+ builder.add_edge(source, target, label)?;
+ }
+ }
+
+ // Then remove all epsilon edges
+
+ let sources_and_targets: Vec<_> = builder.edges().collect();
+
+ for (source, target) in sources_and_targets.into_iter() {
+ builder.remove_edge(source, target, &f)?;
+ }
+
+ self.graph = builder.build();
+
+ Ok(())
}
- fn remove_dead(&mut self) -> Result<(), Error> {
- todo!()
+ fn remove_dead(&mut self, reserve: impl Fn(usize) -> bool) -> Result<(), Error> {
+ let mut builder = self.graph.builder_ref();
+
+ let mut changed = true;
+
+ // Use a counting vector to avoid calculating which nodes are
+ // dead at each iteration.
+ let mut target_counts: Vec<usize> =
+ std::iter::repeat(0).take(self.graph.nodes_len()).collect();
+
+ for (_, target) in self.graph.edges() {
+ *target_counts
+ .get_mut(target)
+ .ok_or(Error::UnknownNode(target))? += 1;
+ }
+
+ while changed {
+ changed = false;
+
+ for node in self.nodes() {
+ let deadp = !matches!(target_counts.get(node).copied(), Some(n) if n > 0);
+
+ if deadp && !reserve(node) {
+ let to_remove: Vec<usize> = builder.children_of(node)?.collect();
+
+ if !to_remove.is_empty() {
+ changed = true;
+
+ for target in to_remove {
+ builder.remove_edge(node, target, |_| true)?;
+ *target_counts
+ .get_mut(target)
+ .ok_or(Error::UnknownNode(target))? -= 1;
+ }
+ }
+ }
+ }
+ }
+
+ self.graph = builder.build();
+
+ Ok(())
}
+ // REVIEW: Combine nulling and remove_epsilon into the same
+ // method, or not.
+
fn nulling(&mut self, f: impl Fn(T) -> bool) -> Result<(), Error> {
let mut updated = true;
let mut builder = self.graph.builder_ref();
@@ -396,7 +494,6 @@ impl<T: GraphLabel + Display + Debug> Display for DefaultNFA<T> {
#[cfg(test)]
mod test_to_nfa {
- #![allow(unused_imports)]
use super::*;
use crate::SoC;
@@ -446,8 +543,11 @@ mod test_to_nfa {
println!("regex = {regex}");
- let nfa: DefaultNFA<DOption<char>> =
- DefaultNFA::to_nfa(&[regex], |label| Ok(SoC::Carry(label)))?;
+ let nfa: DefaultNFA<DOption<char>> = DefaultNFA::to_nfa(
+ &[regex],
+ |label| Ok(SoC::Carry(label)),
+ Some(char::default()),
+ )?;
nfa.print_viz("nfa.gv")?;
diff --git a/nfa/src/default/regex.rs b/nfa/src/default/regex.rs
index 2670e32..c8ad370 100644
--- a/nfa/src/default/regex.rs
+++ b/nfa/src/default/regex.rs
@@ -260,9 +260,7 @@ impl<T: GraphLabel> DefaultRegex<T> {
stack.push(Unseen(0));
while let Some(top) = stack.pop() {
- let node_type = types[top.index()];
-
- // TODO: Do not use unwrap here.
+ let node_type = types.get(top.index()).unwrap();
match node_type {
RegexType::Kleene => {
@@ -350,8 +348,12 @@ impl<T: GraphLabel> DefaultRegex<T> {
.map(Unseen)
.rev(),
);
+
+ if self.graph.is_empty_node(top.index()).unwrap() {
+ write!(result, "ε")?;
+ }
}
- RegexType::Lit(label) => write!(result, "{}", f(label))?,
+ RegexType::Lit(label) => write!(result, "{}", f(*label))?,
}
}
@@ -452,7 +454,15 @@ impl Display for ParseError {
}
}
-impl std::error::Error for ParseError {}
+impl std::error::Error for ParseError {
+ fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
+ if let ParseError::Graph(gerr) = self {
+ Some(gerr)
+ } else {
+ None
+ }
+ }
+}
impl From<GError> for ParseError {
fn from(ge: GError) -> Self {