diff options
Diffstat (limited to 'nfa/src/default/nfa.rs')
-rw-r--r-- | nfa/src/default/nfa.rs | 160 |
1 files changed, 130 insertions, 30 deletions
diff --git a/nfa/src/default/nfa.rs b/nfa/src/default/nfa.rs index 229ba4d..e642218 100644 --- a/nfa/src/default/nfa.rs +++ b/nfa/src/default/nfa.rs @@ -1,17 +1,8 @@ //! This file provides a default implementation of NFA. -// TODO: The current focus of the project is to understand the growth -// rate of the algorithm, to know whether I made a mistake in the -// previous iteration of the implementation, or the algorithm is not -// as fast as the author estimated, which is not quite likely, of -// course. -// -// Thus I shall establish a friendly interface that allows me to view -// and debug the atomic languages and the languages, transparently. - use graph::{ builder::Builder, error::Error as GError, labelled::DLGBuilder, DLGraph, Graph, GraphLabel, - LabelGraph, + LabelExtGraph, LabelGraph, }; use crate::{default::regex::RegexType, error::Error, DOption, Nfa, Regex, SoC}; @@ -32,6 +23,11 @@ impl<T: GraphLabel + Display> Default for DefaultNFA<T> { } } +// Some boiler-plate delegation implementations. +// +// I deliberately avoid using [`Deref`][std::ops::Deref] here, as I do +// not want to dereference an NFA to a Graph. + impl<T: GraphLabel + Display> Graph for DefaultNFA<T> { type Iter<'a> = <DLGraph<T> as Graph>::Iter<'a> where T: 'a; @@ -81,6 +77,11 @@ impl<T: GraphLabel + Display> LabelGraph<T> for DefaultNFA<T> { type LabelIter<'a> = <DLGraph<T> as LabelGraph<T>>::LabelIter<'a> where T: 'a; + type EdgeLabelIter<'a> = <DLGraph<T> as LabelGraph<T>>::EdgeLabelIter<'a> + where + Self: 'a, + T: 'a; + #[inline] fn vertex_label(&self, node_id: usize) -> Result<Option<T>, GError> { if self.has_node(node_id) { @@ -91,7 +92,7 @@ impl<T: GraphLabel + Display> LabelGraph<T> for DefaultNFA<T> { } #[inline] - fn edge_label(&self, source: usize, target: usize) -> Result<Vec<T>, GError> { + fn edge_label(&self, source: usize, target: usize) -> Result<Self::EdgeLabelIter<'_>, GError> { self.graph.edge_label(source, target) } @@ -115,12 +116,24 @@ impl<T: GraphLabel + Display> LabelGraph<T> for DefaultNFA<T> { } } +impl<T: GraphLabel + Display> LabelExtGraph<T> for DefaultNFA<T> { + #[inline] + fn extend(&mut self, edges: impl IntoIterator<Item = (T, usize)>) -> Result<usize, GError> { + self.graph.extend(edges) + } +} + +// TODO: Define a type for the labels of DefaultNFA. + impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> { type FromRegex<S: GraphLabel + Display + Default> = DefaultNFA<S>; + // Reminder: This is an adopted version of Thompson's + // construction. fn to_nfa( regexps: &[impl Regex<RegexType<T>>], sub_pred: impl Fn(T) -> Result<SoC<T>, Error>, + default: Option<T>, ) -> Result<Self::FromRegex<DOption<T>>, Error> { let total_regexps_len: usize = regexps.iter().map(Graph::nodes_len).sum(); @@ -128,7 +141,7 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> { return Ok(Default::default()); } - // We reserve two more rooms for later uses. + // We reserve two more rooms for the default edge. let nfa_len = total_regexps_len * 2 + 2; let mut builder: DLGBuilder<DOption<T>> = Builder::with_capacity(nfa_len); @@ -141,20 +154,18 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> { builder.add_vertex(); } + // add a default edge + builder.add_edge(nfa_len - 2, nfa_len - 1, DOption(default))?; + let accumulators: Vec<usize> = { - let mut result = Vec::with_capacity(regexps.len() + 1); + let mut result = Vec::with_capacity(regexps.len()); result.push(0); - let mut accumulator = 0; - - for regexp in regexps.iter() { - accumulator += regexp.nodes_len() * 2; - result.push(accumulator); + for regexp in regexps.iter().take(regexps.len() - 1) { + result.push(result.last().unwrap() + regexp.nodes_len() * 2); } - result.pop(); - result }; @@ -313,9 +324,8 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> { SoC::Sub(sub_non) => { // a non-terminal - let sub_offset = get_offset_safe!(sub_non); - let sub_nfa_start = sub_offset + 2 * sub_non; - let sub_nfa_end = sub_offset + 2 * sub_non + 1; + let sub_nfa_start = get_offset_safe!(sub_non); + let sub_nfa_end = sub_nfa_start + 1; builder.add_edge(nfa_start, sub_nfa_start, empty_label)?; builder.add_edge(sub_nfa_end, nfa_end, empty_label)?; @@ -336,14 +346,102 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> { Ok(DefaultNFA { graph }) } - fn remove_epsilon(&mut self) -> Result<(), Error> { - todo!() + fn remove_epsilon<F: Fn(T) -> bool>(&mut self, f: F) -> Result<(), Error> { + let mut builder = self.graph.builder_ref(); + + let mut updated = true; + + let nodes_len = self.nodes_len(); + + while updated { + updated = false; + + let mut to_add: Vec<(usize, usize, T)> = Vec::with_capacity(nodes_len); + + for source in builder.nodes() { + for (label, target_iter) in builder.labels_of(source)? { + if f(*label) { + // empty label found + for target in target_iter { + for (label, children_iter) in builder.labels_of(target)? { + for child in children_iter { + if !builder.has_edge_label(source, label, child)? { + updated = true; + + to_add.push((source, child, *label)); + } + } + } + } + } + } + } + + for (source, target, label) in to_add.into_iter() { + builder.add_edge(source, target, label)?; + } + } + + // Then remove all epsilon edges + + let sources_and_targets: Vec<_> = builder.edges().collect(); + + for (source, target) in sources_and_targets.into_iter() { + builder.remove_edge(source, target, &f)?; + } + + self.graph = builder.build(); + + Ok(()) } - fn remove_dead(&mut self) -> Result<(), Error> { - todo!() + fn remove_dead(&mut self, reserve: impl Fn(usize) -> bool) -> Result<(), Error> { + let mut builder = self.graph.builder_ref(); + + let mut changed = true; + + // Use a counting vector to avoid calculating which nodes are + // dead at each iteration. + let mut target_counts: Vec<usize> = + std::iter::repeat(0).take(self.graph.nodes_len()).collect(); + + for (_, target) in self.graph.edges() { + *target_counts + .get_mut(target) + .ok_or(Error::UnknownNode(target))? += 1; + } + + while changed { + changed = false; + + for node in self.nodes() { + let deadp = !matches!(target_counts.get(node).copied(), Some(n) if n > 0); + + if deadp && !reserve(node) { + let to_remove: Vec<usize> = builder.children_of(node)?.collect(); + + if !to_remove.is_empty() { + changed = true; + + for target in to_remove { + builder.remove_edge(node, target, |_| true)?; + *target_counts + .get_mut(target) + .ok_or(Error::UnknownNode(target))? -= 1; + } + } + } + } + } + + self.graph = builder.build(); + + Ok(()) } + // REVIEW: Combine nulling and remove_epsilon into the same + // method, or not. + fn nulling(&mut self, f: impl Fn(T) -> bool) -> Result<(), Error> { let mut updated = true; let mut builder = self.graph.builder_ref(); @@ -396,7 +494,6 @@ impl<T: GraphLabel + Display + Debug> Display for DefaultNFA<T> { #[cfg(test)] mod test_to_nfa { - #![allow(unused_imports)] use super::*; use crate::SoC; @@ -446,8 +543,11 @@ mod test_to_nfa { println!("regex = {regex}"); - let nfa: DefaultNFA<DOption<char>> = - DefaultNFA::to_nfa(&[regex], |label| Ok(SoC::Carry(label)))?; + let nfa: DefaultNFA<DOption<char>> = DefaultNFA::to_nfa( + &[regex], + |label| Ok(SoC::Carry(label)), + Some(char::default()), + )?; nfa.print_viz("nfa.gv")?; |