diff options
Diffstat (limited to 'nfa/src')
-rw-r--r-- | nfa/src/default/mod.rs | 2 | ||||
-rw-r--r-- | nfa/src/default/nfa.rs | 160 | ||||
-rw-r--r-- | nfa/src/default/regex.rs | 20 | ||||
-rw-r--r-- | nfa/src/error.rs | 18 | ||||
-rw-r--r-- | nfa/src/lib.rs | 91 |
5 files changed, 231 insertions, 60 deletions
diff --git a/nfa/src/default/mod.rs b/nfa/src/default/mod.rs index b9ee398..115bea5 100644 --- a/nfa/src/default/mod.rs +++ b/nfa/src/default/mod.rs @@ -1,5 +1,5 @@ //! This file provides a structure that implements the trait -//! [`NFA`][super::Nfa] and another that umplements the trait +//! [`NFA`][super::Nfa] and another that implements the trait //! [`Regex`][super::Regex]. pub mod nfa; diff --git a/nfa/src/default/nfa.rs b/nfa/src/default/nfa.rs index 229ba4d..e642218 100644 --- a/nfa/src/default/nfa.rs +++ b/nfa/src/default/nfa.rs @@ -1,17 +1,8 @@ //! This file provides a default implementation of NFA. -// TODO: The current focus of the project is to understand the growth -// rate of the algorithm, to know whether I made a mistake in the -// previous iteration of the implementation, or the algorithm is not -// as fast as the author estimated, which is not quite likely, of -// course. -// -// Thus I shall establish a friendly interface that allows me to view -// and debug the atomic languages and the languages, transparently. - use graph::{ builder::Builder, error::Error as GError, labelled::DLGBuilder, DLGraph, Graph, GraphLabel, - LabelGraph, + LabelExtGraph, LabelGraph, }; use crate::{default::regex::RegexType, error::Error, DOption, Nfa, Regex, SoC}; @@ -32,6 +23,11 @@ impl<T: GraphLabel + Display> Default for DefaultNFA<T> { } } +// Some boiler-plate delegation implementations. +// +// I deliberately avoid using [`Deref`][std::ops::Deref] here, as I do +// not want to dereference an NFA to a Graph. + impl<T: GraphLabel + Display> Graph for DefaultNFA<T> { type Iter<'a> = <DLGraph<T> as Graph>::Iter<'a> where T: 'a; @@ -81,6 +77,11 @@ impl<T: GraphLabel + Display> LabelGraph<T> for DefaultNFA<T> { type LabelIter<'a> = <DLGraph<T> as LabelGraph<T>>::LabelIter<'a> where T: 'a; + type EdgeLabelIter<'a> = <DLGraph<T> as LabelGraph<T>>::EdgeLabelIter<'a> + where + Self: 'a, + T: 'a; + #[inline] fn vertex_label(&self, node_id: usize) -> Result<Option<T>, GError> { if self.has_node(node_id) { @@ -91,7 +92,7 @@ impl<T: GraphLabel + Display> LabelGraph<T> for DefaultNFA<T> { } #[inline] - fn edge_label(&self, source: usize, target: usize) -> Result<Vec<T>, GError> { + fn edge_label(&self, source: usize, target: usize) -> Result<Self::EdgeLabelIter<'_>, GError> { self.graph.edge_label(source, target) } @@ -115,12 +116,24 @@ impl<T: GraphLabel + Display> LabelGraph<T> for DefaultNFA<T> { } } +impl<T: GraphLabel + Display> LabelExtGraph<T> for DefaultNFA<T> { + #[inline] + fn extend(&mut self, edges: impl IntoIterator<Item = (T, usize)>) -> Result<usize, GError> { + self.graph.extend(edges) + } +} + +// TODO: Define a type for the labels of DefaultNFA. + impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> { type FromRegex<S: GraphLabel + Display + Default> = DefaultNFA<S>; + // Reminder: This is an adopted version of Thompson's + // construction. fn to_nfa( regexps: &[impl Regex<RegexType<T>>], sub_pred: impl Fn(T) -> Result<SoC<T>, Error>, + default: Option<T>, ) -> Result<Self::FromRegex<DOption<T>>, Error> { let total_regexps_len: usize = regexps.iter().map(Graph::nodes_len).sum(); @@ -128,7 +141,7 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> { return Ok(Default::default()); } - // We reserve two more rooms for later uses. + // We reserve two more rooms for the default edge. let nfa_len = total_regexps_len * 2 + 2; let mut builder: DLGBuilder<DOption<T>> = Builder::with_capacity(nfa_len); @@ -141,20 +154,18 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> { builder.add_vertex(); } + // add a default edge + builder.add_edge(nfa_len - 2, nfa_len - 1, DOption(default))?; + let accumulators: Vec<usize> = { - let mut result = Vec::with_capacity(regexps.len() + 1); + let mut result = Vec::with_capacity(regexps.len()); result.push(0); - let mut accumulator = 0; - - for regexp in regexps.iter() { - accumulator += regexp.nodes_len() * 2; - result.push(accumulator); + for regexp in regexps.iter().take(regexps.len() - 1) { + result.push(result.last().unwrap() + regexp.nodes_len() * 2); } - result.pop(); - result }; @@ -313,9 +324,8 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> { SoC::Sub(sub_non) => { // a non-terminal - let sub_offset = get_offset_safe!(sub_non); - let sub_nfa_start = sub_offset + 2 * sub_non; - let sub_nfa_end = sub_offset + 2 * sub_non + 1; + let sub_nfa_start = get_offset_safe!(sub_non); + let sub_nfa_end = sub_nfa_start + 1; builder.add_edge(nfa_start, sub_nfa_start, empty_label)?; builder.add_edge(sub_nfa_end, nfa_end, empty_label)?; @@ -336,14 +346,102 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> { Ok(DefaultNFA { graph }) } - fn remove_epsilon(&mut self) -> Result<(), Error> { - todo!() + fn remove_epsilon<F: Fn(T) -> bool>(&mut self, f: F) -> Result<(), Error> { + let mut builder = self.graph.builder_ref(); + + let mut updated = true; + + let nodes_len = self.nodes_len(); + + while updated { + updated = false; + + let mut to_add: Vec<(usize, usize, T)> = Vec::with_capacity(nodes_len); + + for source in builder.nodes() { + for (label, target_iter) in builder.labels_of(source)? { + if f(*label) { + // empty label found + for target in target_iter { + for (label, children_iter) in builder.labels_of(target)? { + for child in children_iter { + if !builder.has_edge_label(source, label, child)? { + updated = true; + + to_add.push((source, child, *label)); + } + } + } + } + } + } + } + + for (source, target, label) in to_add.into_iter() { + builder.add_edge(source, target, label)?; + } + } + + // Then remove all epsilon edges + + let sources_and_targets: Vec<_> = builder.edges().collect(); + + for (source, target) in sources_and_targets.into_iter() { + builder.remove_edge(source, target, &f)?; + } + + self.graph = builder.build(); + + Ok(()) } - fn remove_dead(&mut self) -> Result<(), Error> { - todo!() + fn remove_dead(&mut self, reserve: impl Fn(usize) -> bool) -> Result<(), Error> { + let mut builder = self.graph.builder_ref(); + + let mut changed = true; + + // Use a counting vector to avoid calculating which nodes are + // dead at each iteration. + let mut target_counts: Vec<usize> = + std::iter::repeat(0).take(self.graph.nodes_len()).collect(); + + for (_, target) in self.graph.edges() { + *target_counts + .get_mut(target) + .ok_or(Error::UnknownNode(target))? += 1; + } + + while changed { + changed = false; + + for node in self.nodes() { + let deadp = !matches!(target_counts.get(node).copied(), Some(n) if n > 0); + + if deadp && !reserve(node) { + let to_remove: Vec<usize> = builder.children_of(node)?.collect(); + + if !to_remove.is_empty() { + changed = true; + + for target in to_remove { + builder.remove_edge(node, target, |_| true)?; + *target_counts + .get_mut(target) + .ok_or(Error::UnknownNode(target))? -= 1; + } + } + } + } + } + + self.graph = builder.build(); + + Ok(()) } + // REVIEW: Combine nulling and remove_epsilon into the same + // method, or not. + fn nulling(&mut self, f: impl Fn(T) -> bool) -> Result<(), Error> { let mut updated = true; let mut builder = self.graph.builder_ref(); @@ -396,7 +494,6 @@ impl<T: GraphLabel + Display + Debug> Display for DefaultNFA<T> { #[cfg(test)] mod test_to_nfa { - #![allow(unused_imports)] use super::*; use crate::SoC; @@ -446,8 +543,11 @@ mod test_to_nfa { println!("regex = {regex}"); - let nfa: DefaultNFA<DOption<char>> = - DefaultNFA::to_nfa(&[regex], |label| Ok(SoC::Carry(label)))?; + let nfa: DefaultNFA<DOption<char>> = DefaultNFA::to_nfa( + &[regex], + |label| Ok(SoC::Carry(label)), + Some(char::default()), + )?; nfa.print_viz("nfa.gv")?; diff --git a/nfa/src/default/regex.rs b/nfa/src/default/regex.rs index 2670e32..c8ad370 100644 --- a/nfa/src/default/regex.rs +++ b/nfa/src/default/regex.rs @@ -260,9 +260,7 @@ impl<T: GraphLabel> DefaultRegex<T> { stack.push(Unseen(0)); while let Some(top) = stack.pop() { - let node_type = types[top.index()]; - - // TODO: Do not use unwrap here. + let node_type = types.get(top.index()).unwrap(); match node_type { RegexType::Kleene => { @@ -350,8 +348,12 @@ impl<T: GraphLabel> DefaultRegex<T> { .map(Unseen) .rev(), ); + + if self.graph.is_empty_node(top.index()).unwrap() { + write!(result, "ε")?; + } } - RegexType::Lit(label) => write!(result, "{}", f(label))?, + RegexType::Lit(label) => write!(result, "{}", f(*label))?, } } @@ -452,7 +454,15 @@ impl Display for ParseError { } } -impl std::error::Error for ParseError {} +impl std::error::Error for ParseError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + if let ParseError::Graph(gerr) = self { + Some(gerr) + } else { + None + } + } +} impl From<GError> for ParseError { fn from(ge: GError) -> Self { diff --git a/nfa/src/error.rs b/nfa/src/error.rs index ad75077..7160555 100644 --- a/nfa/src/error.rs +++ b/nfa/src/error.rs @@ -1,6 +1,6 @@ //! This file implements the error type for the crate. -use graph::error::Error as GError; +use graph::error::Error as GraphError; use core::fmt::{Display, Formatter}; @@ -17,7 +17,7 @@ pub enum Error { /// There is no root in the underlying regular expression. NoRoot, /// This error comes from some underlying graph operation. - Graph(GError), + Graph(GraphError), /// A cycle is found when constructing regular expressions. Cycle, } @@ -34,10 +34,18 @@ impl Display for Error { } } -impl std::error::Error for Error {} +impl std::error::Error for Error { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + if let Self::Graph(gerr) = self { + Some(gerr) + } else { + None + } + } +} -impl From<GError> for Error { - fn from(e: GError) -> Self { +impl From<GraphError> for Error { + fn from(e: GraphError) -> Self { Self::Graph(e) } } diff --git a/nfa/src/lib.rs b/nfa/src/lib.rs index b1d62b3..31bd69a 100644 --- a/nfa/src/lib.rs +++ b/nfa/src/lib.rs @@ -14,7 +14,7 @@ use core::fmt::Display; use std::ops::{Deref, DerefMut}; -use graph::{Graph, GraphLabel, LabelGraph}; +use graph::{Graph, GraphLabel, LabelExtGraph}; use error::Error; @@ -52,14 +52,14 @@ pub trait Regex<T: GraphLabel>: Graph + Display + Clone { } } - // TODO: add functions that determine if certain "positions" in a + // TODO: Add functions that determine if certain "positions" in a // regular language satisfy some special properties, like at the // end of a Kleene star, or at the end of a regular language, et // cetera. These might be needed later. } -/// Since Option<T> does not inherit the Display from T, we wrap it to -/// provide an automatic implementation of Display. +/// Since `Option<T>` does not inherit the `Display` from `T`, we wrap +/// it to provide an automatic implementation of `Display`. #[derive(Debug, Clone, Copy, Ord, PartialOrd, Eq, PartialEq, Hash)] pub struct DOption<T>(Option<T>); @@ -120,42 +120,98 @@ pub enum SoC<T> { /// The expected behvaiour of a nondeterministic finite automaton. /// /// Every NFA is a special labelled graph. -pub trait Nfa<T: GraphLabel>: LabelGraph<T> { +pub trait Nfa<T: GraphLabel>: LabelExtGraph<T> { + // TODO: This trait should have a type for the labels. + /// Remove all empty transitions from the nondeterministic finite /// automaton. - fn remove_epsilon(&mut self) -> Result<(), Error>; + fn remove_epsilon<F>(&mut self, f: F) -> Result<(), Error> + where + F: Fn(T) -> bool; /// Return a state-minimal NFA equivalent with the original one. /// /// This is not required. It is just to allow me to experiment /// with NFA optimization algorithms. - fn minimize(&self) -> Result<Self, Error> { + fn minimize(&self) -> Result<Box<Self>, Error> { Err(Error::UnsupportedOperation) } + /// Check every node or edge by a given predicate. + /// + /// This should also verify that every node and edge has correct + /// indices, so that we can safely use `unwrap` later. A + /// consequence is that, if one only wants to check the validity + /// of nodes and edges, one can pass a function that always + /// returns `true`. + #[inline] + fn labels_satisfy(&self, f: impl Fn(T) -> bool) -> Result<bool, Error> { + let nodes_len = self.nodes_len(); + let mut result = true; + + for node in self.nodes() { + for (label, children_iter) in self.labels_of(node)? { + for child in children_iter { + if child >= nodes_len { + dbg!(node, label); + return Err(graph::error::Error::IndexOutOfBounds(child, nodes_len).into()); + } + } + + // NOTE: Avoid executing `f` if `result` is already + // false. But still proceed in verifying that nodes + // and edges are correct: the correctness of nodes and + // edges is more important than the function execution + // results, as the former directly affects the safety + // of many algorithms. + if result && !f(*label) { + dbg!(node, label); + result = false; + } + } + } + + Ok(result) + } + /// When we convert a regular expression to a nondeterministic /// finite automaton, the label should be optional, so we use a /// different type for the result type. type FromRegex<S: GraphLabel + Display + Default>; - /// Build a nondeterministic finite automaton out of a set of - /// regular expressions. + /// Build a nondeterministic finite automaton out of a set + /// `regexps` of regular expressions. + /// + /// The `sub_pred` is a predicate function that returns an + /// indication whether to carry the value around or to substitute + /// by another regular language in the given set. + /// + /// The `default` parameter specifies the label of a default edge, + /// that goes from a node to another, both of which are not + /// associated with the given regular languages. /// - /// The SUB_PRED is a predicate function that returns an optional - /// index for a label. If it returns some index, then this means - /// we should substitute the index-th regular expression in the - /// set, whereever that label occurs in the set of regular - /// expressions. + /// This function should perform Thompson's construction, + /// basically. fn to_nfa( regexps: &[impl Regex<RegexType<T>>], + // TODO: The transformation should produce more information. sub_pred: impl Fn(T) -> Result<SoC<T>, Error>, + default: Option<T>, ) -> Result<Self::FromRegex<DOption<T>>, Error>; /// Remove all dead states from the nondeterministic finite /// automaton. /// - /// A state is dead if there are no edges going to the state. - fn remove_dead(&mut self) -> Result<(), Error>; + /// A state is dead if there are no edges going to the state, and + /// if it is not reserved. + /// + /// # Note + /// + /// Actually an implementation is allowed to just remove all edges + /// out of the dead nodes. Then these nodes are considered gone + /// from the graph, and we don't need to re-index the existing + /// edges. We can call this "a poor man's removal of nodes". + fn remove_dead(&mut self, reserve: impl Fn(usize) -> bool) -> Result<(), Error>; /// For each edge from A to B whose edge is considered nullable by /// a function `f`, and for every edge from B to C, add an edge @@ -169,6 +225,3 @@ pub trait Nfa<T: GraphLabel>: LabelGraph<T> { pub mod default; pub mod desrec; - -#[cfg(test)] -mod nfa_tests {} |