summaryrefslogtreecommitdiff
path: root/nfa
diff options
context:
space:
mode:
Diffstat (limited to 'nfa')
-rw-r--r--nfa/src/default/mod.rs2
-rw-r--r--nfa/src/default/nfa.rs160
-rw-r--r--nfa/src/default/regex.rs20
-rw-r--r--nfa/src/error.rs18
-rw-r--r--nfa/src/lib.rs91
5 files changed, 231 insertions, 60 deletions
diff --git a/nfa/src/default/mod.rs b/nfa/src/default/mod.rs
index b9ee398..115bea5 100644
--- a/nfa/src/default/mod.rs
+++ b/nfa/src/default/mod.rs
@@ -1,5 +1,5 @@
//! This file provides a structure that implements the trait
-//! [`NFA`][super::Nfa] and another that umplements the trait
+//! [`NFA`][super::Nfa] and another that implements the trait
//! [`Regex`][super::Regex].
pub mod nfa;
diff --git a/nfa/src/default/nfa.rs b/nfa/src/default/nfa.rs
index 229ba4d..e642218 100644
--- a/nfa/src/default/nfa.rs
+++ b/nfa/src/default/nfa.rs
@@ -1,17 +1,8 @@
//! This file provides a default implementation of NFA.
-// TODO: The current focus of the project is to understand the growth
-// rate of the algorithm, to know whether I made a mistake in the
-// previous iteration of the implementation, or the algorithm is not
-// as fast as the author estimated, which is not quite likely, of
-// course.
-//
-// Thus I shall establish a friendly interface that allows me to view
-// and debug the atomic languages and the languages, transparently.
-
use graph::{
builder::Builder, error::Error as GError, labelled::DLGBuilder, DLGraph, Graph, GraphLabel,
- LabelGraph,
+ LabelExtGraph, LabelGraph,
};
use crate::{default::regex::RegexType, error::Error, DOption, Nfa, Regex, SoC};
@@ -32,6 +23,11 @@ impl<T: GraphLabel + Display> Default for DefaultNFA<T> {
}
}
+// Some boiler-plate delegation implementations.
+//
+// I deliberately avoid using [`Deref`][std::ops::Deref] here, as I do
+// not want to dereference an NFA to a Graph.
+
impl<T: GraphLabel + Display> Graph for DefaultNFA<T> {
type Iter<'a> = <DLGraph<T> as Graph>::Iter<'a> where T: 'a;
@@ -81,6 +77,11 @@ impl<T: GraphLabel + Display> LabelGraph<T> for DefaultNFA<T> {
type LabelIter<'a> = <DLGraph<T> as LabelGraph<T>>::LabelIter<'a> where T: 'a;
+ type EdgeLabelIter<'a> = <DLGraph<T> as LabelGraph<T>>::EdgeLabelIter<'a>
+ where
+ Self: 'a,
+ T: 'a;
+
#[inline]
fn vertex_label(&self, node_id: usize) -> Result<Option<T>, GError> {
if self.has_node(node_id) {
@@ -91,7 +92,7 @@ impl<T: GraphLabel + Display> LabelGraph<T> for DefaultNFA<T> {
}
#[inline]
- fn edge_label(&self, source: usize, target: usize) -> Result<Vec<T>, GError> {
+ fn edge_label(&self, source: usize, target: usize) -> Result<Self::EdgeLabelIter<'_>, GError> {
self.graph.edge_label(source, target)
}
@@ -115,12 +116,24 @@ impl<T: GraphLabel + Display> LabelGraph<T> for DefaultNFA<T> {
}
}
+impl<T: GraphLabel + Display> LabelExtGraph<T> for DefaultNFA<T> {
+ #[inline]
+ fn extend(&mut self, edges: impl IntoIterator<Item = (T, usize)>) -> Result<usize, GError> {
+ self.graph.extend(edges)
+ }
+}
+
+// TODO: Define a type for the labels of DefaultNFA.
+
impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
type FromRegex<S: GraphLabel + Display + Default> = DefaultNFA<S>;
+ // Reminder: This is an adopted version of Thompson's
+ // construction.
fn to_nfa(
regexps: &[impl Regex<RegexType<T>>],
sub_pred: impl Fn(T) -> Result<SoC<T>, Error>,
+ default: Option<T>,
) -> Result<Self::FromRegex<DOption<T>>, Error> {
let total_regexps_len: usize = regexps.iter().map(Graph::nodes_len).sum();
@@ -128,7 +141,7 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
return Ok(Default::default());
}
- // We reserve two more rooms for later uses.
+ // We reserve two more rooms for the default edge.
let nfa_len = total_regexps_len * 2 + 2;
let mut builder: DLGBuilder<DOption<T>> = Builder::with_capacity(nfa_len);
@@ -141,20 +154,18 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
builder.add_vertex();
}
+ // add a default edge
+ builder.add_edge(nfa_len - 2, nfa_len - 1, DOption(default))?;
+
let accumulators: Vec<usize> = {
- let mut result = Vec::with_capacity(regexps.len() + 1);
+ let mut result = Vec::with_capacity(regexps.len());
result.push(0);
- let mut accumulator = 0;
-
- for regexp in regexps.iter() {
- accumulator += regexp.nodes_len() * 2;
- result.push(accumulator);
+ for regexp in regexps.iter().take(regexps.len() - 1) {
+ result.push(result.last().unwrap() + regexp.nodes_len() * 2);
}
- result.pop();
-
result
};
@@ -313,9 +324,8 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
SoC::Sub(sub_non) => {
// a non-terminal
- let sub_offset = get_offset_safe!(sub_non);
- let sub_nfa_start = sub_offset + 2 * sub_non;
- let sub_nfa_end = sub_offset + 2 * sub_non + 1;
+ let sub_nfa_start = get_offset_safe!(sub_non);
+ let sub_nfa_end = sub_nfa_start + 1;
builder.add_edge(nfa_start, sub_nfa_start, empty_label)?;
builder.add_edge(sub_nfa_end, nfa_end, empty_label)?;
@@ -336,14 +346,102 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
Ok(DefaultNFA { graph })
}
- fn remove_epsilon(&mut self) -> Result<(), Error> {
- todo!()
+ fn remove_epsilon<F: Fn(T) -> bool>(&mut self, f: F) -> Result<(), Error> {
+ let mut builder = self.graph.builder_ref();
+
+ let mut updated = true;
+
+ let nodes_len = self.nodes_len();
+
+ while updated {
+ updated = false;
+
+ let mut to_add: Vec<(usize, usize, T)> = Vec::with_capacity(nodes_len);
+
+ for source in builder.nodes() {
+ for (label, target_iter) in builder.labels_of(source)? {
+ if f(*label) {
+ // empty label found
+ for target in target_iter {
+ for (label, children_iter) in builder.labels_of(target)? {
+ for child in children_iter {
+ if !builder.has_edge_label(source, label, child)? {
+ updated = true;
+
+ to_add.push((source, child, *label));
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ for (source, target, label) in to_add.into_iter() {
+ builder.add_edge(source, target, label)?;
+ }
+ }
+
+ // Then remove all epsilon edges
+
+ let sources_and_targets: Vec<_> = builder.edges().collect();
+
+ for (source, target) in sources_and_targets.into_iter() {
+ builder.remove_edge(source, target, &f)?;
+ }
+
+ self.graph = builder.build();
+
+ Ok(())
}
- fn remove_dead(&mut self) -> Result<(), Error> {
- todo!()
+ fn remove_dead(&mut self, reserve: impl Fn(usize) -> bool) -> Result<(), Error> {
+ let mut builder = self.graph.builder_ref();
+
+ let mut changed = true;
+
+ // Use a counting vector to avoid calculating which nodes are
+ // dead at each iteration.
+ let mut target_counts: Vec<usize> =
+ std::iter::repeat(0).take(self.graph.nodes_len()).collect();
+
+ for (_, target) in self.graph.edges() {
+ *target_counts
+ .get_mut(target)
+ .ok_or(Error::UnknownNode(target))? += 1;
+ }
+
+ while changed {
+ changed = false;
+
+ for node in self.nodes() {
+ let deadp = !matches!(target_counts.get(node).copied(), Some(n) if n > 0);
+
+ if deadp && !reserve(node) {
+ let to_remove: Vec<usize> = builder.children_of(node)?.collect();
+
+ if !to_remove.is_empty() {
+ changed = true;
+
+ for target in to_remove {
+ builder.remove_edge(node, target, |_| true)?;
+ *target_counts
+ .get_mut(target)
+ .ok_or(Error::UnknownNode(target))? -= 1;
+ }
+ }
+ }
+ }
+ }
+
+ self.graph = builder.build();
+
+ Ok(())
}
+ // REVIEW: Combine nulling and remove_epsilon into the same
+ // method, or not.
+
fn nulling(&mut self, f: impl Fn(T) -> bool) -> Result<(), Error> {
let mut updated = true;
let mut builder = self.graph.builder_ref();
@@ -396,7 +494,6 @@ impl<T: GraphLabel + Display + Debug> Display for DefaultNFA<T> {
#[cfg(test)]
mod test_to_nfa {
- #![allow(unused_imports)]
use super::*;
use crate::SoC;
@@ -446,8 +543,11 @@ mod test_to_nfa {
println!("regex = {regex}");
- let nfa: DefaultNFA<DOption<char>> =
- DefaultNFA::to_nfa(&[regex], |label| Ok(SoC::Carry(label)))?;
+ let nfa: DefaultNFA<DOption<char>> = DefaultNFA::to_nfa(
+ &[regex],
+ |label| Ok(SoC::Carry(label)),
+ Some(char::default()),
+ )?;
nfa.print_viz("nfa.gv")?;
diff --git a/nfa/src/default/regex.rs b/nfa/src/default/regex.rs
index 2670e32..c8ad370 100644
--- a/nfa/src/default/regex.rs
+++ b/nfa/src/default/regex.rs
@@ -260,9 +260,7 @@ impl<T: GraphLabel> DefaultRegex<T> {
stack.push(Unseen(0));
while let Some(top) = stack.pop() {
- let node_type = types[top.index()];
-
- // TODO: Do not use unwrap here.
+ let node_type = types.get(top.index()).unwrap();
match node_type {
RegexType::Kleene => {
@@ -350,8 +348,12 @@ impl<T: GraphLabel> DefaultRegex<T> {
.map(Unseen)
.rev(),
);
+
+ if self.graph.is_empty_node(top.index()).unwrap() {
+ write!(result, "ε")?;
+ }
}
- RegexType::Lit(label) => write!(result, "{}", f(label))?,
+ RegexType::Lit(label) => write!(result, "{}", f(*label))?,
}
}
@@ -452,7 +454,15 @@ impl Display for ParseError {
}
}
-impl std::error::Error for ParseError {}
+impl std::error::Error for ParseError {
+ fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
+ if let ParseError::Graph(gerr) = self {
+ Some(gerr)
+ } else {
+ None
+ }
+ }
+}
impl From<GError> for ParseError {
fn from(ge: GError) -> Self {
diff --git a/nfa/src/error.rs b/nfa/src/error.rs
index ad75077..7160555 100644
--- a/nfa/src/error.rs
+++ b/nfa/src/error.rs
@@ -1,6 +1,6 @@
//! This file implements the error type for the crate.
-use graph::error::Error as GError;
+use graph::error::Error as GraphError;
use core::fmt::{Display, Formatter};
@@ -17,7 +17,7 @@ pub enum Error {
/// There is no root in the underlying regular expression.
NoRoot,
/// This error comes from some underlying graph operation.
- Graph(GError),
+ Graph(GraphError),
/// A cycle is found when constructing regular expressions.
Cycle,
}
@@ -34,10 +34,18 @@ impl Display for Error {
}
}
-impl std::error::Error for Error {}
+impl std::error::Error for Error {
+ fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
+ if let Self::Graph(gerr) = self {
+ Some(gerr)
+ } else {
+ None
+ }
+ }
+}
-impl From<GError> for Error {
- fn from(e: GError) -> Self {
+impl From<GraphError> for Error {
+ fn from(e: GraphError) -> Self {
Self::Graph(e)
}
}
diff --git a/nfa/src/lib.rs b/nfa/src/lib.rs
index b1d62b3..31bd69a 100644
--- a/nfa/src/lib.rs
+++ b/nfa/src/lib.rs
@@ -14,7 +14,7 @@ use core::fmt::Display;
use std::ops::{Deref, DerefMut};
-use graph::{Graph, GraphLabel, LabelGraph};
+use graph::{Graph, GraphLabel, LabelExtGraph};
use error::Error;
@@ -52,14 +52,14 @@ pub trait Regex<T: GraphLabel>: Graph + Display + Clone {
}
}
- // TODO: add functions that determine if certain "positions" in a
+ // TODO: Add functions that determine if certain "positions" in a
// regular language satisfy some special properties, like at the
// end of a Kleene star, or at the end of a regular language, et
// cetera. These might be needed later.
}
-/// Since Option<T> does not inherit the Display from T, we wrap it to
-/// provide an automatic implementation of Display.
+/// Since `Option<T>` does not inherit the `Display` from `T`, we wrap
+/// it to provide an automatic implementation of `Display`.
#[derive(Debug, Clone, Copy, Ord, PartialOrd, Eq, PartialEq, Hash)]
pub struct DOption<T>(Option<T>);
@@ -120,42 +120,98 @@ pub enum SoC<T> {
/// The expected behvaiour of a nondeterministic finite automaton.
///
/// Every NFA is a special labelled graph.
-pub trait Nfa<T: GraphLabel>: LabelGraph<T> {
+pub trait Nfa<T: GraphLabel>: LabelExtGraph<T> {
+ // TODO: This trait should have a type for the labels.
+
/// Remove all empty transitions from the nondeterministic finite
/// automaton.
- fn remove_epsilon(&mut self) -> Result<(), Error>;
+ fn remove_epsilon<F>(&mut self, f: F) -> Result<(), Error>
+ where
+ F: Fn(T) -> bool;
/// Return a state-minimal NFA equivalent with the original one.
///
/// This is not required. It is just to allow me to experiment
/// with NFA optimization algorithms.
- fn minimize(&self) -> Result<Self, Error> {
+ fn minimize(&self) -> Result<Box<Self>, Error> {
Err(Error::UnsupportedOperation)
}
+ /// Check every node or edge by a given predicate.
+ ///
+ /// This should also verify that every node and edge has correct
+ /// indices, so that we can safely use `unwrap` later. A
+ /// consequence is that, if one only wants to check the validity
+ /// of nodes and edges, one can pass a function that always
+ /// returns `true`.
+ #[inline]
+ fn labels_satisfy(&self, f: impl Fn(T) -> bool) -> Result<bool, Error> {
+ let nodes_len = self.nodes_len();
+ let mut result = true;
+
+ for node in self.nodes() {
+ for (label, children_iter) in self.labels_of(node)? {
+ for child in children_iter {
+ if child >= nodes_len {
+ dbg!(node, label);
+ return Err(graph::error::Error::IndexOutOfBounds(child, nodes_len).into());
+ }
+ }
+
+ // NOTE: Avoid executing `f` if `result` is already
+ // false. But still proceed in verifying that nodes
+ // and edges are correct: the correctness of nodes and
+ // edges is more important than the function execution
+ // results, as the former directly affects the safety
+ // of many algorithms.
+ if result && !f(*label) {
+ dbg!(node, label);
+ result = false;
+ }
+ }
+ }
+
+ Ok(result)
+ }
+
/// When we convert a regular expression to a nondeterministic
/// finite automaton, the label should be optional, so we use a
/// different type for the result type.
type FromRegex<S: GraphLabel + Display + Default>;
- /// Build a nondeterministic finite automaton out of a set of
- /// regular expressions.
+ /// Build a nondeterministic finite automaton out of a set
+ /// `regexps` of regular expressions.
+ ///
+ /// The `sub_pred` is a predicate function that returns an
+ /// indication whether to carry the value around or to substitute
+ /// by another regular language in the given set.
+ ///
+ /// The `default` parameter specifies the label of a default edge,
+ /// that goes from a node to another, both of which are not
+ /// associated with the given regular languages.
///
- /// The SUB_PRED is a predicate function that returns an optional
- /// index for a label. If it returns some index, then this means
- /// we should substitute the index-th regular expression in the
- /// set, whereever that label occurs in the set of regular
- /// expressions.
+ /// This function should perform Thompson's construction,
+ /// basically.
fn to_nfa(
regexps: &[impl Regex<RegexType<T>>],
+ // TODO: The transformation should produce more information.
sub_pred: impl Fn(T) -> Result<SoC<T>, Error>,
+ default: Option<T>,
) -> Result<Self::FromRegex<DOption<T>>, Error>;
/// Remove all dead states from the nondeterministic finite
/// automaton.
///
- /// A state is dead if there are no edges going to the state.
- fn remove_dead(&mut self) -> Result<(), Error>;
+ /// A state is dead if there are no edges going to the state, and
+ /// if it is not reserved.
+ ///
+ /// # Note
+ ///
+ /// Actually an implementation is allowed to just remove all edges
+ /// out of the dead nodes. Then these nodes are considered gone
+ /// from the graph, and we don't need to re-index the existing
+ /// edges. We can call this "a poor man's removal of nodes".
+ fn remove_dead(&mut self, reserve: impl Fn(usize) -> bool) -> Result<(), Error>;
/// For each edge from A to B whose edge is considered nullable by
/// a function `f`, and for every edge from B to C, add an edge
@@ -169,6 +225,3 @@ pub trait Nfa<T: GraphLabel>: LabelGraph<T> {
pub mod default;
pub mod desrec;
-
-#[cfg(test)]
-mod nfa_tests {}