summaryrefslogtreecommitdiff
path: root/nfa/src/lib.rs
diff options
context:
space:
mode:
Diffstat (limited to 'nfa/src/lib.rs')
-rw-r--r--nfa/src/lib.rs155
1 files changed, 142 insertions, 13 deletions
diff --git a/nfa/src/lib.rs b/nfa/src/lib.rs
index 31bd69a..c1906e1 100644
--- a/nfa/src/lib.rs
+++ b/nfa/src/lib.rs
@@ -60,8 +60,12 @@ pub trait Regex<T: GraphLabel>: Graph + Display + Clone {
/// Since `Option<T>` does not inherit the `Display` from `T`, we wrap
/// it to provide an automatic implementation of `Display`.
+///
+/// # Convert to `Option`
+///
+/// One can dereference a `DOption` to obtain an `Option`.
#[derive(Debug, Clone, Copy, Ord, PartialOrd, Eq, PartialEq, Hash)]
-pub struct DOption<T>(Option<T>);
+pub struct DOption<T>(pub Option<T>);
impl<T> Default for DOption<T> {
fn default() -> Self {
@@ -117,17 +121,104 @@ pub enum SoC<T> {
Carry(T),
}
+/// This type records some information that is obtained from the
+/// process of converting a regular expression to a nondeterministic
+/// finite automaton.
+#[derive(Debug, Clone, Copy, Ord, PartialOrd, Eq, PartialEq, Hash, Default)]
+pub struct NfaLabel<T: GraphLabel> {
+ /// A terminal or a non-terminal.
+ value: T,
+ /// The corresponding position in the rules.
+ moved: usize,
+ /// Whether this comes from left-linear expansion.
+ left_p: bool,
+}
+
+impl<T: GraphLabel> NfaLabel<T> {
+ /// Construct a new label.
+ #[inline]
+ pub fn new(value: T, moved: usize, left_p: bool) -> Self {
+ Self {
+ value,
+ moved,
+ left_p,
+ }
+ }
+
+ /// Retrieve the value from the label.
+ #[inline]
+ pub fn get_value(&self) -> T {
+ self.value
+ }
+ /// Retrieve the moved position from the label.
+ #[inline]
+ pub fn get_moved(&self) -> usize {
+ self.moved
+ }
+ /// Retrieve whether or not the label comes from left-linear
+ /// expansions.
+ #[inline]
+ pub fn get_left_p(&self) -> bool {
+ self.left_p
+ }
+}
+
+impl<T: GraphLabel> Display for NfaLabel<T> {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "edge {} at {}{}", self.value, self.moved, {
+ if self.left_p {
+ ", by left"
+ } else {
+ ""
+ }
+ })
+ }
+}
+
+/// A convenient alias of the information of two edges.
+///
+/// If the tuple is (a, b, c, la, lb), then the first edge goes from a
+/// to b, is labelled la, and the second edge goes from b to c, and is
+/// labelled by lb.
+#[derive(Debug, Clone, Copy)]
+pub struct TwoEdges<T: Copy>(usize, usize, usize, T, T);
+
+impl<T: Copy> TwoEdges<T> {
+ /// Extract the first edge.
+ pub fn first_edge(&self) -> (usize, usize, T) {
+ (self.0, self.1, self.3)
+ }
+
+ /// Extract the second edge.
+ pub fn second_edge(&self) -> (usize, usize, T) {
+ (self.1, self.2, self.4)
+ }
+}
+
+/// The type of nondeterministic finite automata that is obtained from
+/// a regular expression, via the method [`to_nfa`][Nfa::to_nfa].
+pub type LabelType<T> = NfaLabel<DOption<T>>;
+
/// The expected behvaiour of a nondeterministic finite automaton.
///
/// Every NFA is a special labelled graph.
pub trait Nfa<T: GraphLabel>: LabelExtGraph<T> {
- // TODO: This trait should have a type for the labels.
+ /// When we convert a regular expression to a nondeterministic
+ /// finite automaton, the label should be optional, so we use a
+ /// different type for the result type.
+ type FromRegex<S: GraphLabel + Display + Default>;
+ // FIXME: This should not be needed.
/// Remove all empty transitions from the nondeterministic finite
/// automaton.
- fn remove_epsilon<F>(&mut self, f: F) -> Result<(), Error>
+ #[inline]
+ fn remove_epsilon<F>(&mut self, _f: F) -> Result<(), Error>
where
- F: Fn(T) -> bool;
+ F: FnMut(T) -> bool,
+ {
+ // self.closure(f, true, |two_edges| two_edges.second_edge().2)
+ unimplemented!("deprecated")
+ }
/// Return a state-minimal NFA equivalent with the original one.
///
@@ -174,11 +265,6 @@ pub trait Nfa<T: GraphLabel>: LabelExtGraph<T> {
Ok(result)
}
- /// When we convert a regular expression to a nondeterministic
- /// finite automaton, the label should be optional, so we use a
- /// different type for the result type.
- type FromRegex<S: GraphLabel + Display + Default>;
-
/// Build a nondeterministic finite automaton out of a set
/// `regexps` of regular expressions.
///
@@ -194,10 +280,9 @@ pub trait Nfa<T: GraphLabel>: LabelExtGraph<T> {
/// basically.
fn to_nfa(
regexps: &[impl Regex<RegexType<T>>],
- // TODO: The transformation should produce more information.
sub_pred: impl Fn(T) -> Result<SoC<T>, Error>,
default: Option<T>,
- ) -> Result<Self::FromRegex<DOption<T>>, Error>;
+ ) -> Result<Self::FromRegex<LabelType<T>>, Error>;
/// Remove all dead states from the nondeterministic finite
/// automaton.
@@ -211,8 +296,9 @@ pub trait Nfa<T: GraphLabel>: LabelExtGraph<T> {
/// out of the dead nodes. Then these nodes are considered gone
/// from the graph, and we don't need to re-index the existing
/// edges. We can call this "a poor man's removal of nodes".
- fn remove_dead(&mut self, reserve: impl Fn(usize) -> bool) -> Result<(), Error>;
+ fn remove_dead(&mut self, reserve: impl FnMut(usize) -> bool) -> Result<(), Error>;
+ // FIXME: This should not be needed.
/// For each edge from A to B whose edge is considered nullable by
/// a function `f`, and for every edge from B to C, add an edge
/// from A to C.
@@ -220,7 +306,50 @@ pub trait Nfa<T: GraphLabel>: LabelExtGraph<T> {
/// This is needed specifically by the algorithm to produce a set
/// of atomic languages that represent "null-closed" derived
/// languages.
- fn nulling(&mut self, f: impl Fn(T) -> bool) -> Result<(), Error>;
+ #[inline]
+ fn nulling(&mut self, _f: impl FnMut(T) -> bool) -> Result<(), Error> {
+ // self.closure(f, false, |two_edges| two_edges.second_edge().2)
+ unimplemented!("deprecated")
+ }
+
+ /// Return the *closure* of the nondeterministic finite automaton.
+ ///
+ /// # Definition
+ ///
+ /// The closure of a nondeterministic finite automaton N is
+ /// defined as the unique *minimal* nondeterministic finite
+ /// automaton N+ that can be obtained by adjoining some edges to N
+ /// such that if there are edges a -> b and b -> c, and if the
+ /// edge a -> b is deemed as *nullable* by some function, then
+ /// there is an edge a -> c, where the minimality is the
+ /// minimality of the set of edges: if there is another such
+ /// nondeterministic finite automaton M satisfying the above
+ /// property, then the set of edges of N+ is a subset of the set
+ /// of edges of M.
+ ///
+ /// # Remove edges afterwards
+ ///
+ /// If `remove_after_p` is true, remove all those nullable edges.
+ ///
+ /// # Transformation of labels
+ ///
+ /// We can apply a transformer to labels, to be able to combine
+ /// labels in non-trivial ways. If one just wants the *new* label
+ /// that is the label of the edge from b to c in the above
+ /// definition, one can use the function that sends `two_edges` to
+ /// `two_edges.second_edge().2`.
+ ///
+ /// # Error
+ ///
+ /// The function should emit errors if the edges of the
+ /// nondeterministic finite automaton point to invalid nodes.
+ fn closure(
+ &mut self,
+ predicate: impl FnMut(T) -> bool,
+ remove_after_p: bool,
+ transform: impl FnMut(TwoEdges<T>) -> T,
+ remove_predicate: impl FnMut(T) -> bool,
+ ) -> Result<(), Error>;
}
pub mod default;