diff options
Diffstat (limited to 'nfa/src/lib.rs')
-rw-r--r-- | nfa/src/lib.rs | 155 |
1 files changed, 142 insertions, 13 deletions
diff --git a/nfa/src/lib.rs b/nfa/src/lib.rs index 31bd69a..c1906e1 100644 --- a/nfa/src/lib.rs +++ b/nfa/src/lib.rs @@ -60,8 +60,12 @@ pub trait Regex<T: GraphLabel>: Graph + Display + Clone { /// Since `Option<T>` does not inherit the `Display` from `T`, we wrap /// it to provide an automatic implementation of `Display`. +/// +/// # Convert to `Option` +/// +/// One can dereference a `DOption` to obtain an `Option`. #[derive(Debug, Clone, Copy, Ord, PartialOrd, Eq, PartialEq, Hash)] -pub struct DOption<T>(Option<T>); +pub struct DOption<T>(pub Option<T>); impl<T> Default for DOption<T> { fn default() -> Self { @@ -117,17 +121,104 @@ pub enum SoC<T> { Carry(T), } +/// This type records some information that is obtained from the +/// process of converting a regular expression to a nondeterministic +/// finite automaton. +#[derive(Debug, Clone, Copy, Ord, PartialOrd, Eq, PartialEq, Hash, Default)] +pub struct NfaLabel<T: GraphLabel> { + /// A terminal or a non-terminal. + value: T, + /// The corresponding position in the rules. + moved: usize, + /// Whether this comes from left-linear expansion. + left_p: bool, +} + +impl<T: GraphLabel> NfaLabel<T> { + /// Construct a new label. + #[inline] + pub fn new(value: T, moved: usize, left_p: bool) -> Self { + Self { + value, + moved, + left_p, + } + } + + /// Retrieve the value from the label. + #[inline] + pub fn get_value(&self) -> T { + self.value + } + /// Retrieve the moved position from the label. + #[inline] + pub fn get_moved(&self) -> usize { + self.moved + } + /// Retrieve whether or not the label comes from left-linear + /// expansions. + #[inline] + pub fn get_left_p(&self) -> bool { + self.left_p + } +} + +impl<T: GraphLabel> Display for NfaLabel<T> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "edge {} at {}{}", self.value, self.moved, { + if self.left_p { + ", by left" + } else { + "" + } + }) + } +} + +/// A convenient alias of the information of two edges. +/// +/// If the tuple is (a, b, c, la, lb), then the first edge goes from a +/// to b, is labelled la, and the second edge goes from b to c, and is +/// labelled by lb. +#[derive(Debug, Clone, Copy)] +pub struct TwoEdges<T: Copy>(usize, usize, usize, T, T); + +impl<T: Copy> TwoEdges<T> { + /// Extract the first edge. + pub fn first_edge(&self) -> (usize, usize, T) { + (self.0, self.1, self.3) + } + + /// Extract the second edge. + pub fn second_edge(&self) -> (usize, usize, T) { + (self.1, self.2, self.4) + } +} + +/// The type of nondeterministic finite automata that is obtained from +/// a regular expression, via the method [`to_nfa`][Nfa::to_nfa]. +pub type LabelType<T> = NfaLabel<DOption<T>>; + /// The expected behvaiour of a nondeterministic finite automaton. /// /// Every NFA is a special labelled graph. pub trait Nfa<T: GraphLabel>: LabelExtGraph<T> { - // TODO: This trait should have a type for the labels. + /// When we convert a regular expression to a nondeterministic + /// finite automaton, the label should be optional, so we use a + /// different type for the result type. + type FromRegex<S: GraphLabel + Display + Default>; + // FIXME: This should not be needed. /// Remove all empty transitions from the nondeterministic finite /// automaton. - fn remove_epsilon<F>(&mut self, f: F) -> Result<(), Error> + #[inline] + fn remove_epsilon<F>(&mut self, _f: F) -> Result<(), Error> where - F: Fn(T) -> bool; + F: FnMut(T) -> bool, + { + // self.closure(f, true, |two_edges| two_edges.second_edge().2) + unimplemented!("deprecated") + } /// Return a state-minimal NFA equivalent with the original one. /// @@ -174,11 +265,6 @@ pub trait Nfa<T: GraphLabel>: LabelExtGraph<T> { Ok(result) } - /// When we convert a regular expression to a nondeterministic - /// finite automaton, the label should be optional, so we use a - /// different type for the result type. - type FromRegex<S: GraphLabel + Display + Default>; - /// Build a nondeterministic finite automaton out of a set /// `regexps` of regular expressions. /// @@ -194,10 +280,9 @@ pub trait Nfa<T: GraphLabel>: LabelExtGraph<T> { /// basically. fn to_nfa( regexps: &[impl Regex<RegexType<T>>], - // TODO: The transformation should produce more information. sub_pred: impl Fn(T) -> Result<SoC<T>, Error>, default: Option<T>, - ) -> Result<Self::FromRegex<DOption<T>>, Error>; + ) -> Result<Self::FromRegex<LabelType<T>>, Error>; /// Remove all dead states from the nondeterministic finite /// automaton. @@ -211,8 +296,9 @@ pub trait Nfa<T: GraphLabel>: LabelExtGraph<T> { /// out of the dead nodes. Then these nodes are considered gone /// from the graph, and we don't need to re-index the existing /// edges. We can call this "a poor man's removal of nodes". - fn remove_dead(&mut self, reserve: impl Fn(usize) -> bool) -> Result<(), Error>; + fn remove_dead(&mut self, reserve: impl FnMut(usize) -> bool) -> Result<(), Error>; + // FIXME: This should not be needed. /// For each edge from A to B whose edge is considered nullable by /// a function `f`, and for every edge from B to C, add an edge /// from A to C. @@ -220,7 +306,50 @@ pub trait Nfa<T: GraphLabel>: LabelExtGraph<T> { /// This is needed specifically by the algorithm to produce a set /// of atomic languages that represent "null-closed" derived /// languages. - fn nulling(&mut self, f: impl Fn(T) -> bool) -> Result<(), Error>; + #[inline] + fn nulling(&mut self, _f: impl FnMut(T) -> bool) -> Result<(), Error> { + // self.closure(f, false, |two_edges| two_edges.second_edge().2) + unimplemented!("deprecated") + } + + /// Return the *closure* of the nondeterministic finite automaton. + /// + /// # Definition + /// + /// The closure of a nondeterministic finite automaton N is + /// defined as the unique *minimal* nondeterministic finite + /// automaton N+ that can be obtained by adjoining some edges to N + /// such that if there are edges a -> b and b -> c, and if the + /// edge a -> b is deemed as *nullable* by some function, then + /// there is an edge a -> c, where the minimality is the + /// minimality of the set of edges: if there is another such + /// nondeterministic finite automaton M satisfying the above + /// property, then the set of edges of N+ is a subset of the set + /// of edges of M. + /// + /// # Remove edges afterwards + /// + /// If `remove_after_p` is true, remove all those nullable edges. + /// + /// # Transformation of labels + /// + /// We can apply a transformer to labels, to be able to combine + /// labels in non-trivial ways. If one just wants the *new* label + /// that is the label of the edge from b to c in the above + /// definition, one can use the function that sends `two_edges` to + /// `two_edges.second_edge().2`. + /// + /// # Error + /// + /// The function should emit errors if the edges of the + /// nondeterministic finite automaton point to invalid nodes. + fn closure( + &mut self, + predicate: impl FnMut(T) -> bool, + remove_after_p: bool, + transform: impl FnMut(TwoEdges<T>) -> T, + remove_predicate: impl FnMut(T) -> bool, + ) -> Result<(), Error>; } pub mod default; |