summaryrefslogtreecommitdiff
path: root/nfa/src/default/nfa.rs
diff options
context:
space:
mode:
authorJSDurand <mmemmew@gmail.com>2023-01-11 23:47:26 +0800
committerJSDurand <mmemmew@gmail.com>2023-01-11 23:47:26 +0800
commit1a3d346f413325ed37848a6b2526e8e729269833 (patch)
treeab8812f8094d096c68aee53cf516e986cc9a273a /nfa/src/default/nfa.rs
parentf27d604d93ce583d4404e1874664e08382ea2f00 (diff)
Record left-linear expansion and forest format
Now the grammar will record the left-linear expansions when generating the nondeterministic finite automaton frmo its rules, and will record whether an edge in the nondeterministic finite automaton comes from a left-linear expansion. The latter is needed because while performing a chain-rule derivation, we do not need the left-linear expanded derivations in the "first layer". This might well have been the root cause of the bad performance of the previous version of this package. Also I have figured out how to properly generate and handle parse forests while manipulating the "chain-rule machine".
Diffstat (limited to 'nfa/src/default/nfa.rs')
-rw-r--r--nfa/src/default/nfa.rs210
1 files changed, 103 insertions, 107 deletions
diff --git a/nfa/src/default/nfa.rs b/nfa/src/default/nfa.rs
index e642218..a23c056 100644
--- a/nfa/src/default/nfa.rs
+++ b/nfa/src/default/nfa.rs
@@ -5,7 +5,10 @@ use graph::{
LabelExtGraph, LabelGraph,
};
-use crate::{default::regex::RegexType, error::Error, DOption, Nfa, Regex, SoC};
+use crate::{
+ default::regex::RegexType, error::Error, DOption, LabelType, Nfa, NfaLabel, Regex, SoC,
+ TwoEdges,
+};
use core::fmt::{Debug, Display};
@@ -123,8 +126,6 @@ impl<T: GraphLabel + Display> LabelExtGraph<T> for DefaultNFA<T> {
}
}
-// TODO: Define a type for the labels of DefaultNFA.
-
impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
type FromRegex<S: GraphLabel + Display + Default> = DefaultNFA<S>;
@@ -134,7 +135,7 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
regexps: &[impl Regex<RegexType<T>>],
sub_pred: impl Fn(T) -> Result<SoC<T>, Error>,
default: Option<T>,
- ) -> Result<Self::FromRegex<DOption<T>>, Error> {
+ ) -> Result<Self::FromRegex<LabelType<T>>, Error> {
let total_regexps_len: usize = regexps.iter().map(Graph::nodes_len).sum();
if total_regexps_len == 0 {
@@ -144,18 +145,16 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
// We reserve two more rooms for the default edge.
let nfa_len = total_regexps_len * 2 + 2;
- let mut builder: DLGBuilder<DOption<T>> = Builder::with_capacity(nfa_len);
-
- // Since DOption<T> implements Copy when T does, we can use a
- // variable to hold the empty label to avoid repetitions.
- let empty_label: DOption<T> = Default::default();
+ let mut builder: DLGBuilder<LabelType<T>> = Builder::with_capacity(nfa_len);
for _ in 0..nfa_len {
builder.add_vertex();
}
+ let default = LabelType::new(DOption(default), total_regexps_len, false);
+
// add a default edge
- builder.add_edge(nfa_len - 2, nfa_len - 1, DOption(default))?;
+ builder.add_edge(nfa_len - 2, nfa_len - 1, default)?;
let accumulators: Vec<usize> = {
let mut result = Vec::with_capacity(regexps.len());
@@ -206,6 +205,10 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
stack.push(root);
while let Some(top_index) = stack.pop() {
+ let false_label = NfaLabel::new(DOption(None), (offset >> 1) + top_index, false);
+
+ let true_label = NfaLabel::new(DOption(None), (offset >> 1) + top_index, true);
+
let top_label = regex.vertex_label(top_index)?;
let nfa_start = offset + 2 * top_index;
@@ -213,7 +216,7 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
match top_label {
RegexType::Kleene => {
- builder.add_edge(nfa_start, nfa_end, empty_label)?;
+ builder.add_edge(nfa_start, nfa_end, false_label)?;
let mut source = nfa_start;
@@ -224,14 +227,14 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
let child_start = offset + 2 * child;
let child_end = offset + 2 * child + 1;
- builder.add_edge(source, child_start, empty_label)?;
+ builder.add_edge(source, child_start, false_label)?;
source = child_end;
}
- builder.add_edge(source, nfa_end, empty_label)?;
+ builder.add_edge(source, nfa_end, false_label)?;
- builder.add_edge(nfa_end, nfa_start, empty_label)?;
+ builder.add_edge(nfa_end, nfa_start, false_label)?;
}
}
RegexType::Plus => {
@@ -244,20 +247,20 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
let child_start = offset + 2 * child;
let child_end = offset + 2 * child + 1;
- builder.add_edge(source, child_start, empty_label)?;
+ builder.add_edge(source, child_start, false_label)?;
source = child_end;
}
- builder.add_edge(source, nfa_end, empty_label)?;
+ builder.add_edge(source, nfa_end, false_label)?;
- builder.add_edge(nfa_end, nfa_start, empty_label)?;
+ builder.add_edge(nfa_end, nfa_start, false_label)?;
} else {
- builder.add_edge(nfa_start, nfa_end, empty_label)?;
+ builder.add_edge(nfa_start, nfa_end, false_label)?;
}
}
RegexType::Optional => {
- builder.add_edge(nfa_start, nfa_end, empty_label)?;
+ builder.add_edge(nfa_start, nfa_end, false_label)?;
let mut source = nfa_start;
@@ -268,12 +271,12 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
let child_start = offset + 2 * child;
let child_end = offset + 2 * child + 1;
- builder.add_edge(source, child_start, empty_label)?;
+ builder.add_edge(source, child_start, false_label)?;
source = child_end;
}
- builder.add_edge(source, nfa_end, empty_label)?;
+ builder.add_edge(source, nfa_end, false_label)?;
}
}
RegexType::Or => {
@@ -284,11 +287,11 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
let child_start = offset + 2 * child;
let child_end = offset + 2 * child + 1;
- builder.add_edge(nfa_start, child_start, empty_label)?;
- builder.add_edge(child_end, nfa_end, empty_label)?;
+ builder.add_edge(nfa_start, child_start, false_label)?;
+ builder.add_edge(child_end, nfa_end, false_label)?;
}
} else {
- builder.add_edge(nfa_start, nfa_end, empty_label)?;
+ builder.add_edge(nfa_start, nfa_end, false_label)?;
}
}
RegexType::Paren => {
@@ -305,14 +308,14 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
let child_start = offset + 2 * child;
let child_end = offset + 2 * child + 1;
- builder.add_edge(source, child_start, empty_label)?;
+ builder.add_edge(source, child_start, false_label)?;
source = child_end;
}
- builder.add_edge(source, nfa_end, empty_label)?;
+ builder.add_edge(source, nfa_end, false_label)?;
} else {
- builder.add_edge(nfa_start, nfa_end, empty_label)?;
+ builder.add_edge(nfa_start, nfa_end, false_label)?;
}
}
RegexType::Lit(value) => {
@@ -327,13 +330,34 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
let sub_nfa_start = get_offset_safe!(sub_non);
let sub_nfa_end = sub_nfa_start + 1;
- builder.add_edge(nfa_start, sub_nfa_start, empty_label)?;
- builder.add_edge(sub_nfa_end, nfa_end, empty_label)?;
+ // We also need a label for the
+ // original label and non-left-linear
+ // expansion.
+ builder.add_edge(
+ nfa_start,
+ nfa_end,
+ NfaLabel::new(
+ DOption(Some(value)),
+ (offset >> 1) + top_index,
+ false,
+ ),
+ )?;
+
+ builder.add_edge(nfa_start, sub_nfa_start, true_label)?;
+ builder.add_edge(sub_nfa_end, nfa_end, false_label)?;
}
SoC::Carry(new_value) => {
// a terminal
- builder.add_edge(nfa_start, nfa_end, DOption(Some(new_value)))?;
+ builder.add_edge(
+ nfa_start,
+ nfa_end,
+ NfaLabel::new(
+ DOption(Some(new_value)),
+ (offset >> 1) + top_index,
+ false,
+ ),
+ )?;
}
}
}
@@ -346,56 +370,7 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
Ok(DefaultNFA { graph })
}
- fn remove_epsilon<F: Fn(T) -> bool>(&mut self, f: F) -> Result<(), Error> {
- let mut builder = self.graph.builder_ref();
-
- let mut updated = true;
-
- let nodes_len = self.nodes_len();
-
- while updated {
- updated = false;
-
- let mut to_add: Vec<(usize, usize, T)> = Vec::with_capacity(nodes_len);
-
- for source in builder.nodes() {
- for (label, target_iter) in builder.labels_of(source)? {
- if f(*label) {
- // empty label found
- for target in target_iter {
- for (label, children_iter) in builder.labels_of(target)? {
- for child in children_iter {
- if !builder.has_edge_label(source, label, child)? {
- updated = true;
-
- to_add.push((source, child, *label));
- }
- }
- }
- }
- }
- }
- }
-
- for (source, target, label) in to_add.into_iter() {
- builder.add_edge(source, target, label)?;
- }
- }
-
- // Then remove all epsilon edges
-
- let sources_and_targets: Vec<_> = builder.edges().collect();
-
- for (source, target) in sources_and_targets.into_iter() {
- builder.remove_edge(source, target, &f)?;
- }
-
- self.graph = builder.build();
-
- Ok(())
- }
-
- fn remove_dead(&mut self, reserve: impl Fn(usize) -> bool) -> Result<(), Error> {
+ fn remove_dead(&mut self, mut reserve: impl FnMut(usize) -> bool) -> Result<(), Error> {
let mut builder = self.graph.builder_ref();
let mut changed = true;
@@ -439,48 +414,69 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
Ok(())
}
- // REVIEW: Combine nulling and remove_epsilon into the same
- // method, or not.
+ fn closure(
+ &mut self,
+ mut predicate: impl FnMut(T) -> bool,
+ remove_after_p: bool,
+ mut transform: impl FnMut(TwoEdges<T>) -> T,
+ mut remove_predicate: impl FnMut(T) -> bool,
+ ) -> Result<(), Error> {
+ let mut builder = self.graph.builder_ref();
- fn nulling(&mut self, f: impl Fn(T) -> bool) -> Result<(), Error> {
let mut updated = true;
- let mut builder = self.graph.builder_ref();
+
+ let nodes_len = self.nodes_len();
while updated {
updated = false;
- let mut nullable = false;
-
- let mut to_add = Vec::new();
-
- for (source, target) in builder.edges() {
- for label in builder.edge_label(source, target)? {
- if f(label) {
- nullable = true;
+ // Just a rough estimate
+ let mut to_add: Vec<(usize, usize, T)> = Vec::with_capacity(nodes_len);
- break;
- }
- }
+ // REVIEW: I do not like nested if statements, but I do
+ // not know how to do this otherwise.
- if nullable {
- for (label, child_iter) in builder.labels_of(target)? {
- for child in child_iter {
- if !builder.has_edge_label(source, label, child)? {
- updated = true;
+ for source in builder.nodes() {
+ for (first_label, target_iter) in builder.labels_of(source)? {
+ if predicate(*first_label) {
+ // a null label found
+ for target in target_iter {
+ for (second_label, children_iter) in builder.labels_of(target)? {
+ for child in children_iter {
+ let new_label = transform(TwoEdges(
+ source,
+ target,
+ child,
+ *first_label,
+ *second_label,
+ ));
+
+ if !builder.has_edge_label(source, &new_label, child)? {
+ updated = true;
- to_add.push((source, child, *label));
+ to_add.push((source, child, new_label));
+ }
+ }
}
}
}
}
}
- for (source, child, label) in to_add {
- builder.add_edge(source, child, label)?;
+ for (source, target, label) in to_add.into_iter() {
+ builder.add_edge(source, target, label)?;
+ }
+ }
+
+ // Then remove all nullable edges if demanded
+
+ if remove_after_p {
+ for (source, target) in builder.edges().collect::<Vec<_>>().into_iter() {
+ builder.remove_edge(source, target, &mut remove_predicate)?;
}
}
- self.graph.replace_by_builder(builder);
+ self.graph = builder.build();
Ok(())
}
@@ -532,7 +528,7 @@ mod test_to_nfa {
Ok(
DefaultRegParser::<char>::parse(&parser, &input_string, Box::new(test_scanner), true)?
- .unwrap_or(Default::default())
+ .unwrap_or_default()
.0,
)
}
@@ -543,13 +539,13 @@ mod test_to_nfa {
println!("regex = {regex}");
- let nfa: DefaultNFA<DOption<char>> = DefaultNFA::to_nfa(
+ let _nfa: DefaultNFA<_> = DefaultNFA::to_nfa(
&[regex],
|label| Ok(SoC::Carry(label)),
Some(char::default()),
)?;
- nfa.print_viz("nfa.gv")?;
+ // nfa.print_viz("nfa.gv")?;
Ok(())
}