Record left-linear expansion and forest format

Now the grammar will record the left-linear expansions when generating the nondeterministic finite automaton frmo its rules, and will record whether an edge in the nondeterministic finite automaton comes from a left-linear expansion. The latter is needed because while performing a chain-rule derivation, we do not need the left-linear expanded derivations in the "first layer". This might well have been the root cause of the bad performance of the previous version of this package. Also I have figured out how to properly generate and handle parse forests while manipulating the "chain-rule machine".
author: JSDurand <mmemmew@gmail.com> 2023-01-11 23:47:26 +0800
committer: JSDurand <mmemmew@gmail.com> 2023-01-11 23:47:26 +0800
commit: 1a3d346f413325ed37848a6b2526e8e729269833 (patch)
tree: ab8812f8094d096c68aee53cf516e986cc9a273a /grammar/src/tests
parent: f27d604d93ce583d4404e1874664e08382ea2f00 (diff)
1 files changed, 26 insertions, 17 deletions
diff --git a/grammar/src/tests/test_grammar_left_closure.rs b/grammar/src/tests/test_grammar_left_closure.rs
index 0bc9f4d..003c211 100644
--- a/grammar/src/tests/test_grammar_left_closure.rs
+++ b/grammar/src/tests/test_grammar_left_closure.rs
@@ -1,5 +1,6 @@
 use crate::test_grammar_helper::*;
 use crate::*;
+use graph::LabelGraph;
 use nfa::Nfa;
 use std::{
     collections::HashSet,
@@ -124,7 +125,7 @@ fn test_remove_epsilon() -> Result<(), Box<dyn std::error::Error>> {
 
     nfa.print_viz("nfa_orig.gv")?;
 
-    nfa.remove_epsilon(|label| label.is_none())?;
+    nfa.remove_epsilon(|label| label.get_value().is_none())?;
 
     nfa.print_viz("nfa_no_epsilon.gv")?;
 
@@ -174,7 +175,7 @@ fn test_remove_dead() -> Result<(), Box<dyn std::error::Error>> {
 
     nfa.print_viz("nfa_orig.gv")?;
 
-    nfa.remove_epsilon(|label| label.is_none())?;
+    nfa.remove_epsilon(|label| label.get_value().is_none())?;
 
     let accumulators: HashSet<usize> = accumulators.into_iter().collect();
 
@@ -192,7 +193,8 @@ fn test_remove_dead() -> Result<(), Box<dyn std::error::Error>> {
 #[test]
 #[ignore]
 fn test_nulling() -> Result<(), Box<dyn std::error::Error>> {
-    let mut grammar = new_left_recursive_grammar()?;
+    // TODO: Test more grammars.
+    let mut grammar = new_right_recursive_grammar()?;
     let closure = new_closure_regex(&mut grammar)?;
 
     let mut lock = stdout().lock();
@@ -244,24 +246,31 @@ fn test_nulling() -> Result<(), Box<dyn std::error::Error>> {
 
     let mut nfa = grammar.left_closure_to_nfa(&closure)?;
 
-    nfa.nulling(|label| {
-        if let Some(label) = *label {
-            match label {
-                TNT::Ter(_) => false,
-                // Panics if a non-terminal references an invalid node
-                // here.
-                TNT::Non(n) => grammar.is_nullable(n).unwrap(),
-            }
-        } else {
-            true
-        }
-    })?;
-
     let grammar_reserve_nodes = |node| accumulators.contains(&node);
 
     writeln!(lock, "accumulators are {accumulators:?}")?;
 
-    nfa.remove_epsilon(|label| label.is_none())?;
+    let nullables: HashSet<usize> = (0..grammar.non_num())
+        .filter(|n| matches!(grammar.is_nullable(*n), Ok(true)))
+        .collect();
+
+    nfa.closure(
+        |label| {
+            if let Some(label) = *label.get_value() {
+                matches!(label, TNT::Non(n) if nullables.contains(&n))
+            } else {
+                true
+            }
+        },
+        true,
+        |two_edges| grammar.transform_label_null_epsilon(two_edges),
+        |label| label.get_value().is_none(),
+    )?;
+
+    for (label, child_iter) in nfa.labels_of(18)? {
+        writeln!(lock, "{label}: {child_iter:?}")?;
+    }
+
     nfa.remove_dead(grammar_reserve_nodes)?;
 
     writeln!(lock, "Printing nfa to nfa.gv")?;
author	JSDurand <mmemmew@gmail.com>	2023-01-11 23:47:26 +0800
committer	JSDurand <mmemmew@gmail.com>	2023-01-11 23:47:26 +0800
commit	1a3d346f413325ed37848a6b2526e8e729269833 (patch)
tree	ab8812f8094d096c68aee53cf516e986cc9a273a /grammar/src/tests
parent	f27d604d93ce583d4404e1874664e08382ea2f00 (diff)