From a80db17473ff09cc72acba2c1975101e6dbedf39 Mon Sep 17 00:00:00 2001
From: JSDurand <mmemmew@gmail.com>
Date: Sun, 18 Jun 2023 15:03:34 +0800
Subject: fixed the bugs of node duplications and left-open nodes

There were two main issues in the previous version.

One is that there are lots of duplications of nodes when manipulating
the forest.  This does not mean that labels repeat: by the use of the
data type this cannot happen.  What happened is that there were cloned
nodes whose children are exactly equal.  In this case there is no need
to clone that node in the first place.  This is now fixed by checking
carefully before cloning, so that we do not clone unnecessary nodes.

The other issue, which is perhaps more important, is that there are
nodes which are not closed.  This means that when there should be a
reuction of grammar rules, the forest does not mark the corresponding
node as already reduced.  The incorrect forests thus caused is hard to
fix: I tried several different approaches to fix it afterwards, but
all to no avail.  I also tried to record enough information to fix
these nodes during the manipulations.  It turned out that recording
nodes is a dead end, as I cannot properly syncronize the information
in the forest and the information in the chain-rule machine.  Any
inconsistencies will result in incorrect operations later on.

The approach I finally adapt is to perform every possible reduction at
each step.  This might lead to some more nodes than what we need.  But
those are technically expected to be there after all, and it is easy
to filter them out, so it is fine, from my point of view at the
moment.

Therefore, what remains is to filter those nodes out and connect it to
the holy Emacs.  :D
---
 nfa/src/default/nfa.rs | 32 +++++++++++---------------------
 1 file changed, 11 insertions(+), 21 deletions(-)

(limited to 'nfa')
diff --git a/nfa/src/default/nfa.rs b/nfa/src/default/nfa.rs
index d2fe88e..e38a1a9 100644
--- a/nfa/src/default/nfa.rs
+++ b/nfa/src/default/nfa.rs
@@ -20,7 +20,7 @@ pub struct DefaultNFA<T: GraphLabel> {
     graph: DLGraph<T>,
 }
 
-impl<T: GraphLabel + Display> Default for DefaultNFA<T> {
+impl<T: GraphLabel> Default for DefaultNFA<T> {
     fn default() -> Self {
         Self {
             graph: Default::default(),
@@ -85,10 +85,9 @@ impl<T: GraphLabel> LabelExtGraph<T> for DefaultNFA<T> {
 }
 
 impl<T: GraphLabel> Nfa<T> for DefaultNFA<T> {
-    type FromRegex<S: GraphLabel + Display + Default> = DefaultNFA<S>;
+    type FromRegex<S: GraphLabel + Default> = DefaultNFA<S>;
 
-    // Reminder: This is an adopted version of Thompson's
-    // construction.
+    // NOTE: This is an adopted version of Thompson's construction.
     fn to_nfa(
         regexps: &[impl Regex<RegexType<T>>],
         sub_pred: impl Fn(T) -> Result<SoC<T>, Error>,
@@ -107,10 +106,6 @@ impl<T: GraphLabel> Nfa<T> for DefaultNFA<T> {
 
         builder.add_vertices(nfa_len);
 
-        // for _ in 0..nfa_len {
-        //     builder.add_vertex();
-        // }
-
         let default = LabelType::new(DOption(default), total_regexps_len, false);
 
         // add a default edge
@@ -122,7 +117,7 @@ impl<T: GraphLabel> Nfa<T> for DefaultNFA<T> {
             result.push(0);
 
             for regexp in regexps.iter().take(regexps.len() - 1) {
-                result.push(result.last().unwrap() + regexp.nodes_len() * 2);
+                result.push(*result.last().unwrap() + regexp.nodes_len() * 2);
             }
 
             result
@@ -137,14 +132,14 @@ impl<T: GraphLabel> Nfa<T> for DefaultNFA<T> {
             }
         );
 
-        /// Get offset from `accumulators` without checking bounds.
-        macro_rules! get_offset_unsafe (
-            ($num:expr) => {
-                { unsafe { *accumulators.get_unchecked($num) } }
-            }
-        );
+        // /// Get offset from `accumulators` without checking bounds.
+        // macro_rules! get_offset_unsafe (
+        //     ($num:expr) => {
+        //         { unsafe { *accumulators.get_unchecked($num) } }
+        //     }
+        // );
 
-        for (index, regex) in regexps.iter().enumerate() {
+        for (regex, offset) in regexps.iter().zip(accumulators.iter().copied()) {
             let root = if let Some(root) = regex.root() {
                 root
             } else {
@@ -155,11 +150,6 @@ impl<T: GraphLabel> Nfa<T> for DefaultNFA<T> {
 
             let regex_len = regex.nodes_len();
 
-            // It is safe here to call `get_offset_unsafe`, as `index`
-            // is guaranteed to be strictly less than the length of
-            // `accumulators` by an assertion above.
-            let offset = get_offset_unsafe!(index);
-
             let mut stack: Vec<usize> = Vec::with_capacity(regex_len);
 
             stack.push(root);
-- 
cgit v1.2.3-18-g5258