Record left-linear expansion and forest format

Now the grammar will record the left-linear expansions when generating the nondeterministic finite automaton frmo its rules, and will record whether an edge in the nondeterministic finite automaton comes from a left-linear expansion. The latter is needed because while performing a chain-rule derivation, we do not need the left-linear expanded derivations in the "first layer". This might well have been the root cause of the bad performance of the previous version of this package. Also I have figured out how to properly generate and handle parse forests while manipulating the "chain-rule machine".
author: JSDurand <mmemmew@gmail.com> 2023-01-11 23:47:26 +0800
committer: JSDurand <mmemmew@gmail.com> 2023-01-11 23:47:26 +0800
commit: 1a3d346f413325ed37848a6b2526e8e729269833 (patch)
tree: ab8812f8094d096c68aee53cf516e986cc9a273a /grammar/src
parent: f27d604d93ce583d4404e1874664e08382ea2f00 (diff)
5 files changed, 977 insertions, 748 deletions
diff --git a/grammar/src/first_set.rs b/grammar/src/first_set.rs
new file mode 100644
index 0000000..2a5ee3c
--- /dev/null
+++ b/grammar/src/first_set.rs
@@ -0,0 +1,451 @@
+//! This file implements a function for computing the set of first
+//! terminals for a grammar.
+
+use super::*;
+
+impl Grammar {
+    /// Compute the set of terminals that can appear as the first
+    /// terminal in some left-linear derivation of a non-terminal, for
+    /// every non-terminal.
+    ///
+    /// This is an algorithm that computes the transitive closure,
+    /// which is a common approach for this task.  But perhaps there
+    /// are more efficient approaches?
+    ///
+    /// Also the function computes the set of "reachable nodes" in the
+    /// process, and records the information in the `first_nodes`
+    /// attribute.
+    pub fn compute_firsts(&mut self) -> Result<(), Error> {
+        match self.state {
+            GrammarState::Initial => {
+                self.state = GrammarState::AfterComputeFirst;
+            }
+            _ => {
+                // This has been called already.
+                return Ok(());
+            }
+        }
+
+        let mut updated = true;
+
+        let non_len = self.non_num();
+
+        use StackElement::{Seen, Unseen};
+
+        while updated {
+            updated = false;
+
+            for (n, regex) in self.rules.iter().map(|rule| &rule.regex).enumerate() {
+                let root = if let Some(root) = regex.root() {
+                    root
+                } else {
+                    if !self.is_nullable(n)? {
+                        updated = true;
+
+                        self.firsts.get_mut(n).unwrap().insert(None);
+
+                        // The default construction of a grammar
+                        // reserves some space for each vector, so
+                        // explicitly setting this can reduce some
+                        // minor memory overhead.
+                        let pointer = self.first_nodes.get_mut(n).unwrap();
+
+                        pointer.clear();
+                        pointer.shrink_to_fit();
+                    }
+
+                    continue;
+                };
+
+                let regex_len = regex.len();
+
+                let mut stack: Vec<StackElement> = Vec::with_capacity(regex_len);
+
+                stack.push(Unseen(root));
+
+                let mut children_sets_stack: Vec<HashSet<Option<usize>>> =
+                    Vec::with_capacity(regex_len);
+
+                let mut children_nodes_stack: Vec<HashSet<usize>> = Vec::with_capacity(regex_len);
+
+                while let Some(top) = stack.pop() {
+                    let top_index = top.index();
+                    let is_seen = top.is_seen();
+
+                    match regex
+                        .vertex_label(top_index)
+                        .map_err(|_| Error::IndexOutOfBounds(top_index, regex_len))?
+                    {
+                        RegexType::Kleene => {
+                            if !is_seen {
+                                stack.push(Seen(top_index));
+
+                                for child in regex.children_of(top_index).unwrap() {
+                                    stack.push(Unseen(child));
+                                }
+                            } else {
+                                let degree = regex.degree(top_index).unwrap();
+                                let children_stack_len = children_sets_stack.len();
+                                let children_nodes_len = children_nodes_stack.len();
+
+                                assert!(
+                                    children_stack_len >= degree,
+                                    "not enough stack elements for {top_index}"
+                                );
+
+                                assert!(
+                                    children_nodes_len >= degree,
+                                    "not enough stack elements for {top_index}"
+                                );
+
+                                let mut this_set = HashSet::new();
+
+                                this_set.insert(None);
+
+                                let mut this_nodes = HashSet::new();
+
+                                this_nodes.insert(top_index);
+
+                                if degree == 0 {
+                                    children_sets_stack.push(this_set);
+                                    children_nodes_stack.push(this_nodes);
+
+                                    continue;
+                                }
+
+                                let mut stop = false;
+
+                                for (child_set, child_nodes) in children_sets_stack
+                                    .drain((children_stack_len - degree)..)
+                                    .zip(
+                                        children_nodes_stack.drain((children_nodes_len - degree)..),
+                                    )
+                                {
+                                    if stop {
+                                        break;
+                                    }
+
+                                    if !child_set.contains(&None) {
+                                        stop = true;
+                                    }
+
+                                    this_set.extend(child_set);
+                                    this_nodes.extend(child_nodes);
+                                }
+
+                                children_sets_stack.push(this_set);
+                                children_nodes_stack.push(this_nodes);
+                            }
+                        }
+                        RegexType::Plus => {
+                            if !is_seen {
+                                stack.push(Seen(top_index));
+
+                                for child in regex.children_of(top_index).unwrap() {
+                                    stack.push(Unseen(child));
+                                }
+                            } else {
+                                let degree = regex.degree(top_index).unwrap();
+                                let children_stack_len = children_sets_stack.len();
+                                let children_nodes_len = children_nodes_stack.len();
+
+                                assert!(
+                                    children_stack_len >= degree,
+                                    "not enough stack elements for {top_index}"
+                                );
+
+                                assert!(
+                                    children_nodes_len >= degree,
+                                    "not enough stack elements for {top_index}"
+                                );
+
+                                let mut this_set = HashSet::new();
+
+                                let mut this_nodes = HashSet::new();
+
+                                this_nodes.insert(top_index);
+
+                                if degree == 0 {
+                                    this_set.insert(None);
+
+                                    children_sets_stack.push(this_set);
+                                    children_nodes_stack.push(this_nodes);
+
+                                    continue;
+                                }
+
+                                let mut stop = false;
+
+                                for (child_set, child_nodes) in children_sets_stack
+                                    .drain((children_stack_len - degree)..)
+                                    .zip(
+                                        children_nodes_stack.drain((children_nodes_len - degree)..),
+                                    )
+                                {
+                                    if stop {
+                                        break;
+                                    }
+
+                                    if !child_set.contains(&None) {
+                                        stop = true;
+                                    }
+
+                                    this_set.extend(child_set);
+                                    this_nodes.extend(child_nodes);
+                                }
+
+                                if stop {
+                                    this_set.remove(&None);
+                                }
+
+                                children_sets_stack.push(this_set);
+                                children_nodes_stack.push(this_nodes);
+                            }
+                        }
+                        RegexType::Optional => {
+                            if !is_seen {
+                                stack.push(Seen(top_index));
+
+                                for child in regex.children_of(top_index).unwrap() {
+                                    stack.push(Unseen(child));
+                                }
+                            } else {
+                                let degree = regex.degree(top_index).unwrap();
+                                let children_stack_len = children_sets_stack.len();
+                                let children_nodes_len = children_nodes_stack.len();
+
+                                assert!(
+                                    children_stack_len >= degree,
+                                    "not enough stack elements for {top_index}"
+                                );
+
+                                assert!(
+                                    children_nodes_len >= degree,
+                                    "not enough stack elements for {top_index}"
+                                );
+
+                                let mut this_set = HashSet::new();
+
+                                this_set.insert(None);
+
+                                let mut this_nodes = HashSet::new();
+
+                                this_nodes.insert(top_index);
+
+                                if degree == 0 {
+                                    children_sets_stack.push(this_set);
+                                    children_nodes_stack.push(this_nodes);
+
+                                    continue;
+                                }
+
+                                let mut stop = false;
+
+                                for (child_set, child_nodes) in children_sets_stack
+                                    .drain((children_stack_len - degree)..)
+                                    .zip(
+                                        children_nodes_stack.drain((children_nodes_len - degree)..),
+                                    )
+                                {
+                                    if stop {
+                                        break;
+                                    }
+
+                                    if !child_set.contains(&None) {
+                                        stop = true;
+                                    }
+
+                                    this_set.extend(child_set.iter().copied());
+                                    this_nodes.extend(child_nodes.iter().copied());
+                                }
+
+                                children_sets_stack.push(this_set);
+                                children_nodes_stack.push(this_nodes);
+                            }
+                        }
+                        RegexType::Or => {
+                            if !is_seen {
+                                stack.push(Seen(top_index));
+
+                                for child in regex.children_of(top_index).unwrap() {
+                                    stack.push(Unseen(child));
+                                }
+                            } else {
+                                let degree = regex.degree(top_index).unwrap();
+                                let children_stack_len = children_sets_stack.len();
+                                let children_nodes_len = children_nodes_stack.len();
+
+                                assert!(
+                                    children_stack_len >= degree,
+                                    "not enough stack elements for {top_index}"
+                                );
+
+                                assert!(
+                                    children_nodes_len >= degree,
+                                    "not enough stack elements for {top_index}"
+                                );
+
+                                let mut this_set = HashSet::new();
+
+                                let mut this_nodes = HashSet::new();
+
+                                this_nodes.insert(top_index);
+
+                                if degree == 0 {
+                                    this_set.insert(None);
+
+                                    children_sets_stack.push(this_set);
+                                    children_nodes_stack.push(this_nodes);
+
+                                    continue;
+                                }
+
+                                for (child_set, child_nodes) in children_sets_stack
+                                    .drain((children_stack_len - degree)..)
+                                    .zip(
+                                        children_nodes_stack.drain((children_nodes_len - degree)..),
+                                    )
+                                {
+                                    this_set.extend(child_set.iter().copied());
+                                    this_nodes.extend(child_nodes.iter().copied());
+                                }
+
+                                children_sets_stack.push(this_set);
+                                children_nodes_stack.push(this_nodes);
+                            }
+                        }
+                        RegexType::Paren => {
+                            // Only for printing purposes
+                            let mut this_set = HashSet::new();
+
+                            this_set.insert(None);
+
+                            children_sets_stack.push(this_set);
+
+                            let mut this_nodes = HashSet::new();
+
+                            this_nodes.insert(top_index);
+
+                            children_nodes_stack.push(this_nodes);
+                        }
+                        RegexType::Empty => {
+                            if !is_seen {
+                                stack.push(Seen(top_index));
+
+                                for child in regex.children_of(top_index).unwrap().rev() {
+                                    stack.push(Unseen(child));
+                                }
+                            } else {
+                                let degree = regex.degree(top_index).unwrap();
+                                let children_stack_len = children_sets_stack.len();
+                                let children_nodes_len = children_nodes_stack.len();
+
+                                assert!(
+                                    children_stack_len >= degree,
+                                    "not enough stack elements for {top_index}"
+                                );
+
+                                assert!(
+                                    children_nodes_len >= degree,
+                                    "not enough stack elements for {top_index}"
+                                );
+
+                                let mut this_set = HashSet::new();
+
+                                let mut this_nodes = HashSet::new();
+
+                                this_nodes.insert(top_index);
+
+                                if degree == 0 {
+                                    this_set.insert(None);
+
+                                    children_sets_stack.push(this_set);
+                                    children_nodes_stack.push(this_nodes);
+
+                                    continue;
+                                }
+
+                                let mut stop = false;
+
+                                for (child_set, child_nodes) in children_sets_stack
+                                    .drain((children_stack_len - degree)..)
+                                    .zip(
+                                        children_nodes_stack.drain((children_nodes_len - degree)..),
+                                    )
+                                {
+                                    if stop {
+                                        break;
+                                    }
+
+                                    if !child_set.contains(&None) {
+                                        stop = true;
+                                    }
+
+                                    this_set.extend(child_set.iter().copied());
+                                    this_nodes.extend(child_nodes.iter().copied());
+                                }
+
+                                if stop {
+                                    this_set.remove(&None);
+                                }
+
+                                children_sets_stack.push(this_set);
+                                children_nodes_stack.push(this_nodes);
+                            }
+                        }
+                        RegexType::Lit(tnt) => {
+                            match tnt {
+                                TNT::Ter(t) => {
+                                    let mut this_set = HashSet::with_capacity(1);
+
+                                    this_set.insert(Some(t));
+
+                                    children_sets_stack.push(this_set);
+                                }
+                                TNT::Non(non) => {
+                                    let this_set = self
+                                        .firsts
+                                        .get(non)
+                                        .ok_or(Error::IndexOutOfBounds(non, non_len))?
+                                        .clone();
+
+                                    children_sets_stack.push(this_set);
+                                }
+                            }
+
+                            let mut this_nodes = HashSet::with_capacity(1);
+                            this_nodes.insert(top_index);
+
+                            children_nodes_stack.push(this_nodes);
+                        }
+                    }
+                }
+
+                assert_eq!(
+                    children_sets_stack.len(),
+                    1,
+                    "Too many elements left at the end"
+                );
+
+                assert_eq!(
+                    children_nodes_stack.len(),
+                    1,
+                    "Too many elements left at the end"
+                );
+
+                for first in children_sets_stack.pop().unwrap().into_iter() {
+                    if !self.firsts.get(n).unwrap().contains(&first) {
+                        updated = true;
+
+                        self.firsts.get_mut(n).unwrap().insert(first);
+                    }
+                }
+
+                *self.first_nodes.get_mut(n).unwrap() =
+                    children_nodes_stack.pop().unwrap().into_iter().collect();
+            }
+        }
+
+        Ok(())
+    }
+}
diff --git a/grammar/src/left_closure.rs b/grammar/src/left_closure.rs
new file mode 100644
index 0000000..1630881
--- /dev/null
+++ b/grammar/src/left_closure.rs
@@ -0,0 +1,324 @@
+//! This file implements some functions to compute the regular
+//! language of left-linear closure of a grammar.
+
+use super::*;
+
+use nfa::LabelType;
+
+impl Grammar {
+    /// Return the regular language of the left-linear closures of
+    /// non-terminals in the grammar.
+    ///
+    /// The resulting vector is guaranteed to be of the same length as
+    /// the number of non-terminals.
+    ///
+    /// The resulting regular language is not "self-contained".  That
+    /// is to say, its terminals indices are packed indices and are
+    /// meaningless without the interpretation of the grammar.  They
+    /// should be converted to a nondeterministic finite automaton and
+    /// then to its null closure later on.
+    pub fn left_closure(&mut self) -> Result<Vec<DefaultRegex<TNT>>, Error> {
+        match self.state {
+            GrammarState::Initial => {
+                return Err(Error::WrongState(
+                    self.state,
+                    GrammarState::AfterComputeFirst,
+                ))
+            }
+            GrammarState::AfterLeftClosure
+            | GrammarState::AfterNFA
+            | GrammarState::AfterComputeFirst => {}
+        }
+
+        let non_len = self.non_num();
+
+        let mut result = Vec::with_capacity(non_len);
+
+        for (n, rule) in self.rules.iter().enumerate() {
+            let regex = &rule.regex;
+
+            let regex_root = if let Some(root) = regex.root() {
+                root
+            } else {
+                result.push(Default::default());
+
+                continue;
+            };
+
+            let regex_len = regex.len();
+
+            /// A convenient macro to retrieve the children from the
+            /// original regular expression with error propagation.
+            macro_rules! children {
+                ($n:expr) => {
+                    regex
+                        .children_of($n)
+                        .map_err(|_| Error::IndexOutOfBounds($n, regex_len))?
+                };
+            }
+
+            /// A convenient macro to retrieve the label from the
+            /// original regular expression with error propagation.
+            macro_rules! label {
+                ($n:expr) => {
+                    regex
+                        .vertex_label($n)
+                        .map_err(|_| Error::IndexOutOfBounds($n, regex_len))?
+                };
+            }
+
+            let parents = regex.parents_array().map_err(|e| match e {
+                nfa::error::Error::UnknownNode(n) => Error::IndexOutOfBounds(n, regex_len),
+                nfa::error::Error::Cycle => Error::BuildFail(n, ParseError::Cycle),
+                _ => unreachable!(),
+            })?;
+
+            use RegexType::*;
+            use TNT::*;
+
+            let mut local_result: Vec<RegexType<TNT>> = Vec::with_capacity(regex_len * 2);
+            let mut builder = ALGBuilder::default();
+
+            /// Perform a depth-first copy
+            macro_rules! df_copy {
+                ($parent:expr, $index:expr) => {
+                    match label!($index) {
+                        Kleene | Plus | Optional | Or | Paren | Empty => {
+                            let mut stack = vec![($parent, $index)];
+
+                            while let Some((top_parent, top_index)) = stack.pop() {
+                                let label = label!(top_index);
+                                let label = match label {
+                                    Lit(top_tnt) => Lit(Ter(self.pack_tnt(top_tnt).unwrap())),
+                                    _ => label,
+                                };
+
+                                local_result.push(label);
+
+                                let new = builder.add_vertex();
+
+                                builder.add_edge(top_parent, new, ()).unwrap();
+
+                                stack.extend(children!(top_index).map(|child| (new, child)));
+                            }
+                        }
+                        Lit(remain_tnt) => {
+                            local_result.push(Lit(Ter(self.pack_tnt(remain_tnt).unwrap())));
+                            let new = builder.add_vertex();
+                            builder.add_edge($parent, new, ()).unwrap();
+                        }
+                    }
+                };
+            }
+
+            local_result.push(Or);
+            builder.add_vertex();
+
+            local_result.push(Lit(Ter(self.pack_tnt(Non(n)).unwrap())));
+            let non_lit_index = builder.add_vertex();
+
+            builder.add_edge(0, non_lit_index, ()).unwrap();
+
+            // If this non-terminal is nullable, add an empty variant.
+            if self.is_nullable(n)? {
+                local_result.push(Empty);
+                let empty_index = builder.add_vertex();
+                builder.add_edge(0, empty_index, ()).unwrap();
+            }
+
+            for first_node in self.first_nodes_of(n)?.copied() {
+                assert!(first_node < parents.len());
+
+                let tnt = match label!(first_node) {
+                    Lit(tnt) => Lit(tnt),
+                    _ => continue,
+                };
+
+                let mut parents_chain = {
+                    let mut result = Vec::new();
+                    let mut stack = Vec::with_capacity(parents.len());
+
+                    stack.push(first_node);
+
+                    while let Some(top) = stack.pop() {
+                        assert!(top < parents.len());
+                        if let Some(parent) = parents.get(top).copied().unwrap() {
+                            result.push(parent);
+                            stack.push(parent.0);
+                        }
+                    }
+
+                    result.reverse();
+
+                    result
+                };
+
+                if let Some((first, _)) = parents_chain.first() {
+                    assert_eq!(*first, regex_root);
+                } else {
+                    local_result.push(tnt);
+                    let lit_index = builder.add_vertex();
+                    builder.add_edge(0, lit_index, ()).unwrap();
+
+                    continue;
+                }
+
+                // A different, "more local", root.
+                let mut local_root: usize;
+
+                // Handle the direct parent
+                let (parent_node, parent_edge_index) = parents_chain.pop().unwrap();
+
+                match label!(parent_node) {
+                    Kleene | Plus => {
+                        // TODO: If parent_edge_index is 0, make a
+                        // Plus node instead.
+                        local_result.extend([Empty, tnt]);
+
+                        local_root = builder.add_vertex();
+                        let lit_index = builder.add_vertex();
+                        builder.add_edge(local_root, lit_index, ()).unwrap();
+
+                        let iterator = children!(parent_node);
+
+                        for index in iterator.clone().skip(parent_edge_index + 1) {
+                            df_copy!(local_root, index);
+                        }
+
+                        local_result.push(Kleene);
+                        let new_parent = builder.add_vertex();
+                        builder.add_edge(local_root, new_parent, ()).unwrap();
+
+                        for index in iterator {
+                            df_copy!(new_parent, index);
+                        }
+                    }
+
+                    Or => {
+                        local_result.push(tnt);
+                        local_root = builder.add_vertex();
+                    }
+                    Optional | Empty => {
+                        // If this path is taken, it should not be
+                        // optional.
+                        local_result.extend([Empty, tnt]);
+                        local_root = builder.add_vertex();
+                        let lit_index = builder.add_vertex();
+                        builder.add_edge(local_root, lit_index, ()).unwrap();
+
+                        for index in children!(parent_node).skip(parent_edge_index + 1) {
+                            df_copy!(local_root, index);
+                        }
+                    }
+                    Paren | Lit(_) => unreachable!(),
+                }
+
+                // Handle successive parents
+
+                for (node, edge_index) in parents_chain.into_iter() {
+                    let node_type = label!(node);
+
+                    match node_type {
+                        Kleene | Plus => {
+                            // TODO: If edge_index is 0, then just
+                            // make this a Plus node.
+
+                            local_result.push(Empty);
+                            let new_index = builder.add_vertex();
+                            builder.add_edge(new_index, local_root, ()).unwrap();
+
+                            local_root = new_index;
+
+                            let iterator = children!(node);
+
+                            for index in iterator.clone().skip(edge_index + 1) {
+                                df_copy!(local_root, index);
+                            }
+
+                            local_result.push(Kleene);
+                            let new_parent = builder.add_vertex();
+                            builder.add_edge(local_root, new_parent, ()).unwrap();
+
+                            for index in iterator {
+                                df_copy!(new_parent, index);
+                            }
+                        }
+                        RegexType::Or => {}
+                        RegexType::Optional | RegexType::Empty => {
+                            local_result.push(Empty);
+                            let new_index = builder.add_vertex();
+                            builder.add_edge(new_index, local_root, ()).unwrap();
+                            local_root = new_index;
+
+                            for index in children!(node).skip(edge_index + 1) {
+                                df_copy!(local_root, index);
+                            }
+                        }
+                        RegexType::Paren | RegexType::Lit(_) => unreachable!(),
+                    }
+                }
+
+                builder.add_edge(0, local_root, ()).unwrap();
+            }
+
+            local_result.shrink_to_fit();
+
+            let graph = builder.build();
+
+            assert_eq!(graph.nodes_len(), local_result.len());
+
+            result.push(
+                DefaultRegex::new(graph, local_result)
+                    .map_err(|_| Error::BuildFail(n, ParseError::Cycle))?,
+            );
+        }
+
+        assert_eq!(result.len(), non_len);
+
+        self.accumulators = {
+            let mut acc_result = Vec::with_capacity(result.len() + 1);
+            acc_result.push(0);
+
+            for rule in result.iter() {
+                acc_result.push(rule.len() + *acc_result.last().unwrap());
+            }
+
+            acc_result
+        };
+
+        Ok(result)
+    }
+
+    /// Convert the regular language of left-linear closures to its
+    /// equivalent nondeterministic finite automaton.
+    ///
+    /// In the generation of the left-linear closure, the terminals
+    /// and non-terminals are packed into an unsigned integer.  We
+    /// unpack them in converting to nondeterministic finite
+    /// automaton.
+    ///
+    /// The resulting nondeterministic finite automaton should be
+    /// transformed to its null-closure for use in our algorithm.
+    pub fn left_closure_to_nfa(
+        &self,
+        closure: &[DefaultRegex<TNT>],
+    ) -> Result<DefaultNFA<LabelType<TNT>>, Error> {
+        let label_transform = |tnt| match tnt {
+            TNT::Ter(t) => {
+                let new_tnt = self.unpack_tnt(t).map_err(|e| match e {
+                    Error::IndexOutOfBounds(index, bound) => {
+                        graph::error::Error::IndexOutOfBounds(index, bound)
+                    }
+                    _ => unreachable!(),
+                })?;
+
+                Ok(SoC::Carry(new_tnt))
+            }
+            TNT::Non(n) => Ok(SoC::Sub(n)),
+        };
+
+        let nfa = DefaultNFA::to_nfa(closure, label_transform, Some(TNT::Non(0)))?;
+
+        Ok(nfa)
+    }
+}
diff --git a/grammar/src/lib.rs b/grammar/src/lib.rs
index 4e544c9..627ae6f 100644
--- a/grammar/src/lib.rs
+++ b/grammar/src/lib.rs
@@ -15,12 +15,15 @@ use nfa::{
         nfa::DefaultNFA,
         regex::{DefaultRegex, ParseError, RegexType},
     },
-    DOption, Nfa, Regex, SoC,
+    LabelType, Nfa, NfaLabel, Regex, SoC, TwoEdges,
 };
 
 use graph::{adlist::ALGBuilder, builder::Builder, Graph};
 
-use std::{collections::HashSet, fmt::Display};
+use std::{
+    collections::{HashMap, HashSet},
+    fmt::Display,
+};
 
 /// The type of a terminal.
 ///
@@ -88,6 +91,9 @@ impl Display for TNT {
 #[derive(Debug, Copy, Clone)]
 #[non_exhaustive]
 pub enum Error {
+    /// The operation requires the grammar to be after a certain
+    /// state, but the grammar is not after that state yet.
+    WrongState(GrammarState, GrammarState),
     /// The first component is the index, and the second the bound.
     IndexOutOfBounds(usize, usize),
     /// Fail to build the N-th regular expression, due to the
@@ -112,6 +118,9 @@ impl Display for Error {
                 "Failed to build the {n}-th regular expression due to error: {pe}"
             ),
             Error::NFAFail(nfae) => write!(f, "failed to build NFA because of {nfae}"),
+            Error::WrongState(current, threshold) => {
+                write!(f, "require state {threshold}, but in state {current}")
+            }
         }
     }
 }
@@ -146,6 +155,36 @@ impl Rule {
     }
 }
 
+/// The state of Grammar.
+///
+/// This is used to ensure that the grammar preparation methods are
+/// called in the correct order.
+#[derive(Debug, Copy, Clone, Default)]
+pub enum GrammarState {
+    /// Just initialized
+    #[default]
+    Initial,
+    /// compute_firsts has been called
+    AfterComputeFirst,
+    /// left_closure has been called.
+    AfterLeftClosure,
+    /// left_closure_to_nfa has been called.
+    AfterNFA,
+}
+
+impl Display for GrammarState {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        use GrammarState::*;
+
+        match self {
+            Initial => write!(f, "initial"),
+            AfterComputeFirst => write!(f, "after computation of first set"),
+            AfterLeftClosure => write!(f, "after computation of closure"),
+            AfterNFA => write!(f, "after computation of NFA"),
+        }
+    }
+}
+
 /// The type of a grammar.
 #[derive(Debug, Clone, Default)]
 pub struct Grammar {
@@ -158,6 +197,8 @@ pub struct Grammar {
     /// The length of the list must match that of the list of
     /// non-terminals.
     rules: Vec<Rule>,
+    /// The list of successive sums of lengths of rules.
+    accumulators: Vec<usize>,
     // The following two attributes are empty until we call
     // `compute_firsts` on the grammar.
     /// The list of sets of "first terminals".
@@ -169,9 +210,18 @@ pub struct Grammar {
     ///
     /// The length must match that of the list of non-terminals.
     first_nodes: Vec<Vec<usize>>,
-    // The following attribute is empty until we call `left_closure`
-    // on the grammar.
-    left_closure_branches: HashSet<usize>,
+    // The following attribute is empty until we call `closure` on the
+    // NFA with `transform_label_null_epsilon` as the transformer.
+    /// A hash map that maps a tuple `(pos1, pos2)` of positions
+    /// `pos1` and `pos2` in the rules to a vector of rule positions.
+    /// This vector means that in order to expand from `pos1` to
+    /// `pos`, it is necessary to expand according to the positions in
+    /// the vector, so we need to add all these expansions into the
+    /// parse forest later.
+    expansion_map: HashMap<(usize, usize), Vec<usize>>,
+    /// The state of the grammar, which tells us what information has
+    /// been computed for this grammar.
+    state: GrammarState,
 }
 
 /// A private type to aid the recursive looping of rergular
@@ -216,7 +266,14 @@ impl Grammar {
             .map(|rule| Vec::with_capacity(rule.len()))
             .collect();
 
-        let left_closure_branches = HashSet::default();
+        let state = Default::default();
+
+        let expansion_map = Default::default();
+
+        // NOTE: We cannot calculate accumulators here, as we want the
+        // accumulators of the regular expression of the left-closure,
+        // not of the original one.
+        let accumulators = Vec::new();
 
         Self {
             ter,
@@ -224,7 +281,9 @@ impl Grammar {
             rules,
             firsts,
             first_nodes,
-            left_closure_branches,
+            state,
+            expansion_map,
+            accumulators,
         }
     }
 
@@ -258,7 +317,29 @@ impl Grammar {
     /// Return the total length of all rules.
     #[inline]
     pub fn total(&self) -> usize {
-        self.rules.iter().map(Rule::len).sum()
+        self.accumulators.last().copied().unwrap_or(0)
+    }
+
+    /// Query if a position is the starting position of a
+    /// non-terminal.  If it is, return the non-terminal, else return
+    /// `None` .
+    #[inline]
+    pub fn is_nt_start_in_nfa_p(&self, pos: usize) -> Option<usize> {
+        for (index, accumulator) in self.accumulators.iter().copied().enumerate() {
+            let shifted_accumulator = accumulator << 1;
+
+            // NOTE: Clippy suggests to call `cmp`, but it seems
+            // compiler might not yet be smart enough to inline that
+            // call, so I just silence clippy here.
+            #[allow(clippy::comparison_chain)]
+            if pos == shifted_accumulator {
+                return Some(index);
+            } else if pos < shifted_accumulator {
+                break;
+            }
+        }
+
+        None
     }
 
     /// Return the number of terminals.
@@ -353,754 +434,119 @@ impl Grammar {
             .contains(&None))
     }
 
-    /// For a NON_TERMINAL, return an iterator that goes over the
-    /// nodes that are reachable from the non-terminal through an
-    /// empty transition of the nondeterministic finite automaton.
+    /// Query the expansion information from the position `pos1` to
+    /// the position `pos2` .
     #[inline]
-    pub fn first_nodes_of(&self, non_terminal: usize) -> Result<std::slice::Iter<usize>, Error> {
-        Ok(self
-            .first_nodes
-            .get(non_terminal)
-            .ok_or(Error::IndexOutOfBounds(non_terminal, self.non.len()))?
-            .iter())
-    }
-
-    /// Return a hash set that contains all nodes in the set of
-    /// left-closure regular languages that are added because of the
-    /// left-linear expansion.
-    pub fn left_closure_branches(&self) -> &HashSet<usize> {
-        &self.left_closure_branches
-    }
-
-    /// Compute the set of terminals that can appear as the first
-    /// terminal in some left-linear derivation of a non-terminal, for
-    /// every non-terminal.
-    ///
-    /// This is an algorithm that computes the transitive closure,
-    /// which is a common approach for this task.  But perhaps there
-    /// are more efficient approaches?
-    ///
-    /// Also the function computes the set of "reachable nodes" in the
-    /// process, and records the information in the `first_nodes`
-    /// attribute.
-    pub fn compute_firsts(&mut self) -> Result<(), Error> {
-        let mut updated = true;
-
-        let non_len = self.non_num();
-
-        use StackElement::{Seen, Unseen};
-
-        while updated {
-            updated = false;
-
-            for (n, regex) in self.rules.iter().map(|rule| &rule.regex).enumerate() {
-                let root = if let Some(root) = regex.root() {
-                    root
-                } else {
-                    if !self.is_nullable(n)? {
-                        updated = true;
-
-                        self.firsts.get_mut(n).unwrap().insert(None);
-
-                        // The default construction of a grammar
-                        // reserves some space for each vector, so
-                        // explicitly setting this can reduce some
-                        // minor memory overhead.
-                        let pointer = self.first_nodes.get_mut(n).unwrap();
-
-                        pointer.clear();
-                        pointer.shrink_to_fit();
-                    }
-
-                    continue;
-                };
-
-                let regex_len = regex.len();
-
-                let mut stack: Vec<StackElement> = Vec::with_capacity(regex_len);
-
-                stack.push(Unseen(root));
-
-                let mut children_sets_stack: Vec<HashSet<Option<usize>>> =
-                    Vec::with_capacity(regex_len);
-
-                let mut children_nodes_stack: Vec<HashSet<usize>> = Vec::with_capacity(regex_len);
-
-                while let Some(top) = stack.pop() {
-                    let top_index = top.index();
-                    let is_seen = top.is_seen();
-
-                    match regex
-                        .vertex_label(top_index)
-                        .map_err(|_| Error::IndexOutOfBounds(top_index, regex_len))?
-                    {
-                        RegexType::Kleene => {
-                            if !is_seen {
-                                stack.push(Seen(top_index));
-
-                                for child in regex.children_of(top_index).unwrap() {
-                                    stack.push(Unseen(child));
-                                }
-                            } else {
-                                let degree = regex.degree(top_index).unwrap();
-                                let children_stack_len = children_sets_stack.len();
-                                let children_nodes_len = children_nodes_stack.len();
-
-                                assert!(
-                                    children_stack_len >= degree,
-                                    "not enough stack elements for {top_index}"
-                                );
-
-                                assert!(
-                                    children_nodes_len >= degree,
-                                    "not enough stack elements for {top_index}"
-                                );
-
-                                let mut this_set = HashSet::new();
-
-                                this_set.insert(None);
-
-                                let mut this_nodes = HashSet::new();
-
-                                this_nodes.insert(top_index);
-
-                                if degree == 0 {
-                                    children_sets_stack.push(this_set);
-                                    children_nodes_stack.push(this_nodes);
-
-                                    continue;
-                                }
-
-                                let mut stop = false;
-
-                                for (child_set, child_nodes) in children_sets_stack
-                                    .drain((children_stack_len - degree)..)
-                                    .zip(
-                                        children_nodes_stack.drain((children_nodes_len - degree)..),
-                                    )
-                                {
-                                    if stop {
-                                        break;
-                                    }
-
-                                    if !child_set.contains(&None) {
-                                        stop = true;
-                                    }
-
-                                    this_set.extend(child_set);
-                                    this_nodes.extend(child_nodes);
-                                }
-
-                                children_sets_stack.push(this_set);
-                                children_nodes_stack.push(this_nodes);
-                            }
-                        }
-                        RegexType::Plus => {
-                            if !is_seen {
-                                stack.push(Seen(top_index));
-
-                                for child in regex.children_of(top_index).unwrap() {
-                                    stack.push(Unseen(child));
-                                }
-                            } else {
-                                let degree = regex.degree(top_index).unwrap();
-                                let children_stack_len = children_sets_stack.len();
-                                let children_nodes_len = children_nodes_stack.len();
-
-                                assert!(
-                                    children_stack_len >= degree,
-                                    "not enough stack elements for {top_index}"
-                                );
-
-                                assert!(
-                                    children_nodes_len >= degree,
-                                    "not enough stack elements for {top_index}"
-                                );
-
-                                let mut this_set = HashSet::new();
-
-                                let mut this_nodes = HashSet::new();
-
-                                this_nodes.insert(top_index);
-
-                                if degree == 0 {
-                                    this_set.insert(None);
-
-                                    children_sets_stack.push(this_set);
-                                    children_nodes_stack.push(this_nodes);
-
-                                    continue;
-                                }
-
-                                let mut stop = false;
-
-                                for (child_set, child_nodes) in children_sets_stack
-                                    .drain((children_stack_len - degree)..)
-                                    .zip(
-                                        children_nodes_stack.drain((children_nodes_len - degree)..),
-                                    )
-                                {
-                                    if stop {
-                                        break;
-                                    }
-
-                                    if !child_set.contains(&None) {
-                                        stop = true;
-                                    }
-
-                                    this_set.extend(child_set);
-                                    this_nodes.extend(child_nodes);
-                                }
-
-                                if stop {
-                                    this_set.remove(&None);
-                                }
-
-                                children_sets_stack.push(this_set);
-                                children_nodes_stack.push(this_nodes);
-                            }
-                        }
-                        RegexType::Optional => {
-                            if !is_seen {
-                                stack.push(Seen(top_index));
-
-                                for child in regex.children_of(top_index).unwrap() {
-                                    stack.push(Unseen(child));
-                                }
-                            } else {
-                                let degree = regex.degree(top_index).unwrap();
-                                let children_stack_len = children_sets_stack.len();
-                                let children_nodes_len = children_nodes_stack.len();
-
-                                assert!(
-                                    children_stack_len >= degree,
-                                    "not enough stack elements for {top_index}"
-                                );
-
-                                assert!(
-                                    children_nodes_len >= degree,
-                                    "not enough stack elements for {top_index}"
-                                );
-
-                                let mut this_set = HashSet::new();
-
-                                this_set.insert(None);
-
-                                let mut this_nodes = HashSet::new();
-
-                                this_nodes.insert(top_index);
-
-                                if degree == 0 {
-                                    children_sets_stack.push(this_set);
-                                    children_nodes_stack.push(this_nodes);
-
-                                    continue;
-                                }
-
-                                let mut stop = false;
-
-                                for (child_set, child_nodes) in children_sets_stack
-                                    .drain((children_stack_len - degree)..)
-                                    .zip(
-                                        children_nodes_stack.drain((children_nodes_len - degree)..),
-                                    )
-                                {
-                                    if stop {
-                                        break;
-                                    }
-
-                                    if !child_set.contains(&None) {
-                                        stop = true;
-                                    }
-
-                                    this_set.extend(child_set.iter().copied());
-                                    this_nodes.extend(child_nodes.iter().copied());
-                                }
-
-                                children_sets_stack.push(this_set);
-                                children_nodes_stack.push(this_nodes);
-                            }
-                        }
-                        RegexType::Or => {
-                            if !is_seen {
-                                stack.push(Seen(top_index));
-
-                                for child in regex.children_of(top_index).unwrap() {
-                                    stack.push(Unseen(child));
-                                }
-                            } else {
-                                let degree = regex.degree(top_index).unwrap();
-                                let children_stack_len = children_sets_stack.len();
-                                let children_nodes_len = children_nodes_stack.len();
-
-                                assert!(
-                                    children_stack_len >= degree,
-                                    "not enough stack elements for {top_index}"
-                                );
-
-                                assert!(
-                                    children_nodes_len >= degree,
-                                    "not enough stack elements for {top_index}"
-                                );
-
-                                let mut this_set = HashSet::new();
-
-                                let mut this_nodes = HashSet::new();
-
-                                this_nodes.insert(top_index);
-
-                                if degree == 0 {
-                                    this_set.insert(None);
-
-                                    children_sets_stack.push(this_set);
-                                    children_nodes_stack.push(this_nodes);
-
-                                    continue;
-                                }
-
-                                for (child_set, child_nodes) in children_sets_stack
-                                    .drain((children_stack_len - degree)..)
-                                    .zip(
-                                        children_nodes_stack.drain((children_nodes_len - degree)..),
-                                    )
-                                {
-                                    this_set.extend(child_set.iter().copied());
-                                    this_nodes.extend(child_nodes.iter().copied());
-                                }
-
-                                children_sets_stack.push(this_set);
-                                children_nodes_stack.push(this_nodes);
-                            }
-                        }
-                        RegexType::Paren => {
-                            // Only for printing purposes
-                            let mut this_set = HashSet::new();
-
-                            this_set.insert(None);
-
-                            children_sets_stack.push(this_set);
-
-                            let mut this_nodes = HashSet::new();
-
-                            this_nodes.insert(top_index);
-
-                            children_nodes_stack.push(this_nodes);
-                        }
-                        RegexType::Empty => {
-                            if !is_seen {
-                                stack.push(Seen(top_index));
-
-                                for child in regex.children_of(top_index).unwrap().rev() {
-                                    stack.push(Unseen(child));
-                                }
-                            } else {
-                                let degree = regex.degree(top_index).unwrap();
-                                let children_stack_len = children_sets_stack.len();
-                                let children_nodes_len = children_nodes_stack.len();
-
-                                assert!(
-                                    children_stack_len >= degree,
-                                    "not enough stack elements for {top_index}"
-                                );
-
-                                assert!(
-                                    children_nodes_len >= degree,
-                                    "not enough stack elements for {top_index}"
-                                );
-
-                                let mut this_set = HashSet::new();
-
-                                let mut this_nodes = HashSet::new();
-
-                                this_nodes.insert(top_index);
-
-                                if degree == 0 {
-                                    this_set.insert(None);
-
-                                    children_sets_stack.push(this_set);
-                                    children_nodes_stack.push(this_nodes);
-
-                                    continue;
-                                }
-
-                                let mut stop = false;
-
-                                for (child_set, child_nodes) in children_sets_stack
-                                    .drain((children_stack_len - degree)..)
-                                    .zip(
-                                        children_nodes_stack.drain((children_nodes_len - degree)..),
-                                    )
-                                {
-                                    if stop {
-                                        break;
-                                    }
-
-                                    if !child_set.contains(&None) {
-                                        stop = true;
-                                    }
-
-                                    this_set.extend(child_set.iter().copied());
-                                    this_nodes.extend(child_nodes.iter().copied());
-                                }
-
-                                if stop {
-                                    this_set.remove(&None);
-                                }
-
-                                children_sets_stack.push(this_set);
-                                children_nodes_stack.push(this_nodes);
-                            }
-                        }
-                        RegexType::Lit(tnt) => {
-                            match tnt {
-                                TNT::Ter(t) => {
-                                    let mut this_set = HashSet::with_capacity(1);
-
-                                    this_set.insert(Some(t));
-
-                                    children_sets_stack.push(this_set);
-                                }
-                                TNT::Non(non) => {
-                                    let this_set = self
-                                        .firsts
-                                        .get(non)
-                                        .ok_or(Error::IndexOutOfBounds(non, non_len))?
-                                        .clone();
-
-                                    children_sets_stack.push(this_set);
-                                }
-                            }
-
-                            let mut this_nodes = HashSet::with_capacity(1);
-                            this_nodes.insert(top_index);
-
-                            children_nodes_stack.push(this_nodes);
-                        }
-                    }
-                }
-
-                assert_eq!(
-                    children_sets_stack.len(),
-                    1,
-                    "Too many elements left at the end"
-                );
-
-                assert_eq!(
-                    children_nodes_stack.len(),
-                    1,
-                    "Too many elements left at the end"
-                );
-
-                for first in children_sets_stack.pop().unwrap().into_iter() {
-                    if !self.firsts.get(n).unwrap().contains(&first) {
-                        updated = true;
+    pub fn query_expansion(&self, pos1: usize, pos2: usize) -> Result<Option<&[usize]>, Error> {
+        if pos1 >= self.total() {
+            return Err(Error::IndexOutOfBounds(pos1, self.total()));
+        }
 
-                        self.firsts.get_mut(n).unwrap().insert(first);
-                    }
-                }
+        if pos2 >= self.total() {
+            return Err(Error::IndexOutOfBounds(pos2, self.total()));
+        }
 
-                *self.first_nodes.get_mut(n).unwrap() =
-                    children_nodes_stack.pop().unwrap().into_iter().collect();
+        match self.state {
+            GrammarState::AfterLeftClosure => {}
+            _ => {
+                return Err(Error::WrongState(
+                    self.state,
+                    GrammarState::AfterLeftClosure,
+                ));
             }
         }
 
-        Ok(())
+        Ok(self.expansion_map.get(&(pos1, pos2)).map(|v| &v[..]))
     }
 
-    /// Return the regular language of the left-linear closures of
-    /// non-terminals in the grammar.
-    ///
-    /// The resulting vector is guaranteed to be of the same length as
-    /// the number of non-terminals.
-    ///
-    /// The resulting regular language is not "self-contained".  That
-    /// is to say, its terminals indices are packed indices and are
-    /// meaningless without the interpretation of the grammar.  They
-    /// should be converted to a nondeterministic finite automaton and
-    /// then to its null closure later on.
-    pub fn left_closure(&self) -> Result<Vec<DefaultRegex<TNT>>, Error> {
-        let non_len = self.non_num();
-
-        let mut result = Vec::with_capacity(non_len);
-
-        for (n, rule) in self.rules.iter().enumerate() {
-            let regex = &rule.regex;
-
-            let regex_root = if let Some(root) = regex.root() {
-                root
-            } else {
-                result.push(Default::default());
-
-                continue;
-            };
-
-            let regex_len = regex.len();
-
-            /// A convenient macro to retrieve the children from the
-            /// original regular expression with error propagation.
-            macro_rules! children {
-                ($n:expr) => {
-                    regex
-                        .children_of($n)
-                        .map_err(|_| Error::IndexOutOfBounds($n, regex_len))?
-                };
-            }
-
-            /// A convenient macro to retrieve the label from the
-            /// original regular expression with error propagation.
-            macro_rules! label {
-                ($n:expr) => {
-                    regex
-                        .vertex_label($n)
-                        .map_err(|_| Error::IndexOutOfBounds($n, regex_len))?
-                };
-            }
-
-            let parents = regex.parents_array().map_err(|e| match e {
-                nfa::error::Error::UnknownNode(n) => Error::IndexOutOfBounds(n, regex_len),
-                nfa::error::Error::Cycle => Error::BuildFail(n, ParseError::Cycle),
-                _ => unreachable!(),
-            })?;
-
-            use RegexType::*;
-            use TNT::*;
-
-            let mut local_result: Vec<RegexType<TNT>> = Vec::with_capacity(regex_len * 2);
-            let mut builder = ALGBuilder::default();
-
-            /// Perform a depth-first copy
-            macro_rules! df_copy {
-                ($parent:expr, $index:expr) => {
-                    match label!($index) {
-                        Kleene | Plus | Optional | Or | Paren | Empty => {
-                            let mut stack = vec![($parent, $index)];
-
-                            while let Some((top_parent, top_index)) = stack.pop() {
-                                let label = label!(top_index);
-                                let label = match label {
-                                    Lit(top_tnt) => Lit(Ter(self.pack_tnt(top_tnt).unwrap())),
-                                    _ => label,
-                                };
-
-                                local_result.push(label);
-
-                                let new = builder.add_vertex();
-
-                                builder.add_edge(top_parent, new, ()).unwrap();
-
-                                stack.extend(children!(top_index).map(|child| (new, child)));
-                            }
-                        }
-                        Lit(remain_tnt) => {
-                            local_result.push(Lit(Ter(self.pack_tnt(remain_tnt).unwrap())));
-                            let new = builder.add_vertex();
-                            builder.add_edge($parent, new, ()).unwrap();
-                        }
-                    }
-                };
-            }
-
-            local_result.push(Or);
-            builder.add_vertex();
-
-            local_result.push(Lit(Ter(self.pack_tnt(Non(n)).unwrap())));
-            let non_lit_index = builder.add_vertex();
-
-            builder.add_edge(0, non_lit_index, ()).unwrap();
-
-            // If this non-terminal is nullable, add an empty variant.
-            if self.is_nullable(n)? {
-                local_result.push(Empty);
-                let empty_index = builder.add_vertex();
-                builder.add_edge(0, empty_index, ()).unwrap();
+    // REVIEW: Do we have a better way to record expansion information
+    // than to compute the transitive closure?
+
+    /// A transformer of labels to be fed into
+    /// [`closure`][nfa::default::nfa::DefaultNFA::closure], with the
+    /// predicate that returns true if and only if the label of the
+    /// first edge is either empty or a nullable non-terminal.
+    pub fn transform_label_null_epsilon(
+        &mut self,
+        two_edges: TwoEdges<LabelType<TNT>>,
+    ) -> LabelType<TNT> {
+        let (first_source, first_target, first_label) = two_edges.first_edge();
+        let (second_source, second_target, second_label) = two_edges.second_edge();
+
+        #[cfg(debug_assertions)]
+        {
+            assert_eq!(first_target, second_source);
+
+            if let Some(tnt) = *first_label.get_value() {
+                assert!(matches!(tnt, TNT::Non(n) if matches!(self.is_nullable(n), Ok(true))));
             }
+        }
 
-            for first_node in self.first_nodes_of(n)?.copied() {
-                assert!(first_node < parents.len());
-
-                let tnt = match label!(first_node) {
-                    Lit(tnt) => Lit(tnt),
-                    _ => continue,
-                };
-
-                let mut parents_chain = {
-                    let mut result = Vec::new();
-                    let mut stack = Vec::with_capacity(parents.len());
-
-                    stack.push(first_node);
-
-                    while let Some(top) = stack.pop() {
-                        assert!(top < parents.len());
-                        if let Some(parent) = parents.get(top).copied().unwrap() {
-                            result.push(parent);
-                            stack.push(parent.0);
-                        }
-                    }
+        // Compute if this is from left-linear expansion: it is so if
+        // and only if one if either the edges comes from left-linear
+        // expansion or we are moving across a non-terminal expansion,
+        // that is to say, the source of the second edge is the
+        // starting edge of a non-terminal.
 
-                    result.reverse();
+        let mut left_p = first_label.get_left_p() || second_label.get_left_p();
 
-                    result
-                };
+        // Record left-linear expansion information.
 
-                if parents_chain.is_empty() {
-                    local_result.push(tnt);
-                    let lit_index = builder.add_vertex();
-                    builder.add_edge(0, lit_index, ()).unwrap();
+        if let Some(second_nt) = self.is_nt_start_in_nfa_p(second_source) {
+            left_p = true;
 
-                    continue;
-                }
+            if !self
+                .expansion_map
+                .contains_key(&(first_source, second_target))
+            {
+                let original_expansion = self.expansion_map.get(&(second_source, second_target));
 
-                assert_eq!(parents_chain.first().unwrap().0, regex_root);
-
-                // A different, "more local", root.
-                let mut local_root: usize;
-
-                // Handle the direct parent
-                let (parent_node, parent_edge_index) = parents_chain.pop().unwrap();
-
-                match label!(parent_node) {
-                    Kleene | Plus => {
-                        // TODO: If parent_edge_index is 0, make a
-                        // Plus node instead.
-                        local_result.extend([Empty, tnt]);
-
-                        local_root = builder.add_vertex();
-                        let lit_index = builder.add_vertex();
-                        builder.add_edge(local_root, lit_index, ()).unwrap();
-
-                        let iterator = children!(parent_node);
-
-                        for index in iterator.clone().skip(parent_edge_index + 1) {
-                            df_copy!(local_root, index);
-                        }
-
-                        local_result.push(Kleene);
-                        let new_parent = builder.add_vertex();
-                        builder.add_edge(local_root, new_parent, ()).unwrap();
-
-                        for index in iterator {
-                            df_copy!(new_parent, index);
-                        }
-                    }
-
-                    Or => {
-                        local_result.push(tnt);
-                        local_root = builder.add_vertex();
-                    }
-                    Optional | Empty => {
-                        // If this path is taken, it should not be
-                        // optional.
-                        local_result.extend([Empty, tnt]);
-                        local_root = builder.add_vertex();
-                        let lit_index = builder.add_vertex();
-                        builder.add_edge(local_root, lit_index, ()).unwrap();
-
-                        for index in children!(parent_node).skip(parent_edge_index + 1) {
-                            df_copy!(local_root, index);
-                        }
-                    }
-                    Paren | Lit(_) => unreachable!(),
-                }
+                self.expansion_map.insert(
+                    (first_source, second_target),
+                    if let Some(original_expansion) = original_expansion {
+                        let mut result = original_expansion.clone();
+                        result.push(second_nt);
 
-                // Handle successive parents
-
-                for (node, edge_index) in parents_chain.into_iter() {
-                    let node_type = label!(node);
-
-                    match node_type {
-                        Kleene | Plus => {
-                            // TODO: If edge_index is 0, then just
-                            // make this a Plus node.
-
-                            local_result.push(Empty);
-                            let new_index = builder.add_vertex();
-                            builder.add_edge(new_index, local_root, ()).unwrap();
-
-                            local_root = new_index;
-
-                            let iterator = children!(node);
-
-                            for index in iterator.clone().skip(edge_index + 1) {
-                                df_copy!(local_root, index);
-                            }
-
-                            local_result.push(Kleene);
-                            let new_parent = builder.add_vertex();
-                            builder.add_edge(local_root, new_parent, ()).unwrap();
-
-                            for index in iterator {
-                                df_copy!(new_parent, index);
-                            }
-                        }
-                        RegexType::Or => {}
-                        RegexType::Optional | RegexType::Empty => {
-                            local_result.push(Empty);
-                            let new_index = builder.add_vertex();
-                            builder.add_edge(new_index, local_root, ()).unwrap();
-                            local_root = new_index;
-
-                            for index in children!(node).skip(edge_index + 1) {
-                                df_copy!(local_root, index);
-                            }
-                        }
-                        RegexType::Paren | RegexType::Lit(_) => unreachable!(),
-                    }
-                }
-
-                builder.add_edge(0, local_root, ()).unwrap();
+                        result
+                    } else {
+                        vec![second_nt]
+                    },
+                );
             }
-
-            local_result.shrink_to_fit();
-
-            let graph = builder.build();
-
-            assert_eq!(graph.nodes_len(), local_result.len());
-
-            result.push(
-                DefaultRegex::new(graph, local_result)
-                    .map_err(|_| Error::BuildFail(n, ParseError::Cycle))?,
-            );
         }
 
-        assert_eq!(result.len(), non_len);
-
-        Ok(result)
+        NfaLabel::new(second_label.get_value(), second_label.get_moved(), left_p)
     }
 
-    /// Convert the regular language of left-linear closures to its
-    /// equivalent nondeterministic finite automaton.
-    ///
-    /// In the generation of the left-linear closure, the terminals
-    /// and non-terminals are packed into an unsigned integer.  We
-    /// unpack them in converting to nondeterministic finite
-    /// automaton.
-    ///
-    /// The resulting nondeterministic finite automaton should be
-    /// transformed to its null-closure for use in our algorithm.
-    pub fn left_closure_to_nfa(
-        &self,
-        closure: &[DefaultRegex<TNT>],
-    ) -> Result<DefaultNFA<DOption<TNT>>, Error> {
-        let label_transform = |tnt| match tnt {
-            TNT::Ter(t) => {
-                let new_tnt = self.unpack_tnt(t).map_err(|e| match e {
-                    Error::IndexOutOfBounds(index, bound) => {
-                        graph::error::Error::IndexOutOfBounds(index, bound)
-                    }
-                    _ => unreachable!(),
-                })?;
-
-                Ok(SoC::Carry(new_tnt))
+    /// For a NON_TERMINAL, return an iterator that goes over the
+    /// nodes that are reachable from the non-terminal through an
+    /// empty transition of the nondeterministic finite automaton.
+    #[inline]
+    pub fn first_nodes_of(&self, non_terminal: usize) -> Result<std::slice::Iter<usize>, Error> {
+        match self.state {
+            GrammarState::Initial => {
+                return Err(Error::WrongState(
+                    self.state,
+                    GrammarState::AfterComputeFirst,
+                ));
             }
-            TNT::Non(n) => Ok(SoC::Sub(n)),
-        };
-
-        let nfa = DefaultNFA::to_nfa(closure, label_transform, Some(TNT::Non(0)))?;
+            GrammarState::AfterComputeFirst
+            | GrammarState::AfterLeftClosure
+            | GrammarState::AfterNFA => {}
+        }
 
-        Ok(nfa)
+        Ok(self
+            .first_nodes
+            .get(non_terminal)
+            .ok_or(Error::IndexOutOfBounds(non_terminal, self.non.len()))?
+            .iter())
     }
 }
 
+pub mod first_set;
+
+pub mod left_closure;
+
 impl Display for Grammar {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         assert_eq!(self.non.len(), self.rules.len());
diff --git a/grammar/src/test_grammar_helper.rs b/grammar/src/test_grammar_helper.rs
index c236952..89f9844 100644
--- a/grammar/src/test_grammar_helper.rs
+++ b/grammar/src/test_grammar_helper.rs
@@ -275,7 +275,6 @@ pub fn new_paren_grammar() -> Result<Grammar, Box<dyn std::error::Error>> {
 }
 
 /// Return a left recursive grammar.
-#[allow(dead_code)]
 pub fn new_left_recursive_grammar() -> Result<Grammar, Box<dyn std::error::Error>> {
     let ter = vec![Terminal::new("B".to_owned()), Terminal::new("C".to_owned())];
     let non = vec![
diff --git a/grammar/src/tests/test_grammar_left_closure.rs b/grammar/src/tests/test_grammar_left_closure.rs
index 0bc9f4d..003c211 100644
--- a/grammar/src/tests/test_grammar_left_closure.rs
+++ b/grammar/src/tests/test_grammar_left_closure.rs
@@ -1,5 +1,6 @@
 use crate::test_grammar_helper::*;
 use crate::*;
+use graph::LabelGraph;
 use nfa::Nfa;
 use std::{
     collections::HashSet,
@@ -124,7 +125,7 @@ fn test_remove_epsilon() -> Result<(), Box<dyn std::error::Error>> {
 
     nfa.print_viz("nfa_orig.gv")?;
 
-    nfa.remove_epsilon(|label| label.is_none())?;
+    nfa.remove_epsilon(|label| label.get_value().is_none())?;
 
     nfa.print_viz("nfa_no_epsilon.gv")?;
 
@@ -174,7 +175,7 @@ fn test_remove_dead() -> Result<(), Box<dyn std::error::Error>> {
 
     nfa.print_viz("nfa_orig.gv")?;
 
-    nfa.remove_epsilon(|label| label.is_none())?;
+    nfa.remove_epsilon(|label| label.get_value().is_none())?;
 
     let accumulators: HashSet<usize> = accumulators.into_iter().collect();
 
@@ -192,7 +193,8 @@ fn test_remove_dead() -> Result<(), Box<dyn std::error::Error>> {
 #[test]
 #[ignore]
 fn test_nulling() -> Result<(), Box<dyn std::error::Error>> {
-    let mut grammar = new_left_recursive_grammar()?;
+    // TODO: Test more grammars.
+    let mut grammar = new_right_recursive_grammar()?;
     let closure = new_closure_regex(&mut grammar)?;
 
     let mut lock = stdout().lock();
@@ -244,24 +246,31 @@ fn test_nulling() -> Result<(), Box<dyn std::error::Error>> {
 
     let mut nfa = grammar.left_closure_to_nfa(&closure)?;
 
-    nfa.nulling(|label| {
-        if let Some(label) = *label {
-            match label {
-                TNT::Ter(_) => false,
-                // Panics if a non-terminal references an invalid node
-                // here.
-                TNT::Non(n) => grammar.is_nullable(n).unwrap(),
-            }
-        } else {
-            true
-        }
-    })?;
-
     let grammar_reserve_nodes = |node| accumulators.contains(&node);
 
     writeln!(lock, "accumulators are {accumulators:?}")?;
 
-    nfa.remove_epsilon(|label| label.is_none())?;
+    let nullables: HashSet<usize> = (0..grammar.non_num())
+        .filter(|n| matches!(grammar.is_nullable(*n), Ok(true)))
+        .collect();
+
+    nfa.closure(
+        |label| {
+            if let Some(label) = *label.get_value() {
+                matches!(label, TNT::Non(n) if nullables.contains(&n))
+            } else {
+                true
+            }
+        },
+        true,
+        |two_edges| grammar.transform_label_null_epsilon(two_edges),
+        |label| label.get_value().is_none(),
+    )?;
+
+    for (label, child_iter) in nfa.labels_of(18)? {
+        writeln!(lock, "{label}: {child_iter:?}")?;
+    }
+
     nfa.remove_dead(grammar_reserve_nodes)?;
 
     writeln!(lock, "Printing nfa to nfa.gv")?;
author	JSDurand <mmemmew@gmail.com>	2023-01-11 23:47:26 +0800
committer	JSDurand <mmemmew@gmail.com>	2023-01-11 23:47:26 +0800
commit	1a3d346f413325ed37848a6b2526e8e729269833 (patch)
tree	ab8812f8094d096c68aee53cf516e986cc9a273a /grammar/src
parent	f27d604d93ce583d4404e1874664e08382ea2f00 (diff)