#![warn(missing_docs)]
//! This file implements the extected behaviours of grammars.

// NOTE: We shall first start with a parser that works at the level of
// characters.  The purpose is to first experiment with the workings
// and the performance of the algorithms, before optimising by using
// regular expressions to classify inputs into tokens.  In other
// words, the current focus is not on the optimisations, whereas
// scanners are for optimisations only, so to speak.

#![allow(unused_imports)]
use nfa::{
    default::{
        nfa::DefaultNFA,
        regex::{DefaultRegParser, DefaultRegex, ParseDirection, ParseError, RegexType},
    },
    DOption, DesRec, Nfa, Regex, SoC,
};

use graph::{adlist::ALGBuilder, builder::Builder, Graph};

use std::{
    collections::HashSet,
    fmt::{Display, Write},
};

/// The type of a terminal.
///
/// For the time being this is a wrapper around a string, but in the
/// future it may hold more information of scanners.
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Terminal {
    // If we want to use scanners, per chance add them as a new field
    // here.
    name: String,
}

impl Terminal {
    /// Create a terminal with the given name.
    #[inline]
    pub fn new(name: String) -> Self {
        Self { name }
    }

    /// Return the name of the terminal.
    #[inline]
    pub fn name(&self) -> &str {
        &self.name
    }
}

/// The type of a non-terminal.
///
/// This is just a wrapper around a string.
#[derive(Debug, Clone)]
pub struct Nonterminal(String);

impl Nonterminal {
    /// Return the name of the nonterminal.
    ///
    /// Just to improve readability.
    #[inline]
    pub fn name(&self) -> &str {
        &self.0
    }
}

/// The type of a terminal or a non-terminal.
///
/// Only an index is stored here.  Actual data are stored in two other
/// arrays.
#[derive(Debug, Hash, Eq, PartialEq, Clone, Copy, Ord, PartialOrd)]
pub enum TNT {
    /// Terminal variant
    Ter(usize),
    /// Nonterminal variant
    Non(usize),
}

impl Display for TNT {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            TNT::Ter(t) => write!(f, "T({t})"),
            TNT::Non(n) => write!(f, "N({n})"),
        }
    }
}

/// Errors related to grammar operations.
#[derive(Debug, Copy, Clone)]
#[non_exhaustive]
pub enum Error {
    /// The first component is the index, and the second the bound.
    IndexOutOfBounds(usize, usize),
    /// Fail to build the N-th regular expression, due to the
    /// ParseError.
    BuildFail(usize, ParseError),
    /// fail to build NFA
    NFAFail(nfa::error::Error),
}

impl From<nfa::error::Error> for Error {
    fn from(nfae: nfa::error::Error) -> Self {
        Self::NFAFail(nfae)
    }
}

impl Display for Error {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Error::IndexOutOfBounds(i, b) => write!(f, "index {i} out of bound {b}"),
            Error::BuildFail(n, pe) => write!(
                f,
                "Failed to build the {n}-th regular expression due to error: {pe}"
            ),
            Error::NFAFail(nfae) => write!(f, "failed to build NFA because of {nfae}"),
        }
    }
}

impl std::error::Error for Error {}

/// A rule is a regular expression of terminals or non-terminals.
#[derive(Debug, Clone)]
pub struct Rule {
    regex: DefaultRegex<TNT>,
}

impl Rule {
    /// Return true if and only if the rule is empty.
    #[inline]
    pub fn is_empty(&self) -> bool {
        self.regex.is_empty()
    }

    /// Return the length of the rule.
    #[inline]
    pub fn len(&self) -> usize {
        self.regex.len()
    }
}

/// The type of a grammar.
#[derive(Debug, Clone, Default)]
pub struct Grammar {
    ter: Vec<Terminal>,
    non: Vec<Nonterminal>,
    rules: Vec<Rule>,
    firsts: Vec<HashSet<Option<usize>>>,
    first_nodes: Vec<Vec<usize>>,
}

/// A private type to aid the recursive looping of rergular
/// expressions.
#[derive(Copy, Clone)]
enum StackElement {
    Seen(usize),
    Unseen(usize),
}

impl StackElement {
    fn index(self) -> usize {
        match self {
            Self::Seen(index) => index,
            Self::Unseen(index) => index,
        }
    }

    fn is_seen(self) -> bool {
        matches!(self, Self::Seen(_))
    }
}

impl Grammar {
    /// Construct a grammar from a vector of terminals, a vector of
    /// non-terminals, and a vector of rules for the non-temrinals.
    ///
    /// # Panic
    ///
    /// If the length of `non` is not equal to that of `rules`, then
    /// the function panics.
    pub fn new(ter: Vec<Terminal>, non: Vec<Nonterminal>, rules: Vec<Rule>) -> Self {
        assert_eq!(non.len(), rules.len());

        // One more room is reserved for the `None` value.
        let firsts = std::iter::repeat_with(|| HashSet::with_capacity(ter.len() + 1))
            .take(non.len())
            .collect();

        let first_nodes = rules
            .iter()
            .map(|rule| Vec::with_capacity(rule.len()))
            .collect();

        Self {
            ter,
            non,
            rules,
            firsts,
            first_nodes,
        }
    }

    /// Return the name of a terminal or a non-terminal.
    pub fn name_of_tnt(&self, tnt: TNT) -> Result<String, Error> {
        match tnt {
            TNT::Ter(t) => Ok(format!(
                "T{}",
                self.ter
                    .get(t)
                    .ok_or(Error::IndexOutOfBounds(t, self.ter.len()))?
                    .name()
            )),
            TNT::Non(n) => Ok(format!(
                "N{}",
                self.non
                    .get(n)
                    .ok_or(Error::IndexOutOfBounds(n, self.non.len()))?
                    .name()
            )),
        }
    }

    /// Return true if and only if there are no non-terminals in the
    /// grammar.
    #[inline]
    pub fn is_empty(&self) -> bool {
        self.non.is_empty()
    }

    /// Return the total length of all rules.
    #[inline]
    pub fn total(&self) -> usize {
        self.rules.iter().map(Rule::len).sum()
    }

    /// Return the number of terminals.
    #[inline]
    pub fn ter_num(&self) -> usize {
        self.ter.len()
    }

    /// Return the number of non-terminals.
    #[inline]
    pub fn non_num(&self) -> usize {
        self.non.len()
    }

    /// Convert a non-terminal `N` to `N + TER_NUM`, so that we use a
    /// single number to represent terminals and non-terminals.
    ///
    /// # Bounds
    ///
    /// If a terminal index is greater than or equal to the number of
    /// terminals, then this signals an error; mutatis mutandis for
    /// non-terminals.
    ///
    /// # Related
    ///
    /// The inverse function is [`unpack_tnt`][Grammar::unpack_tnt].
    #[inline]
    pub fn pack_tnt(&self, tnt: TNT) -> Result<usize, Error> {
        let ter_num = self.ter.len();
        let non_num = self.non.len();

        match tnt {
            TNT::Ter(t) => {
                if t >= ter_num {
                    Err(Error::IndexOutOfBounds(t, ter_num))
                } else {
                    Ok(t)
                }
            }
            TNT::Non(n) => {
                if n >= non_num {
                    Err(Error::IndexOutOfBounds(n, non_num))
                } else {
                    Ok(n + ter_num)
                }
            }
        }
    }

    /// Convert a single number to either a terminal or a
    /// non-terminal.
    ///
    /// # Bounds
    ///
    /// If the number is greater than or equal to the sum of the
    /// numbers of terminals and of non-terminals, then this signals
    /// an error.
    ///
    /// # Related
    ///
    /// This is the inverse of [`pack_tnt`][Grammar::pack_tnt].
    #[inline]
    pub fn unpack_tnt(&self, flat: usize) -> Result<TNT, Error> {
        let ter_num = self.ter.len();
        let non_num = self.non.len();

        if flat < ter_num {
            Ok(TNT::Ter(flat))
        } else if flat < ter_num + non_num {
            Ok(TNT::Non(flat - ter_num))
        } else {
            Err(Error::IndexOutOfBounds(flat, ter_num + non_num))
        }
    }

    /// Return true if and only if the non-terminal is nullable.
    #[inline]
    pub fn is_nullable(&self, non_terminal: usize) -> Result<bool, Error> {
        Ok(self
            .firsts
            .get(non_terminal)
            .ok_or(Error::IndexOutOfBounds(non_terminal, self.non.len()))?
            .contains(&None))
    }

    /// For a NON_TERMINAL, return an iterator that goes over the
    /// nodes that are reachable from the non-terminal through an
    /// empty transition of the nondeterministic finite automaton.
    #[inline]
    pub fn first_nodes_of(&self, non_terminal: usize) -> Result<std::slice::Iter<usize>, Error> {
        Ok(self
            .first_nodes
            .get(non_terminal)
            .ok_or(Error::IndexOutOfBounds(non_terminal, self.non.len()))?
            .iter())
    }

    /// Compute the set of terminals that can appear as the first
    /// terminal in some left-linear derivation of a non-terminal, for
    /// every non-terminal.
    ///
    /// This is an algorithm that computes the transitive closure,
    /// which is a common approach for this task.  But perhaps there
    /// are more efficient approaches?
    ///
    /// Also the function computes the set of "reachable nodes" in the
    /// process, and records the information in the `first_nodes`
    /// attribute.
    pub fn compute_firsts(&mut self) -> Result<(), Error> {
        let mut updated = true;

        let non_len = self.non_num();

        use StackElement::{Seen, Unseen};

        while updated {
            updated = false;

            for (n, regex) in self.rules.iter().map(|rule| &rule.regex).enumerate() {
                let root = if let Some(root) = regex.root() {
                    root
                } else {
                    if !self.is_nullable(n)? {
                        updated = true;

                        self.firsts.get_mut(n).unwrap().insert(None);

                        // The default construction of a grammar
                        // reserves some space for each vector, so
                        // explicitly setting this can reduce some
                        // minor memory overhead.
                        let pointer = self.first_nodes.get_mut(n).unwrap();

                        pointer.clear();
                        pointer.shrink_to_fit();
                    }

                    continue;
                };

                let regex_len = regex.len();

                let mut stack: Vec<StackElement> = Vec::with_capacity(regex_len);

                stack.push(Unseen(root));

                let mut children_sets_stack: Vec<HashSet<Option<usize>>> =
                    Vec::with_capacity(regex_len);

                let mut children_nodes_stack: Vec<HashSet<usize>> = Vec::with_capacity(regex_len);

                while let Some(top) = stack.pop() {
                    let top_index = top.index();
                    let is_seen = top.is_seen();

                    match regex
                        .vertex_label(top_index)
                        .map_err(|_| Error::IndexOutOfBounds(top_index, regex_len))?
                    {
                        RegexType::Kleene => {
                            if !is_seen {
                                stack.push(Seen(top_index));

                                for child in regex.children_of(top_index).unwrap() {
                                    stack.push(Unseen(child));
                                }
                            } else {
                                let degree = regex.degree(top_index).unwrap();
                                let children_stack_len = children_sets_stack.len();
                                let children_nodes_len = children_nodes_stack.len();

                                assert!(
                                    children_stack_len >= degree,
                                    "not enough stack elements for {top_index}"
                                );

                                assert!(
                                    children_nodes_len >= degree,
                                    "not enough stack elements for {top_index}"
                                );

                                let mut this_set = HashSet::new();

                                this_set.insert(None);

                                let mut this_nodes = HashSet::new();

                                this_nodes.insert(top_index);

                                if degree == 0 {
                                    children_sets_stack.push(this_set);
                                    children_nodes_stack.push(this_nodes);

                                    continue;
                                }

                                let mut stop = false;

                                for (child_set, child_nodes) in children_sets_stack
                                    .drain((children_stack_len - degree)..)
                                    .zip(
                                        children_nodes_stack.drain((children_nodes_len - degree)..),
                                    )
                                {
                                    if stop {
                                        break;
                                    }

                                    if !child_set.contains(&None) {
                                        stop = true;
                                    }

                                    this_set.extend(child_set);
                                    this_nodes.extend(child_nodes);
                                }

                                children_sets_stack.push(this_set);
                                children_nodes_stack.push(this_nodes);
                            }
                        }
                        RegexType::Plus => {
                            if !is_seen {
                                stack.push(Seen(top_index));

                                for child in regex.children_of(top_index).unwrap() {
                                    stack.push(Unseen(child));
                                }
                            } else {
                                let degree = regex.degree(top_index).unwrap();
                                let children_stack_len = children_sets_stack.len();
                                let children_nodes_len = children_nodes_stack.len();

                                assert!(
                                    children_stack_len >= degree,
                                    "not enough stack elements for {top_index}"
                                );

                                assert!(
                                    children_nodes_len >= degree,
                                    "not enough stack elements for {top_index}"
                                );

                                let mut this_set = HashSet::new();

                                let mut this_nodes = HashSet::new();

                                this_nodes.insert(top_index);

                                if degree == 0 {
                                    this_set.insert(None);

                                    children_sets_stack.push(this_set);
                                    children_nodes_stack.push(this_nodes);

                                    continue;
                                }

                                let mut stop = false;

                                for (child_set, child_nodes) in children_sets_stack
                                    .drain((children_stack_len - degree)..)
                                    .zip(
                                        children_nodes_stack.drain((children_nodes_len - degree)..),
                                    )
                                {
                                    if stop {
                                        break;
                                    }

                                    if !child_set.contains(&None) {
                                        stop = true;
                                    }

                                    this_set.extend(child_set);
                                    this_nodes.extend(child_nodes);
                                }

                                if stop {
                                    this_set.remove(&None);
                                }

                                children_sets_stack.push(this_set);
                                children_nodes_stack.push(this_nodes);
                            }
                        }
                        RegexType::Optional => {
                            if !is_seen {
                                stack.push(Seen(top_index));

                                for child in regex.children_of(top_index).unwrap() {
                                    stack.push(Unseen(child));
                                }
                            } else {
                                let degree = regex.degree(top_index).unwrap();
                                let children_stack_len = children_sets_stack.len();
                                let children_nodes_len = children_nodes_stack.len();

                                assert!(
                                    children_stack_len >= degree,
                                    "not enough stack elements for {top_index}"
                                );

                                assert!(
                                    children_nodes_len >= degree,
                                    "not enough stack elements for {top_index}"
                                );

                                let mut this_set = HashSet::new();

                                this_set.insert(None);

                                let mut this_nodes = HashSet::new();

                                this_nodes.insert(top_index);

                                if degree == 0 {
                                    children_sets_stack.push(this_set);
                                    children_nodes_stack.push(this_nodes);

                                    continue;
                                }

                                let mut stop = false;

                                for (child_set, child_nodes) in children_sets_stack
                                    .drain((children_stack_len - degree)..)
                                    .zip(
                                        children_nodes_stack.drain((children_nodes_len - degree)..),
                                    )
                                {
                                    if stop {
                                        break;
                                    }

                                    if !child_set.contains(&None) {
                                        stop = true;
                                    }

                                    this_set.extend(child_set.iter().copied());
                                    this_nodes.extend(child_nodes.iter().copied());
                                }

                                children_sets_stack.push(this_set);
                                children_nodes_stack.push(this_nodes);
                            }
                        }
                        RegexType::Or => {
                            if !is_seen {
                                stack.push(Seen(top_index));

                                for child in regex.children_of(top_index).unwrap() {
                                    stack.push(Unseen(child));
                                }
                            } else {
                                let degree = regex.degree(top_index).unwrap();
                                let children_stack_len = children_sets_stack.len();
                                let children_nodes_len = children_nodes_stack.len();

                                assert!(
                                    children_stack_len >= degree,
                                    "not enough stack elements for {top_index}"
                                );

                                assert!(
                                    children_nodes_len >= degree,
                                    "not enough stack elements for {top_index}"
                                );

                                let mut this_set = HashSet::new();

                                let mut this_nodes = HashSet::new();

                                this_nodes.insert(top_index);

                                if degree == 0 {
                                    this_set.insert(None);

                                    children_sets_stack.push(this_set);
                                    children_nodes_stack.push(this_nodes);

                                    continue;
                                }

                                for (child_set, child_nodes) in children_sets_stack
                                    .drain((children_stack_len - degree)..)
                                    .zip(
                                        children_nodes_stack.drain((children_nodes_len - degree)..),
                                    )
                                {
                                    this_set.extend(child_set.iter().copied());
                                    this_nodes.extend(child_nodes.iter().copied());
                                }

                                children_sets_stack.push(this_set);
                                children_nodes_stack.push(this_nodes);
                            }
                        }
                        RegexType::Paren => {
                            // Only for printing purposes
                            let mut this_set = HashSet::new();

                            this_set.insert(None);

                            children_sets_stack.push(this_set);

                            let mut this_nodes = HashSet::new();

                            this_nodes.insert(top_index);

                            children_nodes_stack.push(this_nodes);
                        }
                        RegexType::Empty => {
                            if !is_seen {
                                stack.push(Seen(top_index));

                                for child in regex.children_of(top_index).unwrap().rev() {
                                    stack.push(Unseen(child));
                                }
                            } else {
                                let degree = regex.degree(top_index).unwrap();
                                let children_stack_len = children_sets_stack.len();
                                let children_nodes_len = children_nodes_stack.len();

                                assert!(
                                    children_stack_len >= degree,
                                    "not enough stack elements for {top_index}"
                                );

                                assert!(
                                    children_nodes_len >= degree,
                                    "not enough stack elements for {top_index}"
                                );

                                let mut this_set = HashSet::new();

                                let mut this_nodes = HashSet::new();

                                this_nodes.insert(top_index);

                                if degree == 0 {
                                    this_set.insert(None);

                                    children_sets_stack.push(this_set);
                                    children_nodes_stack.push(this_nodes);

                                    continue;
                                }

                                let mut stop = false;

                                for (child_set, child_nodes) in children_sets_stack
                                    .drain((children_stack_len - degree)..)
                                    .zip(
                                        children_nodes_stack.drain((children_nodes_len - degree)..),
                                    )
                                {
                                    if stop {
                                        break;
                                    }

                                    if !child_set.contains(&None) {
                                        stop = true;
                                    }

                                    this_set.extend(child_set.iter().copied());
                                    this_nodes.extend(child_nodes.iter().copied());
                                }

                                if stop {
                                    this_set.remove(&None);
                                }

                                children_sets_stack.push(this_set);
                                children_nodes_stack.push(this_nodes);
                            }
                        }
                        RegexType::Lit(tnt) => {
                            match tnt {
                                TNT::Ter(t) => {
                                    let mut this_set = HashSet::with_capacity(1);

                                    this_set.insert(Some(t));

                                    children_sets_stack.push(this_set);
                                }
                                TNT::Non(non) => {
                                    let this_set = self
                                        .firsts
                                        .get(non)
                                        .ok_or(Error::IndexOutOfBounds(non, non_len))?
                                        .clone();

                                    children_sets_stack.push(this_set);
                                }
                            }

                            let mut this_nodes = HashSet::with_capacity(1);
                            this_nodes.insert(top_index);

                            children_nodes_stack.push(this_nodes);
                        }
                    }
                }

                assert_eq!(
                    children_sets_stack.len(),
                    1,
                    "Too many elements left at the end"
                );

                assert_eq!(
                    children_nodes_stack.len(),
                    1,
                    "Too many elements left at the end"
                );

                for first in children_sets_stack.pop().unwrap().into_iter() {
                    if !self.firsts.get(n).unwrap().contains(&first) {
                        updated = true;

                        self.firsts.get_mut(n).unwrap().insert(first);
                    }
                }

                *self.first_nodes.get_mut(n).unwrap() =
                    children_nodes_stack.pop().unwrap().into_iter().collect();
            }
        }

        Ok(())
    }

    /// Return the regular language of the left-linear closures of
    /// non-terminals in the grammar.
    ///
    /// The resulting vector is guaranteed to be of the same length as
    /// the number of non-terminals.
    ///
    /// The resulting regular language is not "self-contained".  That
    /// is to say, its terminals indices are packed indices and are
    /// meaningless without the interpretation of the grammar.  They
    /// should be converted to a nondeterministic finite automaton and
    /// then to its null closure later on.
    pub fn left_closure(&self) -> Result<Vec<DefaultRegex<TNT>>, Error> {
        let non_len = self.non_num();

        let mut result = Vec::with_capacity(non_len);

        for (n, rule) in self.rules.iter().enumerate() {
            let regex = &rule.regex;

            let regex_root = if let Some(root) = regex.root() {
                root
            } else {
                result.push(Default::default());

                continue;
            };

            let regex_len = regex.len();

            /// A convenient macro to retrieve the children from the
            /// original regular expression with error propagation.
            macro_rules! children {
                ($n:expr) => {
                    regex
                        .children_of($n)
                        .map_err(|_| Error::IndexOutOfBounds($n, regex_len))?
                };
            }

            /// A convenient macro to retrieve the label from the
            /// original regular expression with error propagation.
            macro_rules! label {
                ($n:expr) => {
                    regex
                        .vertex_label($n)
                        .map_err(|_| Error::IndexOutOfBounds($n, regex_len))?
                };
            }

            let parents = regex.parents_array().map_err(|e| match e {
                nfa::error::Error::UnknownNode(n) => Error::IndexOutOfBounds(n, regex_len),
                nfa::error::Error::Cycle => Error::BuildFail(n, ParseError::Cycle),
                _ => unreachable!(),
            })?;

            use RegexType::*;
            use TNT::*;

            let mut local_result: Vec<RegexType<TNT>> = Vec::with_capacity(regex_len * 2);
            let mut builder = ALGBuilder::default();

            /// Perform a depth-first copy
            macro_rules! df_copy {
                ($parent:expr, $index:expr) => {
                    match label!($index) {
                        Kleene | Plus | Optional | Or | Paren | Empty => {
                            let mut stack = vec![($parent, $index)];

                            while let Some((top_parent, top_index)) = stack.pop() {
                                let label = label!(top_index);
                                let label = match label {
                                    Lit(top_tnt) => Lit(Ter(self.pack_tnt(top_tnt).unwrap())),
                                    _ => label,
                                };

                                local_result.push(label);

                                let new = builder.add_vertex();

                                builder.add_edge(top_parent, new, ()).unwrap();

                                stack.extend(children!(top_index).map(|child| (new, child)));
                            }
                        }
                        Lit(remain_tnt) => {
                            local_result.push(Lit(Ter(self.pack_tnt(remain_tnt).unwrap())));
                            let new = builder.add_vertex();
                            builder.add_edge($parent, new, ()).unwrap();
                        }
                    }
                };
            }

            local_result.push(Or);
            builder.add_vertex();

            local_result.push(Lit(Ter(self.pack_tnt(Non(n)).unwrap())));
            let non_lit_index = builder.add_vertex();

            builder.add_edge(0, non_lit_index, ()).unwrap();

            for first_node in self.first_nodes_of(n)?.copied() {
                assert!(first_node < parents.len());

                let tnt = match label!(first_node) {
                    Lit(tnt) => tnt,
                    _ => continue,
                };

                let mut parents_chain = {
                    let mut result = Vec::new();
                    let mut stack = Vec::with_capacity(parents.len());

                    stack.push(first_node);

                    while let Some(top) = stack.pop() {
                        assert!(top < parents.len());
                        if let Some(parent) = parents.get(top).copied().unwrap() {
                            result.push(parent);
                            stack.push(parent.0);
                        }
                    }

                    result.reverse();

                    result
                };

                if parents_chain.is_empty() {
                    local_result.push(Lit(tnt));
                    let lit_index = builder.add_vertex();
                    builder.add_edge(0, lit_index, ()).unwrap();

                    continue;
                }

                assert!(parents_chain.first().unwrap().0 == regex_root);

                // A different, "more local", root.
                let mut root: usize;

                // Handle the direct parent
                let (parent_node, parent_edge_index) = parents_chain.pop().unwrap();

                match label!(parent_node) {
                    Kleene | Plus => {
                        local_result.extend([Empty, Lit(tnt)]);

                        root = builder.add_vertex();
                        let lit_index = builder.add_vertex();
                        builder.add_edge(root, lit_index, ()).unwrap();

                        let iterator = children!(parent_node);

                        for index in iterator.clone().skip(parent_edge_index + 1) {
                            df_copy!(root, index);
                        }

                        local_result.push(Kleene);
                        let new_parent = builder.add_vertex();
                        builder.add_edge(root, new_parent, ()).unwrap();

                        for index in iterator {
                            df_copy!(new_parent, index);
                        }
                    }

                    Or => {
                        local_result.push(Lit(tnt));
                        root = builder.add_vertex();
                    }
                    Optional | Empty => {
                        // If this path is taken, it should not be
                        // optional.
                        local_result.extend([Empty, Lit(tnt)]);
                        root = builder.add_vertex();
                        let lit_index = builder.add_vertex();
                        builder.add_edge(root, lit_index, ()).unwrap();

                        for index in children!(parent_node).skip(parent_edge_index + 1) {
                            df_copy!(root, index);
                        }
                    }
                    Paren | Lit(_) => unreachable!(),
                }

                // Handle successive parents

                for (node, edge_index) in parents_chain.into_iter() {
                    let node_type = label!(node);

                    match node_type {
                        Kleene | Plus => {
                            local_result.push(Empty);
                            let new_index = builder.add_vertex();
                            builder.add_edge(new_index, root, ()).unwrap();

                            root = new_index;

                            let iterator = children!(node);

                            for index in iterator.clone().skip(edge_index + 1) {
                                df_copy!(root, index);
                            }

                            local_result.push(Kleene);
                            let new_parent = builder.add_vertex();
                            builder.add_edge(root, new_parent, ()).unwrap();

                            for index in iterator {
                                df_copy!(new_parent, index);
                            }
                        }
                        RegexType::Or => {}
                        RegexType::Optional | RegexType::Empty => {
                            local_result.push(Empty);
                            let new_index = builder.add_vertex();
                            builder.add_edge(new_index, root, ()).unwrap();
                            root = new_index;

                            for index in children!(node).skip(edge_index + 1) {
                                df_copy!(root, index);
                            }
                        }
                        RegexType::Paren | RegexType::Lit(_) => unreachable!(),
                    }
                }

                builder.add_edge(0, root, ()).unwrap();
            }

            local_result.shrink_to_fit();

            let graph = builder.build();

            assert_eq!(graph.nodes_len(), local_result.len());

            result.push(
                DefaultRegex::new(graph, local_result)
                    .map_err(|_| Error::BuildFail(n, ParseError::Cycle))?,
            );
        }

        assert_eq!(result.len(), non_len);

        Ok(result)
    }

    /// Convert the regular language of left-linear closures to its
    /// equivalent nondeterministic finite automaton.
    ///
    /// In the generation of the left-linear closure, the terminals
    /// and non-terminals are packed into an unsigned integer.  We
    /// unpack them in converting to nondeterministic finite
    /// automaton.
    ///
    /// The resulting nondeterministic finite automaton should be
    /// transformed to its null-closure for use in our algorithm.
    pub fn left_closure_to_nfa(
        &self,
        closure: &[DefaultRegex<TNT>],
    ) -> Result<DefaultNFA<DOption<TNT>>, Error> {
        let label_transform = |tnt| match tnt {
            TNT::Ter(t) => {
                let new_tnt = self.unpack_tnt(t).map_err(|e| match e {
                    Error::IndexOutOfBounds(index, bound) => {
                        graph::error::Error::IndexOutOfBounds(index, bound)
                    }
                    _ => unreachable!(),
                })?;

                Ok(SoC::Carry(new_tnt))
            }
            TNT::Non(n) => Ok(SoC::Sub(n)),
        };

        DefaultNFA::to_nfa(closure, label_transform).map_err(Into::into)
    }
}

impl Display for Grammar {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        assert_eq!(self.non.len(), self.rules.len());

        for (nt, rule) in self.non.iter().zip(self.rules.iter()) {
            write!(f, "{}: ", nt.name())?;

            writeln!(
                f,
                "{}",
                rule.regex.to_string_with(|tnt| format!(
                    "({})",
                    self.name_of_tnt(tnt)
                        .unwrap_or_else(|_| format!("Unknown {tnt:?}"))
                ))?
            )?;
        }

        Ok(())
    }
}

#[cfg(test)]
mod test_grammar_helper {
    use super::*;
    use nfa::default::regex::{DefaultRegParser, ParseDirection, ParseError, RegexType};
    use std::fmt::Write;

    // Construct a grammar to test
    pub fn new_grammar() -> Result<Grammar, Box<dyn std::error::Error>> {
        let ter = vec![Terminal::new("a".to_owned()), Terminal::new("b".to_owned())];
        let non = vec![
            Nonterminal("start".to_owned()),
            Nonterminal("end".to_owned()),
        ];

        /// A function to scan the inputs.
        fn scan_tnt(
            parser: &DefaultRegParser<TNT>,
            input: &str,
        ) -> Result<Option<(usize, RegexType<TNT>, ParseDirection)>, ParseError> {
            use ParseDirection::*;
            use RegexType::*;
            use TNT::*;

            let mut chars = input.chars();

            if let Some(first) = chars.next() {
                match first {
                    '*' => Ok(Some((1, Kleene, Right))),
                    '+' => Ok(Some((1, Plus, Right))),
                    '?' => Ok(Some((1, Optional, Right))),
                    '|' => Ok(Some((1, Empty, Up))),
                    '(' => Ok(Some((1, Or, Down))),
                    ')' => Ok(Some((1, Paren, Up))),
                    'T' => {
                        let mut name = String::new();
                        let mut len = 1;

                        while let Some(c) = chars.next() {
                            if ('a'..='z').contains(&c) {
                                len += 1;
                                write!(name, "{c}").map_err(|_| ParseError::InvalidCharacter(c))?;
                            } else {
                                break;
                            }
                        }

                        if let Some(t) = parser.query(&name, true) {
                            Ok(Some((len, Lit(Ter(t)), Right)))
                        } else {
                            Err(ParseError::InvalidCharacter(first))
                        }
                    }
                    'N' => {
                        let mut name = String::new();
                        let mut len = 1;

                        while let Some(c) = chars.next() {
                            if ('a'..='z').contains(&c) {
                                len += 1;
                                write!(name, "{c}").map_err(|_| ParseError::InvalidCharacter(c))?;
                            } else {
                                break;
                            }
                        }

                        if let Some(n) = parser.query(&name, false) {
                            Ok(Some((len, Lit(Non(n)), Right)))
                        } else {
                            Err(ParseError::InvalidCharacter(first))
                        }
                    }
                    _ => Err(ParseError::InvalidCharacter(first)),
                }
            } else {
                Ok(None)
            }
        }

        let mut regex_parser: DefaultRegParser<TNT> = Default::default();

        regex_parser.add_tnt("a", true);
        regex_parser.add_tnt("b", true);
        regex_parser.add_tnt("start", false);
        regex_parser.add_tnt("end", false);

        let regex_parser = regex_parser;

        let rule1 = Rule {
            regex: regex_parser
                .parse("Ta*Tb+Nend+", Box::new(scan_tnt), true)?
                .ok_or(ParseError::Invalid)?
                .0,
        };

        let rule2 = Rule {
            regex: regex_parser
                .parse("Nstart?Nend*", Box::new(scan_tnt), true)?
                .ok_or(ParseError::Invalid)?
                .0,
        };

        let rules = vec![rule1, rule2];

        Ok(Grammar::new(ter, non, rules))
    }
}

#[cfg(test)]
mod test_grammar_display {
    use super::test_grammar_helper::new_grammar;

    #[test]
    fn test_display() -> Result<(), Box<dyn std::error::Error>> {
        println!("{}", new_grammar()?);

        Ok(())
    }
}

#[cfg(test)]
mod test_grammar_firsts {
    use super::test_grammar_helper::new_grammar;
    use super::*;

    #[test]
    fn test_firsts() -> Result<(), Box<dyn std::error::Error>> {
        let mut grammar = new_grammar()?;

        grammar.compute_firsts()?;

        println!("grammar firsts: {:?}", grammar.firsts);
        println!("grammar first nodes: {:?}", grammar.first_nodes);

        Ok(())
    }
}

#[cfg(test)]
mod test_grammar_left_closure {
    use super::test_grammar_helper::new_grammar;
    use super::*;

    pub fn new_closure_regex(
        grammar: &mut Grammar,
    ) -> Result<Vec<DefaultRegex<TNT>>, Box<dyn std::error::Error>> {
        grammar.compute_firsts()?;

        println!("grammar firsts: {:?}", grammar.firsts);
        println!("grammar first nodes: {:?}", grammar.first_nodes);
        println!("grammar:");
        println!("{grammar}");

        grammar.left_closure().map_err(Into::into)
    }

    #[test]
    fn test_regex() -> Result<(), Box<dyn std::error::Error>> {
        let mut grammar = new_grammar()?;

        let vec_of_regexps = new_closure_regex(&mut grammar)?;

        for regex in &vec_of_regexps {
            println!("regex: {}", regex.to_string_with(|tnt| format!("{tnt}"))?);
            // println!("regex: {regex:?}",);
            println!("regex len = {}", regex.nodes_len());
        }

        Ok(())
    }

    #[test]
    fn test_nfa() -> Result<(), Box<dyn std::error::Error>> {
        let mut grammar = new_grammar()?;
        let closure = new_closure_regex(&mut grammar)?;
        let nfa = grammar.left_closure_to_nfa(&closure)?;
        // TODO: print the nfa out
        Ok(())
    }
}