Finished the Emacs binding.

Now the binding part is finished. What remains is a bug encountered when planting a fragment to the forest which intersects a packed node, which would lead to invalid forests. This will also cause problem when planting a packed fragment, but until now my testing grammars do not produce packed fragments, so this problem is not encountered yet. I am still figuring out efficient ways to solve this problem.
author: JSDurand <mmemmew@gmail.com> 2023-07-08 12:30:21 +0800
committer: JSDurand <mmemmew@gmail.com> 2023-07-08 12:31:13 +0800
commit: 9a317e56f8a6126583f7d0c431bf878d9b1fe7b1 (patch)
tree: 7bb6004196b38446a5ab0cb3a0ab642d35f113e9 /grammar
parent: 691f969eb104fa3d4c2a1667693fd0382eb9d6b5 (diff)
5 files changed, 2952 insertions, 39 deletions
diff --git a/grammar/src/abnf.rs b/grammar/src/abnf.rs
deleted file mode 100644
index eb56819..0000000
--- a/grammar/src/abnf.rs
+++ /dev/null
@@ -1,37 +0,0 @@
-//! This file implements the function to read a grammar from a string,
-//! in the [augmented Backus Naur
-//! form](https://en.wikipedia.org/wiki/Augmented_Backus–Naur_form).
-//! See [RFC5234](https://www.rfc-editor.org/rfc/rfc5234) for its
-//! specifications.
-//!
-//! In fact, the rules of ABNF are considered to be too strict.  For
-//! example, only ASCII characters are allowed in the format, even
-//! within comments.  I think any character should be allowed within
-//! comments, so the implementation here does that.
-
-use super::*;
-
-/// The type of errors encountered during parsing a grammar in the
-/// ABNF format.  See the function [`abnf_to_grammar`] for the parsing
-/// function.
-#[derive(Debug, Clone, Copy)]
-pub enum Error {
-    /// Something invalid has occurred.
-    Invalid,
-}
-
-impl core::fmt::Display for Error {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
-        match self {
-            Error::Invalid => write!(f, "invalid"),
-        }
-    }
-}
-
-#[allow(unused)]
-/// This function converts a string to a grammar.
-pub fn abnf_to_grammar(str: impl AsRef<str>) -> Result<Grammar, Error> {
-    let str = str.as_ref();
-
-    todo!()
-}
diff --git a/grammar/src/abnf/boolean_fns.rs b/grammar/src/abnf/boolean_fns.rs
new file mode 100644
index 0000000..08f317c
--- /dev/null
+++ b/grammar/src/abnf/boolean_fns.rs
@@ -0,0 +1,42 @@
+//! This file collects some functions that return a boolean value from
+//! a character.
+
+pub(super) fn is_wsp(c: char) -> bool {
+    c == ' ' || c == '\t'
+}
+
+pub(crate) fn is_v_char(c: char) -> bool {
+    ('!'..='~').contains(&c)
+}
+
+pub(super) fn is_wsp_or_v_char(c: char) -> bool {
+    is_wsp(c) || is_v_char(c)
+}
+
+pub(super) fn is_not_newline(c: char) -> bool {
+    c != '\n' && c != '\r'
+}
+
+pub(super) fn is_alpha(c: char) -> bool {
+    c.is_ascii_alphabetic()
+}
+
+pub(super) fn is_decimal_digit(c: char) -> bool {
+    c.is_ascii_digit()
+}
+
+pub(super) fn is_binary_digit(c: char) -> bool {
+    c == '0' || c == '1'
+}
+
+pub(super) fn is_hexadecimal_digit(c: char) -> bool {
+    is_decimal_digit(c) || ('A'..='F').contains(&c) || ('a'..='f').contains(&c)
+}
+
+pub(super) fn is_alpha_or_digit_or_hyphen(c: char) -> bool {
+    is_alpha(c) || is_decimal_digit(c) || c == '-'
+}
+
+pub(super) fn is_char_value(c: char) -> bool {
+    (' '..='~').contains(&c) && c != '"'
+}
diff --git a/grammar/src/abnf/mod.rs b/grammar/src/abnf/mod.rs
new file mode 100644
index 0000000..f825bef
--- /dev/null
+++ b/grammar/src/abnf/mod.rs
@@ -0,0 +1,2536 @@
+//! This file implements the function to read a grammar from a string,
+//! in the [augmented Backus Naur
+//! form](https://en.wikipedia.org/wiki/Augmented_Backus–Naur_form).
+//! See [RFC5234](https://www.rfc-editor.org/rfc/rfc5234) for its
+//! specifications.
+//!
+//! In fact, the rules of ABNF are considered to be too strict.  For
+//! example, only ASCII characters are allowed in the format, even
+//! within comments.  I think any character should be allowed within
+//! comments, so the implementation here does that.
+
+#![allow(dead_code)]
+
+use super::*;
+
+/// The type of errors encountered during parsing a grammar in the
+/// ABNF format.
+#[derive(Debug)]
+#[non_exhaustive]
+pub enum Error {
+    /// A rule name without rule body.
+    HangingRuleName(usize),
+    /// The minimum is greater than maximum?
+    MinGreaterThanMax(usize),
+    /// A digit is expected, but is not encountered.
+    InvalidDigit(usize),
+    /// A character that is not allowed.
+    InvalidCharacter(usize),
+    /// A numeric value is not completed.
+    HangingNumeric(usize),
+    /// A repetition is hanging.
+    HangingRepetition(usize),
+    /// A double quote is hanging.
+    HangingDQuote(usize),
+    /// A percentage sign is hanging.
+    HangingPercent(usize),
+    /// An equal sign is hanging.
+    HangingEqualsSign(usize),
+    /// An operation is unsupported.
+    UnsupportedOperation,
+    /// Encounters an unknown node when constructing the regular
+    /// expression.
+    RegexUnknownNode(usize),
+    /// An index is out of bounds when constructing the regular
+    /// expression.
+    RegexIndexOutOfBounds(usize, usize),
+    /// A node is duplicated when constructing the regular expression.
+    RegexDuplicatedNode(usize),
+    /// An edge is duplicated when constructing the regular
+    /// expression.
+    RegexDuplicatedEdge(usize, usize),
+    /// The constructed regular expression graph has no root.
+    RegexNoRoot,
+    /// The constructed regular expression graph has cycles.
+    RegexCycle,
+    /// The stack illegally becomes empty when constructing a regular
+    /// expression.
+    RegexEmptyStack(usize),
+    /// The action stack illegally becomes empty when constructing a
+    /// regular expression.
+    RegexEmptyActionStack(usize),
+    /// An unbalanced parenthesis is encountered when constructing a
+    /// regular expression.
+    RegexUnbalancedParen(usize),
+    /// An invalid repetition specification is encountered when
+    /// constructing a regular expression.
+    RegexInvalidRepeat(usize),
+    /// An invalid comment character is encountered.
+    RegexInvalidComment(usize),
+    /// An invalid numeric value is encoutnered.
+    RegexInvalidNumeric(usize),
+    /// A rule should have an equal sign.
+    MissingEqualSign(usize),
+    /// An unimplemented feature is required.
+    Unimplemented(usize),
+    /// Input-Output error.
+    IOError(std::io::Error),
+    /// The grammar is empty.
+    EmptyGrammar,
+    /// Found an appending rule associated to a non-existent rule.
+    AppendToNothing(usize),
+    /// Found a rule declaration associated to an existing rule.
+    CreateAgain(usize),
+    /// This terminal is not found.
+    UnknownTerminal(String),
+    /// This non-terminal is not found.
+    UnknownNonterminal(String),
+    /// Something invalid has occurred.
+    Invalid,
+}
+
+impl std::error::Error for Error {}
+
+impl From<std::io::Error> for Error {
+    fn from(e: std::io::Error) -> Self {
+        Self::IOError(e)
+    }
+}
+
+impl From<graph::error::Error> for Error {
+    fn from(value: graph::error::Error) -> Self {
+        match value {
+            graph::error::Error::IndexOutOfBounds(index, bound) => {
+                Error::RegexIndexOutOfBounds(index, bound)
+            }
+            graph::error::Error::DuplicatedNode(n) => Error::RegexDuplicatedNode(n),
+            graph::error::Error::DuplicatedEdge(from, to) => Error::RegexDuplicatedEdge(from, to),
+            _ => Error::Invalid,
+        }
+    }
+}
+
+impl std::fmt::Display for Error {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Error::HangingRuleName(pos) => write!(f, "a rule name without body at {pos}"),
+            Error::MinGreaterThanMax(pos) => write!(
+                f,
+                "minimum should not be strictly greater than the maximum at {pos}."
+            ),
+            Error::InvalidDigit(pos) => {
+                write!(f, "a digit is expected at {pos}, but is not found.")
+            }
+            Error::InvalidCharacter(pos) => write!(f, "the character at {pos} is not allowed"),
+            Error::HangingNumeric(pos) => write!(f, "a numeric value is hanging at {pos}"),
+            Error::HangingRepetition(pos) => write!(f, "a repetition is hanging at {pos}"),
+            Error::HangingDQuote(pos) => write!(f, "a double quote is hanging at {pos}"),
+            Error::HangingPercent(pos) => write!(f, "a percentage sign is hanging at {pos}"),
+            Error::HangingEqualsSign(pos) => write!(f, "an equal sign is hanging at {pos}"),
+            Error::UnsupportedOperation => write!(f, "an unsupported operation is encountered"),
+            Error::RegexUnknownNode(n) => write!(f, "an unknown node {n} is encountered \
+                                                     when constructing the regular expression."),
+            Error::RegexIndexOutOfBounds(index, bound) => write!(f, "an index {index} is out of \
+                                                                     bound {bound} when constructing the regular expression."),
+            Error::RegexDuplicatedNode(n) => write!(f, "a node {n} is duplicated \
+                                                        when constructing the regular expression."),
+            Error::RegexDuplicatedEdge(from, to) => write!(f, "an edge from {from} to {to} is duplicated \
+                                                               when constructing the regular expression."),
+            Error::RegexNoRoot => write!(f, "the constructed regular expression has no root."),
+            Error::RegexCycle => write!(f, "the constructed regular expression has cycles."),
+            Error::RegexEmptyStack(pos) => write!(f, "the stack illegally becomes empty at {pos} \
+                                                when constructing a regular expression."),
+            Error::RegexEmptyActionStack(pos) => write!(f, "the action stack illegally becomes empty at {pos} \
+                                                when constructing a regular expression."),
+            Error::RegexUnbalancedParen(pos) => write!(f, "An unbalanced parenthesis is encountered at {pos} \
+                                                           when constructing a regular expression."),
+            Error::RegexInvalidRepeat(pos) => write!(f, "An invalid repetition specification is encountered \
+                                                         at {pos} when constructing a regular expression."),
+            Error::RegexInvalidComment(pos) => write!(f, "An invalid character is encountered in a comment \
+                                                          at {pos}"),
+            Error::RegexInvalidNumeric(pos) => write!(f, "An invalid numeric value is encoutnered at {pos}."),
+            Error::MissingEqualSign(pos) => write!(f, "A rule is missing an equal sign at {pos}"),
+            Error::Unimplemented(pos) => write!(f, "An unimplemented feature is required at {pos}."),
+            Error::IOError(e) => write!(f, "An IO error: {e}"),
+            Error::EmptyGrammar => write!(f, "the grammar is empty"),
+            Error::AppendToNothing(pos) => write!(f, "the rule at {pos} is appending to a non-existing rule."),
+            Error::CreateAgain(pos) => write!(f, "the rule at {pos} is creating a duplicated rule."),
+            Error::UnknownTerminal(name) => write!(f, "the terminal {name} is not found."),
+            Error::UnknownNonterminal(name) => write!(f, "the non-terminal {name} is not found."),
+            Error::Invalid => write!(f, "invalid"),
+        }
+    }
+}
+
+/// Return the number of bytes taken by the characters for which the
+/// function `f` returns `true`.
+fn skip_by(s: &str, f: impl Fn(char) -> bool) -> usize {
+    for (index, c) in s.char_indices() {
+        if !f(c) {
+            return index;
+        }
+    }
+
+    s.len()
+}
+
+mod boolean_fns;
+
+pub(crate) use boolean_fns::*;
+
+fn skip_c_nl(s: &str) -> Option<usize> {
+    if s.is_empty() {
+        return Some(0);
+    }
+
+    let s_len = s.len();
+
+    match s.chars().next().unwrap() {
+        ';' => {
+            let after_not_newline = skip_by(&s[1..], is_not_newline);
+
+            if 1 + after_not_newline >= s_len {
+                Some(s_len)
+            } else {
+                let s = &s[1 + after_not_newline..];
+
+                match s.chars().next().unwrap() {
+                    '\n' | '\r' => Some(after_not_newline + 2),
+                    _ => {
+                        // We skipped anything not newline, so this
+                        // character must be newline.
+                        unreachable!();
+                    }
+                }
+            }
+        }
+        '\n' | '\r' => Some(1),
+        _ => None,
+    }
+}
+
+fn skip_c_wsp(s: &str) -> usize {
+    let mut currently_skipped = 0usize;
+    let mut s = s;
+
+    let mut continue_skipping = true;
+
+    'skipping_loop: while continue_skipping {
+        continue_skipping = false;
+
+        let skip_wsp = skip_by(s, is_wsp);
+
+        if skip_wsp > 0 {
+            if skip_wsp >= s.len() {
+                return currently_skipped + s.len();
+            }
+
+            continue_skipping = true;
+            s = &s[skip_wsp..];
+            currently_skipped += skip_wsp;
+
+            continue 'skipping_loop;
+        }
+
+        if let Some(skip_c_nl_size) = skip_c_nl(s) {
+            if skip_c_nl_size >= s.len() {
+                return currently_skipped + s.len();
+            }
+
+            continue_skipping = true;
+
+            s = &s[skip_c_nl_size..];
+            currently_skipped += skip_c_nl_size;
+
+            let skip_wsp_after_c_nl = skip_by(s, is_wsp);
+
+            s = &s[skip_wsp_after_c_nl..];
+            currently_skipped += skip_wsp_after_c_nl;
+        }
+    }
+
+    currently_skipped
+}
+
+#[derive(Debug, Eq, PartialEq)]
+enum RepetitionSpec {
+    Star,
+    Equal(usize),
+    Min(usize),
+    Max(usize),
+    MinMax(usize, usize),
+}
+
+#[derive(Debug)]
+enum RepetitionAction {
+    /// Ignore the following element.
+    Ignore,
+    /// The following is put in a star.
+    Star,
+    /// The following is put in an optional construct.
+    Optional,
+    /// The following is put in a plus construct.
+    Plus,
+    /// Copy the following some number of times.
+    Copy(usize),
+    /// Copy the following some number of times, and then add a plus
+    /// of that same element.
+    CopyPlus(usize),
+    /// Copy the following some number of times, and then add another
+    /// number of *nested* optional construts of that same element.
+    ///
+    /// # Why nested?
+    ///
+    /// The optional constructs are nested here as a sequence of
+    /// optional constructs is exponentially more ambiguous than the
+    /// nested equivalent: "a?a?a?" is more ambiguous than
+    /// "(a(a(a)?)?)?".  For example, the input "a" can have three
+    /// derivations according to "a?a?a?", but has only one derivation
+    /// according to "(a(a(a)?)?)?".
+    CopyOptional(usize, usize),
+}
+
+impl TryFrom<Option<RepetitionSpec>> for RepetitionAction {
+    type Error = Error;
+
+    fn try_from(spec: Option<RepetitionSpec>) -> Result<Self, Self::Error> {
+        if let Some(r) = spec {
+            match r {
+                RepetitionSpec::Star => Ok(Self::Star),
+                RepetitionSpec::Equal(0) => Ok(Self::Ignore),
+                RepetitionSpec::Equal(n) => Ok(Self::Copy(n)),
+                RepetitionSpec::Min(0) => Ok(Self::Star),
+                RepetitionSpec::Min(1) => Ok(Self::Plus),
+                RepetitionSpec::Min(n) => Ok(Self::CopyPlus(n - 1)),
+                RepetitionSpec::Max(0) => Ok(Self::Ignore),
+                RepetitionSpec::Max(1) => Ok(Self::Optional),
+                RepetitionSpec::Max(n) => Ok(Self::CopyOptional(0, n)),
+                RepetitionSpec::MinMax(0, 0) => Ok(Self::Ignore),
+                RepetitionSpec::MinMax(0, 1) => Ok(Self::Optional),
+                RepetitionSpec::MinMax(n, m) if n > m => Err(Error::MinGreaterThanMax(0)),
+                RepetitionSpec::MinMax(n, m) if n == m => Ok(Self::Copy(n)),
+                RepetitionSpec::MinMax(n, m) => Ok(Self::CopyOptional(n, m - n)),
+            }
+        } else {
+            Ok(Self::Copy(1))
+        }
+    }
+}
+
+fn parse_repetition_spec(s: &str) -> Option<(usize, RepetitionSpec)> {
+    let mut s = s;
+
+    let digit_size = skip_by(s, is_decimal_digit);
+
+    let mut min: Option<usize> = None;
+    let mut max: Option<usize> = None;
+
+    if digit_size != 0 {
+        min = Some(s[..digit_size].parse::<usize>().unwrap());
+    }
+
+    let mut current_index = digit_size;
+
+    if digit_size == s.len() {
+        if let Some(m) = min {
+            return Some((digit_size, RepetitionSpec::Equal(m)));
+        } else {
+            return None;
+        };
+    }
+
+    s = &s[current_index..];
+
+    if s.starts_with('*') {
+        if s.len() == 1 {
+            if let Some(m) = min {
+                return Some((current_index + 1, RepetitionSpec::Min(m)));
+            } else {
+                return Some((current_index + 1, RepetitionSpec::Star));
+            }
+        }
+
+        s = &s[1..];
+        current_index += 1;
+
+        let digit_size = skip_by(s, is_decimal_digit);
+
+        if digit_size != 0 {
+            max = Some(s[..digit_size].parse::<usize>().unwrap());
+        }
+
+        current_index += digit_size;
+
+        match (min, max) {
+            (Some(mi), Some(ma)) => Some((current_index, RepetitionSpec::MinMax(mi, ma))),
+            (Some(m), None) => Some((current_index, RepetitionSpec::Min(m))),
+            (None, Some(m)) => Some((current_index, RepetitionSpec::Max(m))),
+            _ => Some((current_index, RepetitionSpec::Star)),
+        }
+    } else {
+        min.map(|m| (current_index, RepetitionSpec::Equal(m)))
+    }
+}
+
+fn parse_rulename(s: &str) -> Result<usize, Error> {
+    let first_char = s.chars().next().unwrap();
+
+    if !is_alpha(first_char) {
+        return Ok(0);
+    }
+
+    let rulename_end = skip_by(s, is_alpha_or_digit_or_hyphen);
+
+    if rulename_end >= s.len() {
+        // dbg!();
+        return Err(Error::HangingRuleName(0));
+    }
+
+    Ok(rulename_end)
+}
+
+#[derive(Debug, Eq, PartialEq, Hash)]
+enum IntermediateConstruct {
+    Terminal(String),
+    Nonterminal(String),
+    Range(char, char),
+}
+
+impl Display for IntermediateConstruct {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            IntermediateConstruct::Terminal(t) => write!(f, "t{t}"),
+            IntermediateConstruct::Nonterminal(n) => write!(f, "n{n}"),
+            IntermediateConstruct::Range(min, max) => write!(f, "[{min}-{max}]"),
+        }
+    }
+}
+
+#[derive(Debug, Eq, PartialEq, Copy, Clone)]
+enum Radix {
+    Binary,
+    Decimal,
+    Hexadecimal,
+}
+
+fn parse_numeric(s: &str, radix: Radix) -> Result<(usize, IntermediateConstruct), Error> {
+    // first read a number
+
+    let is_radix_digit = match radix {
+        Radix::Binary => is_binary_digit,
+        Radix::Decimal => is_decimal_digit,
+        Radix::Hexadecimal => is_hexadecimal_digit,
+    };
+
+    let radix = match radix {
+        Radix::Binary => 2,
+        Radix::Decimal => 10,
+        Radix::Hexadecimal => 16,
+    };
+
+    let mut s = s;
+    let mut current_index = 0;
+
+    let first_digit_size = skip_by(s, is_radix_digit);
+
+    if first_digit_size == 0 {
+        // dbg!();
+        return Err(Error::InvalidDigit(current_index));
+    }
+
+    let first_number =
+        match std::char::from_u32(u32::from_str_radix(&s[..first_digit_size], radix).unwrap()) {
+            Some(c) => c,
+            None => {
+                // dbg!();
+                return Err(Error::InvalidCharacter(current_index));
+            }
+        };
+
+    if first_digit_size >= s.len() {
+        return Ok((
+            current_index + s.len(),
+            IntermediateConstruct::Terminal(first_number.to_string()),
+        ));
+    }
+
+    s = &s[first_digit_size..];
+
+    current_index += first_digit_size;
+
+    // then read a dot or a dash
+
+    match s.chars().next().unwrap() {
+        '.' => {
+            s = &s[1..];
+            current_index += 1;
+
+            let mut stop_reading = false;
+            let mut chars_vec = vec![first_number];
+
+            while !stop_reading {
+                if s.is_empty() {
+                    return Err(Error::HangingNumeric(current_index - 1));
+                }
+
+                let digit_size = skip_by(s, is_radix_digit);
+
+                if digit_size == 0 {
+                    // dbg!();
+                    return Err(Error::InvalidDigit(current_index));
+                }
+
+                let a_number =
+                    std::char::from_u32(u32::from_str_radix(&s[..digit_size], radix).unwrap())
+                        .ok_or(Error::InvalidCharacter(current_index))?;
+
+                chars_vec.push(a_number);
+
+                s = &s[digit_size..];
+                current_index += digit_size;
+
+                if !s.starts_with('.') {
+                    stop_reading = true;
+                    // println!(
+                    //     "digit_size = {digit_size} a number = {a_number}, s = {}",
+                    //     &s[..10]
+                    // );
+                } else {
+                    s = &s[1..];
+                    current_index += 1;
+                }
+            }
+
+            Ok((
+                current_index,
+                IntermediateConstruct::Terminal(chars_vec.into_iter().collect()),
+            ))
+        }
+        '-' => {
+            s = &s[1..];
+            current_index += 1;
+
+            if s.is_empty() {
+                return Err(Error::HangingNumeric(current_index - 1));
+            }
+
+            let second_digit_size = skip_by(s, is_radix_digit);
+
+            if second_digit_size == 0 {
+                println!(
+                    "s = {}",
+                    s.chars()
+                        .take(if s.len() >= 50 { 50 } else { s.len() })
+                        .collect::<String>()
+                );
+                return Err(Error::InvalidDigit(current_index));
+            }
+
+            let _second_number =
+                std::char::from_u32(u32::from_str_radix(&s[..second_digit_size], radix).unwrap())
+                    .ok_or(Error::InvalidCharacter(current_index))?;
+
+            Err(Error::Unimplemented(current_index - 1))
+
+            // Ok((
+            //     current_index + second_digit_size,
+            //     IntermediateConstruct::Range(first_number, second_number),
+            // ))
+        }
+        c if is_wsp(c) || c == '\n' || c == '\r' || c == ';' => Ok((
+            current_index,
+            IntermediateConstruct::Terminal(first_number.to_string()),
+        )),
+        _ => Err(Error::InvalidCharacter(current_index)),
+    }
+}
+
+#[derive(Debug)]
+struct AlternationConstruct {
+    intermediates: Vec<IntermediateConstruct>,
+    regex: DefaultRegex<usize>,
+}
+
+fn parse_alternation(s: &str, indentation: usize) -> Result<(usize, AlternationConstruct), Error> {
+    let mut stack: Vec<usize> = vec![0, 1];
+    let mut intermediates: Vec<IntermediateConstruct> = Vec::new();
+    let mut types: Vec<RegexType<usize>> = vec![RegexType::Or, RegexType::Empty];
+    let mut edges: Vec<Vec<usize>> = vec![vec![1], Vec::new()];
+
+    fn error_conversion(error: nfa::error::Error) -> Error {
+        match error {
+            nfa::error::Error::UnknownNode(n) => Error::RegexUnknownNode(n),
+            nfa::error::Error::UnsupportedOperation => Error::UnsupportedOperation,
+            nfa::error::Error::NoRoot => Error::RegexNoRoot,
+            nfa::error::Error::Graph(ge) => ge.into(),
+            nfa::error::Error::Cycle => Error::RegexCycle,
+            _ => Error::Invalid,
+        }
+    }
+
+    let mut s = s;
+    let mut current_index = 0usize;
+
+    let mut nonterminal_nodes: HashSet<usize> = HashSet::new();
+
+    if s.is_empty() {
+        let regex = DefaultRegex::new(edges.into(), types).map_err(error_conversion)?;
+
+        return Ok((
+            current_index,
+            AlternationConstruct {
+                intermediates,
+                regex,
+            },
+        ));
+    }
+
+    // A vector of tuples to record some information for each level of
+    // nesting.
+    //
+    // A tuple (len, action) means that the stack grows by LEN many
+    // tokens at this level, and the action ACTION is pending.
+    let mut repetition_action_stack: Vec<(usize, Option<RepetitionAction>)> = vec![];
+
+    while !stack.is_empty() {
+        // First read repetition specifications.
+        let mut repetition_spec = None;
+
+        if let Some((spec_size, spec)) = parse_repetition_spec(s) {
+            if spec_size >= s.len() {
+                return Err(Error::HangingRepetition(current_index));
+            }
+
+            s = &s[spec_size..];
+            current_index += spec_size;
+
+            repetition_spec = Some(spec);
+        }
+
+        let repetition_spec = repetition_spec;
+
+        // if a rulename
+        match parse_rulename(s) {
+            Ok(0) => {}
+            Ok(size) => {
+                let name = &s[..size].to_string();
+
+                intermediates.push(IntermediateConstruct::Nonterminal(name.clone()));
+
+                let nt_index = intermediates.len() - 1;
+
+                let action = repetition_spec.try_into()?;
+                // println!("rep_action = {action:?}");
+
+                let stack_last = stack
+                    .iter()
+                    .copied()
+                    .last()
+                    .ok_or(Error::RegexEmptyStack(current_index))?;
+
+                match action {
+                    RepetitionAction::Ignore => {}
+                    RepetitionAction::Star => {
+                        types.push(RegexType::Kleene);
+                        types.push(RegexType::Lit(nt_index));
+
+                        edges.push(vec![types.len() - 1]);
+                        edges.push(Vec::new());
+
+                        nonterminal_nodes.insert(types.len() - 1);
+
+                        let edges_len = edges.len();
+
+                        edges
+                            .get_mut(stack_last)
+                            .ok_or(Error::RegexIndexOutOfBounds(stack_last, edges_len))?
+                            .push(types.len() - 2);
+                    }
+                    RepetitionAction::Optional => {
+                        types.append(&mut vec![RegexType::Optional]);
+                        types.push(RegexType::Lit(nt_index));
+                        edges.push(vec![types.len() - 1]);
+                        edges.push(vec![]);
+
+                        nonterminal_nodes.insert(types.len() - 1);
+
+                        let edges_len = edges.len();
+
+                        edges
+                            .get_mut(stack_last)
+                            .ok_or(Error::RegexIndexOutOfBounds(stack_last, edges_len))?
+                            .push(types.len() - 2);
+                    }
+                    RepetitionAction::Plus => {
+                        types.append(&mut vec![RegexType::Plus]);
+                        types.push(RegexType::Lit(nt_index));
+                        edges.push(vec![types.len() - 1]);
+                        edges.push(vec![]);
+
+                        nonterminal_nodes.insert(types.len() - 1);
+
+                        let edges_len = edges.len();
+
+                        edges
+                            .get_mut(stack_last)
+                            .ok_or(Error::RegexIndexOutOfBounds(stack_last, edges_len))?
+                            .push(types.len() - 2);
+                    }
+                    RepetitionAction::Copy(n) => {
+                        let old_types_len = types.len();
+
+                        nonterminal_nodes.extend((0..n).map(|i| old_types_len + i));
+
+                        let edges_len = edges.len();
+
+                        edges
+                            .get_mut(stack_last)
+                            .ok_or(Error::RegexIndexOutOfBounds(stack_last, edges_len))?
+                            .extend((0..n).map(|i| old_types_len + i));
+
+                        types.extend(std::iter::repeat(nt_index).map(RegexType::Lit).take(n));
+                        edges.extend(std::iter::repeat_with(Default::default).take(n));
+
+                        // for _ in 0..n {
+                        //     types.push(RegexType::Lit(nt_index));
+                        //     edges.push(vec![]);
+
+                        //     nonterminal_nodes.insert(types.len() - 1);
+
+                        //     edges[stack_last].push(types.len() - 1);
+                        // }
+                    }
+                    RepetitionAction::CopyPlus(n) => {
+                        let old_types_len = types.len();
+
+                        nonterminal_nodes.extend((0..n).map(|i| old_types_len + i));
+
+                        let edges_len = edges.len();
+
+                        edges
+                            .get_mut(stack_last)
+                            .ok_or(Error::RegexIndexOutOfBounds(stack_last, edges_len))?
+                            .extend((0..n).map(|i| old_types_len + i));
+
+                        types.extend(std::iter::repeat(nt_index).map(RegexType::Lit).take(n));
+                        edges.extend(std::iter::repeat_with(Default::default).take(n));
+
+                        // for _ in 0..n {
+                        //     types.push(RegexType::Lit(nt_index));
+                        //     edges.push(vec![]);
+
+                        //     edges[stack_last].push(types.len() - 1);
+                        // }
+
+                        types.append(&mut vec![RegexType::Plus]);
+                        types.push(RegexType::Lit(nt_index));
+
+                        nonterminal_nodes.insert(types.len() - 1);
+
+                        edges.push(vec![types.len() - 1]);
+                        edges.push(vec![]);
+
+                        let edges_len = edges.len();
+
+                        edges
+                            .get_mut(stack_last)
+                            .ok_or(Error::RegexIndexOutOfBounds(stack_last, edges_len))?
+                            .push(types.len() - 2);
+                    }
+                    RepetitionAction::CopyOptional(n, m) => {
+                        let old_types_len = types.len();
+
+                        nonterminal_nodes.extend((0..n).map(|i| old_types_len + i));
+
+                        let edges_len = edges.len();
+
+                        edges
+                            .get_mut(stack_last)
+                            .ok_or(Error::RegexIndexOutOfBounds(stack_last, edges_len))?
+                            .extend((0..n).map(|i| old_types_len + i));
+
+                        types.extend(std::iter::repeat(nt_index).map(RegexType::Lit).take(n));
+                        edges.extend(std::iter::repeat_with(Default::default).take(n));
+
+                        // for _ in 0..n {
+                        //     types.push(RegexType::Lit(nt_index));
+                        //     edges.push(vec![]);
+
+                        //     nonterminal_nodes.insert(types.len() - 1);
+
+                        //     edges[stack_last].push(types.len() - 1);
+                        // }
+
+                        let types_len = types.len();
+
+                        types.extend(
+                            std::iter::repeat([RegexType::Optional, RegexType::Lit(nt_index)])
+                                .take(m)
+                                .flatten(),
+                        );
+
+                        edges.extend((0..(m - 1)).flat_map(|i| {
+                            [
+                                vec![types_len + 1 + 2 * i, types_len + 2 + 2 * i],
+                                Vec::new(),
+                            ]
+                        }));
+
+                        edges.extend([vec![types.len() - 1], Vec::new()]);
+
+                        nonterminal_nodes.extend((0..m).map(|i| types_len + 2 * i + 1));
+
+                        let edges_len = edges.len();
+
+                        edges
+                            .get_mut(stack_last)
+                            .ok_or(Error::RegexIndexOutOfBounds(stack_last, edges_len))?
+                            .push(types_len);
+
+                        // for _ in 0..m {
+                        //     types.extend([RegexType::Optional, RegexType::Lit(nt_index)]);
+                        //     edges.extend([vec![types.len() - 1], Vec::new()]);
+
+                        //     nonterminal_nodes.insert(types.len() - 1);
+
+                        //     edges[stack_last].push(types.len() - 2);
+                        // }
+                    }
+                }
+
+                s = &s[size..];
+                current_index += size;
+
+                // c wsp
+
+                let skip_c_wsp_size = skip_by(s, is_wsp);
+
+                if skip_c_wsp_size >= s.len() {
+                    break;
+                }
+
+                s = &s[skip_c_wsp_size..];
+                current_index += skip_c_wsp_size;
+
+                continue;
+            }
+            Err(_) => {
+                intermediates.push(IntermediateConstruct::Nonterminal(s.to_owned()));
+
+                types.push(RegexType::Lit(intermediates.len() - 1));
+                edges.push(vec![]);
+
+                let stack_last = stack
+                    .last()
+                    .copied()
+                    .ok_or(Error::RegexEmptyStack(current_index))?;
+
+                let edges_len = edges.len();
+
+                edges
+                    .get_mut(stack_last)
+                    .ok_or(Error::RegexIndexOutOfBounds(stack_last, edges_len))?
+                    .push(types.len() - 1);
+                nonterminal_nodes.insert(types.len() - 1);
+
+                break;
+            }
+        }
+
+        // match against the first charcter
+
+        let first_char = s.chars().next().unwrap();
+
+        match first_char {
+            '(' => {
+                // group
+
+                s = &s[1..];
+                current_index += 1;
+
+                let action = repetition_spec.try_into()?;
+
+                let stack_last = stack
+                    .iter()
+                    .copied()
+                    .last()
+                    .ok_or(Error::RegexEmptyStack(current_index))?;
+
+                match action {
+                    RepetitionAction::Star => {
+                        types.extend([RegexType::Kleene, RegexType::Or, RegexType::Empty]);
+                        edges.extend([vec![types.len() - 2], vec![types.len() - 1], Vec::new()]);
+
+                        let edges_len = edges.len();
+
+                        edges
+                            .get_mut(stack_last)
+                            .ok_or(Error::RegexIndexOutOfBounds(stack_last, edges_len))?
+                            .push(types.len() - 3);
+
+                        repetition_action_stack.push((3, None));
+                        stack.extend([types.len() - 3, types.len() - 2, types.len() - 1]);
+                    }
+                    RepetitionAction::Optional => {
+                        types.extend([RegexType::Optional, RegexType::Or, RegexType::Empty]);
+                        edges.extend([vec![types.len() - 2], vec![types.len() - 1], Vec::new()]);
+
+                        let edges_len = edges.len();
+
+                        edges
+                            .get_mut(stack_last)
+                            .ok_or(Error::RegexIndexOutOfBounds(stack_last, edges_len))?
+                            .push(types.len() - 3);
+
+                        stack.extend([types.len() - 3, types.len() - 2, types.len() - 1]);
+                        repetition_action_stack.push((3, None));
+                    }
+                    RepetitionAction::Plus => {
+                        types.extend([RegexType::Plus, RegexType::Or, RegexType::Empty]);
+                        edges.extend([vec![types.len() - 2], vec![types.len() - 1], Vec::new()]);
+
+                        let edges_len = edges.len();
+
+                        edges
+                            .get_mut(stack_last)
+                            .ok_or(Error::RegexIndexOutOfBounds(stack_last, edges_len))?
+                            .push(types.len() - 3);
+
+                        stack.extend([types.len() - 3, types.len() - 2, types.len() - 1]);
+                        repetition_action_stack.push((3, None));
+                    }
+                    RepetitionAction::Copy(1) => {
+                        types.extend([RegexType::Or, RegexType::Empty]);
+                        edges.extend([vec![types.len() - 1], Vec::new()]);
+
+                        let edges_len = edges.len();
+
+                        edges
+                            .get_mut(stack_last)
+                            .ok_or(Error::RegexIndexOutOfBounds(stack_last, edges_len))?
+                            .push(types.len() - 2);
+
+                        stack.extend([types.len() - 2, types.len() - 1]);
+                        repetition_action_stack.push((2, None));
+                    }
+                    RepetitionAction::CopyOptional(0, _) => {
+                        types.extend([RegexType::Optional, RegexType::Or, RegexType::Empty]);
+                        edges.extend([vec![types.len() - 2], vec![types.len() - 1], Vec::new()]);
+
+                        let edges_len = edges.len();
+
+                        edges
+                            .get_mut(stack_last)
+                            .ok_or(Error::RegexIndexOutOfBounds(stack_last, edges_len))?
+                            .push(types.len() - 3);
+
+                        stack.extend([types.len() - 3, types.len() - 2, types.len() - 1]);
+
+                        repetition_action_stack.push((3, Some(action)));
+                    }
+                    _ => {
+                        types.extend([RegexType::Or, RegexType::Empty]);
+                        edges.extend([vec![types.len() - 1], Vec::new()]);
+
+                        let edges_len = edges.len();
+
+                        edges
+                            .get_mut(stack_last)
+                            .ok_or(Error::RegexIndexOutOfBounds(stack_last, edges_len))?
+                            .push(types.len() - 2);
+
+                        stack.extend([types.len() - 2, types.len() - 1]);
+
+                        repetition_action_stack.push((2, Some(action)));
+                    }
+                }
+
+                s = &s[1..];
+                current_index += 1;
+
+                // c wsp
+
+                let skip_c_wsp_size = skip_by(s, is_wsp);
+
+                if skip_c_wsp_size >= s.len() {
+                    return Err(Error::RegexUnbalancedParen(current_index));
+                }
+
+                s = &s[skip_c_wsp_size..];
+                current_index += skip_c_wsp_size;
+
+                continue;
+            }
+            ')' => {
+                // end group
+
+                s = &s[1..];
+                current_index += 1;
+
+                if repetition_spec.is_some() {
+                    return Err(Error::RegexInvalidRepeat(current_index));
+                }
+
+                let (stack_growth, repetition_action) = repetition_action_stack
+                    .pop()
+                    .ok_or(Error::RegexEmptyActionStack(current_index))?;
+
+                match repetition_action {
+                    Some(action) => {
+                        if stack.len() <= stack_growth + 1 {
+                            dbg!();
+                            return Err(Error::RegexEmptyStack(current_index));
+                        }
+
+                        stack.truncate(stack.len() - stack_growth + 1);
+
+                        // this is not going to fail because of the previous check
+                        let stack_last = stack.pop().unwrap();
+                        let stack_second_last = stack.iter().copied().last().unwrap();
+
+                        match action {
+                            RepetitionAction::Ignore => {
+                                edges[stack_second_last].pop();
+
+                                types.truncate(stack_last);
+                                edges.truncate(stack_last);
+
+                                // c wsp
+
+                                let skip_c_wsp_size = skip_by(s, is_wsp);
+
+                                if skip_c_wsp_size >= s.len() {
+                                    break;
+                                }
+
+                                s = &s[skip_c_wsp_size..];
+                                current_index += skip_c_wsp_size;
+
+                                continue;
+                            }
+                            RepetitionAction::Copy(n) | RepetitionAction::CopyOptional(0, n) => {
+                                let nodes_cloned =
+                                    types.iter().skip(stack_last).cloned().collect::<Vec<_>>();
+
+                                let edges_cloned =
+                                    edges.iter().skip(stack_last).cloned().collect::<Vec<_>>();
+
+                                for _ in 0..(n - 1) {
+                                    edges[stack_second_last].push(types.len());
+
+                                    // if stack_second_last == 3 && nodes.len() == 3 {
+                                    //     println!("haha");
+                                    // }
+
+                                    types.extend(nodes_cloned.iter().copied());
+
+                                    let edges_offset = edges.len() - stack_last;
+
+                                    edges.extend(edges_cloned.iter().map(|edges| {
+                                        edges
+                                            .iter()
+                                            .map(|des| des + edges_offset)
+                                            .collect::<Vec<_>>()
+                                    }));
+                                }
+
+                                // c wsp
+
+                                let skip_c_wsp_size = skip_by(s, is_wsp);
+
+                                if skip_c_wsp_size >= s.len() {
+                                    break;
+                                }
+
+                                s = &s[skip_c_wsp_size..];
+                                current_index += skip_c_wsp_size;
+
+                                continue;
+                            }
+                            RepetitionAction::CopyPlus(n) => {
+                                let nodes_cloned =
+                                    types.iter().skip(stack_last).cloned().collect::<Vec<_>>();
+
+                                let edges_cloned =
+                                    edges.iter().skip(stack_last).cloned().collect::<Vec<_>>();
+
+                                for _ in 0..(n - 1) {
+                                    edges[stack_second_last].push(types.len());
+
+                                    // if stack_second_last == 3 && nodes.len() == 3 {
+                                    //     println!("haha");
+                                    // }
+
+                                    types.extend(nodes_cloned.iter().copied());
+
+                                    let edges_offset = edges.len() - stack_last;
+
+                                    edges.extend(edges_cloned.iter().map(|edges| {
+                                        edges
+                                            .iter()
+                                            .map(|des| des + edges_offset)
+                                            .collect::<Vec<_>>()
+                                    }));
+                                }
+
+                                types.push(RegexType::Plus);
+                                edges.push(vec![types.len()]);
+
+                                edges[stack_second_last].push(types.len() - 1);
+
+                                // if stack_second_last == 3 && nodes.len() == 4 {
+                                //     println!("haha");
+                                // }
+
+                                types.extend(nodes_cloned.iter().copied());
+
+                                let edges_offset = edges.len() - stack_last;
+
+                                edges.extend(edges_cloned.iter().map(|edges| {
+                                    edges
+                                        .iter()
+                                        .map(|des| des + edges_offset)
+                                        .collect::<Vec<_>>()
+                                }));
+
+                                // c wsp
+
+                                let skip_c_wsp_size = skip_by(s, is_wsp);
+
+                                if skip_c_wsp_size >= s.len() {
+                                    break;
+                                }
+
+                                s = &s[skip_c_wsp_size..];
+                                current_index += skip_c_wsp_size;
+
+                                continue;
+                            }
+                            RepetitionAction::CopyOptional(n, m) => {
+                                let nodes_cloned =
+                                    types.iter().skip(stack_last).cloned().collect::<Vec<_>>();
+
+                                let edges_cloned =
+                                    edges.iter().skip(stack_last).cloned().collect::<Vec<_>>();
+
+                                for _ in 0..(n - 1) {
+                                    edges[stack_second_last].push(types.len());
+
+                                    // if stack_second_last == 3 && nodes.len() == 3 {
+                                    //     println!("haha");
+                                    // }
+
+                                    types.extend(nodes_cloned.iter().copied());
+
+                                    let edges_offset = edges.len() - stack_last;
+
+                                    edges.extend(edges_cloned.iter().map(|edges| {
+                                        edges
+                                            .iter()
+                                            .map(|des| des + edges_offset)
+                                            .collect::<Vec<_>>()
+                                    }));
+                                }
+
+                                edges[stack_second_last].push(types.len());
+
+                                let nodes_cloned_len = nodes_cloned.len();
+
+                                for _ in 0..(m - 1) {
+                                    // if stack_second_last == 3 && nodes.len() == 3 {
+                                    //     println!("haha");
+                                    // }
+
+                                    types.push(RegexType::Optional);
+                                    edges.push(vec![types.len(), types.len() + nodes_cloned_len]);
+
+                                    types.extend(nodes_cloned.iter().copied());
+
+                                    let edges_offset = edges.len() - stack_last;
+
+                                    edges.extend(edges_cloned.iter().map(|edges| {
+                                        edges
+                                            .iter()
+                                            .map(|des| des + edges_offset)
+                                            .collect::<Vec<_>>()
+                                    }));
+                                }
+
+                                types.push(RegexType::Optional);
+                                edges.push(vec![types.len()]);
+
+                                types.extend(nodes_cloned.iter().copied());
+
+                                let edges_offset = edges.len() - stack_last;
+
+                                edges.extend(edges_cloned.iter().map(|edges| {
+                                    edges
+                                        .iter()
+                                        .map(|des| des + edges_offset)
+                                        .collect::<Vec<_>>()
+                                }));
+
+                                // c wsp
+
+                                let skip_c_wsp_size = skip_by(s, is_wsp);
+
+                                if skip_c_wsp_size >= s.len() {
+                                    break;
+                                }
+
+                                s = &s[skip_c_wsp_size..];
+                                current_index += skip_c_wsp_size;
+
+                                continue;
+                            }
+                            RepetitionAction::Star
+                            | RepetitionAction::Optional
+                            | RepetitionAction::Plus => {
+                                // this will not happen here
+
+                                // c wsp
+
+                                let skip_c_wsp_size = skip_by(s, is_wsp);
+
+                                if skip_c_wsp_size >= s.len() {
+                                    break;
+                                }
+
+                                s = &s[skip_c_wsp_size..];
+                                current_index += skip_c_wsp_size;
+
+                                continue;
+                            }
+                        }
+                    }
+                    None => {
+                        if stack.len() <= stack_growth {
+                            dbg!();
+                            return Err(Error::RegexEmptyStack(current_index));
+                        }
+
+                        stack.truncate(stack.len() - stack_growth);
+
+                        // c wsp
+
+                        let skip_c_wsp_size = skip_by(s, is_wsp);
+
+                        if skip_c_wsp_size >= s.len() {
+                            break;
+                        }
+
+                        s = &s[skip_c_wsp_size..];
+                        current_index += skip_c_wsp_size;
+
+                        continue;
+                    }
+                }
+            }
+            '[' => {
+                // option
+
+                let action = repetition_spec.try_into()?;
+
+                let stack_last = stack
+                    .iter()
+                    .copied()
+                    .last()
+                    .ok_or(Error::RegexEmptyStack(current_index))?;
+
+                match action {
+                    RepetitionAction::Ignore => {
+                        types.extend([RegexType::Optional, RegexType::Or, RegexType::Empty]);
+                        edges.extend([vec![types.len() - 2], vec![types.len() - 1], Vec::new()]);
+
+                        edges[stack_last].push(types.len() - 3);
+
+                        stack.push(types.len() - 3);
+                        stack.push(types.len() - 2);
+                        stack.push(types.len() - 1);
+
+                        repetition_action_stack.push((3, Some(action)));
+                    }
+                    RepetitionAction::Optional => {
+                        types.extend([
+                            RegexType::Optional,
+                            RegexType::Optional,
+                            RegexType::Or,
+                            RegexType::Empty,
+                        ]);
+                        edges.extend([
+                            vec![types.len() - 3],
+                            vec![types.len() - 2],
+                            vec![types.len() - 1],
+                            Vec::new(),
+                        ]);
+
+                        edges[stack_last].push(types.len() - 4);
+
+                        stack.push(types.len() - 4);
+                        stack.push(types.len() - 3);
+                        stack.push(types.len() - 2);
+                        stack.push(types.len() - 1);
+
+                        repetition_action_stack.push((4, None));
+                    }
+                    RepetitionAction::Star => {
+                        types.extend([
+                            RegexType::Kleene,
+                            RegexType::Optional,
+                            RegexType::Or,
+                            RegexType::Empty,
+                        ]);
+                        edges.extend([
+                            vec![types.len() - 3],
+                            vec![types.len() - 2],
+                            vec![types.len() - 1],
+                            Vec::new(),
+                        ]);
+
+                        edges[stack_last].push(types.len() - 4);
+
+                        stack.push(types.len() - 4);
+                        stack.push(types.len() - 3);
+                        stack.push(types.len() - 2);
+                        stack.push(types.len() - 1);
+
+                        repetition_action_stack.push((4, None));
+                    }
+                    RepetitionAction::Plus => {
+                        types.extend([
+                            RegexType::Kleene,
+                            RegexType::Optional,
+                            RegexType::Or,
+                            RegexType::Empty,
+                        ]);
+                        edges.extend([
+                            vec![types.len() - 3],
+                            vec![types.len() - 2],
+                            vec![types.len() - 1],
+                            Vec::new(),
+                        ]);
+
+                        edges[stack_last].push(types.len() - 4);
+
+                        stack.push(types.len() - 4);
+                        stack.push(types.len() - 3);
+                        stack.push(types.len() - 2);
+                        stack.push(types.len() - 1);
+
+                        repetition_action_stack.push((4, None));
+                    }
+                    RepetitionAction::Copy(1) => {
+                        types.extend([RegexType::Optional, RegexType::Or, RegexType::Empty]);
+                        edges.extend([vec![types.len() - 2], vec![types.len() - 1], Vec::new()]);
+
+                        edges[stack_last].push(types.len() - 3);
+
+                        stack.push(types.len() - 3);
+                        stack.push(types.len() - 2);
+                        stack.push(types.len() - 1);
+
+                        repetition_action_stack.push((3, None));
+                    }
+                    _ => {
+                        types.extend([RegexType::Optional, RegexType::Or, RegexType::Empty]);
+                        edges.extend([vec![types.len() - 2], vec![types.len() - 1], Vec::new()]);
+
+                        edges[stack_last].push(types.len() - 3);
+
+                        stack.push(types.len() - 3);
+                        stack.push(types.len() - 2);
+                        stack.push(types.len() - 1);
+
+                        repetition_action_stack.push((3, Some(action)));
+                    }
+                }
+
+                s = &s[1..];
+                current_index += 1;
+
+                // c wsp
+
+                let skip_c_wsp_size = skip_by(s, is_wsp);
+
+                if skip_c_wsp_size >= s.len() {
+                    return Err(Error::RegexUnbalancedParen(current_index));
+                }
+
+                s = &s[skip_c_wsp_size..];
+                current_index += skip_c_wsp_size;
+
+                continue;
+            }
+            ']' => {
+                // end option
+
+                s = &s[1..];
+                current_index += 1;
+
+                if repetition_spec.is_some() {
+                    return Err(Error::RegexInvalidRepeat(current_index));
+                }
+
+                let (stack_growth, repetition_action) = repetition_action_stack
+                    .pop()
+                    .ok_or(Error::RegexEmptyActionStack(current_index))?;
+
+                match repetition_action {
+                    Some(action) => {
+                        if stack.len() <= stack_growth + 1 {
+                            return Err(Error::RegexEmptyStack(current_index));
+                        }
+
+                        stack.truncate(stack.len() - stack_growth + 1);
+
+                        // this is not going to fail because of the previous check
+                        let stack_last = stack.pop().unwrap();
+                        let stack_second_last = stack.iter().copied().last().unwrap();
+
+                        match action {
+                            RepetitionAction::Ignore => {
+                                edges[stack_second_last].pop();
+
+                                types.truncate(stack_last);
+                                edges.truncate(stack_last);
+
+                                // c wsp
+
+                                let skip_c_wsp_size = skip_by(s, is_wsp);
+
+                                if skip_c_wsp_size >= s.len() {
+                                    break;
+                                }
+
+                                s = &s[skip_c_wsp_size..];
+                                current_index += skip_c_wsp_size;
+
+                                continue;
+                            }
+                            RepetitionAction::Copy(n) | RepetitionAction::CopyOptional(0, n) => {
+                                let nodes_cloned =
+                                    types.iter().skip(stack_last).cloned().collect::<Vec<_>>();
+
+                                let edges_cloned =
+                                    edges.iter().skip(stack_last).cloned().collect::<Vec<_>>();
+
+                                for _ in 0..(n - 1) {
+                                    edges[stack_second_last].push(types.len());
+
+                                    // if stack_second_last == 3 && nodes.len() == 3 {
+                                    //     println!("haha");
+                                    // }
+
+                                    types.extend(nodes_cloned.iter().copied());
+
+                                    let edges_offset = edges.len() - stack_last;
+
+                                    edges.extend(edges_cloned.iter().map(|edges| {
+                                        edges
+                                            .iter()
+                                            .map(|des| des + edges_offset)
+                                            .collect::<Vec<_>>()
+                                    }));
+                                }
+
+                                // c wsp
+
+                                let skip_c_wsp_size = skip_by(s, is_wsp);
+
+                                if skip_c_wsp_size >= s.len() {
+                                    break;
+                                }
+
+                                s = &s[skip_c_wsp_size..];
+                                current_index += skip_c_wsp_size;
+
+                                continue;
+                            }
+                            RepetitionAction::CopyPlus(n) => {
+                                let nodes_cloned =
+                                    types.iter().skip(stack_last).cloned().collect::<Vec<_>>();
+
+                                let edges_cloned =
+                                    edges.iter().skip(stack_last).cloned().collect::<Vec<_>>();
+
+                                for _ in 0..(n - 1) {
+                                    edges[stack_second_last].push(types.len());
+
+                                    // if stack_second_last == 3 && nodes.len() == 3 {
+                                    //     println!("haha");
+                                    // }
+
+                                    types.extend(nodes_cloned.iter().copied());
+
+                                    let edges_offset = edges.len() - stack_last;
+
+                                    edges.extend(edges_cloned.iter().map(|edges| {
+                                        edges
+                                            .iter()
+                                            .map(|des| des + edges_offset)
+                                            .collect::<Vec<_>>()
+                                    }));
+                                }
+
+                                types.push(RegexType::Plus);
+                                edges.push(vec![types.len()]);
+
+                                edges[stack_second_last].push(types.len() - 1);
+
+                                // if stack_second_last == 3 && nodes.len() == 4 {
+                                //     println!("haha");
+                                // }
+
+                                types.extend(nodes_cloned.iter().copied());
+
+                                let edges_offset = edges.len() - stack_last;
+
+                                edges.extend(edges_cloned.iter().map(|edges| {
+                                    edges
+                                        .iter()
+                                        .map(|des| des + edges_offset)
+                                        .collect::<Vec<_>>()
+                                }));
+
+                                // c wsp
+
+                                let skip_c_wsp_size = skip_by(s, is_wsp);
+
+                                if skip_c_wsp_size >= s.len() {
+                                    break;
+                                }
+
+                                s = &s[skip_c_wsp_size..];
+                                current_index += skip_c_wsp_size;
+
+                                continue;
+                            }
+                            RepetitionAction::CopyOptional(n, m) => {
+                                let nodes_cloned =
+                                    types.iter().skip(stack_last).cloned().collect::<Vec<_>>();
+
+                                let edges_cloned =
+                                    edges.iter().skip(stack_last).cloned().collect::<Vec<_>>();
+
+                                for _ in 0..(n - 1) {
+                                    edges[stack_second_last].push(types.len());
+
+                                    // if stack_second_last == 3 && nodes.len() == 3 {
+                                    //     println!("haha");
+                                    // }
+
+                                    types.extend(nodes_cloned.iter().copied());
+
+                                    let edges_offset = edges.len() - stack_last;
+
+                                    edges.extend(edges_cloned.iter().map(|edges| {
+                                        edges
+                                            .iter()
+                                            .map(|des| des + edges_offset)
+                                            .collect::<Vec<_>>()
+                                    }));
+                                }
+
+                                let nodes_cloned_len = nodes_cloned.len();
+
+                                for _ in 0..(m - 1) {
+                                    // if stack_second_last == 3 && nodes.len() == 3 {
+                                    //     println!("haha");
+                                    // }
+
+                                    types.push(RegexType::Optional);
+                                    edges.push(vec![types.len(), types.len() + nodes_cloned_len]);
+
+                                    types.extend(nodes_cloned.iter().copied());
+
+                                    let edges_offset = edges.len() - stack_last;
+
+                                    edges.extend(edges_cloned.iter().map(|edges| {
+                                        edges
+                                            .iter()
+                                            .map(|des| des + edges_offset)
+                                            .collect::<Vec<_>>()
+                                    }));
+                                }
+
+                                types.push(RegexType::Optional);
+                                edges.push(vec![types.len()]);
+
+                                types.extend(nodes_cloned.iter().copied());
+
+                                let edges_offset = edges.len() - stack_last;
+
+                                edges.extend(edges_cloned.iter().map(|edges| {
+                                    edges
+                                        .iter()
+                                        .map(|des| des + edges_offset)
+                                        .collect::<Vec<_>>()
+                                }));
+
+                                // c wsp
+
+                                let skip_c_wsp_size = skip_by(s, is_wsp);
+
+                                if skip_c_wsp_size >= s.len() {
+                                    break;
+                                }
+
+                                s = &s[skip_c_wsp_size..];
+                                current_index += skip_c_wsp_size;
+
+                                continue;
+                            }
+                            RepetitionAction::Star
+                            | RepetitionAction::Optional
+                            | RepetitionAction::Plus => {
+                                // this will not happen here
+
+                                // c wsp
+
+                                let skip_c_wsp_size = skip_by(s, is_wsp);
+
+                                if skip_c_wsp_size >= s.len() {
+                                    break;
+                                }
+
+                                s = &s[skip_c_wsp_size..];
+                                current_index += skip_c_wsp_size;
+
+                                continue;
+                            }
+                        }
+                    }
+                    None => {
+                        if stack.len() <= stack_growth {
+                            return Err(Error::RegexEmptyStack(current_index));
+                        }
+
+                        stack.truncate(stack.len() - stack_growth);
+
+                        // c wsp
+
+                        let skip_c_wsp_size = skip_by(s, is_wsp);
+
+                        if skip_c_wsp_size >= s.len() {
+                            break;
+                        }
+
+                        s = &s[skip_c_wsp_size..];
+                        current_index += skip_c_wsp_size;
+
+                        continue;
+                    }
+                }
+            }
+            '/' => {
+                // end concatenation
+
+                s = &s[1..];
+                current_index += 1;
+
+                if repetition_spec.is_some() {
+                    return Err(Error::RegexInvalidRepeat(current_index));
+                }
+
+                if stack.len() <= 1 {
+                    return Err(Error::RegexEmptyStack(current_index));
+                }
+
+                stack.pop().unwrap();
+
+                let stack_last = stack.iter().copied().last().unwrap();
+
+                edges[stack_last].push(types.len());
+
+                types.push(RegexType::Empty);
+                edges.push(vec![]);
+
+                stack.push(types.len() - 1);
+
+                // c wsp
+
+                let skip_c_wsp_size = skip_by(s, is_wsp);
+
+                if skip_c_wsp_size >= s.len() {
+                    break;
+                }
+
+                s = &s[skip_c_wsp_size..];
+                current_index += skip_c_wsp_size;
+
+                continue;
+            }
+            '"' => {
+                // string
+
+                s = &s[1..];
+                current_index += 1;
+
+                let skip_char_value = skip_by(s, is_char_value);
+
+                if skip_char_value >= s.len() {
+                    return Err(Error::HangingDQuote(current_index));
+                }
+
+                let fakes = &s[skip_char_value..];
+
+                if !fakes.starts_with('"') {
+                    return Err(Error::HangingDQuote(current_index));
+                }
+
+                let action = repetition_spec.try_into()?;
+
+                let stack_last = stack
+                    .iter()
+                    .copied()
+                    .last()
+                    .ok_or(Error::RegexEmptyStack(current_index))?;
+
+                let slice_value = s[..skip_char_value].to_owned();
+
+                intermediates.push(IntermediateConstruct::Terminal(slice_value.clone()));
+
+                let t_index = intermediates.len() - 1;
+
+                match action {
+                    RepetitionAction::Ignore => {}
+                    RepetitionAction::Star => {
+                        types.push(RegexType::Kleene);
+                        types.push(RegexType::Lit(t_index));
+                        edges.push(vec![types.len() - 1]);
+                        edges.push(vec![]);
+
+                        edges[stack_last].push(types.len() - 2);
+                    }
+                    RepetitionAction::Optional => {
+                        types.append(&mut vec![RegexType::Optional]);
+                        types.push(RegexType::Lit(t_index));
+                        edges.push(vec![types.len() - 1]);
+                        edges.push(vec![]);
+
+                        edges[stack_last].push(types.len() - 2);
+                    }
+                    RepetitionAction::Plus => {
+                        types.append(&mut vec![RegexType::Plus]);
+                        types.push(RegexType::Lit(t_index));
+                        edges.push(vec![types.len() - 1]);
+                        edges.push(vec![]);
+
+                        edges[stack_last].push(types.len() - 2);
+                    }
+                    RepetitionAction::Copy(n) => {
+                        let types_len = types.len();
+
+                        types.extend(std::iter::repeat(t_index).take(n).map(RegexType::Lit));
+                        edges.extend(std::iter::repeat_with(Default::default).take(n));
+
+                        let edges_len = edges.len();
+
+                        edges
+                            .get_mut(stack_last)
+                            .ok_or(Error::RegexIndexOutOfBounds(stack_last, edges_len))?
+                            .extend((0..n).map(|i| types_len + i));
+
+                        // for _ in 0..n {
+                        //     types.push(RegexType::Lit(t_index));
+                        //     edges.push(vec![]);
+
+                        //     edges[stack_last].push(types.len() - 1);
+                        // }
+                    }
+                    RepetitionAction::CopyPlus(n) => {
+                        let types_len = types.len();
+
+                        types.extend(std::iter::repeat(t_index).take(n).map(RegexType::Lit));
+                        edges.extend(std::iter::repeat_with(Default::default).take(n));
+
+                        let edges_len = edges.len();
+
+                        edges
+                            .get_mut(stack_last)
+                            .ok_or(Error::RegexIndexOutOfBounds(stack_last, edges_len))?
+                            .extend((0..n).map(|i| types_len + i));
+
+                        types.extend([RegexType::Plus, RegexType::Lit(t_index)]);
+                        edges.extend([vec![types.len() - 1], Vec::new()]);
+
+                        let edges_len = edges.len();
+
+                        edges
+                            .get_mut(stack_last)
+                            .ok_or(Error::RegexIndexOutOfBounds(stack_last, edges_len))?
+                            .push(types.len() - 2);
+                    }
+                    RepetitionAction::CopyOptional(n, m) => {
+                        let types_len = types.len();
+
+                        types.extend(std::iter::repeat(t_index).take(n).map(RegexType::Lit));
+                        edges.extend(std::iter::repeat_with(Default::default).take(n));
+
+                        let edges_len = edges.len();
+
+                        edges
+                            .get_mut(stack_last)
+                            .ok_or(Error::RegexIndexOutOfBounds(stack_last, edges_len))?
+                            .extend((0..n).map(|i| types_len + i));
+
+                        let edges_len = edges.len();
+
+                        edges
+                            .get_mut(stack_last)
+                            .ok_or(Error::RegexIndexOutOfBounds(stack_last, edges_len))?
+                            .push(types.len());
+
+                        for _ in 0..(m - 1) {
+                            types.extend([RegexType::Optional, RegexType::Lit(t_index)]);
+                            edges.extend([vec![types.len() - 1, types.len()], Vec::new()]);
+                        }
+
+                        types.extend([RegexType::Optional, RegexType::Lit(t_index)]);
+                        edges.extend([vec![types.len() - 1], Vec::new()]);
+                    }
+                }
+
+                s = &fakes[1..];
+                current_index += skip_char_value + 1;
+
+                // c wsp
+
+                let skip_c_wsp_size = skip_by(s, is_wsp);
+
+                if skip_c_wsp_size >= s.len() {
+                    break;
+                }
+
+                s = &s[skip_c_wsp_size..];
+                current_index += skip_c_wsp_size;
+
+                continue;
+            }
+            '%' => {
+                // numeric value
+
+                s = &s[1..];
+                current_index += 1;
+
+                if s.is_empty() {
+                    return Err(Error::HangingPercent(current_index - 1));
+                }
+
+                let inner_first_char = s.chars().next().unwrap();
+
+                let intermediate_value: IntermediateConstruct;
+
+                match inner_first_char {
+                    'b' => {
+                        s = &s[1..];
+                        current_index += 1;
+
+                        let (parsed_size, parsed_construct) = parse_numeric(s, Radix::Binary)?;
+
+                        intermediate_value = parsed_construct;
+
+                        s = &s[parsed_size..];
+                        current_index += parsed_size;
+                    }
+                    'd' => {
+                        s = &s[1..];
+                        current_index += 1;
+
+                        let (parsed_size, parsed_construct) = parse_numeric(s, Radix::Decimal)?;
+
+                        intermediate_value = parsed_construct;
+
+                        s = &s[parsed_size..];
+                        current_index += parsed_size;
+                    }
+                    'x' => {
+                        s = &s[1..];
+                        current_index += 1;
+
+                        let (parsed_size, parsed_construct) = parse_numeric(s, Radix::Hexadecimal)?;
+                        // println!("size = {parsed_size}, construct = {parsed_construct:#?}");
+
+                        intermediate_value = parsed_construct;
+
+                        s = &s[parsed_size..];
+                        current_index += parsed_size;
+                    }
+                    _ => {
+                        return Err(Error::RegexInvalidNumeric(current_index - 1));
+                    }
+                }
+
+                let action = repetition_spec.try_into()?;
+
+                let stack_last = stack
+                    .iter()
+                    .copied()
+                    .last()
+                    .ok_or(Error::RegexEmptyStack(current_index))?;
+
+                intermediates.push(intermediate_value);
+
+                let inter_index = intermediates.len() - 1;
+
+                match action {
+                    RepetitionAction::Ignore => {}
+                    RepetitionAction::Star => {
+                        types.push(RegexType::Kleene);
+                        edges.push(vec![types.len()]);
+
+                        edges[stack_last].push(types.len() - 1);
+
+                        types.push(RegexType::Lit(inter_index));
+                        edges.push(vec![]);
+                    }
+                    RepetitionAction::Optional => {
+                        types.push(RegexType::Optional);
+                        edges.push(vec![types.len()]);
+
+                        edges[stack_last].push(types.len() - 1);
+
+                        types.push(RegexType::Lit(inter_index));
+                        edges.push(vec![]);
+                    }
+                    RepetitionAction::Plus => {
+                        types.push(RegexType::Plus);
+                        edges.push(vec![types.len()]);
+
+                        edges[stack_last].push(types.len() - 1);
+
+                        types.push(RegexType::Lit(inter_index));
+                        edges.push(vec![]);
+                    }
+                    RepetitionAction::Copy(n) => {
+                        types.extend(std::iter::repeat(inter_index).take(n).map(RegexType::Lit));
+
+                        edges.extend(std::iter::repeat_with(Default::default).take(n));
+
+                        let edges_len = edges.len();
+
+                        edges
+                            .get_mut(stack_last)
+                            .ok_or(Error::RegexIndexOutOfBounds(stack_last, edges_len))?
+                            .push(types.len() - 1);
+
+                        // for _ in 0..n {
+                        //     types.push(RegexType::Lit(inter_index));
+                        //     edges.push(vec![]);
+                        //     edges[stack_last].push(types.len() - 1);
+                        // }
+                    }
+                    RepetitionAction::CopyPlus(n) => {
+                        types.extend(std::iter::repeat(inter_index).take(n).map(RegexType::Lit));
+
+                        edges.extend(std::iter::repeat_with(Default::default).take(n));
+
+                        let edges_len = edges.len();
+
+                        edges
+                            .get_mut(stack_last)
+                            .ok_or(Error::RegexIndexOutOfBounds(stack_last, edges_len))?
+                            .push(types.len() - 1);
+
+                        types.push(RegexType::Plus);
+                        edges.push(vec![types.len()]);
+
+                        edges[stack_last].push(types.len() - 1);
+
+                        types.push(RegexType::Lit(inter_index));
+                        edges.push(vec![]);
+                    }
+                    RepetitionAction::CopyOptional(n, m) => {
+                        types.extend(std::iter::repeat(inter_index).take(n).map(RegexType::Lit));
+
+                        edges.extend(std::iter::repeat_with(Default::default).take(n));
+
+                        let edges_len = edges.len();
+
+                        edges
+                            .get_mut(stack_last)
+                            .ok_or(Error::RegexIndexOutOfBounds(stack_last, edges_len))?
+                            .push(types.len() - 1);
+
+                        for _ in 0..(m - 1) {
+                            types.push(RegexType::Optional);
+                            edges.push(vec![types.len(), types.len() + 1]);
+
+                            edges[stack_last].push(types.len() - 1);
+
+                            types.push(RegexType::Lit(inter_index));
+                            edges.push(vec![]);
+                        }
+
+                        types.push(RegexType::Optional);
+                        edges.push(vec![types.len()]);
+
+                        edges[stack_last].push(types.len() - 1);
+
+                        types.push(RegexType::Lit(inter_index));
+                        edges.push(vec![]);
+                    }
+                }
+
+                // c wsp
+
+                let skip_c_wsp_size = skip_by(s, is_wsp);
+
+                if skip_c_wsp_size >= s.len() {
+                    break;
+                }
+
+                s = &s[skip_c_wsp_size..];
+                current_index += skip_c_wsp_size;
+
+                continue;
+            }
+            '<' => {
+                // prose value
+                return Err(Error::Unimplemented(current_index));
+            }
+            ';' => {
+                // Possibly extend to the next line if the indentation
+                // of the next line is greater than `indentation`.
+
+                s = &s[1..];
+                current_index += 1;
+
+                let mut in_comment = true;
+
+                loop {
+                    let skip_fn = if in_comment { is_wsp_or_v_char } else { is_wsp };
+
+                    let mut after_wsp_and_v_char = skip_by(s, skip_fn);
+
+                    if after_wsp_and_v_char >= s.len() {
+                        current_index += s.len();
+                        stack = vec![];
+                        break;
+                    }
+
+                    s = &s[after_wsp_and_v_char..];
+                    current_index += after_wsp_and_v_char;
+
+                    let first_char = s.chars().next().unwrap();
+
+                    let mut skip_again = false;
+
+                    match first_char {
+                        '\n' | '\r' => {
+                            s = &s[1..];
+                            current_index += 1;
+                            skip_again = true;
+                        }
+                        _ if in_comment => {
+                            // dbg!("1815");
+                            return Err(Error::RegexInvalidComment(current_index));
+                        }
+                        _ => {}
+                    }
+
+                    if skip_again {
+                        let skip_wsp = skip_by(s, is_wsp);
+
+                        if skip_wsp >= s.len() {
+                            current_index += s.len();
+                            stack = vec![];
+                            break;
+                        }
+
+                        after_wsp_and_v_char = skip_wsp;
+
+                        s = &s[skip_wsp..];
+                        current_index += skip_wsp;
+                    }
+
+                    let first_char = s.chars().next().unwrap();
+
+                    match first_char {
+                        ';' => {
+                            // continue looping
+                            s = &s[1..];
+                            current_index += 1;
+                            in_comment = true;
+
+                            continue;
+                        }
+                        '\n' | '\r' => {
+                            in_comment = false;
+                            s = &s[1..];
+                            current_index += 1;
+
+                            continue;
+                        }
+                        _ if after_wsp_and_v_char <= indentation => {
+                            stack = vec![];
+                            break;
+                        }
+                        _ => {
+                            break;
+                        }
+                    }
+                }
+            }
+            '\n' | '\r' => {
+                // Possibly extend to the next line if the indentation
+                // of the next line is greater than `indentation`.
+                // println!("s = {}", s.chars().take(10).collect::<String>());
+
+                s = &s[1..];
+                current_index += 1;
+
+                if s.is_empty() {
+                    break;
+                }
+
+                let mut in_comment = false;
+
+                loop {
+                    let skip_fn = if in_comment { is_wsp_or_v_char } else { is_wsp };
+
+                    let mut after_wsp = skip_by(s, skip_fn);
+
+                    if after_wsp >= s.len() {
+                        current_index += s.len();
+                        stack = vec![];
+                        break;
+                    }
+
+                    s = &s[after_wsp..];
+                    current_index += after_wsp;
+
+                    let first_char = s.chars().next().unwrap();
+
+                    let mut skip_again = false;
+
+                    match first_char {
+                        '\n' | '\r' => {
+                            s = &s[1..];
+                            current_index += 1;
+                            skip_again = true;
+                        }
+                        _ if in_comment => {
+                            // dbg!("1903");
+                            return Err(Error::RegexInvalidComment(current_index));
+                        }
+                        _ => {}
+                    }
+
+                    if skip_again {
+                        let skip_wsp = skip_by(s, is_wsp);
+
+                        if skip_wsp >= s.len() {
+                            current_index += s.len();
+                            stack = vec![];
+                            break;
+                        }
+
+                        s = &s[skip_wsp..];
+                        current_index += skip_wsp;
+
+                        after_wsp = skip_wsp;
+                    }
+
+                    let first_char = s.chars().next().unwrap();
+
+                    match first_char {
+                        ';' => {
+                            // continue looping
+                            s = &s[1..];
+                            current_index += 1;
+                            in_comment = true;
+
+                            continue;
+                        }
+                        '\n' | '\r' => {
+                            in_comment = false;
+                            s = &s[1..];
+                            current_index += 1;
+
+                            continue;
+                        }
+                        _ if after_wsp <= indentation => {
+                            stack = vec![];
+                            break;
+                        }
+                        _ => {
+                            break;
+                        }
+                    }
+                }
+            }
+            _ => {
+                // invalid
+                println!("s = {}", &s[..(if s.len() > 50 { 50 } else { s.len() })]);
+                return Err(Error::InvalidCharacter(current_index));
+            }
+        }
+    }
+
+    let regex = DefaultRegex::new(edges.into(), types).map_err(error_conversion)?;
+
+    Ok((
+        current_index,
+        AlternationConstruct {
+            intermediates,
+            regex,
+        },
+    ))
+}
+
+type RuleConstruct = (
+    // Name
+    String,
+    // Data
+    Vec<IntermediateConstruct>,
+    // Rule
+    DefaultRegex<usize>,
+    // extra_alternation_or_not
+    bool,
+);
+
+fn parse_one_rule(s: &str, indentation: usize) -> Result<(usize, RuleConstruct), Error> {
+    if s.is_empty() {
+        return Ok((0, ("".to_owned(), Vec::new(), Default::default(), false)));
+    }
+
+    let mut s = s;
+
+    let mut current_index = 0usize;
+
+    let mut appending_alternation = false;
+
+    // rule name
+
+    let rulename_end = parse_rulename(s)?;
+    let rule_name = s[..rulename_end].to_string();
+    s = &s[rulename_end..];
+    current_index += rulename_end;
+
+    // c wsp
+
+    let skip_c_wsp_size = skip_by(s, is_wsp);
+
+    if skip_c_wsp_size >= s.len() {
+        // dbg!();
+        return Err(Error::HangingRuleName(0));
+    }
+
+    s = &s[skip_c_wsp_size..];
+    current_index += skip_c_wsp_size;
+
+    // equal or equal forward slash
+
+    let next_char = s.chars().next().unwrap();
+
+    if next_char != '=' {
+        // println!("s = {}", &s[..50]);
+        // println!("rule_name = {rule_name}");
+        return Err(Error::MissingEqualSign(current_index + 1));
+    }
+
+    s = &s[1..];
+    current_index += 1;
+
+    if s.is_empty() {
+        return Err(Error::HangingEqualsSign(current_index - 1));
+    }
+
+    let next_char = s.chars().next().unwrap();
+
+    if next_char == '/' {
+        appending_alternation = true;
+        s = &s[1..];
+        current_index += 1;
+    }
+
+    if s.is_empty() {
+        return Err(Error::HangingEqualsSign(current_index - 1));
+    }
+
+    // c wsp
+
+    let skip_c_wsp_size = skip_c_wsp(s);
+
+    if skip_c_wsp_size >= s.len() {
+        // dbg!();
+        return Err(Error::HangingRuleName(0));
+    }
+
+    s = &s[skip_c_wsp_size..];
+    current_index += skip_c_wsp_size;
+
+    // elements
+
+    // alternation
+
+    let (alternation_size, alternation_result) = parse_alternation(s, indentation)?;
+
+    let intermediates_data = alternation_result.intermediates;
+    let regex = alternation_result.regex;
+
+    // println!("size = {alternation_size}, result = {alternation_result:?}");
+
+    if alternation_size >= s.len() {
+        s = "";
+    } else {
+        s = &s[alternation_size..];
+    }
+
+    current_index += alternation_size;
+
+    // c wsp
+
+    let skip_c_wsp_size = skip_c_wsp(s);
+
+    if skip_c_wsp_size >= s.len() {
+        s = "";
+    } else {
+        s = &s[skip_c_wsp_size..];
+    }
+
+    current_index += skip_c_wsp_size;
+
+    // c-nl
+
+    if let Some(skip_c_nl) = skip_c_nl(s) {
+        current_index += skip_c_nl;
+    }
+
+    Ok((
+        current_index,
+        (rule_name, intermediates_data, regex, appending_alternation),
+    ))
+}
+
+impl std::str::FromStr for Grammar {
+    // First skip whitespaces, linefeeds, carraige returns and
+    // semicolons until the first rule appears.  The basic indentation
+    // level is set to the indentation level of the first rule's line.
+    //
+    // Then enter identifier reading mode, reading a letter followed
+    // by a sequence of letters, digits and hyphens.
+    //
+    // After skipping white spaces, read an equal sign.
+    //
+    // Then enter alternation reading mode, reading alternations of
+    // concatenations.
+    //
+    // Then enter element reading mode, reading optionally repetition
+    // specification followed by an identifier, or a numeral value, or
+    // a quoted string.
+
+    type Err = Error;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        let mut before_first_rule = true;
+        let mut basic_indentation: usize = 0;
+        let mut current_skipped_index: usize = 0;
+
+        let mut s = s;
+
+        while before_first_rule {
+            before_first_rule = false;
+
+            let after_wsp = skip_by(s, is_wsp);
+
+            if after_wsp >= s.len() {
+                // The entire string consists of white-spaces and
+                // comments.
+                return Err(Error::EmptyGrammar);
+            }
+
+            s = &s[after_wsp..];
+            current_skipped_index += after_wsp;
+
+            match skip_c_nl(s) {
+                Some(skipped_size) => {
+                    if skipped_size >= s.len() {
+                        return Err(Error::EmptyGrammar);
+                    }
+
+                    // Repeat skipping until we find the first rule.
+                    before_first_rule = true;
+
+                    s = &s[skipped_size..];
+                    current_skipped_index += skipped_size;
+                }
+                None => {
+                    // Found the first rule.
+                    basic_indentation = after_wsp;
+                }
+            }
+        }
+
+        // TODO: Core rules are not implemented for now.
+
+        #[allow(unused_macros)]
+        macro_rules! log {
+            () => {
+                let mut file = std::fs::OpenOptions::new()
+                    .create(true)
+                    .append(true)
+                    .open("debug.log")
+                    .unwrap();
+
+                let _ = file.write_all(format!("{}\n", line!()).as_bytes());
+            };
+        }
+
+        let mut nonterminals_vec: Vec<String> = vec![];
+        let mut nonterminals_map: HashMap<String, usize> = HashMap::new();
+
+        let mut all_rules: HashMap<String, DefaultRegex<TNT>> = Default::default();
+
+        let mut terminals_vec: Vec<String> = vec![];
+        let mut terminals_map: HashMap<String, usize> = HashMap::new();
+
+        let mut temp = s;
+
+        // First collect non-terminals in the order they are defined.
+        loop {
+            let (rule_size, (rule_name, _rule_data, _rule_regex, appending_p)) =
+                parse_one_rule(temp, basic_indentation)?;
+
+            if rule_size == 0 {
+                break;
+            }
+
+            if appending_p && nonterminals_map.get(&rule_name).is_none() {
+                return Err(Error::AppendToNothing(current_skipped_index));
+            } else if !appending_p && nonterminals_map.get(&rule_name).is_some() {
+                return Err(Error::CreateAgain(current_skipped_index));
+            }
+
+            nonterminals_map.insert(rule_name.clone(), nonterminals_vec.len());
+            nonterminals_vec.push(rule_name);
+
+            if rule_size > 0 && rule_size < temp.len() {
+                temp = &temp[rule_size..];
+                current_skipped_index += rule_size;
+            } else {
+                break;
+            }
+        }
+
+        let mut temp = s;
+
+        // Then gather the data about names of terminals and
+        // non-terminals.
+        loop {
+            let (rule_size, (_rule_name, rule_data, _rule_regex, _appending_p)) =
+                parse_one_rule(temp, basic_indentation)?;
+
+            if rule_size == 0 {
+                break;
+            }
+
+            for data in rule_data {
+                match data {
+                    IntermediateConstruct::Terminal(name) => {
+                        if terminals_map.get(&name).is_none() {
+                            terminals_map.insert(name.clone(), terminals_vec.len());
+                            terminals_vec.push(name);
+                        }
+                    }
+                    IntermediateConstruct::Nonterminal(name) => {
+                        if nonterminals_map.get(&name).is_none() {
+                            return Err(Error::UnknownNonterminal(name));
+                        }
+                    }
+                    IntermediateConstruct::Range(_, _) => unimplemented!(),
+                }
+            }
+
+            if rule_size > 0 && rule_size < temp.len() {
+                temp = &temp[rule_size..];
+                current_skipped_index += rule_size;
+            } else {
+                break;
+            }
+        }
+
+        // Then collect the actual rules.
+        loop {
+            let (rule_size, (rule_name, rule_data, rule_regex, appending_p)) =
+                parse_one_rule(s, basic_indentation)?;
+            // println!("rule name = {rule_name}");
+            // println!("regexp = {rule_regexp:#?}");
+
+            let rule_transform = |old_label: usize| -> Result<TNT, Error> {
+                if let Some(old_value) = rule_data.get(old_label) {
+                    match old_value {
+                        IntermediateConstruct::Terminal(name) => {
+                            if let Some(index) = terminals_map.get(name) {
+                                Ok(TNT::Ter(*index))
+                            } else {
+                                dbg!();
+                                Err(Error::UnknownTerminal(name.clone()))
+                            }
+                        }
+                        IntermediateConstruct::Nonterminal(name) => {
+                            if let Some(index) = nonterminals_map.get(name) {
+                                Ok(TNT::Non(*index))
+                            } else {
+                                dbg!();
+                                Err(Error::UnknownNonterminal(name.clone()))
+                            }
+                        }
+                        IntermediateConstruct::Range(_, _) => {
+                            dbg!();
+                            Err(Error::UnsupportedOperation)
+                        }
+                    }
+                } else {
+                    dbg!();
+                    Err(Error::RegexIndexOutOfBounds(old_label, rule_data.len()))
+                }
+            };
+
+            let transformed_rule = rule_regex.maps_to(rule_transform)?;
+
+            if rule_size == 0 {
+                break;
+            }
+
+            if !appending_p {
+                nonterminals_map.insert(rule_name.clone(), nonterminals_vec.len());
+                nonterminals_vec.push(rule_name.clone());
+
+                all_rules.insert(rule_name, transformed_rule);
+            } else {
+                all_rules
+                    .get_mut(&rule_name)
+                    .unwrap()
+                    .append(transformed_rule);
+            }
+
+            if rule_size > 0 && rule_size < s.len() {
+                s = &s[rule_size..];
+                current_skipped_index += rule_size;
+            } else {
+                break;
+            }
+        }
+
+        let rules_vec: Vec<Rule> = nonterminals_vec
+            .iter()
+            .map(|name| {
+                all_rules
+                    .get(name)
+                    .map(|regex| Rule::new(regex.clone()))
+                    .ok_or(Error::UnknownNonterminal(name.clone()))
+            })
+            .collect::<Result<_, _>>()?;
+
+        let terminals: Vec<Terminal> = terminals_vec.into_iter().map(Terminal::new).collect();
+        let nonterminals: Vec<Nonterminal> =
+            nonterminals_vec.into_iter().map(Nonterminal).collect();
+
+        Ok(Self::new(terminals, nonterminals, rules_vec))
+    }
+}
+
+#[cfg(test)]
+mod tests;
diff --git a/grammar/src/abnf/tests.rs b/grammar/src/abnf/tests.rs
new file mode 100644
index 0000000..67dfaaa
--- /dev/null
+++ b/grammar/src/abnf/tests.rs
@@ -0,0 +1,291 @@
+//! This module contains unit tests for the abnf grammar parser.
+
+use super::*;
+
+#[test]
+fn test_skip_by() -> Result<(), Box<dyn std::error::Error>> {
+    let string = std::iter::repeat('a')
+        .take(10)
+        .chain(std::iter::repeat('哈').take(10))
+        .chain(" \t\n".chars())
+        .collect::<String>();
+
+    let mut s: &str = string.as_str();
+
+    dbg!(&s);
+
+    let alpha_len = skip_by(s, is_alpha);
+
+    assert_eq!(alpha_len, 10);
+
+    s = &s[alpha_len..];
+
+    dbg!(&s);
+
+    let not_newline_len = skip_by(s, is_not_newline);
+
+    // 哈 takes 3 bytes, so the count should be 30 plus two bytes for
+    // the space and the tab.
+    assert_eq!(not_newline_len, 32);
+
+    Ok(())
+}
+
+#[test]
+fn test_skip_c_nl() -> Result<(), Box<dyn std::error::Error>> {
+    let string = std::iter::repeat(' ')
+        .take(10)
+        .chain(std::iter::repeat('哈').take(10))
+        .chain(" \t\n".chars())
+        .collect::<String>();
+
+    let s = string.as_str();
+
+    dbg!(s);
+
+    let c_nl = skip_c_nl(s);
+
+    assert!(c_nl.is_none());
+
+    let new_string = std::iter::once(';')
+        .chain(string.chars())
+        .chain(string.chars())
+        .collect::<String>();
+
+    let s = new_string.as_str();
+
+    dbg!(s);
+
+    let c_nl = skip_c_nl(s);
+
+    assert_eq!(c_nl, Some(string.len() + 1));
+
+    let c_nl = skip_c_nl("\n");
+
+    assert_eq!(c_nl, Some(1));
+
+    Ok(())
+}
+
+#[test]
+fn test_skip_c_wsp() -> Result<(), Box<dyn std::error::Error>> {
+    let mut string: String = std::iter::repeat(' ')
+        .take(10)
+        .chain(std::iter::repeat('\t').take(10))
+        .collect();
+
+    string.extend(
+        std::iter::once(';')
+            .chain(
+                std::iter::repeat(std::iter::once('\n').chain(std::iter::once(';')))
+                    .take(10)
+                    .flatten(),
+            )
+            .chain(std::iter::once('\n'))
+            .chain(std::iter::repeat('a').take(10)),
+    );
+
+    let string = string;
+
+    dbg!(&string);
+
+    let skip_c_wsp_size = skip_c_wsp(&string);
+
+    assert_eq!(skip_c_wsp_size, string.len() - 10);
+
+    Ok(())
+}
+
+#[test]
+fn test_parse_repetition() -> Result<(), Box<dyn std::error::Error>> {
+    let strs = ["*A", "0*A", "*0A", "2*A", "*2A", "5*10A", "A"];
+
+    let results = strs.map(parse_repetition_spec);
+
+    dbg!(&results);
+
+    let answers = [
+        Some((1, RepetitionSpec::Star)),
+        Some((2, RepetitionSpec::Min(0))),
+        Some((2, RepetitionSpec::Max(0))),
+        Some((2, RepetitionSpec::Min(2))),
+        Some((2, RepetitionSpec::Max(2))),
+        Some((4, RepetitionSpec::MinMax(5, 10))),
+        None,
+    ];
+
+    for (result, answer) in results.iter().zip(answers.iter()) {
+        assert_eq!(result, answer);
+    }
+
+    Ok(())
+}
+
+use super::Error;
+
+#[test]
+fn test_parse_rulename() -> Result<(), Box<dyn std::error::Error>> {
+    let s = "NONTERMINAL";
+
+    assert!(matches!(parse_rulename(s), Err(Error::HangingRuleName(0))));
+
+    let s = " NONTERMINAL = ";
+
+    assert!(matches!(parse_rulename(s), Ok(0)));
+
+    let s = "NONTERMINAL = ";
+
+    assert!(matches!(parse_rulename(s), Ok(11)));
+
+    Ok(())
+}
+
+#[test]
+fn test_parse_numeric() -> Result<(), Box<dyn std::error::Error>> {
+    let s = "haha";
+
+    assert!(matches!(
+        parse_numeric(s, Radix::Binary),
+        Err(Error::InvalidDigit(0))
+    ));
+
+    let s = "124";
+
+    assert!(matches!(
+        parse_numeric(s, Radix::Binary),
+        Err(Error::InvalidCharacter(1))
+    ));
+
+    let s = "32";
+
+    assert!(matches!(
+        parse_numeric(s, Radix::Decimal),
+        Ok((2, IntermediateConstruct::Terminal(string))) if string == " ".to_string()
+    ));
+
+    let s = "32.65.66.66.65";
+
+    assert!(matches!(
+        parse_numeric(s, Radix::Decimal),
+        Ok((14, IntermediateConstruct::Terminal(string))) if string == " ABBA".to_string()
+    ));
+
+    let s = "32-";
+
+    assert!(matches!(
+        parse_numeric(s, Radix::Decimal),
+        Err(Error::HangingNumeric(2))
+    ));
+
+    let s = "32-124";
+
+    assert!(matches!(
+        parse_numeric(s, Radix::Decimal),
+        Ok((6, IntermediateConstruct::Range(from, to))) if from == ' ' && to == '|',
+    ));
+
+    Ok(())
+}
+
+#[test]
+fn test_parse_alternation() -> Result<(), Box<dyn std::error::Error>> {
+    let s = "0*10nonterminal\n";
+
+    let (len, alt) = parse_alternation(s, 0)?;
+
+    assert_eq!(len, s.len());
+
+    let regex = alt.regex;
+
+    println!("regex = {regex}");
+
+    assert_eq!(
+        format!("{regex}"),
+        "((0(0(0(0(0(0(0(0(0(0)?)?)?)?)?)?)?)?)?)?)"
+    );
+
+    let s = "0*10nonterminal ( *nonterminal nonterminal / [ nonterminal ] 1*5nonterminal )\n";
+
+    let (len, alt) = parse_alternation(s, 0)?;
+
+    assert_eq!(len, s.len());
+
+    let regex = alt.regex;
+
+    println!("regex = {regex}");
+
+    assert_eq!(
+        format!("{regex}"),
+        "((0(0(0(0(0(0(0(0(0(0)?)?)?)?)?)?)?)?)?)?((1)*2|((3))?4(4(4(4(4)?)?)?)?))"
+    );
+
+    let s = "5nonterminal\n";
+
+    let (len, alt) = parse_alternation(s, 0)?;
+
+    assert_eq!(len, s.len());
+
+    let regex = alt.regex;
+
+    println!("regex = {regex}");
+
+    assert_eq!(format!("{regex}"), "(00000)");
+
+    Ok(())
+}
+
+#[test]
+fn test_parse_onerule() -> Result<(), Box<dyn std::error::Error>> {
+    let s = "start = 1*10nonterminal [ nonterminal ] / 1*nonterminal\n";
+
+    let (len, (name, data, regex, extra_p)) = parse_one_rule(s, 0)?;
+
+    println!("name = {name}");
+    print!("data = ");
+
+    for (index, non_terminal) in data.iter().enumerate() {
+        print!("({index}, {non_terminal})");
+    }
+
+    println!();
+
+    println!("regex = {regex}");
+
+    println!("extra_p = {extra_p}");
+
+    assert_eq!(len, s.len());
+
+    assert_eq!(name, "start".to_string());
+
+    let answers = ["nnonterminal", "nnonterminal", "nnonterminal"];
+
+    assert_eq!(data.len(), answers.len());
+
+    for (tnt, answer) in data.into_iter().zip(answers) {
+        assert_eq!(format!("{tnt}"), answer.to_string());
+    }
+
+    assert_eq!(
+        format!("{regex}"),
+        "(0(0(0(0(0(0(0(0(0(0)?)?)?)?)?)?)?)?)?((1))?|(2)+)".to_string()
+    );
+
+    assert!(!extra_p);
+
+    Ok(())
+}
+
+#[test]
+fn test_parse_grammar() -> Result<(), Box<dyn std::error::Error>> {
+    let grammar_file_name = "abnf grammars/test.abnf";
+
+    let file_content = std::fs::read_to_string(grammar_file_name)?;
+
+    let grammar: Grammar = file_content.parse()?;
+
+    println!("print grammar:");
+
+    println!("{grammar}");
+
+    Ok(())
+}
diff --git a/grammar/src/lib.rs b/grammar/src/lib.rs
index 11cb161..ea1299f 100644
--- a/grammar/src/lib.rs
+++ b/grammar/src/lib.rs
@@ -93,7 +93,7 @@ impl Display for TNT {
 }
 
 /// Errors related to grammar operations.
-#[derive(Debug, Copy, Clone)]
+#[derive(Debug, Clone)]
 #[non_exhaustive]
 pub enum Error {
     /// The operation requires the grammar to be after a certain
@@ -101,6 +101,8 @@ pub enum Error {
     WrongState(GrammarState, GrammarState),
     /// The first component is the index, and the second the bound.
     IndexOutOfBounds(usize, usize),
+    /// The given name of a terminal or a non-terminal is unknown.
+    UnknownTNTName(String),
     /// Fail to build the N-th regular expression, due to the
     /// ParseError.
     BuildFail(usize, ParseError),
@@ -123,6 +125,11 @@ impl Display for Error {
                 "Failed to build the {n}-th regular expression due to error: {pe}"
             ),
             Error::NFAFail(nfae) => write!(f, "failed to build NFA because of {nfae}"),
+            Error::UnknownTNTName(name) => write!(
+                f,
+                "the name {name} is unknown \
+                                                      for a terminal or a non-terminal."
+            ),
             Error::WrongState(current, threshold) => {
                 write!(f, "require state {threshold}, but in state {current}")
             }
@@ -158,6 +165,12 @@ impl Rule {
     pub fn len(&self) -> usize {
         self.regex.len()
     }
+
+    /// Wrap a regular expression into a rule.
+    #[inline]
+    pub fn new(regex: DefaultRegex<TNT>) -> Self {
+        Self { regex }
+    }
 }
 
 /// The state of Grammar.
@@ -190,6 +203,37 @@ impl Display for GrammarState {
     }
 }
 
+/// This enum represents the name of either a terminal or a
+/// non-terminal.
+#[derive(Debug, Clone, Eq, PartialEq, Hash)]
+pub enum TNTName {
+    /// The name of a terminal.
+    Ter(String),
+    /// The name of a non-terminal.
+    Non(String),
+}
+
+impl TNTName {
+    /// Construct the name of a terminal or a non-terminal.
+    #[inline]
+    pub fn new(name: String, terminal_p: bool) -> Self {
+        if terminal_p {
+            Self::Ter(name)
+        } else {
+            Self::Non(name)
+        }
+    }
+
+    /// Return the underlying name.
+    #[inline]
+    pub fn name(&self) -> &str {
+        match self {
+            TNTName::Ter(name) => name,
+            TNTName::Non(name) => name,
+        }
+    }
+}
+
 /// The type of a grammar.
 #[derive(Debug, Clone, Default)]
 pub struct Grammar {
@@ -197,6 +241,8 @@ pub struct Grammar {
     ter: Vec<Terminal>,
     /// A list of non-terminals.
     non: Vec<Nonterminal>,
+    /// A map from the names of terminals or non-terminals to TNT.
+    tnt_map: HashMap<TNTName, TNT>,
     /// A list of rules.
     ///
     /// The length of the list must match that of the list of
@@ -286,6 +332,22 @@ impl Grammar {
         let expansion_map = Default::default();
         let reduction_map = Default::default();
 
+        let mut tnt_map: HashMap<TNTName, TNT> = Default::default();
+
+        for (index, ter_element) in ter.iter().enumerate() {
+            tnt_map.insert(
+                TNTName::new(ter_element.name().to_string(), true),
+                TNT::Ter(index),
+            );
+        }
+
+        for (index, non_element) in non.iter().enumerate() {
+            tnt_map.insert(
+                TNTName::new(non_element.name().to_string(), false),
+                TNT::Non(index),
+            );
+        }
+
         // NOTE: We cannot calculate accumulators here, as we want the
         // accumulators of the regular expression of the left-closure,
         // not of the original one.
@@ -294,6 +356,7 @@ impl Grammar {
         Self {
             ter,
             non,
+            tnt_map,
             rules,
             firsts,
             first_nodes,
@@ -304,6 +367,16 @@ impl Grammar {
         }
     }
 
+    /// Convert from the name of a terminal or a non-terminal to a
+    /// struct TNT.
+    #[inline]
+    pub fn name_to_tnt(&self, name: &TNTName) -> Result<TNT, Error> {
+        self.tnt_map
+            .get(name)
+            .copied()
+            .ok_or_else(|| Error::UnknownTNTName(name.name().to_string()))
+    }
+
     /// Return the name of a terminal or a non-terminal.
     pub fn name_of_tnt(&self, tnt: TNT) -> Result<String, Error> {
         match tnt {
@@ -313,6 +386,14 @@ impl Grammar {
                     .get(t)
                     .ok_or(Error::IndexOutOfBounds(t, self.ter.len()))?
                     .name()
+                    .chars()
+                    .map(|c| if crate::abnf::is_v_char(c) {
+                        c.to_string()
+                    } else {
+                        format!("{:#x}", c as usize)
+                    })
+                    .collect::<Vec<_>>()
+                    .join("")
             )),
             TNT::Non(n) => Ok(format!(
                 "N{}",
@@ -822,7 +903,7 @@ impl Display for Grammar {
                 f,
                 "{}",
                 rule.regex.to_string_with(|tnt| format!(
-                    "({})",
+                    " {} ",
                     self.name_of_tnt(tnt)
                         .unwrap_or_else(|_| format!("Unknown {tnt:?}"))
                 ))?
author	JSDurand <mmemmew@gmail.com>	2023-07-08 12:30:21 +0800
committer	JSDurand <mmemmew@gmail.com>	2023-07-08 12:31:13 +0800
commit	9a317e56f8a6126583f7d0c431bf878d9b1fe7b1 (patch)
tree	7bb6004196b38446a5ab0cb3a0ab642d35f113e9 /grammar
parent	691f969eb104fa3d4c2a1667693fd0382eb9d6b5 (diff)