1 files changed, 368 insertions, 0 deletions
diff --git a/grammar/src/test_grammar_helper.rs b/grammar/src/test_grammar_helper.rs
new file mode 100644
index 0000000..c236952
--- /dev/null
+++ b/grammar/src/test_grammar_helper.rs
@@ -0,0 +1,368 @@
+//! This module provides some grammars for testing.
+
+use super::*;
+use nfa::{
+    default::regex::{DefaultRegParser, ParseDirection, ParseError, RegexType},
+    DesRec,
+};
+use std::fmt::Write;
+
+/// A helper function to compute the first sets of a grammar and
+/// return the left-closure of that grammar.
+pub fn new_closure_regex(
+    grammar: &mut Grammar,
+) -> Result<Vec<DefaultRegex<TNT>>, Box<dyn std::error::Error>> {
+    grammar.compute_firsts()?;
+
+    grammar.left_closure().map_err(Into::into)
+}
+
+/// A function to scan the inputs.
+fn scan_tnt(
+    parser: &DefaultRegParser<TNT>,
+    input: &str,
+) -> Result<Option<(usize, RegexType<TNT>, ParseDirection)>, ParseError> {
+    use ParseDirection::*;
+    use RegexType::*;
+    use TNT::*;
+
+    let mut chars = input.chars();
+
+    let mut len = 1;
+
+    while let Some(first) = chars.next() {
+        match first {
+            ' ' => {
+                // ignore whitespaces
+                len += 1;
+            }
+            '*' => return Ok(Some((len, Kleene, Right))),
+            '+' => return Ok(Some((len, Plus, Right))),
+            '?' => return Ok(Some((len, Optional, Right))),
+            '|' => return Ok(Some((len, Empty, Up))),
+            '(' => return Ok(Some((len, Or, Down))),
+            ')' => return Ok(Some((len, Paren, Up))),
+            'T' => {
+                let mut name = String::new();
+
+                while let Some(c) = chars.next() {
+                    if ('a'..='z').contains(&c) || ('A'..='Z').contains(&c) {
+                        len += 1;
+                        write!(name, "{c}").map_err(|_| ParseError::InvalidCharacter(c))?;
+                    } else {
+                        break;
+                    }
+                }
+
+                if let Some(t) = parser.query(&name, true) {
+                    return Ok(Some((len, Lit(Ter(t)), Right)));
+                } else {
+                    return Err(ParseError::InvalidCharacter(first));
+                }
+            }
+            'N' => {
+                let mut name = String::new();
+
+                while let Some(c) = chars.next() {
+                    if ('a'..='z').contains(&c) || ('A'..='Z').contains(&c) {
+                        len += 1;
+                        write!(name, "{c}").map_err(|_| ParseError::InvalidCharacter(c))?;
+                    } else {
+                        break;
+                    }
+                }
+
+                if let Some(n) = parser.query(&name, false) {
+                    return Ok(Some((len, Lit(Non(n)), Right)));
+                } else {
+                    return Err(ParseError::InvalidCharacter(first));
+                }
+            }
+            _ => {
+                return Err(ParseError::InvalidCharacter(first));
+            }
+        }
+    }
+
+    Ok(None)
+}
+
+/// Return a simple testing grammar.
+#[allow(dead_code)]
+pub fn new_grammar() -> Result<Grammar, Box<dyn std::error::Error>> {
+    let ter = vec![Terminal::new("a".to_owned()), Terminal::new("b".to_owned())];
+    let non = vec![
+        Nonterminal("start".to_owned()),
+        Nonterminal("end".to_owned()),
+    ];
+
+    let mut regex_parser: DefaultRegParser<TNT> = Default::default();
+
+    regex_parser.add_tnt("a", true);
+    regex_parser.add_tnt("b", true);
+    regex_parser.add_tnt("start", false);
+    regex_parser.add_tnt("end", false);
+
+    let regex_parser = regex_parser;
+
+    let rule1 = Rule {
+        regex: regex_parser
+            .parse("Ta*Tb+Nend+", Box::new(scan_tnt), true)?
+            .ok_or(ParseError::Invalid)?
+            .0,
+    };
+
+    let rule2 = Rule {
+        regex: regex_parser
+            .parse("Nstart?Nend*", Box::new(scan_tnt), true)?
+            .ok_or(ParseError::Invalid)?
+            .0,
+    };
+
+    let rules = vec![rule1, rule2];
+
+    Ok(Grammar::new(ter, non, rules))
+}
+
+/// Return a grammar that might serve as the grammar for my notes,
+/// somehow.
+#[allow(dead_code)]
+pub fn new_notes_grammar() -> Result<Grammar, Box<dyn std::error::Error>> {
+    let ter = vec![
+        Terminal::new("NL".to_owned()),
+        Terminal::new("SP".to_owned()),
+        Terminal::new("CON".to_owned()),
+        Terminal::new("STAR".to_owned()),
+        Terminal::new("NOTE".to_owned()),
+        Terminal::new("PRICE".to_owned()),
+        Terminal::new("DIGIT".to_owned()),
+    ];
+    let non = vec![
+        Nonterminal("document".to_owned()),
+        Nonterminal("item".to_owned()),
+        Nonterminal("header".to_owned()),
+        Nonterminal("title".to_owned()),
+        Nonterminal("note".to_owned()),
+        Nonterminal("note-content".to_owned()),
+        Nonterminal("price".to_owned()),
+    ];
+
+    let mut regex_parser: DefaultRegParser<TNT> = Default::default();
+
+    regex_parser.add_tnt("NL", true);
+    regex_parser.add_tnt("SP", true);
+    regex_parser.add_tnt("CON", true);
+    regex_parser.add_tnt("STAR", true);
+    regex_parser.add_tnt("note", true);
+    regex_parser.add_tnt("price", true);
+    regex_parser.add_tnt("DIGIT", true);
+    regex_parser.add_tnt("document", false);
+    regex_parser.add_tnt("item", false);
+    regex_parser.add_tnt("header", false);
+    regex_parser.add_tnt("title", false);
+    regex_parser.add_tnt("note", false);
+    regex_parser.add_tnt("notecontent", false);
+    regex_parser.add_tnt("price", false);
+
+    let regex_parser = regex_parser;
+
+    let rule1 = Rule {
+        regex: regex_parser
+            .parse("Nitem+", Box::new(scan_tnt), true)?
+            .ok_or(ParseError::Invalid)?
+            .0,
+    };
+
+    let rule2 = Rule {
+        regex: regex_parser
+            .parse("Nheader Nprice?Nnote?", Box::new(scan_tnt), true)?
+            .ok_or(ParseError::Invalid)?
+            .0,
+    };
+
+    let rule3 = Rule {
+        regex: regex_parser
+            .parse("TSTAR?TSP Ntitle TNL (TSP|TNL)*", Box::new(scan_tnt), true)?
+            .ok_or(ParseError::Invalid)?
+            .0,
+    };
+
+    let rule4 = Rule {
+        regex: regex_parser
+            .parse("TCON+", Box::new(scan_tnt), true)?
+            .ok_or(ParseError::Invalid)?
+            .0,
+    };
+
+    let rule5 = Rule {
+        regex: regex_parser
+            .parse(
+                "Tnote Nnotecontent TNL (TSP|TNL)*",
+                Box::new(scan_tnt),
+                true,
+            )?
+            .ok_or(ParseError::Invalid)?
+            .0,
+    };
+
+    let rule6 = Rule {
+        regex: regex_parser
+            .parse("TCON+", Box::new(scan_tnt), true)?
+            .ok_or(ParseError::Invalid)?
+            .0,
+    };
+
+    let rule7 = Rule {
+        regex: regex_parser
+            .parse(
+                "Tprice TSP TDIGIT+ TNL(TSP | TNL)+",
+                Box::new(scan_tnt),
+                true,
+            )?
+            .ok_or(ParseError::Invalid)?
+            .0,
+    };
+
+    let rules = vec![rule1, rule2, rule3, rule4, rule5, rule6, rule7];
+
+    Ok(Grammar::new(ter, non, rules))
+}
+
+/// Return a grammar that can express parentheses.
+#[allow(dead_code)]
+pub fn new_paren_grammar() -> Result<Grammar, Box<dyn std::error::Error>> {
+    let ter = vec![
+        Terminal::new("LEFT".to_owned()),
+        Terminal::new("RIGHT".to_owned()),
+        Terminal::new("A".to_owned()),
+    ];
+    let non = vec![
+        Nonterminal("start".to_owned()),
+        Nonterminal("content".to_owned()),
+    ];
+
+    let mut regex_parser: DefaultRegParser<TNT> = Default::default();
+
+    regex_parser.add_tnt("LEFT", true);
+    regex_parser.add_tnt("RIGHT", true);
+    regex_parser.add_tnt("A", true);
+    regex_parser.add_tnt("start", false);
+    regex_parser.add_tnt("content", false);
+
+    let regex_parser = regex_parser;
+
+    let rule1 = Rule {
+        regex: regex_parser
+            .parse(
+                "TLEFT Nstart TRIGHT | Ncontent Nstart | ",
+                Box::new(scan_tnt),
+                true,
+            )?
+            .ok_or(ParseError::Invalid)?
+            .0,
+    };
+
+    let rule2 = Rule {
+        regex: regex_parser
+            .parse("TA +", Box::new(scan_tnt), true)?
+            .ok_or(ParseError::Invalid)?
+            .0,
+    };
+
+    let rules = vec![rule1, rule2];
+
+    Ok(Grammar::new(ter, non, rules))
+}
+
+/// Return a left recursive grammar.
+#[allow(dead_code)]
+pub fn new_left_recursive_grammar() -> Result<Grammar, Box<dyn std::error::Error>> {
+    let ter = vec![Terminal::new("B".to_owned()), Terminal::new("C".to_owned())];
+    let non = vec![
+        Nonterminal("start".to_owned()),
+        Nonterminal("S".to_owned()),
+        Nonterminal("A".to_owned()),
+    ];
+
+    let mut regex_parser: DefaultRegParser<TNT> = Default::default();
+
+    regex_parser.add_tnt("B", true);
+    regex_parser.add_tnt("C", true);
+    regex_parser.add_tnt("start", false);
+    regex_parser.add_tnt("S", false);
+    regex_parser.add_tnt("A", false);
+
+    let regex_parser = regex_parser;
+
+    let rule1 = Rule {
+        regex: regex_parser
+            .parse("NA NS TC", Box::new(scan_tnt), true)?
+            .ok_or(ParseError::Invalid)?
+            .0,
+    };
+
+    let rule2 = Rule {
+        regex: regex_parser
+            .parse("TB | Nstart", Box::new(scan_tnt), true)?
+            .ok_or(ParseError::Invalid)?
+            .0,
+    };
+
+    let rule3 = Rule {
+        regex: regex_parser
+            .parse("()", Box::new(scan_tnt), true)?
+            .ok_or(ParseError::Invalid)?
+            .0,
+    };
+
+    let rules = vec![rule1, rule2, rule3];
+
+    Ok(Grammar::new(ter, non, rules))
+}
+
+/// Return a right recursive grammar.
+#[allow(dead_code)]
+pub fn new_right_recursive_grammar() -> Result<Grammar, Box<dyn std::error::Error>> {
+    let ter = vec![Terminal::new("B".to_owned()), Terminal::new("C".to_owned())];
+    let non = vec![
+        Nonterminal("start".to_owned()),
+        Nonterminal("S".to_owned()),
+        Nonterminal("A".to_owned()),
+    ];
+
+    let mut regex_parser: DefaultRegParser<TNT> = Default::default();
+
+    regex_parser.add_tnt("B", true);
+    regex_parser.add_tnt("C", true);
+    regex_parser.add_tnt("start", false);
+    regex_parser.add_tnt("S", false);
+    regex_parser.add_tnt("A", false);
+
+    let regex_parser = regex_parser;
+
+    let rule1 = Rule {
+        regex: regex_parser
+            .parse("NS TC NA|TB Nstart", Box::new(scan_tnt), true)?
+            .ok_or(ParseError::Invalid)?
+            .0,
+    };
+
+    let rule2 = Rule {
+        regex: regex_parser
+            .parse("TB", Box::new(scan_tnt), true)?
+            .ok_or(ParseError::Invalid)?
+            .0,
+    };
+
+    let rule3 = Rule {
+        regex: regex_parser
+            .parse("NA|", Box::new(scan_tnt), true)?
+            .ok_or(ParseError::Invalid)?
+            .0,
+    };
+
+    let rules = vec![rule1, rule2, rule3];
+
+    Ok(Grammar::new(ter, non, rules))
+}
+// TODO: more grammars