summaryrefslogtreecommitdiff
path: root/grammar/src/test_grammar_helper.rs
diff options
context:
space:
mode:
Diffstat (limited to 'grammar/src/test_grammar_helper.rs')
-rw-r--r--grammar/src/test_grammar_helper.rs368
1 files changed, 368 insertions, 0 deletions
diff --git a/grammar/src/test_grammar_helper.rs b/grammar/src/test_grammar_helper.rs
new file mode 100644
index 0000000..c236952
--- /dev/null
+++ b/grammar/src/test_grammar_helper.rs
@@ -0,0 +1,368 @@
+//! This module provides some grammars for testing.
+
+use super::*;
+use nfa::{
+ default::regex::{DefaultRegParser, ParseDirection, ParseError, RegexType},
+ DesRec,
+};
+use std::fmt::Write;
+
+/// A helper function to compute the first sets of a grammar and
+/// return the left-closure of that grammar.
+pub fn new_closure_regex(
+ grammar: &mut Grammar,
+) -> Result<Vec<DefaultRegex<TNT>>, Box<dyn std::error::Error>> {
+ grammar.compute_firsts()?;
+
+ grammar.left_closure().map_err(Into::into)
+}
+
+/// A function to scan the inputs.
+fn scan_tnt(
+ parser: &DefaultRegParser<TNT>,
+ input: &str,
+) -> Result<Option<(usize, RegexType<TNT>, ParseDirection)>, ParseError> {
+ use ParseDirection::*;
+ use RegexType::*;
+ use TNT::*;
+
+ let mut chars = input.chars();
+
+ let mut len = 1;
+
+ while let Some(first) = chars.next() {
+ match first {
+ ' ' => {
+ // ignore whitespaces
+ len += 1;
+ }
+ '*' => return Ok(Some((len, Kleene, Right))),
+ '+' => return Ok(Some((len, Plus, Right))),
+ '?' => return Ok(Some((len, Optional, Right))),
+ '|' => return Ok(Some((len, Empty, Up))),
+ '(' => return Ok(Some((len, Or, Down))),
+ ')' => return Ok(Some((len, Paren, Up))),
+ 'T' => {
+ let mut name = String::new();
+
+ while let Some(c) = chars.next() {
+ if ('a'..='z').contains(&c) || ('A'..='Z').contains(&c) {
+ len += 1;
+ write!(name, "{c}").map_err(|_| ParseError::InvalidCharacter(c))?;
+ } else {
+ break;
+ }
+ }
+
+ if let Some(t) = parser.query(&name, true) {
+ return Ok(Some((len, Lit(Ter(t)), Right)));
+ } else {
+ return Err(ParseError::InvalidCharacter(first));
+ }
+ }
+ 'N' => {
+ let mut name = String::new();
+
+ while let Some(c) = chars.next() {
+ if ('a'..='z').contains(&c) || ('A'..='Z').contains(&c) {
+ len += 1;
+ write!(name, "{c}").map_err(|_| ParseError::InvalidCharacter(c))?;
+ } else {
+ break;
+ }
+ }
+
+ if let Some(n) = parser.query(&name, false) {
+ return Ok(Some((len, Lit(Non(n)), Right)));
+ } else {
+ return Err(ParseError::InvalidCharacter(first));
+ }
+ }
+ _ => {
+ return Err(ParseError::InvalidCharacter(first));
+ }
+ }
+ }
+
+ Ok(None)
+}
+
+/// Return a simple testing grammar.
+#[allow(dead_code)]
+pub fn new_grammar() -> Result<Grammar, Box<dyn std::error::Error>> {
+ let ter = vec![Terminal::new("a".to_owned()), Terminal::new("b".to_owned())];
+ let non = vec![
+ Nonterminal("start".to_owned()),
+ Nonterminal("end".to_owned()),
+ ];
+
+ let mut regex_parser: DefaultRegParser<TNT> = Default::default();
+
+ regex_parser.add_tnt("a", true);
+ regex_parser.add_tnt("b", true);
+ regex_parser.add_tnt("start", false);
+ regex_parser.add_tnt("end", false);
+
+ let regex_parser = regex_parser;
+
+ let rule1 = Rule {
+ regex: regex_parser
+ .parse("Ta*Tb+Nend+", Box::new(scan_tnt), true)?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rule2 = Rule {
+ regex: regex_parser
+ .parse("Nstart?Nend*", Box::new(scan_tnt), true)?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rules = vec![rule1, rule2];
+
+ Ok(Grammar::new(ter, non, rules))
+}
+
+/// Return a grammar that might serve as the grammar for my notes,
+/// somehow.
+#[allow(dead_code)]
+pub fn new_notes_grammar() -> Result<Grammar, Box<dyn std::error::Error>> {
+ let ter = vec![
+ Terminal::new("NL".to_owned()),
+ Terminal::new("SP".to_owned()),
+ Terminal::new("CON".to_owned()),
+ Terminal::new("STAR".to_owned()),
+ Terminal::new("NOTE".to_owned()),
+ Terminal::new("PRICE".to_owned()),
+ Terminal::new("DIGIT".to_owned()),
+ ];
+ let non = vec![
+ Nonterminal("document".to_owned()),
+ Nonterminal("item".to_owned()),
+ Nonterminal("header".to_owned()),
+ Nonterminal("title".to_owned()),
+ Nonterminal("note".to_owned()),
+ Nonterminal("note-content".to_owned()),
+ Nonterminal("price".to_owned()),
+ ];
+
+ let mut regex_parser: DefaultRegParser<TNT> = Default::default();
+
+ regex_parser.add_tnt("NL", true);
+ regex_parser.add_tnt("SP", true);
+ regex_parser.add_tnt("CON", true);
+ regex_parser.add_tnt("STAR", true);
+ regex_parser.add_tnt("note", true);
+ regex_parser.add_tnt("price", true);
+ regex_parser.add_tnt("DIGIT", true);
+ regex_parser.add_tnt("document", false);
+ regex_parser.add_tnt("item", false);
+ regex_parser.add_tnt("header", false);
+ regex_parser.add_tnt("title", false);
+ regex_parser.add_tnt("note", false);
+ regex_parser.add_tnt("notecontent", false);
+ regex_parser.add_tnt("price", false);
+
+ let regex_parser = regex_parser;
+
+ let rule1 = Rule {
+ regex: regex_parser
+ .parse("Nitem+", Box::new(scan_tnt), true)?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rule2 = Rule {
+ regex: regex_parser
+ .parse("Nheader Nprice?Nnote?", Box::new(scan_tnt), true)?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rule3 = Rule {
+ regex: regex_parser
+ .parse("TSTAR?TSP Ntitle TNL (TSP|TNL)*", Box::new(scan_tnt), true)?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rule4 = Rule {
+ regex: regex_parser
+ .parse("TCON+", Box::new(scan_tnt), true)?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rule5 = Rule {
+ regex: regex_parser
+ .parse(
+ "Tnote Nnotecontent TNL (TSP|TNL)*",
+ Box::new(scan_tnt),
+ true,
+ )?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rule6 = Rule {
+ regex: regex_parser
+ .parse("TCON+", Box::new(scan_tnt), true)?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rule7 = Rule {
+ regex: regex_parser
+ .parse(
+ "Tprice TSP TDIGIT+ TNL(TSP | TNL)+",
+ Box::new(scan_tnt),
+ true,
+ )?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rules = vec![rule1, rule2, rule3, rule4, rule5, rule6, rule7];
+
+ Ok(Grammar::new(ter, non, rules))
+}
+
+/// Return a grammar that can express parentheses.
+#[allow(dead_code)]
+pub fn new_paren_grammar() -> Result<Grammar, Box<dyn std::error::Error>> {
+ let ter = vec![
+ Terminal::new("LEFT".to_owned()),
+ Terminal::new("RIGHT".to_owned()),
+ Terminal::new("A".to_owned()),
+ ];
+ let non = vec![
+ Nonterminal("start".to_owned()),
+ Nonterminal("content".to_owned()),
+ ];
+
+ let mut regex_parser: DefaultRegParser<TNT> = Default::default();
+
+ regex_parser.add_tnt("LEFT", true);
+ regex_parser.add_tnt("RIGHT", true);
+ regex_parser.add_tnt("A", true);
+ regex_parser.add_tnt("start", false);
+ regex_parser.add_tnt("content", false);
+
+ let regex_parser = regex_parser;
+
+ let rule1 = Rule {
+ regex: regex_parser
+ .parse(
+ "TLEFT Nstart TRIGHT | Ncontent Nstart | ",
+ Box::new(scan_tnt),
+ true,
+ )?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rule2 = Rule {
+ regex: regex_parser
+ .parse("TA +", Box::new(scan_tnt), true)?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rules = vec![rule1, rule2];
+
+ Ok(Grammar::new(ter, non, rules))
+}
+
+/// Return a left recursive grammar.
+#[allow(dead_code)]
+pub fn new_left_recursive_grammar() -> Result<Grammar, Box<dyn std::error::Error>> {
+ let ter = vec![Terminal::new("B".to_owned()), Terminal::new("C".to_owned())];
+ let non = vec![
+ Nonterminal("start".to_owned()),
+ Nonterminal("S".to_owned()),
+ Nonterminal("A".to_owned()),
+ ];
+
+ let mut regex_parser: DefaultRegParser<TNT> = Default::default();
+
+ regex_parser.add_tnt("B", true);
+ regex_parser.add_tnt("C", true);
+ regex_parser.add_tnt("start", false);
+ regex_parser.add_tnt("S", false);
+ regex_parser.add_tnt("A", false);
+
+ let regex_parser = regex_parser;
+
+ let rule1 = Rule {
+ regex: regex_parser
+ .parse("NA NS TC", Box::new(scan_tnt), true)?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rule2 = Rule {
+ regex: regex_parser
+ .parse("TB | Nstart", Box::new(scan_tnt), true)?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rule3 = Rule {
+ regex: regex_parser
+ .parse("()", Box::new(scan_tnt), true)?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rules = vec![rule1, rule2, rule3];
+
+ Ok(Grammar::new(ter, non, rules))
+}
+
+/// Return a right recursive grammar.
+#[allow(dead_code)]
+pub fn new_right_recursive_grammar() -> Result<Grammar, Box<dyn std::error::Error>> {
+ let ter = vec![Terminal::new("B".to_owned()), Terminal::new("C".to_owned())];
+ let non = vec![
+ Nonterminal("start".to_owned()),
+ Nonterminal("S".to_owned()),
+ Nonterminal("A".to_owned()),
+ ];
+
+ let mut regex_parser: DefaultRegParser<TNT> = Default::default();
+
+ regex_parser.add_tnt("B", true);
+ regex_parser.add_tnt("C", true);
+ regex_parser.add_tnt("start", false);
+ regex_parser.add_tnt("S", false);
+ regex_parser.add_tnt("A", false);
+
+ let regex_parser = regex_parser;
+
+ let rule1 = Rule {
+ regex: regex_parser
+ .parse("NS TC NA|TB Nstart", Box::new(scan_tnt), true)?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rule2 = Rule {
+ regex: regex_parser
+ .parse("TB", Box::new(scan_tnt), true)?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rule3 = Rule {
+ regex: regex_parser
+ .parse("NA|", Box::new(scan_tnt), true)?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rules = vec![rule1, rule2, rule3];
+
+ Ok(Grammar::new(ter, non, rules))
+}
+// TODO: more grammars