diff options
Diffstat (limited to 'grammar/src/test_grammar_helper.rs')
-rw-r--r-- | grammar/src/test_grammar_helper.rs | 368 |
1 files changed, 368 insertions, 0 deletions
diff --git a/grammar/src/test_grammar_helper.rs b/grammar/src/test_grammar_helper.rs new file mode 100644 index 0000000..c236952 --- /dev/null +++ b/grammar/src/test_grammar_helper.rs @@ -0,0 +1,368 @@ +//! This module provides some grammars for testing. + +use super::*; +use nfa::{ + default::regex::{DefaultRegParser, ParseDirection, ParseError, RegexType}, + DesRec, +}; +use std::fmt::Write; + +/// A helper function to compute the first sets of a grammar and +/// return the left-closure of that grammar. +pub fn new_closure_regex( + grammar: &mut Grammar, +) -> Result<Vec<DefaultRegex<TNT>>, Box<dyn std::error::Error>> { + grammar.compute_firsts()?; + + grammar.left_closure().map_err(Into::into) +} + +/// A function to scan the inputs. +fn scan_tnt( + parser: &DefaultRegParser<TNT>, + input: &str, +) -> Result<Option<(usize, RegexType<TNT>, ParseDirection)>, ParseError> { + use ParseDirection::*; + use RegexType::*; + use TNT::*; + + let mut chars = input.chars(); + + let mut len = 1; + + while let Some(first) = chars.next() { + match first { + ' ' => { + // ignore whitespaces + len += 1; + } + '*' => return Ok(Some((len, Kleene, Right))), + '+' => return Ok(Some((len, Plus, Right))), + '?' => return Ok(Some((len, Optional, Right))), + '|' => return Ok(Some((len, Empty, Up))), + '(' => return Ok(Some((len, Or, Down))), + ')' => return Ok(Some((len, Paren, Up))), + 'T' => { + let mut name = String::new(); + + while let Some(c) = chars.next() { + if ('a'..='z').contains(&c) || ('A'..='Z').contains(&c) { + len += 1; + write!(name, "{c}").map_err(|_| ParseError::InvalidCharacter(c))?; + } else { + break; + } + } + + if let Some(t) = parser.query(&name, true) { + return Ok(Some((len, Lit(Ter(t)), Right))); + } else { + return Err(ParseError::InvalidCharacter(first)); + } + } + 'N' => { + let mut name = String::new(); + + while let Some(c) = chars.next() { + if ('a'..='z').contains(&c) || ('A'..='Z').contains(&c) { + len += 1; + write!(name, "{c}").map_err(|_| ParseError::InvalidCharacter(c))?; + } else { + break; + } + } + + if let Some(n) = parser.query(&name, false) { + return Ok(Some((len, Lit(Non(n)), Right))); + } else { + return Err(ParseError::InvalidCharacter(first)); + } + } + _ => { + return Err(ParseError::InvalidCharacter(first)); + } + } + } + + Ok(None) +} + +/// Return a simple testing grammar. +#[allow(dead_code)] +pub fn new_grammar() -> Result<Grammar, Box<dyn std::error::Error>> { + let ter = vec![Terminal::new("a".to_owned()), Terminal::new("b".to_owned())]; + let non = vec![ + Nonterminal("start".to_owned()), + Nonterminal("end".to_owned()), + ]; + + let mut regex_parser: DefaultRegParser<TNT> = Default::default(); + + regex_parser.add_tnt("a", true); + regex_parser.add_tnt("b", true); + regex_parser.add_tnt("start", false); + regex_parser.add_tnt("end", false); + + let regex_parser = regex_parser; + + let rule1 = Rule { + regex: regex_parser + .parse("Ta*Tb+Nend+", Box::new(scan_tnt), true)? + .ok_or(ParseError::Invalid)? + .0, + }; + + let rule2 = Rule { + regex: regex_parser + .parse("Nstart?Nend*", Box::new(scan_tnt), true)? + .ok_or(ParseError::Invalid)? + .0, + }; + + let rules = vec![rule1, rule2]; + + Ok(Grammar::new(ter, non, rules)) +} + +/// Return a grammar that might serve as the grammar for my notes, +/// somehow. +#[allow(dead_code)] +pub fn new_notes_grammar() -> Result<Grammar, Box<dyn std::error::Error>> { + let ter = vec![ + Terminal::new("NL".to_owned()), + Terminal::new("SP".to_owned()), + Terminal::new("CON".to_owned()), + Terminal::new("STAR".to_owned()), + Terminal::new("NOTE".to_owned()), + Terminal::new("PRICE".to_owned()), + Terminal::new("DIGIT".to_owned()), + ]; + let non = vec![ + Nonterminal("document".to_owned()), + Nonterminal("item".to_owned()), + Nonterminal("header".to_owned()), + Nonterminal("title".to_owned()), + Nonterminal("note".to_owned()), + Nonterminal("note-content".to_owned()), + Nonterminal("price".to_owned()), + ]; + + let mut regex_parser: DefaultRegParser<TNT> = Default::default(); + + regex_parser.add_tnt("NL", true); + regex_parser.add_tnt("SP", true); + regex_parser.add_tnt("CON", true); + regex_parser.add_tnt("STAR", true); + regex_parser.add_tnt("note", true); + regex_parser.add_tnt("price", true); + regex_parser.add_tnt("DIGIT", true); + regex_parser.add_tnt("document", false); + regex_parser.add_tnt("item", false); + regex_parser.add_tnt("header", false); + regex_parser.add_tnt("title", false); + regex_parser.add_tnt("note", false); + regex_parser.add_tnt("notecontent", false); + regex_parser.add_tnt("price", false); + + let regex_parser = regex_parser; + + let rule1 = Rule { + regex: regex_parser + .parse("Nitem+", Box::new(scan_tnt), true)? + .ok_or(ParseError::Invalid)? + .0, + }; + + let rule2 = Rule { + regex: regex_parser + .parse("Nheader Nprice?Nnote?", Box::new(scan_tnt), true)? + .ok_or(ParseError::Invalid)? + .0, + }; + + let rule3 = Rule { + regex: regex_parser + .parse("TSTAR?TSP Ntitle TNL (TSP|TNL)*", Box::new(scan_tnt), true)? + .ok_or(ParseError::Invalid)? + .0, + }; + + let rule4 = Rule { + regex: regex_parser + .parse("TCON+", Box::new(scan_tnt), true)? + .ok_or(ParseError::Invalid)? + .0, + }; + + let rule5 = Rule { + regex: regex_parser + .parse( + "Tnote Nnotecontent TNL (TSP|TNL)*", + Box::new(scan_tnt), + true, + )? + .ok_or(ParseError::Invalid)? + .0, + }; + + let rule6 = Rule { + regex: regex_parser + .parse("TCON+", Box::new(scan_tnt), true)? + .ok_or(ParseError::Invalid)? + .0, + }; + + let rule7 = Rule { + regex: regex_parser + .parse( + "Tprice TSP TDIGIT+ TNL(TSP | TNL)+", + Box::new(scan_tnt), + true, + )? + .ok_or(ParseError::Invalid)? + .0, + }; + + let rules = vec![rule1, rule2, rule3, rule4, rule5, rule6, rule7]; + + Ok(Grammar::new(ter, non, rules)) +} + +/// Return a grammar that can express parentheses. +#[allow(dead_code)] +pub fn new_paren_grammar() -> Result<Grammar, Box<dyn std::error::Error>> { + let ter = vec![ + Terminal::new("LEFT".to_owned()), + Terminal::new("RIGHT".to_owned()), + Terminal::new("A".to_owned()), + ]; + let non = vec![ + Nonterminal("start".to_owned()), + Nonterminal("content".to_owned()), + ]; + + let mut regex_parser: DefaultRegParser<TNT> = Default::default(); + + regex_parser.add_tnt("LEFT", true); + regex_parser.add_tnt("RIGHT", true); + regex_parser.add_tnt("A", true); + regex_parser.add_tnt("start", false); + regex_parser.add_tnt("content", false); + + let regex_parser = regex_parser; + + let rule1 = Rule { + regex: regex_parser + .parse( + "TLEFT Nstart TRIGHT | Ncontent Nstart | ", + Box::new(scan_tnt), + true, + )? + .ok_or(ParseError::Invalid)? + .0, + }; + + let rule2 = Rule { + regex: regex_parser + .parse("TA +", Box::new(scan_tnt), true)? + .ok_or(ParseError::Invalid)? + .0, + }; + + let rules = vec![rule1, rule2]; + + Ok(Grammar::new(ter, non, rules)) +} + +/// Return a left recursive grammar. +#[allow(dead_code)] +pub fn new_left_recursive_grammar() -> Result<Grammar, Box<dyn std::error::Error>> { + let ter = vec![Terminal::new("B".to_owned()), Terminal::new("C".to_owned())]; + let non = vec![ + Nonterminal("start".to_owned()), + Nonterminal("S".to_owned()), + Nonterminal("A".to_owned()), + ]; + + let mut regex_parser: DefaultRegParser<TNT> = Default::default(); + + regex_parser.add_tnt("B", true); + regex_parser.add_tnt("C", true); + regex_parser.add_tnt("start", false); + regex_parser.add_tnt("S", false); + regex_parser.add_tnt("A", false); + + let regex_parser = regex_parser; + + let rule1 = Rule { + regex: regex_parser + .parse("NA NS TC", Box::new(scan_tnt), true)? + .ok_or(ParseError::Invalid)? + .0, + }; + + let rule2 = Rule { + regex: regex_parser + .parse("TB | Nstart", Box::new(scan_tnt), true)? + .ok_or(ParseError::Invalid)? + .0, + }; + + let rule3 = Rule { + regex: regex_parser + .parse("()", Box::new(scan_tnt), true)? + .ok_or(ParseError::Invalid)? + .0, + }; + + let rules = vec![rule1, rule2, rule3]; + + Ok(Grammar::new(ter, non, rules)) +} + +/// Return a right recursive grammar. +#[allow(dead_code)] +pub fn new_right_recursive_grammar() -> Result<Grammar, Box<dyn std::error::Error>> { + let ter = vec![Terminal::new("B".to_owned()), Terminal::new("C".to_owned())]; + let non = vec![ + Nonterminal("start".to_owned()), + Nonterminal("S".to_owned()), + Nonterminal("A".to_owned()), + ]; + + let mut regex_parser: DefaultRegParser<TNT> = Default::default(); + + regex_parser.add_tnt("B", true); + regex_parser.add_tnt("C", true); + regex_parser.add_tnt("start", false); + regex_parser.add_tnt("S", false); + regex_parser.add_tnt("A", false); + + let regex_parser = regex_parser; + + let rule1 = Rule { + regex: regex_parser + .parse("NS TC NA|TB Nstart", Box::new(scan_tnt), true)? + .ok_or(ParseError::Invalid)? + .0, + }; + + let rule2 = Rule { + regex: regex_parser + .parse("TB", Box::new(scan_tnt), true)? + .ok_or(ParseError::Invalid)? + .0, + }; + + let rule3 = Rule { + regex: regex_parser + .parse("NA|", Box::new(scan_tnt), true)? + .ok_or(ParseError::Invalid)? + .0, + }; + + let rules = vec![rule1, rule2, rule3]; + + Ok(Grammar::new(ter, non, rules)) +} +// TODO: more grammars |