//! This module provides some grammars for testing. use super::*; use nfa::{ default::regex::{DefaultRegParser, ParseDirection, ParseError, RegexType}, DesRec, }; use std::fmt::Write; /// A helper function to compute the first sets of a grammar and /// return the left-closure of that grammar. pub fn new_closure_regex( grammar: &mut Grammar, ) -> Result>, Box> { grammar.compute_firsts()?; grammar.left_closure().map_err(Into::into) } /// A function to scan the inputs. fn scan_tnt( parser: &DefaultRegParser, input: &str, ) -> Result, ParseDirection)>, ParseError> { use ParseDirection::*; use RegexType::*; use TNT::*; let mut chars = input.chars(); let mut len = 1; while let Some(first) = chars.next() { match first { ' ' => { // ignore whitespaces len += 1; } '*' => return Ok(Some((len, Kleene, Right))), '+' => return Ok(Some((len, Plus, Right))), '?' => return Ok(Some((len, Optional, Right))), '|' => return Ok(Some((len, Empty, Up))), '(' => return Ok(Some((len, Or, Down))), ')' => return Ok(Some((len, Paren, Up))), 'T' => { let mut name = String::new(); while let Some(c) = chars.next() { if ('a'..='z').contains(&c) || ('A'..='Z').contains(&c) { len += 1; write!(name, "{c}").map_err(|_| ParseError::InvalidCharacter(c))?; } else { break; } } if let Some(t) = parser.query(&name, true) { return Ok(Some((len, Lit(Ter(t)), Right))); } else { return Err(ParseError::InvalidCharacter(first)); } } 'N' => { let mut name = String::new(); while let Some(c) = chars.next() { if ('a'..='z').contains(&c) || ('A'..='Z').contains(&c) { len += 1; write!(name, "{c}").map_err(|_| ParseError::InvalidCharacter(c))?; } else { break; } } if let Some(n) = parser.query(&name, false) { return Ok(Some((len, Lit(Non(n)), Right))); } else { return Err(ParseError::InvalidCharacter(first)); } } _ => { return Err(ParseError::InvalidCharacter(first)); } } } Ok(None) } /// Return a simple testing grammar. #[allow(dead_code)] pub fn new_grammar() -> Result> { let ter = vec![Terminal::new("a".to_owned()), Terminal::new("b".to_owned())]; let non = vec![ Nonterminal("start".to_owned()), Nonterminal("end".to_owned()), ]; let mut regex_parser: DefaultRegParser = Default::default(); regex_parser.add_tnt("a", true); regex_parser.add_tnt("b", true); regex_parser.add_tnt("start", false); regex_parser.add_tnt("end", false); let regex_parser = regex_parser; let rule1 = Rule { regex: regex_parser .parse("Ta*Tb+Nend+", Box::new(scan_tnt), true)? .ok_or(ParseError::Invalid)? .0, }; let rule2 = Rule { regex: regex_parser .parse("Nstart?Nend*", Box::new(scan_tnt), true)? .ok_or(ParseError::Invalid)? .0, }; let rules = vec![rule1, rule2]; Ok(Grammar::new(ter, non, rules)) } /// Return a grammar that might serve as the grammar for my notes, /// somehow. #[allow(dead_code)] pub fn new_notes_grammar() -> Result> { let ter = vec![ Terminal::new("NL".to_owned()), Terminal::new("SP".to_owned()), Terminal::new("CON".to_owned()), Terminal::new("STAR".to_owned()), Terminal::new("NOTE".to_owned()), Terminal::new("PRICE".to_owned()), Terminal::new("DIGIT".to_owned()), ]; let non = vec![ Nonterminal("document".to_owned()), Nonterminal("item".to_owned()), Nonterminal("header".to_owned()), Nonterminal("title".to_owned()), Nonterminal("note".to_owned()), Nonterminal("note-content".to_owned()), Nonterminal("price".to_owned()), ]; let mut regex_parser: DefaultRegParser = Default::default(); regex_parser.add_tnt("NL", true); regex_parser.add_tnt("SP", true); regex_parser.add_tnt("CON", true); regex_parser.add_tnt("STAR", true); regex_parser.add_tnt("note", true); regex_parser.add_tnt("price", true); regex_parser.add_tnt("DIGIT", true); regex_parser.add_tnt("document", false); regex_parser.add_tnt("item", false); regex_parser.add_tnt("header", false); regex_parser.add_tnt("title", false); regex_parser.add_tnt("note", false); regex_parser.add_tnt("notecontent", false); regex_parser.add_tnt("price", false); let regex_parser = regex_parser; let rule1 = Rule { regex: regex_parser .parse("Nitem+", Box::new(scan_tnt), true)? .ok_or(ParseError::Invalid)? .0, }; let rule2 = Rule { regex: regex_parser .parse("Nheader Nprice?Nnote?", Box::new(scan_tnt), true)? .ok_or(ParseError::Invalid)? .0, }; let rule3 = Rule { regex: regex_parser .parse("TSTAR?TSP Ntitle TNL (TSP|TNL)*", Box::new(scan_tnt), true)? .ok_or(ParseError::Invalid)? .0, }; let rule4 = Rule { regex: regex_parser .parse("TCON+", Box::new(scan_tnt), true)? .ok_or(ParseError::Invalid)? .0, }; let rule5 = Rule { regex: regex_parser .parse( "Tnote Nnotecontent TNL (TSP|TNL)*", Box::new(scan_tnt), true, )? .ok_or(ParseError::Invalid)? .0, }; let rule6 = Rule { regex: regex_parser .parse("TCON+", Box::new(scan_tnt), true)? .ok_or(ParseError::Invalid)? .0, }; let rule7 = Rule { regex: regex_parser .parse( "Tprice TSP TDIGIT+ TNL(TSP | TNL)+", Box::new(scan_tnt), true, )? .ok_or(ParseError::Invalid)? .0, }; let rules = vec![rule1, rule2, rule3, rule4, rule5, rule6, rule7]; Ok(Grammar::new(ter, non, rules)) } /// Return a grammar that can express parentheses. #[allow(dead_code)] pub fn new_paren_grammar() -> Result> { let ter = vec![ Terminal::new("LEFT".to_owned()), Terminal::new("RIGHT".to_owned()), Terminal::new("A".to_owned()), ]; let non = vec![ Nonterminal("start".to_owned()), Nonterminal("content".to_owned()), ]; let mut regex_parser: DefaultRegParser = Default::default(); regex_parser.add_tnt("LEFT", true); regex_parser.add_tnt("RIGHT", true); regex_parser.add_tnt("A", true); regex_parser.add_tnt("start", false); regex_parser.add_tnt("content", false); let regex_parser = regex_parser; let rule1 = Rule { regex: regex_parser .parse( "TLEFT Nstart TRIGHT | Ncontent Nstart | ", Box::new(scan_tnt), true, )? .ok_or(ParseError::Invalid)? .0, }; let rule2 = Rule { regex: regex_parser .parse("TA +", Box::new(scan_tnt), true)? .ok_or(ParseError::Invalid)? .0, }; let rules = vec![rule1, rule2]; Ok(Grammar::new(ter, non, rules)) } /// Return a left recursive grammar. #[allow(dead_code)] pub fn new_left_recursive_grammar() -> Result> { let ter = vec![Terminal::new("B".to_owned()), Terminal::new("C".to_owned())]; let non = vec![ Nonterminal("start".to_owned()), Nonterminal("S".to_owned()), Nonterminal("A".to_owned()), ]; let mut regex_parser: DefaultRegParser = Default::default(); regex_parser.add_tnt("B", true); regex_parser.add_tnt("C", true); regex_parser.add_tnt("start", false); regex_parser.add_tnt("S", false); regex_parser.add_tnt("A", false); let regex_parser = regex_parser; let rule1 = Rule { regex: regex_parser .parse("NA NS TC", Box::new(scan_tnt), true)? .ok_or(ParseError::Invalid)? .0, }; let rule2 = Rule { regex: regex_parser .parse("TB | Nstart", Box::new(scan_tnt), true)? .ok_or(ParseError::Invalid)? .0, }; let rule3 = Rule { regex: regex_parser .parse("()", Box::new(scan_tnt), true)? .ok_or(ParseError::Invalid)? .0, }; let rules = vec![rule1, rule2, rule3]; Ok(Grammar::new(ter, non, rules)) } /// Return a right recursive grammar. #[allow(dead_code)] pub fn new_right_recursive_grammar() -> Result> { let ter = vec![Terminal::new("B".to_owned()), Terminal::new("C".to_owned())]; let non = vec![ Nonterminal("start".to_owned()), Nonterminal("S".to_owned()), Nonterminal("A".to_owned()), ]; let mut regex_parser: DefaultRegParser = Default::default(); regex_parser.add_tnt("B", true); regex_parser.add_tnt("C", true); regex_parser.add_tnt("start", false); regex_parser.add_tnt("S", false); regex_parser.add_tnt("A", false); let regex_parser = regex_parser; let rule1 = Rule { regex: regex_parser .parse("NS TC NA|TB Nstart", Box::new(scan_tnt), true)? .ok_or(ParseError::Invalid)? .0, }; let rule2 = Rule { regex: regex_parser .parse("TB", Box::new(scan_tnt), true)? .ok_or(ParseError::Invalid)? .0, }; let rule3 = Rule { regex: regex_parser .parse("NA|", Box::new(scan_tnt), true)? .ok_or(ParseError::Invalid)? .0, }; let rules = vec![rule1, rule2, rule3]; Ok(Grammar::new(ter, non, rules)) } // TODO: more grammars