diff options
author | JSDurand <mmemmew@gmail.com> | 2023-01-03 23:44:02 +0800 |
---|---|---|
committer | JSDurand <mmemmew@gmail.com> | 2023-01-03 23:44:02 +0800 |
commit | bdbd4b4dc21af09711c97d3f903877443199af06 (patch) | |
tree | c6a9602f72ee1f6fd7fd3f64b8679a4de50a0159 /grammar/src/tests | |
parent | 8463dd24f815fe2b8f25fe9763e0a43023bfbb20 (diff) |
structural change: separate crates out
I put functionalities that are not strictly core to separate crates,
so that the whole package becomes more modular, and makes it easier to
try other parsing algorithms in the future.
Also I have to figure the forests out before finishing the core
chain-rule algorithm, as the part about forests affects the labels of
the grammars directly. From my experiences in writing the previous
version, it is asking for trouble to change the labels type
dramatically at a later point: too many places need to be changed.
Thus I decide to figure the rough part of forests out.
Actually I only have to figure out how to attach forests fragments to
edges of the underlying atomic languages, and the more complex parts
of putting forests together can be left to the recorders, which is my
vision of assembling semi-ring values during the chain-rule machine.
It should be relatively easy to produce forests fragments from
grammars since we are just trying to extract some information from the
grammar, not to manipulate those information in some complicated way.
We have to do some manipulations in the process, though, in order to
make sure that the nulling and epsilon-removal processes do not
invalidate these fragments.
Diffstat (limited to 'grammar/src/tests')
-rw-r--r-- | grammar/src/tests/mod.rs | 30 | ||||
-rw-r--r-- | grammar/src/tests/test_grammar_left_closure.rs | 272 |
2 files changed, 302 insertions, 0 deletions
diff --git a/grammar/src/tests/mod.rs b/grammar/src/tests/mod.rs new file mode 100644 index 0000000..55eedff --- /dev/null +++ b/grammar/src/tests/mod.rs @@ -0,0 +1,30 @@ +#[cfg(test)] +mod test_grammar_display { + use crate::test_grammar_helper::new_grammar; + + #[test] + fn test_display() -> Result<(), Box<dyn std::error::Error>> { + new_grammar().map(|g| println!("{g}")) + } +} + +#[cfg(test)] +mod test_grammar_firsts { + use crate::test_grammar_helper::new_grammar; + use std::io::Write; + + #[test] + fn test_firsts() -> Result<(), Box<dyn std::error::Error>> { + let mut grammar = new_grammar()?; + + grammar.compute_firsts()?; + + let mut lock = std::io::stdout().lock(); + + writeln!(lock, "grammar firsts: {:?}", grammar.firsts)?; + writeln!(lock, "grammar first nodes: {:?}", grammar.first_nodes).map_err(Into::into) + } +} + +#[cfg(test)] +mod test_grammar_left_closure; diff --git a/grammar/src/tests/test_grammar_left_closure.rs b/grammar/src/tests/test_grammar_left_closure.rs new file mode 100644 index 0000000..0bc9f4d --- /dev/null +++ b/grammar/src/tests/test_grammar_left_closure.rs @@ -0,0 +1,272 @@ +use crate::test_grammar_helper::*; +use crate::*; +use nfa::Nfa; +use std::{ + collections::HashSet, + io::{stdout, Write}, +}; + +#[test] +fn test_regex() -> Result<(), Box<dyn std::error::Error>> { + let mut grammar = new_grammar()?; + + let vec_of_regexps = new_closure_regex(&mut grammar)?; + + let mut lock = stdout().lock(); + + writeln!(lock, "grammar firsts: {:?}", grammar.firsts)?; + writeln!(lock, "grammar first nodes: {:?}", grammar.first_nodes)?; + writeln!(lock, "grammar:")?; + writeln!(lock, "{grammar}")?; + + for regex in vec_of_regexps.into_iter().skip(1) { + writeln!( + lock, + "regex: {}", + regex.to_string_with(|tnt| format!("{tnt}"))? + )?; + // println!("regex: {regex:?}",); + writeln!(lock, "regex len = {}", regex.nodes_len())?; + } + + Ok(()) +} + +// We ignore this test by default as it is possibly involved with +// printing to a graphviz file. +#[ignore] +#[test] +fn test_nfa() -> Result<(), Box<dyn std::error::Error>> { + let mut grammar = new_notes_grammar()?; + let closure = new_closure_regex(&mut grammar)?; + + let mut lock = stdout().lock(); + + for regex in closure.iter() { + writeln!( + lock, + "regex: {}", + regex.to_string_with(|tnt| { + match tnt { + TNT::Ter(t) => { + format!( + "({})", + grammar.name_of_tnt(grammar.unpack_tnt(t).unwrap()).unwrap() + ) + } + TNT::Non(_) => { + // hyper non-terminal + format!("H({})", grammar.name_of_tnt(tnt).unwrap()) + } + } + })? + )?; + // println!("regex: {regex:?}",); + writeln!(lock, "regex len = {}", regex.nodes_len())?; + } + + grammar + .left_closure_to_nfa(&closure) + .map(|_| ()) + .map_err(Into::into) + + // let _nfa = grammar.left_closure_to_nfa(&closure)?; + + // writeln!(lock, "Not printing nfa to nfa.gv")?; + + // nfa.print_viz("nfa.gv").map_err(Into::into) + + // Ok(()) +} + +#[test] +#[ignore] +fn test_remove_epsilon() -> Result<(), Box<dyn std::error::Error>> { + let mut lock = stdout().lock(); + + let mut grammar = new_paren_grammar()?; + + writeln!(lock, "grammar:")?; + writeln!(lock, "{grammar}")?; + + let closure = new_closure_regex(&mut grammar)?; + + let mut accumulator_value: usize = 0; + + for regex in closure.iter() { + writeln!( + lock, + "regex: {}", + regex.to_string_with(|tnt| { + match tnt { + TNT::Ter(t) => { + format!( + "({})", + grammar.name_of_tnt(grammar.unpack_tnt(t).unwrap()).unwrap() + ) + } + TNT::Non(_) => { + // hyper non-terminal + format!("({})", grammar.name_of_tnt(tnt).unwrap()) + } + } + })? + )?; + writeln!(lock, "regex len = {}", regex.nodes_len())?; + writeln!(lock, "offset = {accumulator_value}")?; + + accumulator_value += regex.nodes_len(); + } + + writeln!(lock, "total = {accumulator_value}")?; + + let mut nfa = grammar.left_closure_to_nfa(&closure)?; + + nfa.print_viz("nfa_orig.gv")?; + + nfa.remove_epsilon(|label| label.is_none())?; + + nfa.print_viz("nfa_no_epsilon.gv")?; + + Ok(()) +} + +#[test] +#[ignore] +fn test_remove_dead() -> Result<(), Box<dyn std::error::Error>> { + let mut grammar = new_paren_grammar()?; + let closure = new_closure_regex(&mut grammar)?; + + let mut lock = stdout().lock(); + + let mut accumulator = 0usize; + let mut accumulators = Vec::with_capacity(closure.len() + 1); + accumulators.push(accumulator); + + for regex in closure.iter() { + writeln!( + lock, + "regex: {}", + regex.to_string_with(|tnt| { + match tnt { + TNT::Ter(t) => { + format!( + "({})", + grammar.name_of_tnt(grammar.unpack_tnt(t).unwrap()).unwrap() + ) + } + TNT::Non(_) => { + // hyper non-terminal + format!("({})", grammar.name_of_tnt(tnt).unwrap()) + } + } + })? + )?; + // println!("regex: {regex:?}",); + writeln!(lock, "regex len = {}", regex.nodes_len())?; + + accumulator += regex.nodes_len() * 2; + + accumulators.push(accumulator); + } + + let mut nfa = grammar.left_closure_to_nfa(&closure)?; + + nfa.print_viz("nfa_orig.gv")?; + + nfa.remove_epsilon(|label| label.is_none())?; + + let accumulators: HashSet<usize> = accumulators.into_iter().collect(); + + println!("accumulators = {accumulators:?}"); + + let grammar_reserve_node = |node| accumulators.contains(&node); + + nfa.remove_dead(grammar_reserve_node)?; + + nfa.print_viz("nfa_no_dead.gv")?; + + Ok(()) +} + +#[test] +#[ignore] +fn test_nulling() -> Result<(), Box<dyn std::error::Error>> { + let mut grammar = new_left_recursive_grammar()?; + let closure = new_closure_regex(&mut grammar)?; + + let mut lock = stdout().lock(); + + let mut accumulators = Vec::with_capacity(closure.len() + 1); + accumulators.push(0); + + for regex in closure.iter() { + writeln!( + lock, + "regex: {}", + regex.to_string_with(|tnt| { + match tnt { + TNT::Ter(t) => { + format!( + "({})", + grammar.name_of_tnt(grammar.unpack_tnt(t).unwrap()).unwrap() + ) + } + TNT::Non(_) => { + // hyper non-terminal + format!("H({})", grammar.name_of_tnt(tnt).unwrap()) + } + } + })? + )?; + // println!("regex: {regex:?}",); + writeln!(lock, "regex len = {}", regex.nodes_len())?; + + accumulators.push(regex.nodes_len() * 2 + accumulators.last().unwrap()); + } + + write!(lock, "nullables:")?; + let mut first_time = true; + for i in 0..(grammar.non_num()) { + if grammar.is_nullable(i)? { + if first_time { + write!(lock, " ")?; + } else { + write!(lock, ", ")?; + } + write!(lock, " {i}")?; + first_time = false; + } + } + writeln!(lock)?; + + let accumulators: HashSet<usize> = accumulators.into_iter().collect(); + + let mut nfa = grammar.left_closure_to_nfa(&closure)?; + + nfa.nulling(|label| { + if let Some(label) = *label { + match label { + TNT::Ter(_) => false, + // Panics if a non-terminal references an invalid node + // here. + TNT::Non(n) => grammar.is_nullable(n).unwrap(), + } + } else { + true + } + })?; + + let grammar_reserve_nodes = |node| accumulators.contains(&node); + + writeln!(lock, "accumulators are {accumulators:?}")?; + + nfa.remove_epsilon(|label| label.is_none())?; + nfa.remove_dead(grammar_reserve_nodes)?; + + writeln!(lock, "Printing nfa to nfa.gv")?; + + nfa.print_viz("nfa.gv")?; + + Ok(()) +} |