From 265ff8f87dc7392fdf701f811eb2bf54d7bc6678 Mon Sep 17 00:00:00 2001 From: JSDurand Date: Fri, 3 Feb 2023 10:52:35 +0800 Subject: Finally produced the first correct forest Finally the prototype parser has produced the first correct forest. It is my first time to generate a correct forest, in fact, ever since the beginning of this project. --- chain/src/item/genins.rs | 318 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 287 insertions(+), 31 deletions(-) (limited to 'chain/src/item/genins.rs') diff --git a/chain/src/item/genins.rs b/chain/src/item/genins.rs index 99d9202..9ed3b74 100644 --- a/chain/src/item/genins.rs +++ b/chain/src/item/genins.rs @@ -14,6 +14,17 @@ use graph::Graph; use core::borrow::Borrow; use std::collections::HashSet as Set; +/// Convert an error telling us that an index is out of bounds. +/// +/// # Panics +/// +/// The function panics if the error is not of the expected kind. +fn index_out_of_bounds_conversion(ge: GrammarError) -> Error { + match ge { + GrammarError::IndexOutOfBounds(index, bound) => Error::IndexOutOfBounds(index, bound), + _ => Error::Invalid, + } +} /// A helper function to generate a fragment of forest. /// /// It simply constructs a root node and then appends @@ -56,6 +67,53 @@ pub fn generate_fragment( Ok(result) } +/// Generate a virtual fragment representing the left-linear null +/// closure [nt]^t. +pub fn virtual_generate_fragment( + atom: impl Borrow, + nt: usize, + t: usize, + pos: usize, +) -> Result>, crate::default::Error> { + let atom = atom.borrow(); + + let non_start = atom.nth_accumulator(nt).unwrap() * 2; + + let mut result = DefaultForest::default(); + + for (label, child_iter) in atom.labels_of(non_start)? { + if matches!(*label.get_value(), + Some(TNT::Ter(ter)) if ter == t) + { + for child in child_iter { + // #[allow(unused_must_use)] + // { + // dbg!(non_start, child, atom.query_expansion(non_start, child)); + // } + + let line: Vec = atom + .query_expansion(non_start, child) + .map_err(index_out_of_bounds_conversion)? + .iter() + .copied() + .flatten() + .flat_map(|(nt, rule)| [(*rule).into(), TNT::Non(*nt).into()]) + .rev() + .chain(std::iter::once(TNT::Ter(t).into())) + .collect(); + + if result.is_empty() { + result = generate_fragment(line, pos)?; + } else { + result.plant(0, generate_fragment(line, pos)?, false)?; + } + } + } + } + + Ok(result) +} + impl DefaultForest> { /// Insert an item derivation forest into a recording forest. /// @@ -68,26 +126,39 @@ impl DefaultForest> { atom_child_iter: impl Iterator, atom: &DefaultAtom, ) -> Result { - /// Convert an error telling us that an index is out of bounds. - /// - /// # Panics - /// - /// The function panics if the error is not of the expected - /// kind. - fn index_out_of_bounds_conversion(ge: GrammarError) -> Error { - match ge { - GrammarError::IndexOutOfBounds(index, bound) => { - Error::IndexOutOfBounds(index, bound) - } - _ => Error::Invalid, - } - } - let fragment = fragment.borrow(); let pase = label.forest_source(); - let parents: Vec = { + let _fragment_root = if let Some(root) = fragment.root() { + root + } else { + return Err(Error::EmptyItem); + }; + + // Ensure the last node in the PaSe is a terminal or a + // non-terminal node, as an extra safety guard during + // development. + #[cfg(debug_assertions)] + { + if let Some(parent) = pase.parent() { + assert!(matches!( + self.vertex_label(self.nth_child(parent.node(), parent.edge())?), + Ok(Some(label)) + if label.label().label().tnt().is_some())); + } else if let Some((_, leaf)) = pase.segment() { + assert!(matches!( + self.vertex_label(leaf), + Ok(Some(label)) + if label.label().label().tnt().is_some())); + } else { + unreachable!("a pase is neither a parent nor a segment"); + } + } + + let mut is_empty_segment = false; + + let mut parents: Vec = { let mut result = Vec::new(); if let Some(parent) = pase.parent() { @@ -96,7 +167,15 @@ impl DefaultForest> { let (root, leaf) = pase.segment().unwrap(); let mut seen_nodes = Set::new(); - let mut stack = vec![root]; + let mut stack = if root == leaf { + // an empty segment means the root + is_empty_segment = true; + + result.push(Parent::new(root, 0)); + vec![] + } else { + vec![root] + }; while let Some(top) = stack.pop() { if seen_nodes.contains(&top) { @@ -118,7 +197,35 @@ impl DefaultForest> { result }; + for parent in parents.iter() { + if !self.has_node(parent.node()) { + return Err(Error::IndexOutOfBounds(parent.node(), self.nodes_len())); + } + } + + if !is_empty_segment { + parents = parents + .into_iter() + .flat_map(|parent| { + self.parents_of(parent.node()).unwrap().filter(|n| { + matches!( + self.vertex_label(n.node()) + .unwrap() + .unwrap() + .label() + .label() + .tnt(), + Some(TNT::Non(_)) + ) + }) + }) + .collect(); + } + + let mut non_empty = false; + for atom_child in atom_child_iter { + // dbg!(label.label(), atom_child); // Find reduction information. let reduction_info = atom .query_reduction(label.label(), atom_child) @@ -142,20 +249,20 @@ impl DefaultForest> { ) { for parent in self.parents_of(node.node())? { debug_assert!(matches!( - self.vertex_label(parent.node()), - Ok(Some(label)) if - label - .label() - .label() - .rule() - .is_some())); + self.vertex_label(parent.node()), + Ok(Some(label)) if + label + .label() + .label() + .rule() + .is_some())); second_stack.extend(self.parents_of(parent.node())?.filter(|n| { matches!(self.vertex_label(n.node()), - Ok(Some(label)) - if matches!( - label.label().label().tnt(), - Some(TNT::Non(_)))) + Ok(Some(label)) + if matches!( + label.label().label().tnt(), + Some(TNT::Non(_)))) })); } } @@ -169,12 +276,26 @@ impl DefaultForest> { } if stack.is_empty() { + dbg!( + label, + atom_child, + parents, + reduction_info, + atom.query_reduction(label.label(), atom_child).unwrap() + ); + + self.print_viz("dbg forest.gv").unwrap(); + + #[cfg(test)] + genins_test::print_labels(atom, self).unwrap(); + return Err(Error::CannotPlant); } for parent in stack.into_iter() { if parent.edge() + 1 >= self.degree(parent.node())? { - self.plant(parent.node(), fragment, false)?; + // dbg!(&fragment); + self.plant(parent.node(), fragment, non_empty)?; } else { let nth_child = self.nth_child(parent.node(), parent.edge() + 1)?; @@ -182,11 +303,26 @@ impl DefaultForest> { // do nothing continue; } + dbg!("clone?"); let cloned_node = self.clone_node(nth_child)?; - self.plant(cloned_node, fragment, false)?; + self.plant(cloned_node, fragment, non_empty)?; } + + non_empty = true; + } + } + + // If the iterator is empty, just plant at the specified + // child. + if !non_empty { + for parent in parents.into_iter() { + let nth_child = self.nth_child(parent.node(), parent.edge())?; + + self.plant(nth_child, fragment, non_empty)?; + + non_empty = true; } } @@ -214,11 +350,12 @@ impl DefaultForest> { unreachable!("the forest is wrongly planted"); } }; - + // dbg!(node, edge, root_label, leaf_label); PaSe::Parent(node, edge) } else { let root_label = fragment.vertex_label(0)?.unwrap(); let leaf_label = fragment.vertex_label(fragment.nodes_len() - 1)?.unwrap(); + // dbg!(root_label, leaf_label); PaSe::Segment( self.query_label(root_label).unwrap(), @@ -229,3 +366,122 @@ impl DefaultForest> { Ok(result) } } + +#[cfg(test)] +mod genins_test { + use super::*; + #[allow(unused_imports)] + use crate::{default::DefaultChain, item::default::leaf, Chain}; + + use grammar::test_grammar_helper::*; + + pub fn print_labels( + atom: impl Borrow, + forest: impl Borrow>>, + ) -> Result<(), Box> { + let forest = forest.borrow(); + let atom = atom.borrow(); + + for node in forest.nodes() { + let label = forest.vertex_label(node)?; + + if let Some(label) = label { + let label = label.label.label(); + + match label { + GrammarLabelType::TNT(tnt) => { + println!("{tnt} = {}", atom.name_of_tnt(tnt)?); + } + GrammarLabelType::Rule(pos) => { + println!("pos {pos} = {}", atom.rule_pos_string(pos)?); + } + } + } else { + return Err(Error::NodeNoLabel(node).into()); + } + } + + Ok(()) + } + + #[test] + fn test_generate_fragment() -> Result<(), Box> { + let grammar = new_notes_grammar()?; + + let atom = DefaultAtom::from_grammar(grammar)?; + + #[cfg(feature = "test-print-viz")] + atom.print_nfa("genins nfa.gv")?; + + let fragment = generate_fragment([72.into(), TNT::Non(0).into()], 0)?; + + let mut test_fragment = leaf!( + GrammarLabel::new(GrammarLabelType::from(72), 0), + GrammarLabel + ); + + test_fragment.plant( + 0, + leaf!( + GrammarLabel::new(GrammarLabelType::from(TNT::Non(0)), 0), + GrammarLabel + ), + false, + )?; + + assert_eq!(fragment, test_fragment); + + // virtual fragments + + println!("nt = 0, t = 3"); + + let virtual_fragment = virtual_generate_fragment(&atom, 0, 3, 0)?; + + assert_eq!(virtual_fragment.nodes_len(), 7); + + let virtual_node = virtual_fragment.vertex_label(5)?.unwrap().label(); + + let test_fragment = generate_fragment( + [ + TNT::Non(0).into(), + 2.into(), + TNT::Non(1).into(), + 8.into(), + TNT::Non(2).into(), + virtual_node.label(), + TNT::Ter(3).into(), + ], + 0, + )?; + + print_labels(&atom, &virtual_fragment)?; + + assert_eq!(virtual_fragment, test_fragment); + + #[cfg(feature = "test-print-viz")] + virtual_fragment.print_viz("virtual fragment (0, 3).gv")?; + + println!("nt = 3, t = 2"); + + let virtual_fragment = virtual_generate_fragment(&atom, 3, 2, 1)?; + + let test_fragment = + generate_fragment([TNT::Non(3).into(), 38.into(), TNT::Ter(2).into()], 1)?; + + print_labels(&atom, &virtual_fragment)?; + + assert_eq!(virtual_fragment, test_fragment); + + #[cfg(feature = "test-print-viz")] + virtual_fragment.print_viz("virtual fragment (3, 2).gv")?; + + // querying reductions + + assert!(matches!(atom.query_reduction(17, 9), Ok(Some(&[1])))); + + assert!(matches!(atom.query_reduction(35, 9), Ok(Some(&[1, 2])))); + assert!(matches!(atom.query_reduction(35, 25), Ok(Some(&[2])))); + + Ok(()) + } +} -- cgit v1.2.3-18-g5258