//! This module implements the generation and insertion of item //! derivation forests. //! //! This is used for the chain-rule machine to conveniently produce //! item derivations into a forest. This forest can serve as a rough //! approximation of the parse forests, and can be executed in other //! semirings later on. use super::*; use crate::{ atom::{Atom, DefaultAtom}, default::Error, item::default::DefaultForest, Edge, }; use grammar::{Error as GrammarError, GrammarLabel, GrammarLabelType, TNT}; use graph::Graph; use std::borrow::Borrow; /// Convert an error telling us that an index is out of bounds. /// /// # Panics /// /// The function panics if the error is not of the expected kind. pub(crate) fn index_out_of_bounds_conversion(ge: GrammarError) -> Error { match ge { GrammarError::IndexOutOfBounds(index, bound) => Error::IndexOutOfBounds(index, bound), _ => Error::Invalid, } } /// Determine if a label is labelled by a terminal. fn is_labelled_by_terminal(label: GrammarLabelType) -> bool { matches!(label.tnt(), Some(tnt) if matches!(tnt, TNT::Ter(_))) } /// A helper function to generate a fragment of forest. /// /// It simply constructs a root node and then appends /// successive nodes as successive children of the previous /// node. Also the starting positions will all be set to the /// same position. /// /// If the input is empty, this returns an empty forest; /// otherwise the result is not empty. pub fn generate_fragment( labels: impl AsRef<[GrammarLabelType]>, pos: usize, ) -> Result>, crate::default::Error> { let labels_slice = labels.as_ref(); let labels_len = labels_slice.len(); let last_label = if labels_len > 0 { labels_slice.get(labels_len - 1).copied().unwrap() } else { return Ok(Default::default()); }; let labels_iter = labels_slice.iter(); let labels_iter_zipped = labels_iter .clone() .zip(labels_iter.skip(1).chain(std::iter::once(&last_label))); let mut mapped_iter = labels_iter_zipped.map(|(label, next_label)| { if is_labelled_by_terminal(*next_label) { GrammarLabel::new_closed(*label, pos, pos + 1) } else { GrammarLabel::new(*label, pos) } }); let first_label = mapped_iter.next().unwrap(); let mut result = DefaultForest::new_leaf(first_label); let mut index = 0; for label in mapped_iter { result.plant(index, DefaultForest::new_leaf(label), false)?; index = result .query_label(label.into()) // REVIEW: Perhaps a LabelNoNode error? .ok_or(Error::Invalid)?; } Ok(result) } /// Generate a virtual fragment representing the left-linear null /// closure \[nt\]^t. pub fn virtual_generate_fragment( atom: impl Borrow, nt: usize, t: usize, pos: usize, ) -> Result>, crate::default::Error> { let atom = atom.borrow(); let non_start = atom.nth_accumulator(nt).unwrap() * 2; let mut result = DefaultForest::default(); for (label, child_iter) in atom.labels_of(non_start)? { if matches!(*label.get_value(), Some(TNT::Ter(ter)) if ter == t) { for child in child_iter { let line: Vec = atom .query_expansion(non_start, child) .map_err(index_out_of_bounds_conversion)? .iter() .copied() .flatten() .flat_map(|(nt, rule)| [(*rule).into(), TNT::Non(*nt).into()]) .rev() .chain(std::iter::once(TNT::Ter(t).into())) .collect(); if result.is_empty() { result = generate_fragment(line, pos)?; } else { let mut new_fragment = generate_fragment(line, pos)?; new_fragment.remove_node(0)?; new_fragment.set_root(1)?; let cloned = result.clone_node(0, 0, false)?; result.plant(cloned, new_fragment, false)?; } } } } Ok(result) } impl DefaultForest> { /// Insert an item derivation forest into a recording forest. /// /// We need the help of other things just for finding the correct /// places to insert these item fragments. /// /// # Steps /// /// This function performs the following steps. /// /// # Extra reductions /// /// If the label's true_source is different from its /// forest_source, first splone the node of true_source, then /// query the reducer by the key botop, whose bottom is /// `true_source` and top is `forest_source`. The result is an /// optional set of tuples (nt, rule) of unsigned integers. For /// each tuple, find a parent which is labelled by `nt` and whose /// parent is labelled by `rule`. Then proceed similarly. /// /// # Reductions /// /// Perform splone on the node of forest_source. Then query atom /// for the reduction information by the key (label, atom_child), /// where atom_child runs through every element of /// atom_child_iter. The result is a list of unsigned integers. /// For each unsigned integer `nt`, we find a parent which is /// labelled by `nt`. The last parents found will be the parents /// used in the next step. /// /// # Plant /// /// For parents as found in the previous step, for each node in /// parents, perform splone with an open end, and then plant the /// fragment under the result splone. pub(crate) fn insert_item( &mut self, label: Edge, ter: usize, fragment: impl Borrow>>, atom_child_iter: impl Iterator + ExactSizeIterator + Clone, atom: &DefaultAtom, ) -> Result { let root = if let Some(root) = self.root() { root } else { unreachable!("the forest must be non-empty when we insert items"); }; let pavi = label.forest_source(); let true_source = label.true_source(); let fragment = fragment.borrow(); let fragment_root = if let Some(root) = fragment.root() { root } else { panic!("empty item"); }; let fragment_root_label = fragment .vertex_label(fragment_root)? .ok_or(Error::NodeNoLabel(fragment_root))?; let pos = fragment_root_label.label().start(); // dbg!((pos, label)); // Whether or not to print detailed graphs of each step of // operation for debugging purposes. let mut to_print = false; if std::fs::metadata("output/").is_err() { to_print = false; } let tnt_string = { let empty_p = atom_child_iter.len() == 0; let label = fragment.vertex_label(fragment.nodes_len() - 1)?.unwrap(); match label.label().label() { GrammarLabelType::TNT(TNT::Ter(t)) => { format!("t {t}{}", if empty_p { " second" } else { "" }) } GrammarLabelType::TNT(TNT::Non(n)) => { format!("n {n}") } _ => "error".to_string(), } }; let num = { let mut repetition = 0; while std::fs::metadata(format!("output/pos {pos} - {repetition}.gv")).is_ok() { repetition += 1; } repetition }; if to_print { self.print_viz(&format!("pos {pos} - {num}.gv")).unwrap(); } /// A cute little macro to produce compact representations /// of Parents, Virtual nodes, or empty. #[allow(unused_macros)] macro_rules! pavi_to_short_str { ($pavi:ident) => { match $pavi { PaVi::Parent(node, edge, child) => format!("p{node} {edge} {child}"), PaVi::Virtual(nt, t, node) => format!("v{nt} {t} {node}"), PaVi::Empty => "ε".to_string(), } }; } // Ensure the last node in the PaVi is a terminal or a // non-terminal node, as an extra safety guard during // development. #[cfg(debug_assertions)] { match pavi { PaVi::Parent(_node, _edge, child) => { assert!(matches!( self.vertex_label(child), Ok(Some(label)) if label.label().label().tnt().is_some())); } PaVi::Virtual(nt, t, node) => { if !matches!( self.vertex_label(node), Ok(Some(label)) if matches!( label.label().label().tnt(), Some(TNT::Non(_)))) { dbg!(node, self.vertex_label(node)?, pavi); self.print_viz("dbg forest.gv").unwrap(); panic!("assumption fails"); } if nt >= atom.non_num() { dbg!(); return Err(Error::IndexOutOfBounds(nt, atom.non_num())); } if t >= atom.ter_num() { dbg!(); return Err(Error::IndexOutOfBounds(t, atom.ter_num())); } } PaVi::Empty => {} } } let is_empty_segment = pavi.is_empty(); if true_source.is_virtual() { self.close_pavi(atom.borrow(), true_source, pos)?; if to_print { self.print_viz(&format!( "pos {pos} - {num} {tnt_string} stage 0.1 {}.gv", pavi_to_short_str!(true_source) )) .unwrap(); } } let mut parents: Vec = { let mut result = Vec::new(); match pavi { PaVi::Parent(node, edge, _) => { result.push(Parent::new(node, edge)); } PaVi::Virtual(nt, t, node) => { let node_label = self.vertex_label(node)?.ok_or(Error::NodeNoLabel(node))?; for atom_child in atom_child_iter.clone() { for rule in atom.trace(nt, t, atom_child).into_iter().flatten() { let virtual_frag = atom.generate_virtual_frags(nt, t, Some(rule)); if let Some(frag) = virtual_frag { let mut frag = (*frag.get(0).unwrap()).clone(); frag.set_pos(atom, node_label.label().start(), true)?; let frag_nodes_len = frag.nodes_len(); assert!(frag_nodes_len > 1); let last_but_one_label = frag .vertex_label(frag_nodes_len - 2)? .ok_or(Error::NodeNoLabel(frag_nodes_len - 2))?; // NOTE: The function `plant_at_start` // assumes that we want to plant the // fragment as the first child of the // node. This assumption holds in // this case, but not in general. self.plant_at_start(node, frag)?; if to_print { self.print_viz(&format!( "pos {pos} - {num} {tnt_string} stage 0.2 {node}.gv" )) .unwrap(); } let rule_label_pos = self .query_label(last_but_one_label) .expect("the forest was wrongly planted"); result.push(Parent::new(rule_label_pos, 0)); } } } } PaVi::Empty => { result.push(Parent::new(root, 0)); } } result }; if let PaVi::Parent(node, edge, _) = pavi { let nth_child = self.nth_child(node, edge)?; let reduced = self.reduction(nth_child, pos, ter, atom.borrow(), false)?; if reduced != nth_child && !self.is_empty_node(reduced)? { parents.clear(); parents.extend(self.parents_of(reduced)?); } if to_print { self.print_viz(&format!( "pos {pos} - {num} {tnt_string} stage 0.3 {nth_child}.gv" )) .unwrap(); } } for parent in parents.iter() { if !self.has_node(parent.node()) { return Err(Error::IndexOutOfBounds(parent.node(), self.nodes_len())); } } if !is_empty_segment { parents = parents .into_iter() .flat_map(|parent| { self.parents_of(parent.node()).unwrap().filter(|n| { matches!( self.vertex_label(n.node()) .unwrap() .unwrap() .label() .label() .tnt(), Some(TNT::Non(_)) ) }) }) .collect(); } let mut non_empty = false; for atom_child in atom_child_iter { // dbg!(label.label(), atom_child); // Find reduction information. let reduction_info = atom .query_reduction(label.label(), atom_child) .map_err(index_out_of_bounds_conversion)?; let mut stack = parents.clone(); let mut second_stack = Vec::new(); // locate the nodes for reduction_nt in reduction_info.iter().copied().flatten().rev() { while let Some(mut node) = stack.pop() { let mut node_label = self .vertex_label(node.node())? .ok_or_else(|| Error::NodeNoLabel(node.node()))?; if matches!( node_label .label() .label(), GrammarLabelType::TNT(TNT::Non(nt)) if nt == *reduction_nt ) { let sploned_node = self.splone(node.node(), Some(pos), node.edge(), false)?; if to_print { self.print_viz(&format!( "pos {pos} - {num} {tnt_string} stage 1 {} {}.gv", node.node(), node.edge(), )) .unwrap(); } node_label = self .vertex_label(sploned_node)? .ok_or(Error::NodeNoLabel(sploned_node))?; if node_label.clone_index().is_some() { let mut parent_iter = self.parents_of(sploned_node)?; #[cfg(debug_assertions)] assert_eq!(parent_iter.len(), 1); node = parent_iter.next().unwrap(); #[cfg(debug_assertions)] assert!(self .vertex_label(node.node())? .ok_or(Error::NodeNoLabel(node.node()))? .is_packed()); } else { node = Parent::new(sploned_node, node.edge()); } let parents_iter = self.parents_of(node.node())?; for parent in parents_iter { let parent_node = parent.node(); let parent_label = self .vertex_label(parent_node)? .ok_or(Error::NodeNoLabel(parent_node))? .label(); if parent_label.label().rule().is_none() { crate::item::default::print_labels(atom, self.borrow()).unwrap(); self.print_viz("dbg forest.gv").unwrap(); dbg!(parent, parent_label, label, node, sploned_node); panic!("assumption fails"); } second_stack.extend(self.parents_of(parent.node())?.filter(|n| { matches!(self.vertex_label(n.node()), Ok(Some(label)) if matches!( label.label().label().tnt(), Some(TNT::Non(_)))) })); } } } std::mem::swap(&mut stack, &mut second_stack); if stack.is_empty() { break; } } if stack.is_empty() { dbg!( label, atom_child, parents, reduction_info, atom.query_reduction(label.label(), atom_child).unwrap(), is_empty_segment, atom.trace(0, 3, atom_child) .into_iter() .flatten() .collect::>(), ); self.print_viz("dbg forest.gv").unwrap(); #[cfg(test)] crate::item::default::print_labels(atom, self.borrow()).unwrap(); return Err(Error::CannotPlant); } for parent in stack { let splanted = self.splant(parent.node(), parent.edge(), fragment, non_empty)?; let _splanted_child = self.nth_child(splanted, self.degree(splanted)? - 1)?; if to_print { self.print_viz(&format!( "pos {pos} - {num} {tnt_string} stage 2 {} {} {splanted}.gv", parent.node(), parent.edge(), )) .unwrap(); } non_empty = true; } } // If the iterator is empty, assert the fragment has length // one, and do not plant anything. if !non_empty { assert_eq!(fragment.nodes_len(), 1); } let result = if fragment.nodes_len() == 2 { let root_label = fragment_root_label; let leaf_label = fragment .vertex_label(1 - fragment_root)? .ok_or(Error::NodeNoLabel(1 - fragment_root))?; // it has been planted, so should be safe. let node = self .query_label(root_label) .expect("root label was not found"); let edge: usize; let child: usize; let mut result = None; // dbg!(leaf_label, &fragment); // crate::item::default::print_labels(atom, fragment).unwrap(); // dbg!(self.vertex_label(node)?); for (index, child) in self.children_of(node)?.enumerate() { // dbg!(self.vertex_label(child)?, child); if matches!(self.vertex_label(child)?, Some(child_label) if child_label.label() == leaf_label.label()) { result = Some((index, child)); break; } } if let Some((index, edge_child)) = result { edge = index; child = edge_child; } else { unreachable!("the forest is wrongly planted"); } // dbg!(node, edge, root_label, leaf_label); PaVi::Parent(node, edge, child) } else { assert_eq!( fragment.nodes_len(), 1, "a virtual fragment should consist of a single terminal node." ); let root_label = fragment_root_label; let pavi_parent = pavi.parent().expect( "When we insert a virtual fragment, the forest_source of the label must be a parent.", ); let nth_child = self.nth_child(pavi_parent.node(), pavi_parent.edge())?; let nth_child_label = self .vertex_label(nth_child)? .ok_or(Error::NodeNoLabel(nth_child))? .label() .label(); let error_str = "When we insert a virtual fragment, the \ forest source of the label must point to \ a non-terminal node"; let nt = match nth_child_label.tnt().expect(error_str) { TNT::Non(nt) => nt, _ => { dbg!(nth_child, nth_child_label); panic!("{error_str}"); } }; let error_str = "a virtual fragment should consist of a single terminal node"; let t = match root_label.label().label().tnt().expect(error_str) { TNT::Ter(t) => t, _ => { dbg!(root_label); panic!("{error_str}") } }; PaVi::Virtual(nt, t, nth_child) }; Ok(result) } /// Set the end position of the node associated with `pavi` to be `pos`. /// /// The parameter `atom` is used to query the reduction fragment /// if `pavi` is a virtual node. pub(crate) fn close_pavi( &mut self, atom: &DefaultAtom, pavi: PaVi, pos: usize, ) -> Result { match pavi { PaVi::Parent(_node, _edge, child) => { let nth_child = child; let nth_child_label = self .vertex_label(nth_child)? .ok_or(Error::NodeNoLabel(nth_child))?; let nth_child_degree = self.degree(nth_child)?; let nth_child_last = core::cmp::max(nth_child_degree, 1) - 1; if matches!(nth_child_label.label().label().tnt(), Some(TNT::Non(_))) && !nth_child_label.is_packed() { Ok(self.splone(nth_child, Some(pos), nth_child_last, false)?) } else if nth_child_label.is_packed() { // REVIEW: is this really correct? dbg!("this should not really happen?"); // SUMMARY: splone every child of nth_child let mut result: usize = nth_child; for node in self.children_of(nth_child)?.collect::>() { let node_label = self.vertex_label(node)?.ok_or(Error::NodeNoLabel(node))?; let degree = self.degree(node)?; let last_index = core::cmp::max(degree, 1) - 1; if matches!(node_label.label().label().tnt(), Some(TNT::Non(_))) { result = self.splone(node, Some(pos), last_index, false)?; } } Ok(result) } else { Ok(nth_child) } } PaVi::Virtual(nt, t, mut node) => { let node_label_start = self .vertex_label(node)? .ok_or(Error::NodeNoLabel(node))? .label() .start(); let reduction_fragment = atom.generate_virtual_frags(nt, t, None); // Maybe we do not have to force the reduciton here? // // NOTE: the case of the root is exceptional // if reduction_fragment.is_none() && self.root() != Some(node) { // dbg!(self.root()); // self.print_viz("cannot close.gv").unwrap(); // return Err(Error::CannotClose(nt, t, node, node_label_start)); // } for frag in reduction_fragment.into_iter().flatten() { let mut frag = frag.clone(); frag.set_pos(atom, node_label_start, true)?; node = self.plant_at_start(node, frag)?; } Ok(node) } _ => self.root().ok_or(Error::IndexOutOfBounds(0, 0)), } } } #[cfg(test)] mod genins_test { use super::*; use crate::item::default::leaf; use grammar::test_grammar_helper::*; #[test] fn test_generate_fragment() -> Result<(), Box> { let grammar = new_notes_grammar()?; let atom = DefaultAtom::from_grammar(grammar)?; #[cfg(feature = "test-print-viz")] atom.print_nfa("genins nfa.gv")?; let fragment = generate_fragment([72.into(), TNT::Non(0).into()], 0)?; let mut test_fragment = leaf!( GrammarLabel::new(GrammarLabelType::from(72), 0), GrammarLabel ); test_fragment.plant( 0, leaf!( GrammarLabel::new(GrammarLabelType::from(TNT::Non(0)), 0), GrammarLabel ), false, )?; assert_eq!(fragment, test_fragment); // virtual fragments println!("nt = 0, t = 3"); let virtual_fragment = virtual_generate_fragment(&atom, 0, 3, 0)?; assert_eq!(virtual_fragment.nodes_len(), 7); let virtual_node = virtual_fragment.vertex_label(5)?.unwrap().label(); let test_fragment = generate_fragment( [ TNT::Non(0).into(), 2.into(), TNT::Non(1).into(), 8.into(), TNT::Non(2).into(), virtual_node.label(), TNT::Ter(3).into(), ], 0, )?; crate::item::default::print_labels(&atom, &virtual_fragment)?; assert_eq!(virtual_fragment, test_fragment); #[cfg(feature = "test-print-viz")] virtual_fragment.print_viz("virtual fragment (0, 3).gv")?; println!("nt = 3, t = 2"); let virtual_fragment = virtual_generate_fragment(&atom, 3, 2, 1)?; let test_fragment = generate_fragment([TNT::Non(3).into(), 38.into(), TNT::Ter(2).into()], 1)?; crate::item::default::print_labels(&atom, &virtual_fragment)?; assert_eq!(virtual_fragment, test_fragment); #[cfg(feature = "test-print-viz")] virtual_fragment.print_viz("virtual fragment (3, 2).gv")?; // querying reductions assert!(matches!(atom.query_reduction(17, 9), Ok(Some(&[1])))); // assert!(matches!(atom.query_reduction(35, 9), Ok(Some(&[1, 2])))); // assert!(matches!(atom.query_reduction(35, 25), Ok(Some(&[2])))); Ok(()) } #[test] fn test_reduction() -> Result<(), Box> { let grammar = new_paren_grammar()?; let atom = DefaultAtom::from_grammar(grammar)?; #[cfg(feature = "test-print-viz")] atom.print_nfa("genins nfa.gv")?; // querying reductions println!("{:?}", atom.query_reduction(32, 17)?); // assert!(matches!(atom.query_reduction(35, 25), Ok(Some(&[2])))); Ok(()) } }