diff options
author | JSDurand <mmemmew@gmail.com> | 2023-02-27 12:36:41 +0800 |
---|---|---|
committer | JSDurand <mmemmew@gmail.com> | 2023-02-27 12:36:41 +0800 |
commit | fbaa420ed550e9c3e7cdc09d4a8ec22bfbd782a6 (patch) | |
tree | fad9722825bb3fa796dd52c3fd4a8bf46b958cf9 /chain/src/default.rs | |
parent | afad02bdff111ecccb0077b9c989e869723c231c (diff) |
before a major refactor
I decide to adopt a new approach of recording and updating item
derivation forests. Since this affects a lot of things, I decide to
commit before the refactor, so that I can create a branch for that
refactor.
Diffstat (limited to 'chain/src/default.rs')
-rw-r--r-- | chain/src/default.rs | 405 |
1 files changed, 307 insertions, 98 deletions
diff --git a/chain/src/default.rs b/chain/src/default.rs index c873de7..08e29ce 100644 --- a/chain/src/default.rs +++ b/chain/src/default.rs @@ -8,14 +8,16 @@ use super::*; use crate::atom::{Atom, DefaultAtom}; use crate::item::{ - default::DefaultForest, generate_fragment, genins::virtual_generate_fragment, Forest, + default::DefaultForest, generate_fragment, genins::index_out_of_bounds_conversion, Forest, ForestLabel, ForestLabelError, }; use core::fmt::Display; -use grammar::{Error as GrammarError, GrammarLabel, GrammarLabelType, TNT}; -use graph::{labelled::DLGBuilder, Builder, DLGraph, Graph, LabelExtGraph, LabelGraph}; +use grammar::{GrammarLabel, GrammarLabelType, START_NONTERMINAL, TNT}; +use graph::{ + labelled::DLGBuilder, Builder, DLGraph, Graph, LabelExtGraph, LabelGraph, ParentsGraph, +}; -use std::collections::{HashMap as Map, TryReserveError}; +use std::collections::{HashMap as Map, HashSet, TryReserveError}; /// The errors related to taking derivatives by chain rule. #[non_exhaustive] @@ -36,10 +38,10 @@ pub enum Error { CannotClone(ForestLabelError), /// Cannot find a suitable node to plant the new forest fragment. CannotPlant, - /// Trying to insert an empty item. - EmptyItem, /// Cannot split a packed node. SplitPack(usize), + /// A cloned node should have exactly one parent. + InvalidClone(usize), /// An invalid situation happens. Invalid, } @@ -57,8 +59,10 @@ impl Display for Error { Self::CannotReserve(e) => write!(f, "cannot reserve memory: {e}"), Self::CannotClone(fe) => write!(f, "cannot clone due to {fe}"), Self::CannotPlant => write!(f, "cannot plant a new forest fragment"), - Self::EmptyItem => write!(f, "trying to insert an empty item"), Self::SplitPack(n) => write!(f, "cannot split the packed node {n}"), + Self::InvalidClone(n) => { + write!(f, "the cloned node {n} should have exactly one parent") + } Self::Invalid => write!(f, "invalid"), } } @@ -86,6 +90,7 @@ impl From<ForestError> for Error { ForestError::NodeNoLabel(n) => Error::NodeNoLabel(n), ForestError::LabelConversion(ce) => Error::CannotClone(ce), ForestError::SplitPack(n) => Error::SplitPack(n), + ForestError::InvalidClone(n) => Error::SplitPack(n), } } } @@ -110,7 +115,7 @@ impl Default for DerIterIndex { } /// A complex type used for storing values of edges with two layers. -type SecondTypeValue = (PaSe, bool, Vec<(Roi, usize)>); +type SecondTypeValue = (PaVi, bool, Vec<(Roi, usize)>); /// An iterator of TwoLayers. #[derive(Debug, Default)] @@ -137,7 +142,7 @@ impl DerIter { fn add_second_layer( &mut self, label: usize, - forest_source: PaSe, + forest_source: PaVi, accepting: bool, edges: Vec<(Roi, usize)>, ) { @@ -166,7 +171,7 @@ impl Iterator for DerIter { Some(TwoLayers::Two(*key, forest_source, accepting, edges)) } else { // this should not happen - println!("a key does not exist in the hashmap: something is wrong when taking derivatives"); + dbg!("a key does not exist in the hashmap: something is wrong when taking derivatives"); None } } else { @@ -176,22 +181,15 @@ impl Iterator for DerIter { // safely set the index to one. self.index = DerIterIndex::Single(1); - if let Some((edge, to)) = self.singles.first() { - Some(TwoLayers::One(*edge, *to)) - } else { - None - } - } - } - DerIterIndex::Single(index) => { - if let Some((edge, to)) = self.singles.get(index) { - self.index = DerIterIndex::Single(index + 1); - - Some(TwoLayers::One(*edge, *to)) - } else { - None + self.singles + .first() + .map(|(edge, to)| TwoLayers::One(*edge, *to)) } } + DerIterIndex::Single(index) => self.singles.get(index).map(|(edge, to)| { + self.index = DerIterIndex::Single(index + 1); + TwoLayers::One(*edge, *to) + }), } } } @@ -205,6 +203,7 @@ pub struct DefaultChain { history: Vec<usize>, forest: DefaultForest<ForestLabel<GrammarLabel>>, accepting_vec: Vec<bool>, + accepting_sources: Vec<PaVi>, } impl DefaultChain { @@ -217,7 +216,7 @@ impl DefaultChain { /// Return the complete slice of histories. #[inline] pub fn history(&self) -> &[usize] { - self.history.as_ref() + self.history.as_slice() } /// Return a reference to the associated forest. @@ -228,7 +227,7 @@ impl DefaultChain { /// Print the rule positions of the labels. pub fn print_rule_positions(&self) -> Result<(), Box<dyn std::error::Error>> { - let mut labels = std::collections::HashSet::<usize>::default(); + let mut labels = HashSet::<usize>::default(); for node in 0..self.graph.nodes_len() { labels.extend(self.graph.labels_of(node)?.map(|(label, _)| label.label)); @@ -395,10 +394,8 @@ impl Chain for DefaultChain { let empty_state = atom.empty(); - // The zero-th non-terminal is assumed to be the starting - // non-terminal, by convention. let initial_nullable = atom - .is_nullable(0) + .is_nullable(START_NONTERMINAL) .map_err(|_| Error::IndexOutOfBounds(0, atom.non_num()))?; builder.add_edge( @@ -421,6 +418,8 @@ impl Chain for DefaultChain { let accepting_vec = vec![true, initial_nullable]; + let accepting_sources = Vec::new(); + Ok(Self { graph, atom, @@ -428,6 +427,7 @@ impl Chain for DefaultChain { history, forest, accepting_vec, + accepting_sources, }) } @@ -455,18 +455,15 @@ impl Chain for DefaultChain { edges: impl Iterator<Item = (Roi, usize)>, ) -> Result<(), Self::Error> { for (roi, _) in edges { - match roi.imaginary_part() { - Some(target) => { - if matches!(self.accepting_vec.get(target).copied(), Some(true)) { - let accepting_vec_len = self.accepting_vec.len(); - - *self - .accepting_vec - .get_mut(node_id) - .ok_or(Error::IndexOutOfBounds(node_id, accepting_vec_len))? = true; - } + if let Some(target) = roi.imaginary_part() { + if matches!(self.accepting_vec.get(target).copied(), Some(true)) { + let accepting_vec_len = self.accepting_vec.len(); + + *self + .accepting_vec + .get_mut(node_id) + .ok_or(Error::IndexOutOfBounds(node_id, accepting_vec_len))? = true; } - None => {} } } @@ -475,24 +472,22 @@ impl Chain for DefaultChain { type DerIter = DerIter; + // EXPERIMENT: Try the approach of keeping an additional vector of + // vectors of unsigned integers. Each element of the vector + // corresponds to an edge of the current node. Each element is a + // vector. This vector represents the list of reductions that are + // implied due to skipping edges without children. + // + // Then when we insert an item, we can use this list to perform + // additional reductions. This can avoid the ugly and inefficient + // position_search method currently adopted. + // + // Of course we can use an optional vector to prevent allocating + // too much memory for edges whose corresponding vector is empty. + fn derive(&mut self, t: usize, pos: usize) -> Result<Self::DerIter, Self::Error> { use TNT::*; - /// Convert an error telling us that an index is out of bounds. - /// - /// # Panics - /// - /// The function panics if the error is not of the expected - /// kind. - fn index_out_of_bounds_conversion(ge: GrammarError) -> Error { - match ge { - GrammarError::IndexOutOfBounds(index, bound) => { - Error::IndexOutOfBounds(index, bound) - } - _ => Error::Invalid, - } - } - /// A helper function to generate edges to join. /// /// It first checks if the base edge is accepting. If yes, @@ -505,11 +500,14 @@ impl Chain for DefaultChain { /// to save some allocations. fn generate_edges( chain: &DefaultChain, - child_iter: impl Iterator<Item = usize> + ExactSizeIterator + Clone, + mut child_iter: impl Iterator<Item = usize> + ExactSizeIterator + Clone, atom_child_iter: impl Iterator<Item = usize> + Clone, - pase: PaSe, + pavi: PaVi, + true_pavi: PaVi, mut output: impl AsMut<Vec<(Roi, usize)>>, - ) -> Result<(), Error> { + ) -> Result<bool, Error> { + let mut accepting = false; + // First check the values from iterators are all valid. let graph_len = chain.graph.nodes_len(); let atom_len = chain.atom.nodes_len(); @@ -540,11 +538,11 @@ impl Chain for DefaultChain { for atom_child in atom_child_iter.clone() { let atom_child_accepting = chain.atom.is_accepting(atom_child).unwrap(); - // let atom_child_empty_node = chain.atom.is_empty_node(atom_child).unwrap(); num += child_iter.len(); if atom_child_accepting { + accepting = true; num += child_iter_total_degree; } } @@ -561,7 +559,7 @@ impl Chain for DefaultChain { let atom_child_accepting = chain.atom.is_accepting(atom_child).unwrap(); let atom_child_empty_node = chain.atom.is_empty_node(atom_child).unwrap(); - let roi = Edge::new(atom_child, pase, atom_child_accepting).into(); + let roi = Edge::new(atom_child, pavi, atom_child_accepting).into(); if atom_child_empty_node { output.extend(child_iter.clone().map(|child| (child.into(), child))); @@ -572,18 +570,30 @@ impl Chain for DefaultChain { if atom_child_accepting { for child in child_iter.clone() { for (child_label, child_child) in chain.graph.labels_of(child).unwrap() { - output - .extend(child_child.map(|target| ((*child_label).into(), target))); + // use the new `pavi` as `true_source` + let mut new_label = *child_label; + new_label.set_true_source(true_pavi); + + output.extend( + std::iter::repeat(new_label.into()) + .take(child_child.len()) + .zip(child_child), + ); } } } } - Ok(()) + accepting = + accepting && child_iter.any(|child| *chain.accepting_vec.get(child).unwrap()); + + Ok(accepting) } let mut der_iter = DerIter::default(); + let mut accepting_pavi: HashSet<PaVi> = HashSet::new(); + for (label, child_iter) in self.graph.labels_of(self.current)? { for (atom_label, atom_child_iter) in self.atom.labels_of(label.label())? { if atom_label.is_left_p() { @@ -594,6 +604,10 @@ impl Chain for DefaultChain { let atom_moved = atom_label.get_moved(); + if pos == 4 { + dbg!(atom_label); + } + match *atom_label.get_value() { Some(Ter(ter)) if ter == t => { // prepare forest fragment @@ -601,20 +615,44 @@ impl Chain for DefaultChain { let fragment = generate_fragment([atom_moved.into(), Ter(ter).into()], pos)?; - let new_pase = self.forest.insert_item( + if pos == 4 { + dbg!(atom_moved, label); + self.forest + .print_viz(&format!( + "pos4tb - {atom_moved}-{:?}.gv", + label.true_source() + )) + .unwrap(); + } + + let new_pavi = self.forest.insert_item( *label, fragment, atom_child_iter.clone(), &self.atom, )?; - generate_edges( + if pos == 4 { + self.forest + .print_viz(&format!( + "pos4ta - {atom_moved}-{:?}.gv", + label.true_source() + )) + .unwrap(); + } + + let accepting = generate_edges( self, child_iter.clone(), atom_child_iter.clone(), - new_pase, + new_pavi, + new_pavi, &mut der_iter.singles, )?; + + if accepting { + accepting_pavi.insert(new_pavi); + } } Some(Non(non)) => { if !self @@ -634,50 +672,75 @@ impl Chain for DefaultChain { let first_fragment = generate_fragment([atom_moved.into(), Non(non).into()], pos)?; - let first_segment_pase = self.forest.insert_item( + if pos == 4 { + dbg!(atom_moved, label); + self.forest + .print_viz(&format!("pos4nb - {atom_moved}-{:?}.gv", label)) + .unwrap(); + } + + let first_segment_pavi = self.forest.insert_item( *label, first_fragment, atom_child_iter.clone(), &self.atom, )?; + if pos == 4 { + self.forest + .print_viz(&format!("pos4na - {atom_moved}-{:?}.gv", label)) + .unwrap(); + } + let accepting = self .atom .is_accepting(virtual_node) .map_err(index_out_of_bounds_conversion)?; let virtual_fragment = - virtual_generate_fragment(&self.atom, non, t, pos)?; + DefaultForest::new_leaf(GrammarLabel::new(Ter(t), pos)); - // NOTE: We only need the PaSe from the + // NOTE: We only need the PaVi from the // first segment, so we pass an empty // iterator, in which case the passed - // label is only used for the PaSe. - let virtual_pase = self.forest.insert_item( - Edge::new(0, first_segment_pase, accepting), + // label is only used for the PaVi. + let virtual_pavi = self.forest.insert_item( + Edge::new(0, first_segment_pavi, accepting), virtual_fragment, std::iter::empty(), &self.atom, )?; + if pos == 4 { + self.forest + .print_viz(&format!("pos4va - {atom_moved}-{:?}.gv", label)) + .unwrap(); + } + let mut new_edges = Vec::new(); - generate_edges( + let virtual_accepting = generate_edges( self, child_iter.clone(), atom_child_iter.clone(), - first_segment_pase, + first_segment_pavi, + virtual_pavi, &mut new_edges, )?; + if virtual_accepting { + accepting_pavi.insert(first_segment_pavi); + } + if accepting { + accepting_pavi.insert(virtual_pavi); der_iter.singles.extend(new_edges.clone()); } if !self.atom.is_empty_node(virtual_node).unwrap() { der_iter.add_second_layer( virtual_node, - virtual_pase, + virtual_pavi, accepting, new_edges, ); @@ -691,7 +754,7 @@ impl Chain for DefaultChain { if self.atom.is_empty_node(atom_child).unwrap() { der_iter.singles.extend(child_iter.clone().map(|child| { ( - Edge::new(virtual_node, virtual_pase, accepting) + Edge::new(virtual_node, virtual_pavi, accepting) .into(), child, ) @@ -728,8 +791,137 @@ impl Chain for DefaultChain { } } + self.accepting_sources.extend(accepting_pavi); + Ok(der_iter) } + + // FIXME: Refactor this. + fn end_of_input(&mut self) -> Result<(), Self::Error> { + self.forest.remove_node(1)?; + + let mut stack = Vec::new(); + + dbg!(&self.accepting_sources); + + self.forest.print_viz("dbg forest before.gv").unwrap(); + + for pavi in self.accepting_sources.iter() { + match pavi { + PaVi::Parent(node, edge) => { + // REVIEW: This is a garbage node that has to be + // added when the machine starts. Is there a way + // to avoid this garbage? + if *node == 1 { + continue; + } + + let nth_child = self.forest.nth_child(*node, *edge)?; + + stack.push(nth_child); + } + PaVi::Virtual(nt, t, node) => { + let node_label = self + .forest + .vertex_label(*node)? + .ok_or(Error::NodeNoLabel(*node))?; + + if node_label.label().end().is_none() { + let reduction_fragment = self.atom.generate_virtual_frags(*nt, *t, None); + + for frag in reduction_fragment.into_iter().flatten() { + let mut frag = frag.clone(); + frag.set_pos(node_label.label().start())?; + + stack.push(self.forest.plant_if_needed(*node, frag)?); + } + } + } + PaVi::Empty => {} + } + } + + let mut seen_nodes: HashSet<usize> = HashSet::with_capacity(stack.len()); + + dbg!(&stack); + + self.forest.print_viz("dbg forest.gv").unwrap(); + + while let Some(mut top) = stack.pop() { + if seen_nodes.contains(&top) { + continue; + } + + seen_nodes.insert(top); + + let mut top_label = self + .forest + .vertex_label(top)? + .ok_or(Error::NodeNoLabel(top))?; + + if !top_label.is_packed() + && matches!(top_label.label().label().tnt(), Some(TNT::Non(_))) + && top_label.label().end().is_none() + { + let degree = self.forest.degree(top)?; + let last_index = if degree > 0 { degree - 1 } else { 0 }; + + let pos = if degree > 0 { + let last_child = self.forest.nth_child(top, last_index)?; + let last_child_label = self + .forest + .vertex_label(last_child)? + .ok_or(Error::NodeNoLabel(last_child))? + .label(); + let last_child_end = last_child_label.end(); + + if !matches!(last_child_label.label().rule(), + Some(rule) if + self + .atom + .is_accepting(2*rule+1) + .map_err(index_out_of_bounds_conversion)?) + { + continue; + } + + if let Some(pos) = last_child_end { + pos + } else { + stack.extend(self.forest.children_of(last_child)?); + continue; + } + } else { + top_label.label().start() + 1 + }; + + top = self.forest.splone(top, Some(pos), last_index, true)?; + top_label = self + .forest + .vertex_label(top)? + .ok_or(Error::NodeNoLabel(top))?; + } else if top_label.is_packed() + || top_label.label().label().rule().is_some() && top_label.label().end().is_none() + { + stack.extend(self.forest.children_of(top)?); + } + + if top_label.clone_index().is_some() { + let mut parents = self.forest.parents_of(top)?; + if parents.len() != 1 { + dbg!(top, top_label, parents.len()); + self.forest.print_viz("dbg forest.gv").unwrap(); + panic!("0 parent?"); + } + + top = parents.next().unwrap().node(); + } + + stack.extend(self.forest.parents_of(top)?.map(|parent| parent.node())); + } + + Ok(()) + } } #[cfg(test)] @@ -847,23 +1039,11 @@ mod test_chain { chain.chain(0, 11)?; chain.chain(0, 12)?; - let forest = &mut chain.forest; - - let node = forest - .query_label(ForestLabel::from(GrammarLabel::new(TNT::Non(6), 6))) - .unwrap(); - - forest.splone(node, Some(13), forest.degree(node)?)?; - - let node = forest - .query_label(ForestLabel::from(GrammarLabel::new(TNT::Non(1), 0))) - .unwrap(); - - forest.splone(node, Some(13), forest.degree(node)?)?; - - forest.splone(0, Some(13), forest.degree(0)?)?; + chain.end_of_input()?; - forest.print_viz("forest.gv")?; + for label in chain.labels_of(chain.current())?.map(|(label, _)| label) { + dbg!(label); + } assert!(matches!(chain.epsilon(), Ok(true))); @@ -871,7 +1051,9 @@ mod test_chain { { chain.graph.print_viz("chain.gv")?; chain.atom.print_nfa("nfa.gv")?; + chain.forest.print_viz("forest.gv")?; + item::default::print_labels(&chain.atom, &chain.forest)?; } @@ -886,21 +1068,38 @@ mod test_chain { let mut chain = DefaultChain::unit(atom)?; chain.chain(0, 0)?; + chain.forest.print_viz("forest0.gv")?; chain.chain(2, 1)?; + chain.forest.print_viz("forest1.gv")?; chain.chain(2, 2)?; + chain.forest.print_viz("forest2.gv")?; chain.chain(2, 3)?; + chain.forest.print_viz("forest3.gv")?; chain.chain(1, 4)?; - + chain.forest.print_viz("forest4.gv")?; + chain.end_of_input()?; chain.forest.print_viz("forest.gv")?; + chain.forest.print_closed_viz("closed.gv")?; - dbg!(chain.current(), chain.history()); + chain.graph.print_viz("chain.gv")?; + chain.atom.print_nfa("nfa.gv")?; item::default::print_labels(&chain.atom, &chain.forest)?; + for label in chain.labels_of(chain.current())?.map(|(label, _)| label) { + dbg!(label); + } + + dbg!(chain.current(), chain.history()); + #[cfg(feature = "test-print-viz")] { chain.graph.print_viz("chain.gv")?; chain.atom.print_nfa("nfa.gv")?; + chain.forest.print_viz("forest.gv")?; + + chain.forest.print_closed_viz("closed.gv")?; + item::default::print_labels(&chain.atom, &chain.forest)?; } @@ -950,21 +1149,22 @@ mod test_chain { let elapsed = start.elapsed(); - // assert!(matches!(chain.epsilon(), Ok(true))); + assert!(matches!(chain.epsilon(), Ok(true))); dbg!(elapsed); dbg!(chain.current()); assert_eq!(input.len(), chain.history().len()); - if std::fs::metadata("output/history").is_ok() { - std::fs::remove_file("output/history")?; + let history_file_name = "output/history"; + if std::fs::metadata(history_file_name).is_ok() { + std::fs::remove_file(history_file_name)?; } let mut history_file = std::fs::OpenOptions::new() .create(true) .write(true) - .open("output/history")?; + .open(history_file_name)?; use std::fmt::Write; use std::io::Write as IOWrite; @@ -985,10 +1185,19 @@ mod test_chain { history_file.write_all(log_string.as_bytes())?; + for label in chain.labels_of(chain.current())?.map(|(label, _)| label) { + dbg!(label); + } + #[cfg(feature = "test-print-viz")] { chain.graph.print_viz("chain.gv")?; chain.atom.print_nfa("nfa.gv")?; + item::default::print_labels(&chain.atom, &chain.forest)?; + + chain.forest.print_viz("forest.gv")?; + + chain.forest.print_closed_viz("closed.gv")?; } Ok(()) |