diff options
Diffstat (limited to 'chain/src/default.rs')
-rw-r--r-- | chain/src/default.rs | 353 |
1 files changed, 155 insertions, 198 deletions
diff --git a/chain/src/default.rs b/chain/src/default.rs index e61df41..ed63ff4 100644 --- a/chain/src/default.rs +++ b/chain/src/default.rs @@ -38,6 +38,10 @@ pub enum Error { CannotReserve(TryReserveError), /// Cannot create label for cloning nodes. CannotClone(ForestLabelError), + /// Cannot close the virtual node. + /// + /// The format is (nt, t, node, pos). + CannotClose(usize, usize, usize, usize), /// Cannot find a suitable node to plant the new forest fragment. CannotPlant, /// Cannot split a packed node. @@ -60,6 +64,12 @@ impl Display for Error { Self::NodeNoLabel(n) => write!(f, "node {n} has no labels while it should have one"), Self::CannotReserve(e) => write!(f, "cannot reserve memory: {e}"), Self::CannotClone(fe) => write!(f, "cannot clone due to {fe}"), + Self::CannotClose(nt, t, node, pos) => write!( + f, + "cannot close the virtual node {node} \ + with the expansion from nt {nt} by t \ + {t} at pos {pos}" + ), Self::CannotPlant => write!(f, "cannot plant a new forest fragment"), Self::SplitPack(n) => write!(f, "cannot split the packed node {n}"), Self::InvalidClone(n) => { @@ -196,58 +206,6 @@ impl Iterator for DerIter { } } -// TODO: Add a hashmap mapping tuples of forest positions (top, -// bottom) to vectors of tuples of unsigned integers. Each tuple -// represents a non-terminal and a rule. The tuple means that we need -// to close the expansion of the non-terminal by the rule position, if -// we want to move from bottom to top. -// -// After we move up one layer, we are now in a new forest position, so -// we just query again with the new bottom and top. This means we do -// not need to clear the map. - -// REVIEW: Why is the above needed? - -/// This represents a tuple of bottom and top forest positions. -/// -/// It is supposed to be used as keys to query the reduction -/// information. See the type [`Reducer`] for more details. -#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Default)] -pub(crate) struct BoTop { - bottom: PaVi, - top: PaVi, -} - -impl BoTop { - pub(crate) fn new(bottom: PaVi, top: PaVi) -> Self { - Self { bottom, top } - } - - pub(crate) fn top(&self) -> PaVi { - self.top - } - - pub(crate) fn bottom(&self) -> PaVi { - self.bottom - } -} - -/// This hashmap records information for performing extra reductions -/// when we insert items. -/// -/// When we want to *jump* from the bottom to the top positions in the -/// forest, we need to perform a set of reductions, in order to jump -/// one layer upwards. After one step, we query again for the next -/// step of reductions to perform. -/// -/// # Reduction format -/// -/// The value records a set of tuples of the form `(non-terminal, -/// starting_position)`. Such a tuple means we want to reduce from -/// the expansion of the given `non-terminal`, which expands from the -/// given starting position. We need to restrict the -pub(crate) type Reducer = Map<(usize, usize), HashSet<usize>>; - /// A default implementation for the [`Chain`] trait. #[derive(Debug, Clone, Default, Graph)] pub struct DefaultChain { @@ -259,7 +217,6 @@ pub struct DefaultChain { forest: DefaultForest<ForestLabel<GrammarLabel>>, accepting_vec: Vec<bool>, accepting_sources: Vec<PaVi>, - reducer: Reducer, } impl DefaultChain { @@ -431,8 +388,6 @@ impl Chain for DefaultChain { let accepting_sources = Vec::new(); - let reducer = Default::default(); - Ok(Self { graph, atom, @@ -441,7 +396,6 @@ impl Chain for DefaultChain { forest, accepting_vec, accepting_sources, - reducer, }) } @@ -486,19 +440,6 @@ impl Chain for DefaultChain { type DerIter = DerIter; - // EXPERIMENT: Try the approach of keeping an additional vector of - // vectors of unsigned integers. Each element of the vector - // corresponds to an edge of the current node. Each element is a - // vector. This vector represents the list of reductions that are - // implied due to skipping edges without children. - // - // Then when we insert an item, we can use this list to perform - // additional reductions. This can avoid the ugly and inefficient - // position_search method currently adopted. - // - // Of course we can use an optional vector to prevent allocating - // too much memory for edges whose corresponding vector is empty. - fn derive( &mut self, t: usize, @@ -536,12 +477,11 @@ impl Chain for DefaultChain { input: GeneratorInput<'_>, mut child_iter: impl Iterator<Item = usize> + ExactSizeIterator + Clone, atom_child_iter: impl Iterator<Item = usize> + Clone, - reducer: &mut Reducer, mut output: impl AsMut<Vec<(Roi, usize)>>, ) -> Result<bool, Error> { let mut accepting = false; - let (graph, atom, forest, accepting_vec, pavi, true_pavi) = input; + let (graph, atom, _forest, accepting_vec, pavi, true_pavi) = input; // First check the values from iterators are all valid. let graph_len = graph.nodes_len(); @@ -606,19 +546,8 @@ impl Chain for DefaultChain { for child in child_iter.clone() { for (child_label, child_child) in graph.labels_of(child).unwrap() { let mut new_label = *child_label; - new_label.set_true_source(true_pavi); - - let botop = BoTop::new(true_pavi, new_label.forest_source()); - let key = ( - forest.node_from_pavi(botop.top())?, - forest.node_from_pavi(botop.bottom)?, - ); - - reducer - .entry(key) - .or_insert_with(Default::default) - .insert(forest.node_from_pavi(new_label.true_source())?); + new_label.set_true_source(true_pavi); output.extend( std::iter::repeat(new_label.into()) @@ -658,8 +587,8 @@ impl Chain for DefaultChain { generate_fragment([atom_moved.into(), Ter(ter).into()], pos)?; self.forest.insert_item( - &self.reducer, *label, + t, fragment, atom_child_iter.clone(), &self.atom, @@ -681,7 +610,6 @@ impl Chain for DefaultChain { generator_input, child_iter.clone(), atom_child_iter.clone(), - &mut self.reducer, &mut der_iter.singles, )?; @@ -717,8 +645,8 @@ impl Chain for DefaultChain { generate_fragment([atom_moved.into(), Non(non).into()], pos)?; first_segment_pavi = self.forest.insert_item( - &self.reducer, *label, + t, first_fragment, atom_child_iter.clone(), &self.atom, @@ -732,8 +660,8 @@ impl Chain for DefaultChain { // iterator, in which case the passed // label is only used for the PaVi. virtual_pavi = self.forest.insert_item( - &self.reducer, Edge::new(0, first_segment_pavi, accepting), + t, virtual_fragment, std::iter::empty(), &self.atom, @@ -758,7 +686,6 @@ impl Chain for DefaultChain { generator_input, child_iter.clone(), atom_child_iter.clone(), - &mut self.reducer, &mut new_edges, )?; @@ -773,27 +700,8 @@ impl Chain for DefaultChain { match roi { Roi::Re(edge) => { let mut edge = *edge; - edge.set_true_source(virtual_pavi); - - // FIXME: Modify reducer after first experiments. - - // let botop = BoTop::new(virtual_pavi, edge.forest_source()); - // let rule_pos = atom_child.div_euclid(2); - - // let rule_nt = atom - // .get_rule_num(rule_pos) - // .map_err(index_out_of_bounds_conversion)?; - - // let rule_pos = rule_pos - // - atom - // .nth_accumulator(rule_nt) - // .map_err(index_out_of_bounds_conversion)?; - - // reducer - // .entry(botop) - // .or_insert_with(Default::default) - // .insert((rule_nt, rule_pos)); + edge.set_true_source(virtual_pavi); der_iter.singles.push((Roi::Re(edge), *target)); } @@ -831,23 +739,32 @@ impl Chain for DefaultChain { // this has been checked in // `generate_edges` if self.atom.is_empty_node(atom_child).unwrap() { - // REVIEW: flat flat map, - // hmm... - - // FIXME: Set true_source as well. - - // FIXME: Modify two layers of reducer. - der_iter.singles.extend(child_iter.clone().flat_map( - |child| { - self.graph.labels_of(child).unwrap().flat_map( - |(child_label, child_child_iter)| { - child_child_iter.map(|child_child| { - ((*child_label).into(), child_child) - }) - }, - ) - }, - )); + let mut extra_len = 0usize; + + for child in child_iter.clone() { + extra_len += self + .graph + .labels_of(child) + .unwrap() + .map(|(_, child_child_iter)| child_child_iter.len()) + .sum::<usize>(); + } + + der_iter.singles.try_reserve(extra_len)?; + + for child in child_iter.clone() { + for (child_label, child_child_iter) in + self.graph.labels_of(child).unwrap() + { + let mut child_label = *child_label; + + child_label.set_true_source(virtual_pavi); + + der_iter.singles.extend(child_child_iter.map( + |child_child| (child_label.into(), child_child), + )); + } + } } } } @@ -866,8 +783,21 @@ impl Chain for DefaultChain { Ok(der_iter) } - // FIXME: Refactor this. + // FIXME: Do nothing more than to make this at the end of input, + // actually. + // + // This means we only close the expansions at the last accepting + // sources. + // + // As to the remaining still open fragments, they should be + // considered as bugs and should be fixed in the function + // `insert_item` instead. + fn end_of_input(&mut self) -> Result<(), Self::Error> { + if !matches!(self.epsilon(), Ok(true)) { + return Ok(()); + } + self.forest.remove_node(1)?; let mut stack = Vec::new(); @@ -878,7 +808,7 @@ impl Chain for DefaultChain { for pavi in self.accepting_sources.iter() { match pavi { - PaVi::Parent(node, edge) => { + PaVi::Parent(node, _edge, child) => { // REVIEW: This is a garbage node that has to be // added when the machine starts. Is there a way // to avoid this garbage? @@ -886,9 +816,7 @@ impl Chain for DefaultChain { continue; } - let nth_child = self.forest.nth_child(*node, *edge)?; - - stack.push(nth_child); + stack.push(*child); } PaVi::Virtual(nt, t, node) => { let node_label = self @@ -911,83 +839,87 @@ impl Chain for DefaultChain { } } - let mut seen_nodes: HashSet<usize> = HashSet::with_capacity(stack.len()); - - // dbg!(&stack); + let forest_nodes_len = self.forest.nodes_len(); - // self.forest.print_viz("dbg forest.gv").unwrap(); + let mut node_ends: Vec<Vec<usize>> = std::iter::repeat_with(Default::default) + .take(forest_nodes_len) + .collect(); - while let Some(mut top) = stack.pop() { - if seen_nodes.contains(&top) { - continue; + // NOTE: Use iterator to avoid checking bounds when accessing + // `node_ends`. + for (node, node_end) in self.forest.nodes().zip(node_ends.iter_mut()) { + if let Some(end) = self + .forest + .vertex_label(node)? + .ok_or(Error::NodeNoLabel(node))? + .label() + .end() + { + node_end.push(end); } + } - seen_nodes.insert(top); + #[derive(Copy, Clone)] + enum StackElement { + Seen(usize), + Unseen(usize), + } - let mut top_label = self - .forest - .vertex_label(top)? - .ok_or(Error::NodeNoLabel(top))?; + use StackElement::{Seen, Unseen}; - if !top_label.is_packed() - && matches!(top_label.label().label().tnt(), Some(TNT::Non(_))) - && top_label.label().end().is_none() - { - let degree = self.forest.degree(top)?; - let last_index = if degree > 0 { degree - 1 } else { 0 }; + // maybe we do not need so much space? + let mut stack = Vec::with_capacity(forest_nodes_len); - let pos = if degree > 0 { - let last_child = self.forest.nth_child(top, last_index)?; - let last_child_label = self - .forest - .vertex_label(last_child)? - .ok_or(Error::NodeNoLabel(last_child))? - .label(); - let last_child_end = last_child_label.end(); - - if !matches!(last_child_label.label().rule(), - Some(rule) if - self - .atom - .is_accepting(2*rule+1) - .map_err(index_out_of_bounds_conversion)?) - { + stack.push(Unseen( + self.forest.root().ok_or(Error::IndexOutOfBounds(0, 0))?, + )); + + let mut seen_nodes = HashSet::with_capacity(forest_nodes_len); + + // depth-first search + while let Some(top) = stack.pop() { + match top { + Seen(top) => { + // Every of its children should be processed + // already. + + if !node_ends.get(top).unwrap().is_empty() { continue; } - if let Some(pos) = last_child_end { - pos - } else { - stack.extend(self.forest.children_of(last_child)?); + let last_index = self.forest.degree(top).unwrap() - 1; + let last_child = self.forest.nth_child(top, last_index)?; + + *node_ends.get_mut(top).unwrap() = node_ends + .get(last_child) + .ok_or(Error::IndexOutOfBounds(last_child, forest_nodes_len))? + .clone(); + } + Unseen(top) => { + if seen_nodes.contains(&top) { continue; } - } else { - top_label.label().start() + 1 - }; + seen_nodes.insert(top); - top = self.forest.splone(top, Some(pos), last_index, true)?; - top_label = self - .forest - .vertex_label(top)? - .ok_or(Error::NodeNoLabel(top))?; - } else if top_label.is_packed() - || top_label.label().label().rule().is_some() && top_label.label().end().is_none() - { - stack.extend(self.forest.children_of(top)?); - } + // NOTE: After the above check we can safely unwrap. - if top_label.clone_index().is_some() { - let mut parents = self.forest.parents_of(top)?; - if parents.len() != 1 { - dbg!(top, top_label, parents.len()); - self.forest.print_viz("dbg forest.gv").unwrap(); - panic!("0 parent?"); - } + let mut top_no_children = true; - top = parents.next().unwrap().node(); - } + for child in self.forest.children_of(top).unwrap() { + if top_no_children { + stack.push(Seen(top)); + } + + top_no_children = false; + + stack.push(Unseen(child)); + } - stack.extend(self.forest.parents_of(top)?.map(|parent| parent.node())); + if !top_no_children { + continue; + } + } + } } Ok(()) @@ -1120,6 +1052,11 @@ mod test_chain { if !no_item { chain.end_of_input()?; + chain.forest.print_viz("forest.gv")?; + + chain.graph.print_viz("chain.gv")?; + chain.atom.print_nfa("nfa.gv")?; + item::default::print_labels(&chain.atom, &chain.forest)?; } for label in chain.labels_of(chain.current())?.map(|(label, _)| label) { @@ -1143,6 +1080,10 @@ mod test_chain { #[test] fn test_ambiguity() -> Result<(), Box<dyn std::error::Error>> { + if std::fs::metadata("debug.log").is_ok() { + std::fs::remove_file("debug.log")?; + } + let mut no_item = false; for arg in std::env::args() { @@ -1158,21 +1099,37 @@ mod test_chain { let mut chain = DefaultChain::unit(atom)?; chain.chain(0, 0, no_item)?; - chain.forest.print_viz("forest0.gv")?; + + if !no_item { + chain.forest.print_viz("forest0.gv")?; + } + chain.chain(2, 1, no_item)?; - chain.forest.print_viz("forest1.gv")?; + + if !no_item { + chain.forest.print_viz("forest1.gv")?; + } + chain.chain(2, 2, no_item)?; - chain.forest.print_viz("forest2.gv")?; + + if !no_item { + chain.forest.print_viz("forest2.gv")?; + } + chain.chain(2, 3, no_item)?; - chain.forest.print_viz("forest3.gv")?; + + if !no_item { + chain.forest.print_viz("forest3.gv")?; + } + chain.chain(1, 4, no_item)?; - chain.forest.print_viz("forest4.gv")?; if !no_item { - chain.end_of_input()?; + // chain.end_of_input()?; chain.forest.print_viz("forest.gv")?; - chain.forest.print_closed_viz("closed.gv")?; + // chain.forest.print_closed_viz("closed.gv")?; } + chain.graph.print_viz("chain.gv")?; chain.atom.print_nfa("nfa.gv")?; item::default::print_labels(&chain.atom, &chain.forest)?; |