diff options
Diffstat (limited to 'chain')
-rw-r--r-- | chain/src/archive.txt | 495 | ||||
-rw-r--r-- | chain/src/atom/default.rs | 7 | ||||
-rw-r--r-- | chain/src/atom/mod.rs | 3 | ||||
-rw-r--r-- | chain/src/default.rs | 353 | ||||
-rw-r--r-- | chain/src/item/default/mod.rs | 257 | ||||
-rw-r--r-- | chain/src/item/default/splone.rs | 7 | ||||
-rw-r--r-- | chain/src/item/genins.rs | 288 | ||||
-rw-r--r-- | chain/src/item/mod.rs | 25 | ||||
-rw-r--r-- | chain/src/item/reduction.rs | 428 | ||||
-rw-r--r-- | chain/src/item/thoughts-on-reducer.org | 121 | ||||
-rw-r--r-- | chain/src/reducer.rs | 191 |
11 files changed, 1561 insertions, 614 deletions
diff --git a/chain/src/archive.txt b/chain/src/archive.txt new file mode 100644 index 0000000..030ccba --- /dev/null +++ b/chain/src/archive.txt @@ -0,0 +1,495 @@ +// Temporary archive + +// let mut seen_nodes: HashSet<usize> = HashSet::with_capacity(stack.len()); +// +// // dbg!(&stack); +// +// // self.forest.print_viz("dbg forest.gv").unwrap(); +// +// while let Some(mut top) = stack.pop() { +// if seen_nodes.contains(&top) { +// continue; +// } +// +// seen_nodes.insert(top); +// +// let mut top_label = self +// .forest +// .vertex_label(top)? +// .ok_or(Error::NodeNoLabel(top))?; +// +// if !top_label.is_packed() +// && matches!(top_label.label().label().tnt(), Some(TNT::Non(_))) +// && top_label.label().end().is_none() +// { +// let degree = self.forest.degree(top)?; +// let last_index = if degree > 0 { degree - 1 } else { 0 }; +// +// let pos = if degree > 0 { +// let last_child = self.forest.nth_child(top, last_index)?; +// let last_child_label = self +// .forest +// .vertex_label(last_child)? +// .ok_or(Error::NodeNoLabel(last_child))? +// .label(); +// let last_child_end = last_child_label.end(); +// +// if !matches!(last_child_label.label().rule(), +// Some(rule) if +// self +// .atom +// .is_accepting(2*rule+1) +// .map_err(index_out_of_bounds_conversion)?) +// { +// continue; +// } +// +// if let Some(pos) = last_child_end { +// pos +// } else { +// stack.extend(self.forest.children_of(last_child)?); +// continue; +// } +// } else { +// top_label.label().start() + 1 +// }; +// +// top = self.forest.splone(top, Some(pos), last_index, true)?; +// top_label = self +// .forest +// .vertex_label(top)? +// .ok_or(Error::NodeNoLabel(top))?; +// } else if top_label.is_packed() +// || top_label.label().label().rule().is_some() && top_label.label().end().is_none() +// { +// stack.extend(self.forest.children_of(top)?); +// } +// +// if top_label.clone_index().is_some() { +// let mut parents = self.forest.parents_of(top)?; +// if parents.len() != 1 { +// dbg!(top, top_label, parents.len()); +// self.forest.print_viz("dbg forest.gv").unwrap(); +// panic!("0 parent?"); +// } +// +// top = parents.next().unwrap().node(); +// } +// +// stack.extend(self.forest.parents_of(top)?.map(|parent| parent.node())); +// } + + +// extra reduction archive function + +// /// Perform extra reductions. +// /// +// /// To be precise, this function first splones the bottom node, +// /// and then queries the reducer to find the next nodes to splone, +// /// and repeats this process until either we find the top node, or +// /// the reducer says to stop. +// /// +// /// One nuance to note is that after we splone a node, if we split +// /// that node, then the splitted node will have a cloned parent, +// /// see +// /// [`split_node`][DefaultForest::<ForestLabel<GrammarLabel>>::split_node] +// /// for details. So for each parent pointed out by the reducer, +// /// we need to find its clone that has the splitted node as a +// /// child. +// #[allow(dead_code)] +// fn extra_reductions( +// &mut self, +// botop: BoTop, +// pos: usize, +// reducer: &Reducer, +// atom: &DefaultAtom, +// ) -> Result<Vec<usize>, Error> { +// if botop.bottom() == botop.top() { +// return Ok(vec![self.node_from_pavi(botop.top())?]); +// } +// +// let top = botop.top(); +// let bottom = botop.bottom(); +// +// let bottom_closed = self.close_pavi(atom.borrow(), bottom, pos)?; +// +// let top_node = self.node_from_pavi(top)?; +// let bottom_node = self.node_from_pavi(bottom)?; +// +// let mut stack = vec![(bottom_closed, bottom_node)]; +// +// // Exclude duplicate nodes to ensure that no infinite +// // recursion occurs. In the future I shall experiment if this +// // is necessary, and get rid of this if possible. +// // let mut seen_nodes: HashSet<usize> = Default::default(); +// +// let mut result = Vec::new(); +// +// // NOTE: We must query the reducer by the original nodes, +// // otherwise the reducer has no chance to know about the new +// // nodes that were created by a previous iteration here. At +// // the same time, we must splone a clone of the queried result +// // which is the parent of the previously sploned node, so that +// // we are on the right track. +// // +// // A summary is that we record both the original node and the +// // sploned node in the stack, so that we can query by the +// // original node, and then find the correct clone of the +// // queried result by means of the sploned node. +// +// let mut seen_nodes = HashSet::new(); +// +// while let Some((bottom_sploned, bottom_original)) = stack.pop() { +// if seen_nodes.contains(&bottom_original) { +// continue; +// } +// +// seen_nodes.insert(bottom_original); +// +// if pos == 4 { +// println!("stack popped: {bottom_sploned}, {bottom_original}"); +// } +// +// let query_result = reducer.query(bottom_original, top_node); +// +// let mut sploned = bottom_sploned; +// +// let mut label_set: HashSet<GrammarLabel> = HashSet::with_capacity(query_result.len()); +// +// if query_result.len() != 0 { +// if pos == 4 { +// println!("reducer:"); +// } +// +// for node in query_result { +// if pos == 4 { +// println!("{node}"); +// } +// +// label_set.insert( +// self.vertex_label(node)? +// .ok_or(Error::NodeNoLabel(node))? +// .label(), +// ); +// } +// +// let sploned_label = self +// .vertex_label(sploned)? +// .ok_or(Error::NodeNoLabel(sploned))?; +// +// if sploned_label.clone_index().is_some() { +// let mut parents = self.parents_of(sploned)?; +// +// #[cfg(debug_assertions)] +// if parents.len() != 1 { +// panic!("assumption fails"); +// } +// +// sploned = parents.next().unwrap().node(); +// } +// +// // NOTE: We cannot borrow self mutably and immutably +// // at the same time, so we have to collect the nodes +// // first. +// let mut to_splone = Vec::with_capacity(label_set.len()); +// +// for rule_parent in self.parents_of(sploned)? { +// if self.degree(rule_parent.node())? != 1 { +// panic!("assumption fails"); +// } +// +// for parent in self.parents_of(rule_parent.node())? { +// let parent_node = parent.node(); +// +// let parent_label = self +// .vertex_label(parent_node)? +// .ok_or(Error::NodeNoLabel(parent_node))? +// .label(); +// +// if label_set.contains(&parent_label) { +// to_splone.push(parent_node); +// } +// } +// } +// +// for parent_node in to_splone { +// let degree = self.degree(parent_node)?; +// let last_index = std::cmp::max(degree, 1) - 1; +// let parent_sploned = self.splone(parent_node, Some(pos), last_index, false)?; +// +// stack.push((parent_sploned, parent_node)); +// } +// } else { +// result.push(bottom_sploned); +// } +// } +// +// Ok(result) +// } + +// BoTop is now unused + +// / TODO: Add a hashmap mapping tuples of forest positions (top, +// / bottom) to vectors of tuples of unsigned integers. Each tuple +// / represents a non-terminal and a rule. The tuple means that we need +// / to close the expansion of the non-terminal by the rule position, if +// / we want to move from bottom to top. +// / +// / After we move up one layer, we are now in a new forest position, so +// / we just query again with the new bottom and top. This means we do +// / not need to clear the map. +// +// / REVIEW: Why is the above needed? +// +// // This represents a tuple of bottom and top forest positions. +// // +// // It is supposed to be used as keys to query the reduction +// // information. See the type [`Reducer`] for more details. +// [derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Default)] +// ub(crate) struct BoTop { +// bottom: PaVi, +// top: PaVi, +// +// +// mpl BoTop { +// #[allow(dead_code)] +// pub(crate) fn new(bottom: PaVi, top: PaVi) -> Self { +// Self { bottom, top } +// } +// +// pub(crate) fn top(&self) -> PaVi { +// self.top +// } +// +// pub(crate) fn bottom(&self) -> PaVi { +// self.bottom +// } +// + + +// FIXME: Fix this outdated documentation. +// This hashmap records information for performing extra reductions +// when we insert items. + +// When we want to *jump* from the bottom to the top positions in the +// forest, we need to perform a set of reductions, in order to jump +// one layer upwards. After one step, we query again for the next +// step of reductions to perform. + +// # Reduction format + +// The value records a set of tuples of the form `(non-terminal, +// starting_position)`. Such a tuple means we want to reduce from +// the expansion of the given `non-terminal`, which expands from the +// given starting position. We need to restrict the +// pub(crate) type Reducer = Map<usize, HashSet<usize>>; + +// /// Check if every child already has an end. +// fn every_child_is_completed(&self, node_id: usize, atom: &DefaultAtom) -> Result<bool, Error> { +// let children = self.children_of(node_id)?; + +// if children.len() == 0 { +// return Ok(true); +// } + +// let mut pos = self +// .vertex_label(node_id)? +// .ok_or(Error::NodeNoLabel(node_id))? +// .label() +// .start(); + +// let mut last_child_label = None; + +// for child in children { +// let child_label = self +// .vertex_label(child)? +// .ok_or(Error::NodeNoLabel(child))? +// .label(); + +// last_child_label = Some(child_label); + +// if child_label.start() != pos || child_label.end().is_none() { +// return Ok(false); +// } + +// pos = child_label.end().unwrap(); +// } + +// if let Some(label) = last_child_label { +// if let Some(rule) = label.label().rule() { +// if !atom +// .is_accepting(2 * rule + 1) +// .map_err(|_| Error::IndexOutOfBounds(2 * rule + 1, atom.nodes_len()))? +// { +// return Ok(false); +// } +// } +// } + +// Ok(true) +// } + +// /// Complete the forest by trying to set the ending position of +// /// every node that does not have an end, by the ending position +// /// of its last child. +// pub fn complete(&mut self, atom: &DefaultAtom) -> Result<(), Error> { +// let mut stack: Vec<_> = self +// .nodes() +// .filter(|node| { +// let label = self.vertex_label(*node).unwrap().unwrap().label(); + +// label.label().rule().is_some() && label.end().is_some() +// }) +// .collect(); + +// let mut second_stack: Vec<usize> = Vec::new(); + +// let mut pending_candidates: Vec<usize> = Vec::new(); + +// let nodes_len = self.nodes_len(); + +// let mut seen_nodes: HashSet<usize> = HashSet::with_capacity(nodes_len); + +// while !stack.is_empty() { +// while let Some(mut top) = stack.pop() { +// if seen_nodes.contains(&top) { +// continue; +// } + +// seen_nodes.insert(top); + +// let top_label = self.vertex_label(top)?.unwrap(); + +// if top_label.clone_index().is_some() { +// let mut top_parents = self.parents_of(top)?; + +// if top_parents.len() != 1 { +// return Err(Error::InvalidClone(top)); +// } + +// top = top_parents.next().unwrap().node(); +// } + +// let rule_parents = self.parents_of(top)?; + +// let candidates = { +// let mut result = Vec::with_capacity(rule_parents.len()); + +// for parent in rule_parents { +// // for parent in self.parents_of(parent.node())? { +// // if self.degree(parent.node())? <= parent.edge() + 1 { +// result.push(parent); +// // } +// // } +// } + +// result +// }; + +// for candidate in candidates { +// if matches!(self.vertex_label(candidate.node())?, Some(label) if label.label().end().is_none()) +// { +// if self.every_child_is_completed(candidate.node(), atom)? { +// let last_child = self +// .nth_child(candidate.node(), self.degree(candidate.node())? - 1)?; +// let end = self +// .vertex_label(last_child)? +// .ok_or(Error::NodeNoLabel(last_child))? +// .label() +// .end(); + +// let sploned_node = self.splone( +// candidate.node(), +// end, +// self.degree(candidate.node())? - 1, +// true, +// )?; + +// second_stack.push(sploned_node); +// } else { +// pending_candidates.push(candidate.node()); +// } +// } else { +// second_stack.push(candidate.node()); +// } +// } + +// let mut new_pending = Vec::with_capacity(pending_candidates.len()); + +// while let Some(node) = pending_candidates.pop() { +// if self.every_child_is_completed(node, atom)? { +// let last_edge = self.degree(node)? - 1; +// let last_child = self.nth_child(node, last_edge)?; +// let end = self +// .vertex_label(last_child)? +// .ok_or(Error::NodeNoLabel(last_child))? +// .label() +// .end(); + +// let sploned_node = self.splone(node, end, last_edge, true)?; + +// second_stack.push(sploned_node); +// } else { +// new_pending.push(node); +// } +// } + +// std::mem::swap(&mut pending_candidates, &mut new_pending); +// } + +// std::mem::swap(&mut stack, &mut second_stack); +// } + +// Ok(()) +// } + +// /// Unconditionally set the label of the node to be the new label. +// /// +// /// # Warning +// /// +// /// Use with caution: it does not do anything special, so it is +// /// the responsibility of the caller to ensure this operation is +// /// legal. +// #[allow(dead_code)] +// pub(crate) fn set_label(&mut self, node: usize, label: GrammarLabel) -> Result<(), Error> { +// let status = self +// .vertex_label(node)? +// .ok_or(Error::NodeNoLabel(node))? +// .status; + +// let label = ForestLabel::new(label, status); + +// let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + +// builder.set_label(node, label)?; + +// Ok(()) +// } + +// /// Extract the node from `pavi`. +// /// +// /// If pavi is a parent, this returns the child pointed to by the +// /// represented edge. +// /// +// /// If pavi is a virtual node, this simply returns the virtual +// /// node. +// /// +// /// If pavi is empty, this returns the root, unless the forest is +// /// empty. +// /// +// /// # Guarantee +// /// +// /// The returned node is guaranteed to be a valid node. +// pub(crate) fn node_from_pavi(&self, pavi: PaVi) -> Result<usize, Error> { +// match pavi { +// PaVi::Parent(_node, _edge, child) => Ok(child), +// PaVi::Virtual(_, _, node) => { +// if node >= self.nodes_len() { +// return Err(Error::IndexOutOfBounds(node, self.nodes_len())); +// } +// +// Ok(node) +// } +// PaVi::Empty => Ok(self.root().ok_or(Error::IndexOutOfBounds(0, 0))?), +// } +// } diff --git a/chain/src/atom/default.rs b/chain/src/atom/default.rs index 6883907..317090e 100644 --- a/chain/src/atom/default.rs +++ b/chain/src/atom/default.rs @@ -507,6 +507,9 @@ impl DefaultAtom { DError::InvalidClone(_) => { unreachable!("we are not cloning") } + DError::CannotClose(_, _, _, _) => { + unreachable!("we are not closing virtual nodes") + } DError::Invalid => { panic!("a label is wrongly planted?") } @@ -713,4 +716,8 @@ impl Atom for DefaultAtom { .get(&trace) .map(|set| set.iter().copied()) } + + fn accepting_len(&self) -> usize { + self.accepting_vec.len() + } } diff --git a/chain/src/atom/mod.rs b/chain/src/atom/mod.rs index c9dadb2..4b3bfce 100644 --- a/chain/src/atom/mod.rs +++ b/chain/src/atom/mod.rs @@ -32,6 +32,9 @@ pub trait Atom: Nfa<LabelType<TNT>> + Deref<Target = Grammar> { /// Tell whether a node is accepting. fn is_accepting(&self, node_id: usize) -> Result<bool, GrammarError>; + + /// Return the bound for `is_accepting`. + fn accepting_len(&self) -> usize; } pub mod default; diff --git a/chain/src/default.rs b/chain/src/default.rs index e61df41..ed63ff4 100644 --- a/chain/src/default.rs +++ b/chain/src/default.rs @@ -38,6 +38,10 @@ pub enum Error { CannotReserve(TryReserveError), /// Cannot create label for cloning nodes. CannotClone(ForestLabelError), + /// Cannot close the virtual node. + /// + /// The format is (nt, t, node, pos). + CannotClose(usize, usize, usize, usize), /// Cannot find a suitable node to plant the new forest fragment. CannotPlant, /// Cannot split a packed node. @@ -60,6 +64,12 @@ impl Display for Error { Self::NodeNoLabel(n) => write!(f, "node {n} has no labels while it should have one"), Self::CannotReserve(e) => write!(f, "cannot reserve memory: {e}"), Self::CannotClone(fe) => write!(f, "cannot clone due to {fe}"), + Self::CannotClose(nt, t, node, pos) => write!( + f, + "cannot close the virtual node {node} \ + with the expansion from nt {nt} by t \ + {t} at pos {pos}" + ), Self::CannotPlant => write!(f, "cannot plant a new forest fragment"), Self::SplitPack(n) => write!(f, "cannot split the packed node {n}"), Self::InvalidClone(n) => { @@ -196,58 +206,6 @@ impl Iterator for DerIter { } } -// TODO: Add a hashmap mapping tuples of forest positions (top, -// bottom) to vectors of tuples of unsigned integers. Each tuple -// represents a non-terminal and a rule. The tuple means that we need -// to close the expansion of the non-terminal by the rule position, if -// we want to move from bottom to top. -// -// After we move up one layer, we are now in a new forest position, so -// we just query again with the new bottom and top. This means we do -// not need to clear the map. - -// REVIEW: Why is the above needed? - -/// This represents a tuple of bottom and top forest positions. -/// -/// It is supposed to be used as keys to query the reduction -/// information. See the type [`Reducer`] for more details. -#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Default)] -pub(crate) struct BoTop { - bottom: PaVi, - top: PaVi, -} - -impl BoTop { - pub(crate) fn new(bottom: PaVi, top: PaVi) -> Self { - Self { bottom, top } - } - - pub(crate) fn top(&self) -> PaVi { - self.top - } - - pub(crate) fn bottom(&self) -> PaVi { - self.bottom - } -} - -/// This hashmap records information for performing extra reductions -/// when we insert items. -/// -/// When we want to *jump* from the bottom to the top positions in the -/// forest, we need to perform a set of reductions, in order to jump -/// one layer upwards. After one step, we query again for the next -/// step of reductions to perform. -/// -/// # Reduction format -/// -/// The value records a set of tuples of the form `(non-terminal, -/// starting_position)`. Such a tuple means we want to reduce from -/// the expansion of the given `non-terminal`, which expands from the -/// given starting position. We need to restrict the -pub(crate) type Reducer = Map<(usize, usize), HashSet<usize>>; - /// A default implementation for the [`Chain`] trait. #[derive(Debug, Clone, Default, Graph)] pub struct DefaultChain { @@ -259,7 +217,6 @@ pub struct DefaultChain { forest: DefaultForest<ForestLabel<GrammarLabel>>, accepting_vec: Vec<bool>, accepting_sources: Vec<PaVi>, - reducer: Reducer, } impl DefaultChain { @@ -431,8 +388,6 @@ impl Chain for DefaultChain { let accepting_sources = Vec::new(); - let reducer = Default::default(); - Ok(Self { graph, atom, @@ -441,7 +396,6 @@ impl Chain for DefaultChain { forest, accepting_vec, accepting_sources, - reducer, }) } @@ -486,19 +440,6 @@ impl Chain for DefaultChain { type DerIter = DerIter; - // EXPERIMENT: Try the approach of keeping an additional vector of - // vectors of unsigned integers. Each element of the vector - // corresponds to an edge of the current node. Each element is a - // vector. This vector represents the list of reductions that are - // implied due to skipping edges without children. - // - // Then when we insert an item, we can use this list to perform - // additional reductions. This can avoid the ugly and inefficient - // position_search method currently adopted. - // - // Of course we can use an optional vector to prevent allocating - // too much memory for edges whose corresponding vector is empty. - fn derive( &mut self, t: usize, @@ -536,12 +477,11 @@ impl Chain for DefaultChain { input: GeneratorInput<'_>, mut child_iter: impl Iterator<Item = usize> + ExactSizeIterator + Clone, atom_child_iter: impl Iterator<Item = usize> + Clone, - reducer: &mut Reducer, mut output: impl AsMut<Vec<(Roi, usize)>>, ) -> Result<bool, Error> { let mut accepting = false; - let (graph, atom, forest, accepting_vec, pavi, true_pavi) = input; + let (graph, atom, _forest, accepting_vec, pavi, true_pavi) = input; // First check the values from iterators are all valid. let graph_len = graph.nodes_len(); @@ -606,19 +546,8 @@ impl Chain for DefaultChain { for child in child_iter.clone() { for (child_label, child_child) in graph.labels_of(child).unwrap() { let mut new_label = *child_label; - new_label.set_true_source(true_pavi); - - let botop = BoTop::new(true_pavi, new_label.forest_source()); - let key = ( - forest.node_from_pavi(botop.top())?, - forest.node_from_pavi(botop.bottom)?, - ); - - reducer - .entry(key) - .or_insert_with(Default::default) - .insert(forest.node_from_pavi(new_label.true_source())?); + new_label.set_true_source(true_pavi); output.extend( std::iter::repeat(new_label.into()) @@ -658,8 +587,8 @@ impl Chain for DefaultChain { generate_fragment([atom_moved.into(), Ter(ter).into()], pos)?; self.forest.insert_item( - &self.reducer, *label, + t, fragment, atom_child_iter.clone(), &self.atom, @@ -681,7 +610,6 @@ impl Chain for DefaultChain { generator_input, child_iter.clone(), atom_child_iter.clone(), - &mut self.reducer, &mut der_iter.singles, )?; @@ -717,8 +645,8 @@ impl Chain for DefaultChain { generate_fragment([atom_moved.into(), Non(non).into()], pos)?; first_segment_pavi = self.forest.insert_item( - &self.reducer, *label, + t, first_fragment, atom_child_iter.clone(), &self.atom, @@ -732,8 +660,8 @@ impl Chain for DefaultChain { // iterator, in which case the passed // label is only used for the PaVi. virtual_pavi = self.forest.insert_item( - &self.reducer, Edge::new(0, first_segment_pavi, accepting), + t, virtual_fragment, std::iter::empty(), &self.atom, @@ -758,7 +686,6 @@ impl Chain for DefaultChain { generator_input, child_iter.clone(), atom_child_iter.clone(), - &mut self.reducer, &mut new_edges, )?; @@ -773,27 +700,8 @@ impl Chain for DefaultChain { match roi { Roi::Re(edge) => { let mut edge = *edge; - edge.set_true_source(virtual_pavi); - - // FIXME: Modify reducer after first experiments. - - // let botop = BoTop::new(virtual_pavi, edge.forest_source()); - // let rule_pos = atom_child.div_euclid(2); - - // let rule_nt = atom - // .get_rule_num(rule_pos) - // .map_err(index_out_of_bounds_conversion)?; - - // let rule_pos = rule_pos - // - atom - // .nth_accumulator(rule_nt) - // .map_err(index_out_of_bounds_conversion)?; - - // reducer - // .entry(botop) - // .or_insert_with(Default::default) - // .insert((rule_nt, rule_pos)); + edge.set_true_source(virtual_pavi); der_iter.singles.push((Roi::Re(edge), *target)); } @@ -831,23 +739,32 @@ impl Chain for DefaultChain { // this has been checked in // `generate_edges` if self.atom.is_empty_node(atom_child).unwrap() { - // REVIEW: flat flat map, - // hmm... - - // FIXME: Set true_source as well. - - // FIXME: Modify two layers of reducer. - der_iter.singles.extend(child_iter.clone().flat_map( - |child| { - self.graph.labels_of(child).unwrap().flat_map( - |(child_label, child_child_iter)| { - child_child_iter.map(|child_child| { - ((*child_label).into(), child_child) - }) - }, - ) - }, - )); + let mut extra_len = 0usize; + + for child in child_iter.clone() { + extra_len += self + .graph + .labels_of(child) + .unwrap() + .map(|(_, child_child_iter)| child_child_iter.len()) + .sum::<usize>(); + } + + der_iter.singles.try_reserve(extra_len)?; + + for child in child_iter.clone() { + for (child_label, child_child_iter) in + self.graph.labels_of(child).unwrap() + { + let mut child_label = *child_label; + + child_label.set_true_source(virtual_pavi); + + der_iter.singles.extend(child_child_iter.map( + |child_child| (child_label.into(), child_child), + )); + } + } } } } @@ -866,8 +783,21 @@ impl Chain for DefaultChain { Ok(der_iter) } - // FIXME: Refactor this. + // FIXME: Do nothing more than to make this at the end of input, + // actually. + // + // This means we only close the expansions at the last accepting + // sources. + // + // As to the remaining still open fragments, they should be + // considered as bugs and should be fixed in the function + // `insert_item` instead. + fn end_of_input(&mut self) -> Result<(), Self::Error> { + if !matches!(self.epsilon(), Ok(true)) { + return Ok(()); + } + self.forest.remove_node(1)?; let mut stack = Vec::new(); @@ -878,7 +808,7 @@ impl Chain for DefaultChain { for pavi in self.accepting_sources.iter() { match pavi { - PaVi::Parent(node, edge) => { + PaVi::Parent(node, _edge, child) => { // REVIEW: This is a garbage node that has to be // added when the machine starts. Is there a way // to avoid this garbage? @@ -886,9 +816,7 @@ impl Chain for DefaultChain { continue; } - let nth_child = self.forest.nth_child(*node, *edge)?; - - stack.push(nth_child); + stack.push(*child); } PaVi::Virtual(nt, t, node) => { let node_label = self @@ -911,83 +839,87 @@ impl Chain for DefaultChain { } } - let mut seen_nodes: HashSet<usize> = HashSet::with_capacity(stack.len()); - - // dbg!(&stack); + let forest_nodes_len = self.forest.nodes_len(); - // self.forest.print_viz("dbg forest.gv").unwrap(); + let mut node_ends: Vec<Vec<usize>> = std::iter::repeat_with(Default::default) + .take(forest_nodes_len) + .collect(); - while let Some(mut top) = stack.pop() { - if seen_nodes.contains(&top) { - continue; + // NOTE: Use iterator to avoid checking bounds when accessing + // `node_ends`. + for (node, node_end) in self.forest.nodes().zip(node_ends.iter_mut()) { + if let Some(end) = self + .forest + .vertex_label(node)? + .ok_or(Error::NodeNoLabel(node))? + .label() + .end() + { + node_end.push(end); } + } - seen_nodes.insert(top); + #[derive(Copy, Clone)] + enum StackElement { + Seen(usize), + Unseen(usize), + } - let mut top_label = self - .forest - .vertex_label(top)? - .ok_or(Error::NodeNoLabel(top))?; + use StackElement::{Seen, Unseen}; - if !top_label.is_packed() - && matches!(top_label.label().label().tnt(), Some(TNT::Non(_))) - && top_label.label().end().is_none() - { - let degree = self.forest.degree(top)?; - let last_index = if degree > 0 { degree - 1 } else { 0 }; + // maybe we do not need so much space? + let mut stack = Vec::with_capacity(forest_nodes_len); - let pos = if degree > 0 { - let last_child = self.forest.nth_child(top, last_index)?; - let last_child_label = self - .forest - .vertex_label(last_child)? - .ok_or(Error::NodeNoLabel(last_child))? - .label(); - let last_child_end = last_child_label.end(); - - if !matches!(last_child_label.label().rule(), - Some(rule) if - self - .atom - .is_accepting(2*rule+1) - .map_err(index_out_of_bounds_conversion)?) - { + stack.push(Unseen( + self.forest.root().ok_or(Error::IndexOutOfBounds(0, 0))?, + )); + + let mut seen_nodes = HashSet::with_capacity(forest_nodes_len); + + // depth-first search + while let Some(top) = stack.pop() { + match top { + Seen(top) => { + // Every of its children should be processed + // already. + + if !node_ends.get(top).unwrap().is_empty() { continue; } - if let Some(pos) = last_child_end { - pos - } else { - stack.extend(self.forest.children_of(last_child)?); + let last_index = self.forest.degree(top).unwrap() - 1; + let last_child = self.forest.nth_child(top, last_index)?; + + *node_ends.get_mut(top).unwrap() = node_ends + .get(last_child) + .ok_or(Error::IndexOutOfBounds(last_child, forest_nodes_len))? + .clone(); + } + Unseen(top) => { + if seen_nodes.contains(&top) { continue; } - } else { - top_label.label().start() + 1 - }; + seen_nodes.insert(top); - top = self.forest.splone(top, Some(pos), last_index, true)?; - top_label = self - .forest - .vertex_label(top)? - .ok_or(Error::NodeNoLabel(top))?; - } else if top_label.is_packed() - || top_label.label().label().rule().is_some() && top_label.label().end().is_none() - { - stack.extend(self.forest.children_of(top)?); - } + // NOTE: After the above check we can safely unwrap. - if top_label.clone_index().is_some() { - let mut parents = self.forest.parents_of(top)?; - if parents.len() != 1 { - dbg!(top, top_label, parents.len()); - self.forest.print_viz("dbg forest.gv").unwrap(); - panic!("0 parent?"); - } + let mut top_no_children = true; - top = parents.next().unwrap().node(); - } + for child in self.forest.children_of(top).unwrap() { + if top_no_children { + stack.push(Seen(top)); + } + + top_no_children = false; + + stack.push(Unseen(child)); + } - stack.extend(self.forest.parents_of(top)?.map(|parent| parent.node())); + if !top_no_children { + continue; + } + } + } } Ok(()) @@ -1120,6 +1052,11 @@ mod test_chain { if !no_item { chain.end_of_input()?; + chain.forest.print_viz("forest.gv")?; + + chain.graph.print_viz("chain.gv")?; + chain.atom.print_nfa("nfa.gv")?; + item::default::print_labels(&chain.atom, &chain.forest)?; } for label in chain.labels_of(chain.current())?.map(|(label, _)| label) { @@ -1143,6 +1080,10 @@ mod test_chain { #[test] fn test_ambiguity() -> Result<(), Box<dyn std::error::Error>> { + if std::fs::metadata("debug.log").is_ok() { + std::fs::remove_file("debug.log")?; + } + let mut no_item = false; for arg in std::env::args() { @@ -1158,21 +1099,37 @@ mod test_chain { let mut chain = DefaultChain::unit(atom)?; chain.chain(0, 0, no_item)?; - chain.forest.print_viz("forest0.gv")?; + + if !no_item { + chain.forest.print_viz("forest0.gv")?; + } + chain.chain(2, 1, no_item)?; - chain.forest.print_viz("forest1.gv")?; + + if !no_item { + chain.forest.print_viz("forest1.gv")?; + } + chain.chain(2, 2, no_item)?; - chain.forest.print_viz("forest2.gv")?; + + if !no_item { + chain.forest.print_viz("forest2.gv")?; + } + chain.chain(2, 3, no_item)?; - chain.forest.print_viz("forest3.gv")?; + + if !no_item { + chain.forest.print_viz("forest3.gv")?; + } + chain.chain(1, 4, no_item)?; - chain.forest.print_viz("forest4.gv")?; if !no_item { - chain.end_of_input()?; + // chain.end_of_input()?; chain.forest.print_viz("forest.gv")?; - chain.forest.print_closed_viz("closed.gv")?; + // chain.forest.print_closed_viz("closed.gv")?; } + chain.graph.print_viz("chain.gv")?; chain.atom.print_nfa("nfa.gv")?; item::default::print_labels(&chain.atom, &chain.forest)?; diff --git a/chain/src/item/default/mod.rs b/chain/src/item/default/mod.rs index 6d28956..47e8c90 100644 --- a/chain/src/item/default/mod.rs +++ b/chain/src/item/default/mod.rs @@ -250,7 +250,12 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> { frag_stack.pop(); self_stack.pop(); - if fragment.vertex_label(frag_top)? != self.vertex_label(self_top)? { + // NOTE: The labels might defer by the status. We test + // for the equalities of inner labels only. + + if fragment.vertex_label(frag_top)?.map(|label| label.label()) + != self.vertex_label(self_top)?.map(|label| label.label()) + { // not a prefix // dbg!( // frag_top, @@ -589,11 +594,15 @@ impl<T: GraphLabel> DefaultForest<ForestLabel<T>> { return Ok(node); } + fragment.set_root(1)?; + let node_label = self.vertex_label(node)?.ok_or(Error::NodeNoLabel(node))?; if node_label.is_packed() || node_label.clone_index().is_some() { let mut planted = false; + let mut to_plant: Option<usize> = None; + let mut node = node; if node_label.clone_index().is_some() { @@ -607,17 +616,25 @@ impl<T: GraphLabel> DefaultForest<ForestLabel<T>> { for clone in self.children_of(node)? { node = clone; - if self.is_prefix(clone, &fragment)? { - planted = true; + if !self.is_empty_node(clone)? { + let first_child = self.nth_child(clone, 0)?; + + if self.is_prefix(first_child, &fragment)? { + planted = true; - break; + break; + } + } else if to_plant.is_none() { + to_plant = Some(clone); } } if !planted { - let clone = self.clone_node(node, 0, false)?; - - fragment.set_root(1)?; + let clone = if let Some(cloned_node) = to_plant { + cloned_node + } else { + self.clone_node(node, 0, false)? + }; self.plant(clone, fragment, false)?; @@ -626,51 +643,29 @@ impl<T: GraphLabel> DefaultForest<ForestLabel<T>> { } else { let clone_threshold = if self.root == Some(node) { 1 } else { 0 }; - let planted = self.is_prefix(node, &fragment)?; + let satisfy_threshold = self.degree(node)? > clone_threshold; - fragment.set_root(1)?; + if !satisfy_threshold { + self.plant(node, fragment, false)?; - if !planted && self.degree(node)? > clone_threshold { + return Ok(node); + } + + let first_child = self.nth_child(node, clone_threshold)?; + + let planted = self.is_prefix(first_child, &fragment)?; + + if !planted { let clone = self.clone_node(node, 0, false)?; self.plant(clone, fragment, false)?; return Ok(clone); - } else if !planted { - self.plant(node, fragment, false)?; } } Ok(node) } - - /// Extract the node from `pavi`. - /// - /// If pavi is a parent, this returns the child pointed to by the - /// represented edge. - /// - /// If pavi is a virtual node, this simply returns the virtual - /// node. - /// - /// If pavi is empty, this returns the root, unless the forest is - /// empty. - /// - /// # Guarantee - /// - /// The returned node is guaranteed to be a valid node. - pub(crate) fn node_from_pavi(&self, pavi: PaVi) -> Result<usize, Error> { - match pavi { - PaVi::Parent(node, edge) => Ok(self.nth_child(node, edge)?), - PaVi::Virtual(_, _, node) => { - if node >= self.nodes_len() { - return Err(Error::IndexOutOfBounds(node, self.nodes_len())); - } - - Ok(node) - } - PaVi::Empty => Ok(self.root().ok_or(Error::IndexOutOfBounds(0, 0))?), - } - } } pub mod splone; @@ -850,188 +845,6 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { Ok(None) } - // /// Check if every child already has an end. - // fn every_child_is_completed(&self, node_id: usize, atom: &DefaultAtom) -> Result<bool, Error> { - // let children = self.children_of(node_id)?; - - // if children.len() == 0 { - // return Ok(true); - // } - - // let mut pos = self - // .vertex_label(node_id)? - // .ok_or(Error::NodeNoLabel(node_id))? - // .label() - // .start(); - - // let mut last_child_label = None; - - // for child in children { - // let child_label = self - // .vertex_label(child)? - // .ok_or(Error::NodeNoLabel(child))? - // .label(); - - // last_child_label = Some(child_label); - - // if child_label.start() != pos || child_label.end().is_none() { - // return Ok(false); - // } - - // pos = child_label.end().unwrap(); - // } - - // if let Some(label) = last_child_label { - // if let Some(rule) = label.label().rule() { - // if !atom - // .is_accepting(2 * rule + 1) - // .map_err(|_| Error::IndexOutOfBounds(2 * rule + 1, atom.nodes_len()))? - // { - // return Ok(false); - // } - // } - // } - - // Ok(true) - // } - - // /// Complete the forest by trying to set the ending position of - // /// every node that does not have an end, by the ending position - // /// of its last child. - // pub fn complete(&mut self, atom: &DefaultAtom) -> Result<(), Error> { - // let mut stack: Vec<_> = self - // .nodes() - // .filter(|node| { - // let label = self.vertex_label(*node).unwrap().unwrap().label(); - - // label.label().rule().is_some() && label.end().is_some() - // }) - // .collect(); - - // let mut second_stack: Vec<usize> = Vec::new(); - - // let mut pending_candidates: Vec<usize> = Vec::new(); - - // let nodes_len = self.nodes_len(); - - // let mut seen_nodes: HashSet<usize> = HashSet::with_capacity(nodes_len); - - // while !stack.is_empty() { - // while let Some(mut top) = stack.pop() { - // if seen_nodes.contains(&top) { - // continue; - // } - - // seen_nodes.insert(top); - - // let top_label = self.vertex_label(top)?.unwrap(); - - // if top_label.clone_index().is_some() { - // let mut top_parents = self.parents_of(top)?; - - // if top_parents.len() != 1 { - // return Err(Error::InvalidClone(top)); - // } - - // top = top_parents.next().unwrap().node(); - // } - - // let rule_parents = self.parents_of(top)?; - - // let candidates = { - // let mut result = Vec::with_capacity(rule_parents.len()); - - // for parent in rule_parents { - // // for parent in self.parents_of(parent.node())? { - // // if self.degree(parent.node())? <= parent.edge() + 1 { - // result.push(parent); - // // } - // // } - // } - - // result - // }; - - // for candidate in candidates { - // if matches!(self.vertex_label(candidate.node())?, Some(label) if label.label().end().is_none()) - // { - // if self.every_child_is_completed(candidate.node(), atom)? { - // let last_child = self - // .nth_child(candidate.node(), self.degree(candidate.node())? - 1)?; - // let end = self - // .vertex_label(last_child)? - // .ok_or(Error::NodeNoLabel(last_child))? - // .label() - // .end(); - - // let sploned_node = self.splone( - // candidate.node(), - // end, - // self.degree(candidate.node())? - 1, - // true, - // )?; - - // second_stack.push(sploned_node); - // } else { - // pending_candidates.push(candidate.node()); - // } - // } else { - // second_stack.push(candidate.node()); - // } - // } - - // let mut new_pending = Vec::with_capacity(pending_candidates.len()); - - // while let Some(node) = pending_candidates.pop() { - // if self.every_child_is_completed(node, atom)? { - // let last_edge = self.degree(node)? - 1; - // let last_child = self.nth_child(node, last_edge)?; - // let end = self - // .vertex_label(last_child)? - // .ok_or(Error::NodeNoLabel(last_child))? - // .label() - // .end(); - - // let sploned_node = self.splone(node, end, last_edge, true)?; - - // second_stack.push(sploned_node); - // } else { - // new_pending.push(node); - // } - // } - - // std::mem::swap(&mut pending_candidates, &mut new_pending); - // } - - // std::mem::swap(&mut stack, &mut second_stack); - // } - - // Ok(()) - // } - - // /// Unconditionally set the label of the node to be the new label. - // /// - // /// # Warning - // /// - // /// Use with caution: it does not do anything special, so it is - // /// the responsibility of the caller to ensure this operation is - // /// legal. - // #[allow(dead_code)] - // pub(crate) fn set_label(&mut self, node: usize, label: GrammarLabel) -> Result<(), Error> { - // let status = self - // .vertex_label(node)? - // .ok_or(Error::NodeNoLabel(node))? - // .status; - - // let label = ForestLabel::new(label, status); - - // let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); - - // builder.set_label(node, label)?; - - // Ok(()) - // } - /// Set the position of every node. /// /// If ALLP is non-nil or if the node is a terminal node, also set diff --git a/chain/src/item/default/splone.rs b/chain/src/item/default/splone.rs index d77686e..581d1dc 100644 --- a/chain/src/item/default/splone.rs +++ b/chain/src/item/default/splone.rs @@ -555,7 +555,12 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { let modified_degree = std::cmp::max(existing_degree, 1) - 1; if existing_degree != 0 - && self.has_same_children(existing, node, modified_degree, edge_index)? + && self.has_same_children( + existing, + node, + modified_degree, + edge_index + 1, + )? { let last_child = self.nth_child(existing, existing_degree - 1)?; diff --git a/chain/src/item/genins.rs b/chain/src/item/genins.rs index a2bb22c..19f835b 100644 --- a/chain/src/item/genins.rs +++ b/chain/src/item/genins.rs @@ -9,16 +9,14 @@ use super::*; use crate::{ atom::{Atom, DefaultAtom}, - default::{BoTop, Error, Reducer}, + default::Error, item::default::DefaultForest, Edge, }; use grammar::{Error as GrammarError, GrammarLabel, GrammarLabelType, TNT}; use graph::Graph; -use core::borrow::Borrow; - -use std::collections::HashSet; +use std::borrow::Borrow; /// Convert an error telling us that an index is out of bounds. /// @@ -179,8 +177,8 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { /// fragment under the result splone. pub(crate) fn insert_item( &mut self, - reducer: &Reducer, label: Edge, + ter: usize, fragment: impl Borrow<DefaultForest<ForestLabel<GrammarLabel>>>, atom_child_iter: impl Iterator<Item = usize> + ExactSizeIterator + Clone, atom: &DefaultAtom, @@ -200,7 +198,7 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { let fragment_root = if let Some(root) = fragment.root() { root } else { - unreachable!("empty item"); + panic!("empty item"); }; let fragment_root_label = fragment @@ -209,7 +207,11 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { let pos = fragment_root_label.label().start(); - dbg!((pos, label)); + // dbg!((pos, label)); + + // Whether or not to print detailed graphs of each step of + // operation for debugging purposes. + let to_print = false; let tnt_string = { let empty_p = atom_child_iter.len() == 0; @@ -217,10 +219,10 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { match label.label().label() { GrammarLabelType::TNT(TNT::Ter(t)) => { - format!("t {t}") + format!("t {t}{}", if empty_p { " second" } else { "" }) } GrammarLabelType::TNT(TNT::Non(n)) => { - format!("n {n} {}", if empty_p { "second" } else { "first" }) + format!("n {n}") } _ => "error".to_string(), } @@ -230,7 +232,7 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { let mut repetition = 0; while std::fs::metadata(format!( - "/Users/durand/Desktop/Centre/A propos de programmes/Rust/rep/chain/output/pos {pos} {tnt_string} {repetition}.gv" + "/Users/durand/Desktop/Centre/A propos de programmes/Rust/rep/chain/output/pos {pos} - {repetition}.gv" )) .is_ok() { @@ -240,44 +242,32 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { repetition }; - self.print_viz(&format!("pos {pos} {tnt_string} {num}.gv")) - .unwrap(); - - self.extra_reductions( - BoTop::new(true_source, pavi), - pos, - reducer.borrow(), - atom.borrow(), - )?; + if to_print { + self.print_viz(&format!("pos {pos} - {num}.gv")).unwrap(); + } /// A cute little macro to produce compact representations /// of Parents, Virtual nodes, or empty. + #[allow(unused_macros)] macro_rules! pavi_to_short_str { ($pavi:ident) => { match $pavi { - PaVi::Parent(node, edge) => format!("{node} {edge}"), - PaVi::Virtual(nt, t, node) => format!("{nt} {t} {node}"), + PaVi::Parent(node, edge, child) => format!("p{node} {edge} {child}"), + PaVi::Virtual(nt, t, node) => format!("v{nt} {t} {node}"), PaVi::Empty => "ε".to_string(), } }; } - self.print_viz(&format!( - "pos {pos} {tnt_string} {num} stage 1 {}, {}.gv", - pavi_to_short_str!(true_source), - pavi_to_short_str!(pavi) - )) - .unwrap(); - // Ensure the last node in the PaVi is a terminal or a // non-terminal node, as an extra safety guard during // development. #[cfg(debug_assertions)] { match pavi { - PaVi::Parent(node, edge) => { + PaVi::Parent(_node, _edge, child) => { assert!(matches!( - self.vertex_label(self.nth_child(node, edge)?), + self.vertex_label(child), Ok(Some(label)) if label.label().label().tnt().is_some())); } @@ -312,11 +302,23 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { let is_empty_segment = pavi.is_empty(); + if true_source.is_virtual() { + self.close_pavi(atom.borrow(), true_source, pos)?; + + if to_print { + self.print_viz(&format!( + "pos {pos} - {num} {tnt_string} stage 0.1 {}.gv", + pavi_to_short_str!(true_source) + )) + .unwrap(); + } + } + let mut parents: Vec<Parent> = { let mut result = Vec::new(); match pavi { - PaVi::Parent(node, edge) => { + PaVi::Parent(node, edge, _) => { result.push(Parent::new(node, edge)); } PaVi::Virtual(nt, t, node) => { @@ -347,10 +349,12 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { self.plant_at_start(node, frag)?; - self.print_viz(&format!( - "pos {pos} {tnt_string} {num} stage 1.5 {node}.gv" - )) - .unwrap(); + if to_print { + self.print_viz(&format!( + "pos {pos} - {num} {tnt_string} stage 0.2 {node}.gv" + )) + .unwrap(); + } let rule_label_pos = self .query_label(last_but_one_label) @@ -369,6 +373,24 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { result }; + if let PaVi::Parent(node, edge, _) = pavi { + let nth_child = self.nth_child(node, edge)?; + + let reduced = self.reduction(nth_child, pos, ter, atom.borrow())?; + + if reduced != nth_child && !self.is_empty_node(reduced)? { + parents.clear(); + parents.extend(self.parents_of(reduced)?); + } + + if to_print { + self.print_viz(&format!( + "pos {pos} - {num} {tnt_string} stage 0.3 {nth_child}.gv" + )) + .unwrap(); + } + } + for parent in parents.iter() { if !self.has_node(parent.node()) { return Err(Error::IndexOutOfBounds(parent.node(), self.nodes_len())); @@ -423,12 +445,14 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { let sploned_node = self.splone(node.node(), Some(pos), node.edge(), false)?; - self.print_viz(&format!( - "pos {pos} {tnt_string} {num} stage 2 {} {}.gv", - node.node(), - node.edge(), - )) - .unwrap(); + if to_print { + self.print_viz(&format!( + "pos {pos} - {num} {tnt_string} stage 1 {} {}.gv", + node.node(), + node.edge(), + )) + .unwrap(); + } node_label = self .vertex_label(sploned_node)? @@ -513,12 +537,16 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { for parent in stack { let splanted = self.splant(parent.node(), parent.edge(), fragment, non_empty)?; - self.print_viz(&format!( - "pos {pos} {tnt_string} {num} stage 3 {} {} {splanted}.gv", - parent.node(), - parent.edge(), - )) - .unwrap(); + let _splanted_child = self.nth_child(splanted, self.degree(splanted)? - 1)?; + + if to_print { + self.print_viz(&format!( + "pos {pos} - {num} {tnt_string} stage 2 {} {} {splanted}.gv", + parent.node(), + parent.edge(), + )) + .unwrap(); + } non_empty = true; } @@ -541,25 +569,28 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { .query_label(root_label) .expect("root label was not found"); - let edge = { - let mut result = None; + let edge: usize; + let child: usize; - for (index, child) in self.children_of(node)?.enumerate() { - if matches!(self.vertex_label(child)?, Some(child_label) if child_label == leaf_label) - { - result = Some(index); - break; - } - } + let mut result = None; - if let Some(result) = result { - result - } else { - unreachable!("the forest is wrongly planted"); + for (index, child) in self.children_of(node)?.enumerate() { + if matches!(self.vertex_label(child)?, Some(child_label) if child_label == leaf_label) + { + result = Some((index, child)); + break; } - }; + } + + if let Some((index, edge_child)) = result { + edge = index; + child = edge_child; + } else { + unreachable!("the forest is wrongly planted"); + } + // dbg!(node, edge, root_label, leaf_label); - PaVi::Parent(node, edge) + PaVi::Parent(node, edge, child) } else { assert_eq!( fragment.nodes_len(), @@ -612,131 +643,19 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { Ok(result) } - // REVIEW: Is this function really correct? - - /// Perform extra reductions. - /// - /// To be precise, this function first splones the bottom node, - /// and then queries the reducer to find the next nodes to splone, - /// and repeats this process until either we find the top node, or - /// the reducer says to stop. - /// - /// One nuance to note is that after we splone a node, if we split - /// that node, then the splitted node will have a cloned parent, - /// see - /// [`split_node`][DefaultForest::<ForestLabel<GrammarLabel>>::split_node] - /// for details. So for each parent pointed out by the reducer, - /// we need to find its clone that has the splitted node as a - /// child. - fn extra_reductions( - &mut self, - botop: BoTop, - pos: usize, - reducer: &Reducer, - atom: &DefaultAtom, - ) -> Result<Vec<usize>, Error> { - if botop.bottom() == botop.top() { - return Ok(vec![self.node_from_pavi(botop.top())?]); - } - - let top = botop.top(); - let bottom = botop.bottom(); - - let bottom_closed = self.close_pavi(atom.borrow(), bottom, pos)?; - - let top_node = self.node_from_pavi(top)?; - let bottom_node = self.node_from_pavi(bottom)?; - - let mut stack = vec![bottom_node]; - - // Exclude duplicate nodes to ensure that no infinite - // recursion occurs. In the future I shall experiment if this - // is necessary, and get rid of this if possible. - let mut seen_nodes: HashSet<usize> = Default::default(); - - let mut result = Vec::new(); - - while let Some(bottom) = stack.pop() { - if seen_nodes.contains(&bottom) { - continue; - } - - seen_nodes.insert(bottom); - - if let Some(set) = reducer.get(&(top_node, bottom)) { - // First splone the bottom, except for the bottom one. - - let mut sploned = if bottom == bottom_node { - bottom_closed - } else { - let degree = self.degree(bottom)?; - let last_index = core::cmp::max(degree, 1) - 1; - - self.splone(bottom, Some(pos), last_index, false)? - }; - - // Then add parents whose labels are what we want into - // the stack. - - let mut label_set: HashSet<GrammarLabel> = HashSet::with_capacity(set.len()); - - for node in set.iter().copied() { - label_set.insert( - self.vertex_label(node)? - .ok_or(Error::NodeNoLabel(node))? - .label(), - ); - } - - let sploned_label = self - .vertex_label(sploned)? - .ok_or(Error::NodeNoLabel(sploned))?; - - if sploned_label.clone_index().is_some() { - let mut parents = self.parents_of(sploned)?; - - #[cfg(debug_assertions)] - if parents.len() != 1 { - panic!("assumption fails"); - } - - sploned = parents.next().unwrap().node(); - } - - for rule_parent in self.parents_of(sploned)? { - if self.degree(rule_parent.node())? != 1 { - panic!("assumption fails"); - } - - for parent in self.parents_of(rule_parent.node())? { - let parent_node = parent.node(); - - let parent_label = self - .vertex_label(parent_node)? - .ok_or(Error::NodeNoLabel(parent_node))? - .label(); - - if label_set.contains(&parent_label) { - stack.push(parent_node); - } - } - } - } else { - result.push(bottom); - } - } - - Ok(result) - } - /// Set the end position of the node associated with `pavi` to be `pos`. /// /// The parameter `atom` is used to query the reduction fragment /// if `pavi` is a virtual node. - fn close_pavi(&mut self, atom: &DefaultAtom, pavi: PaVi, pos: usize) -> Result<usize, Error> { + pub(crate) fn close_pavi( + &mut self, + atom: &DefaultAtom, + pavi: PaVi, + pos: usize, + ) -> Result<usize, Error> { match pavi { - PaVi::Parent(node, edge) => { - let nth_child = self.nth_child(node, edge)?; + PaVi::Parent(_node, _edge, child) => { + let nth_child = child; let nth_child_label = self .vertex_label(nth_child)? .ok_or(Error::NodeNoLabel(nth_child))?; @@ -781,6 +700,11 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { let reduction_fragment = atom.generate_virtual_frags(nt, t, None); + // NOTE: the case of the root is exceptional + if reduction_fragment.is_none() && self.root() != Some(node) { + return Err(Error::CannotClose(nt, t, node, node_label_start)); + } + for frag in reduction_fragment.into_iter().flatten() { let mut frag = frag.clone(); frag.set_pos(node_label_start, true)?; diff --git a/chain/src/item/mod.rs b/chain/src/item/mod.rs index 5efa710..54ca946 100644 --- a/chain/src/item/mod.rs +++ b/chain/src/item/mod.rs @@ -32,8 +32,9 @@ use core::borrow::Borrow; /// Also this might be empty if it represents the root node. #[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Default)] pub enum PaVi { - /// An edge from a node, as the n-th child. - Parent(usize, usize), + /// An edge from a node, as the n-th child, along with the + /// nth-child node number. + Parent(usize, usize, usize), /// A virtual segment from a non-terminal by a terminal, rooted at /// a node. /// @@ -50,16 +51,12 @@ pub enum PaVi { Empty, } -impl From<Parent> for PaVi { - fn from(value: Parent) -> Self { - Self::Parent(value.node(), value.edge()) - } -} - -impl core::fmt::Display for PaVi { +impl std::fmt::Display for PaVi { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Self::Parent(node, edge) => write!(f, "the {edge}-th edge from {node}"), + Self::Parent(node, edge, child) => { + write!(f, "the {edge}-th edge from {node} to {child}") + } Self::Virtual(nt, t, node) => write!( f, "a virtual node for non-terminal {nt} and terminal {t} at node {node}" @@ -72,13 +69,17 @@ impl core::fmt::Display for PaVi { impl PaVi { /// Get the Parent variant. fn parent(self) -> Option<Parent> { - if let Self::Parent(node, edge) = self { + if let Self::Parent(node, edge, _) = self { Some(Parent::new(node, edge)) } else { None } } + fn is_virtual(self) -> bool { + matches!(self, Self::Virtual(_, _, _)) + } + /// Is this an empty segment? fn is_empty(self) -> bool { self == Self::Empty @@ -290,3 +291,5 @@ pub mod default; pub mod genins; pub use genins::generate_fragment; + +pub mod reduction; diff --git a/chain/src/item/reduction.rs b/chain/src/item/reduction.rs new file mode 100644 index 0000000..89306e6 --- /dev/null +++ b/chain/src/item/reduction.rs @@ -0,0 +1,428 @@ +//! This module implements the operation of reductions on the forest. +//! +//! # What are reductions +//! +//! To explain this in detail, we first investigate the +//! nullable-closure process, then discuss what reduction is and what +//! it means in the chain-rule machine, and then explain the problem. +//! +//! ## Nullable-closure process +//! +//! In the process of running the chain-rule machine, we will collapse +//! edges, in the sense that, if there is an edge from node a to node +//! b and another from node b to node c, and if the edge from node a +//! to node b is "nullable", that is if the edge corresponds to a +//! position in the atomic language that is deemed nullable by the +//! atomic language, then we also make an edge from node a to node c. +//! +//! The purpose of this process of forming the nullable closure is to +//! ensure that the chain-rule machine can save the time to traverse +//! the entire machine to find the nullable closure later on. But a +//! side-effect of this process is that reductions are not explicitly +//! marked. +//! +//! Note that we must perform this nullable closure process in order +//! that the time complexity of the chain-rule machine lies within the +//! cubic bound. +//! +//! ## Three types of actions +//! +//! We can imagine a "traditional parser generator" as a stack +//! machine: there are three types of actions associated with it, +//! depending on the current state and the current input token. The +//! first is expansion: it means that we are expanding from a +//! non-terminal, by some rule. The second is a normal push: we just +//! continue parsing according to the current rule. The final one is +//! reduction: it means the current expansion from a non-terminal has +//! terminated and we go back to the previous level of expansion. +//! +//! ## Relation to the chain-rule machine +//! +//! For our chain-rule machine, expansion means to create a new node +//! pointing at the current node, forming a path with one more length. +//! A normal push means to create a new node that points to the target +//! of an edge going out from the current node, which was not created +//! by the process of forming nullable closures. And the reduction +//! means to create a new node that points to the target of an edge +//! going out from the current node, which *was* created by the +//! process of forming nullable closures. +//! +//! # Problem +//! +//! As can be seen from the previous paragraph, the distinction +//! between a normal push and a reduction in the chain-rule machine is +//! simply whether or not the original edge was created by the process +//! of forming nullable closures. For the chain-rule machine, this +//! does not matter: it can function well. For the formation of the +//! derivation forest, however, this is not so well: we cannot +//! read-off immediately whether or not to perform reductions from the +//! chain-rule machine. +//! +//! # Solutions +//! +//! I tried some solutions to solve this problem. +//! +//! ## Complete at the end +//! +//! The first idea I tried is to find the nodes that were not closed +//! properly and fill in the needed reductions, at the end of the +//! parse. This idea did not end up well, as we already lost a lot of +//! information at the end of the parse, so it becomes quite difficult +//! to know on which nodes we need to perform reductions. +//! +//! ## Record precise information +//! +//! The next idea is to record the nodes that were skipped by the +//! nullable-closure process, and then when we encounter a skipped +//! segment, we perform the reductions there. This idea sounds +//! feasible at first, but it turns out that we cannot track the nodes +//! properly. That is, when running the chain-rule machine, we will +//! split and clone nodes from time to time. A consequence is that +//! the old node numbers that were recorded previously will not be +//! correct afterwards. This means we cannot know exactly which +//! reductions to perform later. +//! +//! ## Guided brute-force +//! +//! Now I am trying out the third idea. The motivation is simple: if +//! we cannot properly track nodes, then we track no nodes. Then, in +//! order to perform reductions, we do not distinguish between +//! necessary reductions and unnecessary reductions, and perform all +//! possible reductions. In other words, when we are about to plant a +//! new node in the forest, we first check the last child of the +//! to-be-parent. If it is properly closed, we do nothing. +//! Otherwise, we recursively descend into its children(s) to find all +//! last children, and perform all possible valid reductions. + +use super::*; +use crate::{ + atom::{Atom, DefaultAtom}, + default::Error, + item::default::DefaultForest, +}; +use grammar::{GrammarLabel, TNT}; +use graph::Graph; + +use std::collections::HashMap as Map; + +impl DefaultForest<ForestLabel<GrammarLabel>> { + /// Perform reduction at last descendents of `node`. + /// + /// # Parameters + /// + /// The parameter `pos` is the next starting position. It is used + /// to find the descendents that need reductions: only those nodes + /// which have descendents with the correct ending positions will + /// receive reductions. + /// + /// The parameter `atom` is used to know which rule numbers are + /// deemed as accepting. Only accepting rule numbers can receive + /// reductions: this is the definition of being accepting. + /// + /// The parameter `ter` is used to fill in segments for virtual + /// nodes if they are found along the way. + /// + /// # Errors + /// + /// 1. Index out of bounds: some node number is corrupted. + /// 2. Node has no label: some node label is lost. + pub(crate) fn reduction( + &mut self, + node: usize, + pos: usize, + ter: usize, + atom: &DefaultAtom, + ) -> Result<usize, Error> { + let mut result = node; + + // step 1: Determine if this needs reductions. + + if self.root() == Some(node) { + return Ok(result); + } + + // REVIEW: Do we need to check the end matches the position? + + let mut node_label = self + .vertex_label(node)? + .ok_or(Error::NodeNoLabel(node))? + .label(); + + if node_label.end().is_some() { + return Ok(result); + } + + // step 2: Find descendents that need reductions. + + let mut correct_ends: Map<usize, bool> = Default::default(); + + let mut order_of_correct_ends: Vec<usize> = Vec::new(); + + let mut stack: Vec<usize> = vec![node]; + + let mut file = std::fs::OpenOptions::new().append(true).open("debug.log"); + + use std::{borrow::BorrowMut, io::Write}; + + // Whether or not to write a debug file. + let to_write = false; + + if to_write { + if let Ok(ref mut file) = file.borrow_mut() { + file.write_all(format!("beginning, pos = {pos}, node = {node}\n").as_bytes()) + .unwrap(); + } + } + + 'stack_loop: while let Some(top) = stack.pop() { + if to_write { + if let Ok(ref mut file) = file.borrow_mut() { + file.write_all(format!("top: {top}\n").as_bytes()).unwrap(); + } + } + + if correct_ends.get(&top).is_some() { + continue 'stack_loop; + } + + let top_label = self.vertex_label(top)?.ok_or(Error::NodeNoLabel(top))?; + + if let Some(end) = top_label.label().end() { + correct_ends.insert(top, end == pos); + + continue 'stack_loop; + } + + if let Some(rule) = top_label.label().label().rule() { + // A rule node is not considered directly: it should + // be affected by its child implicitly. + // + // We only consider a rule node if it is deemed + // accepting by the atom. + + if to_write { + if let Ok(ref mut file) = file.borrow_mut() { + file.write_all(format!("{}: {rule}, {stack:?}\n", line!()).as_bytes()) + .unwrap(); + } + } + + if atom + .is_accepting(rule * 2 + 1) + .map_err(|_| Error::IndexOutOfBounds(2 * rule + 1, atom.accepting_len()))? + { + let mut has_unexplored_children = false; + let mut inserted = false; + + 'child_loop: for child in self.children_of(top)? { + match correct_ends.get(&child).copied() { + Some(true) => { + correct_ends.insert(top, true); + + inserted = true; + } + None => { + has_unexplored_children = true; + + break 'child_loop; + } + _ => {} + } + } + + if has_unexplored_children { + stack.push(top); + stack.extend(self.children_of(top)?); + } else if !inserted { + correct_ends.insert(top, false); + } + } else { + correct_ends.insert(top, false); + } + + continue 'stack_loop; + } + + if top_label.is_packed() { + let mut has_unexplored_children = false; + + if to_write { + if let Ok(ref mut file) = file.borrow_mut() { + file.write_all(format!("{}: packed, {stack:?}\n", line!()).as_bytes()) + .unwrap(); + } + } + + for child in self.children_of(top)? { + match correct_ends.get(&child).copied() { + Some(true) => { + // NOTE: This is not recorded in the + // correct orders, as we do not handle + // packed nodes directly. + + correct_ends.insert(top, true); + } + None => { + has_unexplored_children = true; + } + _ => {} + } + } + + if to_write { + if let Ok(ref mut file) = file.borrow_mut() { + file.write_all( + format!("{}: packed, {has_unexplored_children}\n", line!()).as_bytes(), + ) + .unwrap(); + } + } + + if has_unexplored_children { + stack.push(top); + stack.extend(self.children_of(top)?); + } else if correct_ends.get(&top).is_none() { + correct_ends.insert(top, false); + } + + continue 'stack_loop; + } + + let degree = self.degree(top)?; + + if to_write { + if let Ok(ref mut file) = file.borrow_mut() { + file.write_all(format!("{}: degree = {degree}\n", line!()).as_bytes()) + .unwrap(); + } + } + + let last_index = if degree != 0 { + degree - 1 + } else { + // a leaf is supposed to be a terminal node and hence + // should have an ending position + + let end = match top_label.label().end() { + None => match top_label.label().label().tnt() { + Some(TNT::Ter(_)) => { + panic!("a terminal node {top} has no ending position?"); + } + Some(TNT::Non(nt)) => { + correct_ends.insert(top, true); + + self.close_pavi(atom.borrow(), PaVi::Virtual(nt, ter, top), pos)?; + + continue 'stack_loop; + } + None => { + unreachable!("we already handled the rule case above"); + } + }, + Some(end) => end, + }; + + correct_ends.insert(top, end == pos); + + if end == pos { + order_of_correct_ends.push(top); + } + + continue 'stack_loop; + }; + + if to_write { + if let Ok(ref mut file) = file.borrow_mut() { + file.write_all(format!("{}: last = {last_index}\n", line!()).as_bytes()) + .unwrap(); + } + } + + let last_child = self.nth_child(top, last_index)?; + + if let Some(child_correctness_value) = correct_ends.get(&last_child).copied() { + correct_ends.insert(top, child_correctness_value); + + if child_correctness_value { + order_of_correct_ends.push(top); + } + } else { + stack.extend([top, last_child]); + } + } + + if to_write { + if let Ok(ref mut file) = file.borrow_mut() { + file.write_all( + format!("{}: orders = {order_of_correct_ends:?}\n", line!()).as_bytes(), + ) + .unwrap(); + } + } + + // step 3: perform reductions by `splone`. + // + // NOTE: We must fix the order from top to bottom: this is the + // reverse order of `order_of_correct_ends` . + + for node in order_of_correct_ends.into_iter().rev() { + let label = self.vertex_label(node)?.ok_or(Error::NodeNoLabel(node))?; + let degree = self.degree(node)?; + + if !matches!(label.label().label().tnt(), Some(TNT::Non(_))) { + continue; + } + + #[cfg(debug_assertions)] + { + let label_label_label = label.label().label(); + + assert!(label_label_label.rule().is_none()); + + assert!(matches!(label_label_label.tnt(), Some(TNT::Non(_)))); + + assert!(label.label().end().is_none()); + + assert_ne!(degree, 0); + } + + let last_index = degree - 1; + + self.splone(node, Some(pos), last_index, false)?; + } + + node_label.set_end(pos); + + if let Some(packed) = + self.query_label(ForestLabel::new(node_label, ForestLabelType::Packed)) + { + result = packed; + + if to_write { + if let Ok(ref mut file) = file.borrow_mut() { + file.write_all(format!("{}: packed = {packed}\n", line!()).as_bytes()) + .unwrap(); + } + } + } else if let Some(plain) = + self.query_label(ForestLabel::new(node_label, ForestLabelType::Plain)) + { + result = plain; + + if to_write { + if let Ok(ref mut file) = file.borrow_mut() { + file.write_all(format!("{}: plain = {plain}\n", line!()).as_bytes()) + .unwrap(); + } + } + } + + if to_write { + if let Ok(ref mut file) = file.borrow_mut() { + file.write_all(&[101, 110, 100, 10]).unwrap(); + } + } + + Ok(result) + } +} diff --git a/chain/src/item/thoughts-on-reducer.org b/chain/src/item/thoughts-on-reducer.org new file mode 100644 index 0000000..39a799a --- /dev/null +++ b/chain/src/item/thoughts-on-reducer.org @@ -0,0 +1,121 @@ +#+TITLE: Thoughts on the reducer +#+AUTHOR: Durand +#+DATE: <2023-06-05 Lun 22:08> + +This module implements a data type for storing reduction +information. + +* Questions + +What is reduction information, and why is it necessary to store it? + +* Nullable-closure process + +In the process of running the chain-rule machine, we will collapse +edges, in the sense that, if there is an edge from node a to node b +and another from node b to node c, and if the edge from node a to node +b is "nullable", that is if the edge corresponds to a position in the +atomic language that is deemed nullable by the atomic language, then +we also make an edge from node a to node c. + +The purpose of this process of forming the nullable closure is to +ensure that the chain-rule machine can save the time to traverse the +entire machine to find the nullable closure later on. But a +side-effect of this process is that reductions are not explicitly +marked. + +To explain this in detail, we first investigate what reduction is and +what it means in the chain-rule machine, and then explain the problem. + +* Three types of actions + +We can imagine a "traditional parser generator" as a stack machine: +there are three types of actions associated with it, depending on the +current state and the current input token. The first is expansion: it +means that we are expanding from a non-terminal, by some rule. The +second is a normal push: we just continue parsing according to the +current rule. The final one is reduction: it means the current +expansion from a non-terminal has terminated and we go back to the +previous level of expansion. + +* Relation to the chain-rule machine + +For our chain-rule machine, expansion means to create a new node +pointing at the current node, forming a path of length increased by +one. A normal push means to create a new node that points to the +target of an edge going out from the current node, which was not +created by the process of forming nullable closures. And the +reduction means to create a new node that points to the target of an +edge going out from the current node, which *was* created by the +process of forming nullable closures. + +* Problem + +As can be seen from the previous paragraph, the distinction between a +normal push and a reduction in the chain-rule machine is simply +whether or not the original edge was created by the process of forming +nullable closures. For the chain-rule machine, this does not matter: +it can function well. For the formation of the derivation forest, +however, this is not so well: we cannot read-off immediately whether +or not to perform reductions from the chain-rule machine. + +* Solution + +Since we cannot read-off the reduction information directly from the +chain-rule machine, we have to store that information somehow. + +** Digression: past experiences + +When developping this part, I did not have a clear picture of the +situation in mind: I was experimenting with the ways to construct the +derivation forest from the chain-rule machine, as the paper describing +the algorithm does not construct the derivation forest directly: it +constructs an alternative format. A consequence is that I spent quite +some time in figuring out how to construct derivation forests +correctly. + +During the experiments, I tried various ideas: including to "fill-in" +the reductions after we have parsed the entire input. This seemed +ostensibly a good idea, as I seem to be able to "read-off" the +reductions from the resulting partially complete derivation forests. + +As it turned out, this was actually a bad idea. In fact, I can now +prove that this will not work by the presence of a specific grammar +that will cause this approach to fail definitely. This led me to +believe that the only way is to store the needed reduction information +in order to fill in this gap. + +** Storing reduction information + +Now we want to store the reduction information, so we need to be clear +about what we are storing. + +In the derivation forest, a reduction happens when we reach the +right-most descendent of a subtree, which marks the end of an +expansion from a non-terminal. Moreover, our derivation forests +usually contain half-open nodes, which mean that they are not +completed yet and can keep growing, until a reduction happens when we +"close" the half-open node. Thus, what we really need is the list of +nodes to close. + +This information is readily available when we form nullable closures: +the skipped nodes are the nodes we need to close later on. To be more +exact, when we skip nodes, we have two nodes: the top node and the +bottom node, and we want to store the middle node that is skipped. + +This naturally led to the structure of a nondeterministic finite +automaton: when we want to close nodes, we start from the bottom-most +node, and query the nodes upward by the top node, until we reach the +top node or until we have no more nodes to go. + +The special characteristic about this structure is that we need to +label both the vertices and the edges. Since we already have a +structure that labels edges, we can simply extend that structure by +adding an array of labels and possibly a map from labels to nodes, to +make sure the labels of vertices are unique and can be queried +quickly. + +One thing to note is that we actually only need the ability to query +children by the labels, and do not need to query labels by the target. +We need experiments to see if thiw will be the bottleneck and hence +need to be optimized out. diff --git a/chain/src/reducer.rs b/chain/src/reducer.rs new file mode 100644 index 0000000..7ff1858 --- /dev/null +++ b/chain/src/reducer.rs @@ -0,0 +1,191 @@ +//! This module implements a data type for storing reduction +//! information. +//! +//! # Questions +//! +//! What is reduction information, and why is it necessary to store +//! it? +//! +//! # Nullable-closure process +//! +//! In the process of running the chain-rule machine, we will collapse +//! edges, in the sense that, if there is an edge from node a to node +//! b and another from node b to node c, and if the edge from node a +//! to node b is "nullable", that is if the edge corresponds to a +//! position in the atomic language that is deemed nullable by the +//! atomic language, then we also make an edge from node a to node c. +//! +//! The purpose of this process of forming the nullable closure is to +//! ensure that the chain-rule machine can save the time to traverse +//! the entire machine to find the nullable closure later on. But a +//! side-effect of this process is that reductions are not explicitly +//! marked. +//! +//! To explain this in detail, we first investigate what reduction is +//! and what it means in the chain-rule machine, and then explain the +//! problem. +//! +//! # Three types of actions +//! +//! We can imagine a "traditional parser generator" as a stack +//! machine: there are three types of actions associated with it, +//! depending on the current state and the current input token. The +//! first is expansion: it means that we are expanding from a +//! non-terminal, by some rule. The second is a normal push: we just +//! continue parsing according to the current rule. The final one is +//! reduction: it means the current expansion from a non-terminal has +//! terminated and we go back to the previous level of expansion. +//! +//! # Relation to the chain-rule machine +//! +//! For our chain-rule machine, expansion means to create a new node +//! pointing at the current node, forming a path of length increased +//! by one. A normal push means to create a new node that points to +//! the target of an edge going out from the current node, which was +//! not created by the process of forming nullable closures. And the +//! reduction means to create a new node that points to the target of +//! an edge going out from the current node, which *was* created by +//! the process of forming nullable closures. +//! +//! # Problem +//! +//! As can be seen from the previous paragraph, the distinction +//! between a normal push and a reduction in the chain-rule machine is +//! simply whether or not the original edge was created by the process +//! of forming nullable closures. For the chain-rule machine, this +//! does not matter: it can function well. For the formation of the +//! derivation forest, however, this is not so well: we cannot +//! read-off immediately whether or not to perform reductions from the +//! chain-rule machine. +//! +//! # Solution +//! +//! Since we cannot read-off the reduction information directly from +//! the chain-rule machine, we have to store that information somehow. +//! +//! ## Digression: past experiences +//! +//! When developping this part, I did not have a clear picture of the +//! situation in mind: I was experimenting with the ways to construct +//! the derivation forest from the chain-rule machine, as the paper +//! describing the algorithm does not construct the derivation forest +//! directly: it constructs an alternative format. A consequence is +//! that I spent quite some time in figuring out how to construct +//! derivation forests correctly. +//! +//! During the experiments, I tried various ideas: including to +//! "fill-in" the reductions after we have parsed the entire input. +//! This seemed ostensibly a good idea, as I seem to be able to +//! "read-off" the reductions from the resulting partially complete +//! derivation forests. +//! +//! As it turned out, this was actually a bad idea. In fact, I can +//! now prove that this will not work by the presence of a specific +//! grammar that will cause this approach to fail definitely. This +//! led me to believe that the only way is to store the needed +//! reduction information in order to fill in this gap. +//! +//! ## Storing reduction information +//! +//! Now we want to store the reduction information, so we need to be +//! clear about what we are storing. +//! +//! In the derivation forest, a reduction happens when we reach the +//! right-most descendent of a subtree, which marks the end of an +//! expansion from a non-terminal. Moreover, our derivation forests +//! usually contain half-open nodes, which mean that they are not +//! completed yet and can keep growing, until a reduction happens when +//! we "close" the half-open node. Thus, what we really need is the +//! list of nodes to close. +//! +//! This information is readily available when we form nullable +//! closures: the skipped nodes are the nodes we need to close later +//! on. To be more exact, when we skip nodes, we have two nodes: the +//! top node and the bottom node, and we want to store the middle node +//! that is skipped. +//! +//! This naturally led to the structure of a nondeterministic finite +//! automaton: when we want to close nodes, we start from the +//! bottom-most node, and query the nodes upward by the top node, +//! until we reach the top node or until we have no more nodes to go. +//! +//! The special characteristic about this structure is that we need to +//! label both the vertices and the edges. Since we already have a +//! structure that labels edges, we can simply extend that structure +//! by adding an array of labels and possibly a map from labels to +//! nodes, to make sure the labels of vertices are unique and can be +//! queried quickly. +//! +//! One thing to note is that we actually only need the ability to +//! query children by the labels, and do not need to query labels by +//! the target. So we can represent this nondeterministic finite +//! automaton by a plain hashmap. + +#![allow(unused)] + +use std::collections::{hash_set::Iter, HashMap, HashSet}; + +#[derive(Debug, Default, Clone)] +pub(crate) struct Reducer(HashMap<(usize, usize), HashSet<usize>>); + +#[derive(Debug, Default)] +pub(crate) enum ReducerIter<'a> { + #[default] + Empty, + NonEmpty(Iter<'a, usize>), +} + +impl<'a> Iterator for ReducerIter<'a> { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option<Self::Item> { + match self { + ReducerIter::Empty => None, + ReducerIter::NonEmpty(iter) => iter.next().copied(), + } + } + + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + match self { + ReducerIter::Empty => (0, Some(0)), + ReducerIter::NonEmpty(iter) => iter.size_hint(), + } + } +} + +impl<'a> ExactSizeIterator for ReducerIter<'a> { + #[inline] + fn len(&self) -> usize { + match self { + ReducerIter::Empty => 0, + ReducerIter::NonEmpty(iter) => iter.len(), + } + } +} + +impl<'a> ReducerIter<'a> { + #[inline] + pub(crate) fn new(iter: Option<Iter<'a, usize>>) -> Self { + match iter { + Some(iter) => Self::NonEmpty(iter), + None => Self::default(), + } + } +} + +impl Reducer { + #[inline] + pub(crate) fn query(&self, bottom: usize, top: usize) -> ReducerIter<'_> { + ReducerIter::new(self.0.get(&(bottom, top)).map(|set| set.iter())) + } + + #[inline] + pub(crate) fn save(&mut self, bottom: usize, top: usize, middle: usize) { + self.0 + .entry((bottom, top)) + .or_insert_with(Default::default) + .insert(middle); + } +} |