diff options
Diffstat (limited to 'chain/src/item')
-rw-r--r-- | chain/src/item/default/mod.rs | 257 | ||||
-rw-r--r-- | chain/src/item/default/splone.rs | 7 | ||||
-rw-r--r-- | chain/src/item/genins.rs | 288 | ||||
-rw-r--r-- | chain/src/item/mod.rs | 25 | ||||
-rw-r--r-- | chain/src/item/reduction.rs | 428 | ||||
-rw-r--r-- | chain/src/item/thoughts-on-reducer.org | 121 |
6 files changed, 710 insertions, 416 deletions
diff --git a/chain/src/item/default/mod.rs b/chain/src/item/default/mod.rs index 6d28956..47e8c90 100644 --- a/chain/src/item/default/mod.rs +++ b/chain/src/item/default/mod.rs @@ -250,7 +250,12 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> { frag_stack.pop(); self_stack.pop(); - if fragment.vertex_label(frag_top)? != self.vertex_label(self_top)? { + // NOTE: The labels might defer by the status. We test + // for the equalities of inner labels only. + + if fragment.vertex_label(frag_top)?.map(|label| label.label()) + != self.vertex_label(self_top)?.map(|label| label.label()) + { // not a prefix // dbg!( // frag_top, @@ -589,11 +594,15 @@ impl<T: GraphLabel> DefaultForest<ForestLabel<T>> { return Ok(node); } + fragment.set_root(1)?; + let node_label = self.vertex_label(node)?.ok_or(Error::NodeNoLabel(node))?; if node_label.is_packed() || node_label.clone_index().is_some() { let mut planted = false; + let mut to_plant: Option<usize> = None; + let mut node = node; if node_label.clone_index().is_some() { @@ -607,17 +616,25 @@ impl<T: GraphLabel> DefaultForest<ForestLabel<T>> { for clone in self.children_of(node)? { node = clone; - if self.is_prefix(clone, &fragment)? { - planted = true; + if !self.is_empty_node(clone)? { + let first_child = self.nth_child(clone, 0)?; + + if self.is_prefix(first_child, &fragment)? { + planted = true; - break; + break; + } + } else if to_plant.is_none() { + to_plant = Some(clone); } } if !planted { - let clone = self.clone_node(node, 0, false)?; - - fragment.set_root(1)?; + let clone = if let Some(cloned_node) = to_plant { + cloned_node + } else { + self.clone_node(node, 0, false)? + }; self.plant(clone, fragment, false)?; @@ -626,51 +643,29 @@ impl<T: GraphLabel> DefaultForest<ForestLabel<T>> { } else { let clone_threshold = if self.root == Some(node) { 1 } else { 0 }; - let planted = self.is_prefix(node, &fragment)?; + let satisfy_threshold = self.degree(node)? > clone_threshold; - fragment.set_root(1)?; + if !satisfy_threshold { + self.plant(node, fragment, false)?; - if !planted && self.degree(node)? > clone_threshold { + return Ok(node); + } + + let first_child = self.nth_child(node, clone_threshold)?; + + let planted = self.is_prefix(first_child, &fragment)?; + + if !planted { let clone = self.clone_node(node, 0, false)?; self.plant(clone, fragment, false)?; return Ok(clone); - } else if !planted { - self.plant(node, fragment, false)?; } } Ok(node) } - - /// Extract the node from `pavi`. - /// - /// If pavi is a parent, this returns the child pointed to by the - /// represented edge. - /// - /// If pavi is a virtual node, this simply returns the virtual - /// node. - /// - /// If pavi is empty, this returns the root, unless the forest is - /// empty. - /// - /// # Guarantee - /// - /// The returned node is guaranteed to be a valid node. - pub(crate) fn node_from_pavi(&self, pavi: PaVi) -> Result<usize, Error> { - match pavi { - PaVi::Parent(node, edge) => Ok(self.nth_child(node, edge)?), - PaVi::Virtual(_, _, node) => { - if node >= self.nodes_len() { - return Err(Error::IndexOutOfBounds(node, self.nodes_len())); - } - - Ok(node) - } - PaVi::Empty => Ok(self.root().ok_or(Error::IndexOutOfBounds(0, 0))?), - } - } } pub mod splone; @@ -850,188 +845,6 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { Ok(None) } - // /// Check if every child already has an end. - // fn every_child_is_completed(&self, node_id: usize, atom: &DefaultAtom) -> Result<bool, Error> { - // let children = self.children_of(node_id)?; - - // if children.len() == 0 { - // return Ok(true); - // } - - // let mut pos = self - // .vertex_label(node_id)? - // .ok_or(Error::NodeNoLabel(node_id))? - // .label() - // .start(); - - // let mut last_child_label = None; - - // for child in children { - // let child_label = self - // .vertex_label(child)? - // .ok_or(Error::NodeNoLabel(child))? - // .label(); - - // last_child_label = Some(child_label); - - // if child_label.start() != pos || child_label.end().is_none() { - // return Ok(false); - // } - - // pos = child_label.end().unwrap(); - // } - - // if let Some(label) = last_child_label { - // if let Some(rule) = label.label().rule() { - // if !atom - // .is_accepting(2 * rule + 1) - // .map_err(|_| Error::IndexOutOfBounds(2 * rule + 1, atom.nodes_len()))? - // { - // return Ok(false); - // } - // } - // } - - // Ok(true) - // } - - // /// Complete the forest by trying to set the ending position of - // /// every node that does not have an end, by the ending position - // /// of its last child. - // pub fn complete(&mut self, atom: &DefaultAtom) -> Result<(), Error> { - // let mut stack: Vec<_> = self - // .nodes() - // .filter(|node| { - // let label = self.vertex_label(*node).unwrap().unwrap().label(); - - // label.label().rule().is_some() && label.end().is_some() - // }) - // .collect(); - - // let mut second_stack: Vec<usize> = Vec::new(); - - // let mut pending_candidates: Vec<usize> = Vec::new(); - - // let nodes_len = self.nodes_len(); - - // let mut seen_nodes: HashSet<usize> = HashSet::with_capacity(nodes_len); - - // while !stack.is_empty() { - // while let Some(mut top) = stack.pop() { - // if seen_nodes.contains(&top) { - // continue; - // } - - // seen_nodes.insert(top); - - // let top_label = self.vertex_label(top)?.unwrap(); - - // if top_label.clone_index().is_some() { - // let mut top_parents = self.parents_of(top)?; - - // if top_parents.len() != 1 { - // return Err(Error::InvalidClone(top)); - // } - - // top = top_parents.next().unwrap().node(); - // } - - // let rule_parents = self.parents_of(top)?; - - // let candidates = { - // let mut result = Vec::with_capacity(rule_parents.len()); - - // for parent in rule_parents { - // // for parent in self.parents_of(parent.node())? { - // // if self.degree(parent.node())? <= parent.edge() + 1 { - // result.push(parent); - // // } - // // } - // } - - // result - // }; - - // for candidate in candidates { - // if matches!(self.vertex_label(candidate.node())?, Some(label) if label.label().end().is_none()) - // { - // if self.every_child_is_completed(candidate.node(), atom)? { - // let last_child = self - // .nth_child(candidate.node(), self.degree(candidate.node())? - 1)?; - // let end = self - // .vertex_label(last_child)? - // .ok_or(Error::NodeNoLabel(last_child))? - // .label() - // .end(); - - // let sploned_node = self.splone( - // candidate.node(), - // end, - // self.degree(candidate.node())? - 1, - // true, - // )?; - - // second_stack.push(sploned_node); - // } else { - // pending_candidates.push(candidate.node()); - // } - // } else { - // second_stack.push(candidate.node()); - // } - // } - - // let mut new_pending = Vec::with_capacity(pending_candidates.len()); - - // while let Some(node) = pending_candidates.pop() { - // if self.every_child_is_completed(node, atom)? { - // let last_edge = self.degree(node)? - 1; - // let last_child = self.nth_child(node, last_edge)?; - // let end = self - // .vertex_label(last_child)? - // .ok_or(Error::NodeNoLabel(last_child))? - // .label() - // .end(); - - // let sploned_node = self.splone(node, end, last_edge, true)?; - - // second_stack.push(sploned_node); - // } else { - // new_pending.push(node); - // } - // } - - // std::mem::swap(&mut pending_candidates, &mut new_pending); - // } - - // std::mem::swap(&mut stack, &mut second_stack); - // } - - // Ok(()) - // } - - // /// Unconditionally set the label of the node to be the new label. - // /// - // /// # Warning - // /// - // /// Use with caution: it does not do anything special, so it is - // /// the responsibility of the caller to ensure this operation is - // /// legal. - // #[allow(dead_code)] - // pub(crate) fn set_label(&mut self, node: usize, label: GrammarLabel) -> Result<(), Error> { - // let status = self - // .vertex_label(node)? - // .ok_or(Error::NodeNoLabel(node))? - // .status; - - // let label = ForestLabel::new(label, status); - - // let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); - - // builder.set_label(node, label)?; - - // Ok(()) - // } - /// Set the position of every node. /// /// If ALLP is non-nil or if the node is a terminal node, also set diff --git a/chain/src/item/default/splone.rs b/chain/src/item/default/splone.rs index d77686e..581d1dc 100644 --- a/chain/src/item/default/splone.rs +++ b/chain/src/item/default/splone.rs @@ -555,7 +555,12 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { let modified_degree = std::cmp::max(existing_degree, 1) - 1; if existing_degree != 0 - && self.has_same_children(existing, node, modified_degree, edge_index)? + && self.has_same_children( + existing, + node, + modified_degree, + edge_index + 1, + )? { let last_child = self.nth_child(existing, existing_degree - 1)?; diff --git a/chain/src/item/genins.rs b/chain/src/item/genins.rs index a2bb22c..19f835b 100644 --- a/chain/src/item/genins.rs +++ b/chain/src/item/genins.rs @@ -9,16 +9,14 @@ use super::*; use crate::{ atom::{Atom, DefaultAtom}, - default::{BoTop, Error, Reducer}, + default::Error, item::default::DefaultForest, Edge, }; use grammar::{Error as GrammarError, GrammarLabel, GrammarLabelType, TNT}; use graph::Graph; -use core::borrow::Borrow; - -use std::collections::HashSet; +use std::borrow::Borrow; /// Convert an error telling us that an index is out of bounds. /// @@ -179,8 +177,8 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { /// fragment under the result splone. pub(crate) fn insert_item( &mut self, - reducer: &Reducer, label: Edge, + ter: usize, fragment: impl Borrow<DefaultForest<ForestLabel<GrammarLabel>>>, atom_child_iter: impl Iterator<Item = usize> + ExactSizeIterator + Clone, atom: &DefaultAtom, @@ -200,7 +198,7 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { let fragment_root = if let Some(root) = fragment.root() { root } else { - unreachable!("empty item"); + panic!("empty item"); }; let fragment_root_label = fragment @@ -209,7 +207,11 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { let pos = fragment_root_label.label().start(); - dbg!((pos, label)); + // dbg!((pos, label)); + + // Whether or not to print detailed graphs of each step of + // operation for debugging purposes. + let to_print = false; let tnt_string = { let empty_p = atom_child_iter.len() == 0; @@ -217,10 +219,10 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { match label.label().label() { GrammarLabelType::TNT(TNT::Ter(t)) => { - format!("t {t}") + format!("t {t}{}", if empty_p { " second" } else { "" }) } GrammarLabelType::TNT(TNT::Non(n)) => { - format!("n {n} {}", if empty_p { "second" } else { "first" }) + format!("n {n}") } _ => "error".to_string(), } @@ -230,7 +232,7 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { let mut repetition = 0; while std::fs::metadata(format!( - "/Users/durand/Desktop/Centre/A propos de programmes/Rust/rep/chain/output/pos {pos} {tnt_string} {repetition}.gv" + "/Users/durand/Desktop/Centre/A propos de programmes/Rust/rep/chain/output/pos {pos} - {repetition}.gv" )) .is_ok() { @@ -240,44 +242,32 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { repetition }; - self.print_viz(&format!("pos {pos} {tnt_string} {num}.gv")) - .unwrap(); - - self.extra_reductions( - BoTop::new(true_source, pavi), - pos, - reducer.borrow(), - atom.borrow(), - )?; + if to_print { + self.print_viz(&format!("pos {pos} - {num}.gv")).unwrap(); + } /// A cute little macro to produce compact representations /// of Parents, Virtual nodes, or empty. + #[allow(unused_macros)] macro_rules! pavi_to_short_str { ($pavi:ident) => { match $pavi { - PaVi::Parent(node, edge) => format!("{node} {edge}"), - PaVi::Virtual(nt, t, node) => format!("{nt} {t} {node}"), + PaVi::Parent(node, edge, child) => format!("p{node} {edge} {child}"), + PaVi::Virtual(nt, t, node) => format!("v{nt} {t} {node}"), PaVi::Empty => "ε".to_string(), } }; } - self.print_viz(&format!( - "pos {pos} {tnt_string} {num} stage 1 {}, {}.gv", - pavi_to_short_str!(true_source), - pavi_to_short_str!(pavi) - )) - .unwrap(); - // Ensure the last node in the PaVi is a terminal or a // non-terminal node, as an extra safety guard during // development. #[cfg(debug_assertions)] { match pavi { - PaVi::Parent(node, edge) => { + PaVi::Parent(_node, _edge, child) => { assert!(matches!( - self.vertex_label(self.nth_child(node, edge)?), + self.vertex_label(child), Ok(Some(label)) if label.label().label().tnt().is_some())); } @@ -312,11 +302,23 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { let is_empty_segment = pavi.is_empty(); + if true_source.is_virtual() { + self.close_pavi(atom.borrow(), true_source, pos)?; + + if to_print { + self.print_viz(&format!( + "pos {pos} - {num} {tnt_string} stage 0.1 {}.gv", + pavi_to_short_str!(true_source) + )) + .unwrap(); + } + } + let mut parents: Vec<Parent> = { let mut result = Vec::new(); match pavi { - PaVi::Parent(node, edge) => { + PaVi::Parent(node, edge, _) => { result.push(Parent::new(node, edge)); } PaVi::Virtual(nt, t, node) => { @@ -347,10 +349,12 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { self.plant_at_start(node, frag)?; - self.print_viz(&format!( - "pos {pos} {tnt_string} {num} stage 1.5 {node}.gv" - )) - .unwrap(); + if to_print { + self.print_viz(&format!( + "pos {pos} - {num} {tnt_string} stage 0.2 {node}.gv" + )) + .unwrap(); + } let rule_label_pos = self .query_label(last_but_one_label) @@ -369,6 +373,24 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { result }; + if let PaVi::Parent(node, edge, _) = pavi { + let nth_child = self.nth_child(node, edge)?; + + let reduced = self.reduction(nth_child, pos, ter, atom.borrow())?; + + if reduced != nth_child && !self.is_empty_node(reduced)? { + parents.clear(); + parents.extend(self.parents_of(reduced)?); + } + + if to_print { + self.print_viz(&format!( + "pos {pos} - {num} {tnt_string} stage 0.3 {nth_child}.gv" + )) + .unwrap(); + } + } + for parent in parents.iter() { if !self.has_node(parent.node()) { return Err(Error::IndexOutOfBounds(parent.node(), self.nodes_len())); @@ -423,12 +445,14 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { let sploned_node = self.splone(node.node(), Some(pos), node.edge(), false)?; - self.print_viz(&format!( - "pos {pos} {tnt_string} {num} stage 2 {} {}.gv", - node.node(), - node.edge(), - )) - .unwrap(); + if to_print { + self.print_viz(&format!( + "pos {pos} - {num} {tnt_string} stage 1 {} {}.gv", + node.node(), + node.edge(), + )) + .unwrap(); + } node_label = self .vertex_label(sploned_node)? @@ -513,12 +537,16 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { for parent in stack { let splanted = self.splant(parent.node(), parent.edge(), fragment, non_empty)?; - self.print_viz(&format!( - "pos {pos} {tnt_string} {num} stage 3 {} {} {splanted}.gv", - parent.node(), - parent.edge(), - )) - .unwrap(); + let _splanted_child = self.nth_child(splanted, self.degree(splanted)? - 1)?; + + if to_print { + self.print_viz(&format!( + "pos {pos} - {num} {tnt_string} stage 2 {} {} {splanted}.gv", + parent.node(), + parent.edge(), + )) + .unwrap(); + } non_empty = true; } @@ -541,25 +569,28 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { .query_label(root_label) .expect("root label was not found"); - let edge = { - let mut result = None; + let edge: usize; + let child: usize; - for (index, child) in self.children_of(node)?.enumerate() { - if matches!(self.vertex_label(child)?, Some(child_label) if child_label == leaf_label) - { - result = Some(index); - break; - } - } + let mut result = None; - if let Some(result) = result { - result - } else { - unreachable!("the forest is wrongly planted"); + for (index, child) in self.children_of(node)?.enumerate() { + if matches!(self.vertex_label(child)?, Some(child_label) if child_label == leaf_label) + { + result = Some((index, child)); + break; } - }; + } + + if let Some((index, edge_child)) = result { + edge = index; + child = edge_child; + } else { + unreachable!("the forest is wrongly planted"); + } + // dbg!(node, edge, root_label, leaf_label); - PaVi::Parent(node, edge) + PaVi::Parent(node, edge, child) } else { assert_eq!( fragment.nodes_len(), @@ -612,131 +643,19 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { Ok(result) } - // REVIEW: Is this function really correct? - - /// Perform extra reductions. - /// - /// To be precise, this function first splones the bottom node, - /// and then queries the reducer to find the next nodes to splone, - /// and repeats this process until either we find the top node, or - /// the reducer says to stop. - /// - /// One nuance to note is that after we splone a node, if we split - /// that node, then the splitted node will have a cloned parent, - /// see - /// [`split_node`][DefaultForest::<ForestLabel<GrammarLabel>>::split_node] - /// for details. So for each parent pointed out by the reducer, - /// we need to find its clone that has the splitted node as a - /// child. - fn extra_reductions( - &mut self, - botop: BoTop, - pos: usize, - reducer: &Reducer, - atom: &DefaultAtom, - ) -> Result<Vec<usize>, Error> { - if botop.bottom() == botop.top() { - return Ok(vec![self.node_from_pavi(botop.top())?]); - } - - let top = botop.top(); - let bottom = botop.bottom(); - - let bottom_closed = self.close_pavi(atom.borrow(), bottom, pos)?; - - let top_node = self.node_from_pavi(top)?; - let bottom_node = self.node_from_pavi(bottom)?; - - let mut stack = vec![bottom_node]; - - // Exclude duplicate nodes to ensure that no infinite - // recursion occurs. In the future I shall experiment if this - // is necessary, and get rid of this if possible. - let mut seen_nodes: HashSet<usize> = Default::default(); - - let mut result = Vec::new(); - - while let Some(bottom) = stack.pop() { - if seen_nodes.contains(&bottom) { - continue; - } - - seen_nodes.insert(bottom); - - if let Some(set) = reducer.get(&(top_node, bottom)) { - // First splone the bottom, except for the bottom one. - - let mut sploned = if bottom == bottom_node { - bottom_closed - } else { - let degree = self.degree(bottom)?; - let last_index = core::cmp::max(degree, 1) - 1; - - self.splone(bottom, Some(pos), last_index, false)? - }; - - // Then add parents whose labels are what we want into - // the stack. - - let mut label_set: HashSet<GrammarLabel> = HashSet::with_capacity(set.len()); - - for node in set.iter().copied() { - label_set.insert( - self.vertex_label(node)? - .ok_or(Error::NodeNoLabel(node))? - .label(), - ); - } - - let sploned_label = self - .vertex_label(sploned)? - .ok_or(Error::NodeNoLabel(sploned))?; - - if sploned_label.clone_index().is_some() { - let mut parents = self.parents_of(sploned)?; - - #[cfg(debug_assertions)] - if parents.len() != 1 { - panic!("assumption fails"); - } - - sploned = parents.next().unwrap().node(); - } - - for rule_parent in self.parents_of(sploned)? { - if self.degree(rule_parent.node())? != 1 { - panic!("assumption fails"); - } - - for parent in self.parents_of(rule_parent.node())? { - let parent_node = parent.node(); - - let parent_label = self - .vertex_label(parent_node)? - .ok_or(Error::NodeNoLabel(parent_node))? - .label(); - - if label_set.contains(&parent_label) { - stack.push(parent_node); - } - } - } - } else { - result.push(bottom); - } - } - - Ok(result) - } - /// Set the end position of the node associated with `pavi` to be `pos`. /// /// The parameter `atom` is used to query the reduction fragment /// if `pavi` is a virtual node. - fn close_pavi(&mut self, atom: &DefaultAtom, pavi: PaVi, pos: usize) -> Result<usize, Error> { + pub(crate) fn close_pavi( + &mut self, + atom: &DefaultAtom, + pavi: PaVi, + pos: usize, + ) -> Result<usize, Error> { match pavi { - PaVi::Parent(node, edge) => { - let nth_child = self.nth_child(node, edge)?; + PaVi::Parent(_node, _edge, child) => { + let nth_child = child; let nth_child_label = self .vertex_label(nth_child)? .ok_or(Error::NodeNoLabel(nth_child))?; @@ -781,6 +700,11 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { let reduction_fragment = atom.generate_virtual_frags(nt, t, None); + // NOTE: the case of the root is exceptional + if reduction_fragment.is_none() && self.root() != Some(node) { + return Err(Error::CannotClose(nt, t, node, node_label_start)); + } + for frag in reduction_fragment.into_iter().flatten() { let mut frag = frag.clone(); frag.set_pos(node_label_start, true)?; diff --git a/chain/src/item/mod.rs b/chain/src/item/mod.rs index 5efa710..54ca946 100644 --- a/chain/src/item/mod.rs +++ b/chain/src/item/mod.rs @@ -32,8 +32,9 @@ use core::borrow::Borrow; /// Also this might be empty if it represents the root node. #[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Default)] pub enum PaVi { - /// An edge from a node, as the n-th child. - Parent(usize, usize), + /// An edge from a node, as the n-th child, along with the + /// nth-child node number. + Parent(usize, usize, usize), /// A virtual segment from a non-terminal by a terminal, rooted at /// a node. /// @@ -50,16 +51,12 @@ pub enum PaVi { Empty, } -impl From<Parent> for PaVi { - fn from(value: Parent) -> Self { - Self::Parent(value.node(), value.edge()) - } -} - -impl core::fmt::Display for PaVi { +impl std::fmt::Display for PaVi { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Self::Parent(node, edge) => write!(f, "the {edge}-th edge from {node}"), + Self::Parent(node, edge, child) => { + write!(f, "the {edge}-th edge from {node} to {child}") + } Self::Virtual(nt, t, node) => write!( f, "a virtual node for non-terminal {nt} and terminal {t} at node {node}" @@ -72,13 +69,17 @@ impl core::fmt::Display for PaVi { impl PaVi { /// Get the Parent variant. fn parent(self) -> Option<Parent> { - if let Self::Parent(node, edge) = self { + if let Self::Parent(node, edge, _) = self { Some(Parent::new(node, edge)) } else { None } } + fn is_virtual(self) -> bool { + matches!(self, Self::Virtual(_, _, _)) + } + /// Is this an empty segment? fn is_empty(self) -> bool { self == Self::Empty @@ -290,3 +291,5 @@ pub mod default; pub mod genins; pub use genins::generate_fragment; + +pub mod reduction; diff --git a/chain/src/item/reduction.rs b/chain/src/item/reduction.rs new file mode 100644 index 0000000..89306e6 --- /dev/null +++ b/chain/src/item/reduction.rs @@ -0,0 +1,428 @@ +//! This module implements the operation of reductions on the forest. +//! +//! # What are reductions +//! +//! To explain this in detail, we first investigate the +//! nullable-closure process, then discuss what reduction is and what +//! it means in the chain-rule machine, and then explain the problem. +//! +//! ## Nullable-closure process +//! +//! In the process of running the chain-rule machine, we will collapse +//! edges, in the sense that, if there is an edge from node a to node +//! b and another from node b to node c, and if the edge from node a +//! to node b is "nullable", that is if the edge corresponds to a +//! position in the atomic language that is deemed nullable by the +//! atomic language, then we also make an edge from node a to node c. +//! +//! The purpose of this process of forming the nullable closure is to +//! ensure that the chain-rule machine can save the time to traverse +//! the entire machine to find the nullable closure later on. But a +//! side-effect of this process is that reductions are not explicitly +//! marked. +//! +//! Note that we must perform this nullable closure process in order +//! that the time complexity of the chain-rule machine lies within the +//! cubic bound. +//! +//! ## Three types of actions +//! +//! We can imagine a "traditional parser generator" as a stack +//! machine: there are three types of actions associated with it, +//! depending on the current state and the current input token. The +//! first is expansion: it means that we are expanding from a +//! non-terminal, by some rule. The second is a normal push: we just +//! continue parsing according to the current rule. The final one is +//! reduction: it means the current expansion from a non-terminal has +//! terminated and we go back to the previous level of expansion. +//! +//! ## Relation to the chain-rule machine +//! +//! For our chain-rule machine, expansion means to create a new node +//! pointing at the current node, forming a path with one more length. +//! A normal push means to create a new node that points to the target +//! of an edge going out from the current node, which was not created +//! by the process of forming nullable closures. And the reduction +//! means to create a new node that points to the target of an edge +//! going out from the current node, which *was* created by the +//! process of forming nullable closures. +//! +//! # Problem +//! +//! As can be seen from the previous paragraph, the distinction +//! between a normal push and a reduction in the chain-rule machine is +//! simply whether or not the original edge was created by the process +//! of forming nullable closures. For the chain-rule machine, this +//! does not matter: it can function well. For the formation of the +//! derivation forest, however, this is not so well: we cannot +//! read-off immediately whether or not to perform reductions from the +//! chain-rule machine. +//! +//! # Solutions +//! +//! I tried some solutions to solve this problem. +//! +//! ## Complete at the end +//! +//! The first idea I tried is to find the nodes that were not closed +//! properly and fill in the needed reductions, at the end of the +//! parse. This idea did not end up well, as we already lost a lot of +//! information at the end of the parse, so it becomes quite difficult +//! to know on which nodes we need to perform reductions. +//! +//! ## Record precise information +//! +//! The next idea is to record the nodes that were skipped by the +//! nullable-closure process, and then when we encounter a skipped +//! segment, we perform the reductions there. This idea sounds +//! feasible at first, but it turns out that we cannot track the nodes +//! properly. That is, when running the chain-rule machine, we will +//! split and clone nodes from time to time. A consequence is that +//! the old node numbers that were recorded previously will not be +//! correct afterwards. This means we cannot know exactly which +//! reductions to perform later. +//! +//! ## Guided brute-force +//! +//! Now I am trying out the third idea. The motivation is simple: if +//! we cannot properly track nodes, then we track no nodes. Then, in +//! order to perform reductions, we do not distinguish between +//! necessary reductions and unnecessary reductions, and perform all +//! possible reductions. In other words, when we are about to plant a +//! new node in the forest, we first check the last child of the +//! to-be-parent. If it is properly closed, we do nothing. +//! Otherwise, we recursively descend into its children(s) to find all +//! last children, and perform all possible valid reductions. + +use super::*; +use crate::{ + atom::{Atom, DefaultAtom}, + default::Error, + item::default::DefaultForest, +}; +use grammar::{GrammarLabel, TNT}; +use graph::Graph; + +use std::collections::HashMap as Map; + +impl DefaultForest<ForestLabel<GrammarLabel>> { + /// Perform reduction at last descendents of `node`. + /// + /// # Parameters + /// + /// The parameter `pos` is the next starting position. It is used + /// to find the descendents that need reductions: only those nodes + /// which have descendents with the correct ending positions will + /// receive reductions. + /// + /// The parameter `atom` is used to know which rule numbers are + /// deemed as accepting. Only accepting rule numbers can receive + /// reductions: this is the definition of being accepting. + /// + /// The parameter `ter` is used to fill in segments for virtual + /// nodes if they are found along the way. + /// + /// # Errors + /// + /// 1. Index out of bounds: some node number is corrupted. + /// 2. Node has no label: some node label is lost. + pub(crate) fn reduction( + &mut self, + node: usize, + pos: usize, + ter: usize, + atom: &DefaultAtom, + ) -> Result<usize, Error> { + let mut result = node; + + // step 1: Determine if this needs reductions. + + if self.root() == Some(node) { + return Ok(result); + } + + // REVIEW: Do we need to check the end matches the position? + + let mut node_label = self + .vertex_label(node)? + .ok_or(Error::NodeNoLabel(node))? + .label(); + + if node_label.end().is_some() { + return Ok(result); + } + + // step 2: Find descendents that need reductions. + + let mut correct_ends: Map<usize, bool> = Default::default(); + + let mut order_of_correct_ends: Vec<usize> = Vec::new(); + + let mut stack: Vec<usize> = vec![node]; + + let mut file = std::fs::OpenOptions::new().append(true).open("debug.log"); + + use std::{borrow::BorrowMut, io::Write}; + + // Whether or not to write a debug file. + let to_write = false; + + if to_write { + if let Ok(ref mut file) = file.borrow_mut() { + file.write_all(format!("beginning, pos = {pos}, node = {node}\n").as_bytes()) + .unwrap(); + } + } + + 'stack_loop: while let Some(top) = stack.pop() { + if to_write { + if let Ok(ref mut file) = file.borrow_mut() { + file.write_all(format!("top: {top}\n").as_bytes()).unwrap(); + } + } + + if correct_ends.get(&top).is_some() { + continue 'stack_loop; + } + + let top_label = self.vertex_label(top)?.ok_or(Error::NodeNoLabel(top))?; + + if let Some(end) = top_label.label().end() { + correct_ends.insert(top, end == pos); + + continue 'stack_loop; + } + + if let Some(rule) = top_label.label().label().rule() { + // A rule node is not considered directly: it should + // be affected by its child implicitly. + // + // We only consider a rule node if it is deemed + // accepting by the atom. + + if to_write { + if let Ok(ref mut file) = file.borrow_mut() { + file.write_all(format!("{}: {rule}, {stack:?}\n", line!()).as_bytes()) + .unwrap(); + } + } + + if atom + .is_accepting(rule * 2 + 1) + .map_err(|_| Error::IndexOutOfBounds(2 * rule + 1, atom.accepting_len()))? + { + let mut has_unexplored_children = false; + let mut inserted = false; + + 'child_loop: for child in self.children_of(top)? { + match correct_ends.get(&child).copied() { + Some(true) => { + correct_ends.insert(top, true); + + inserted = true; + } + None => { + has_unexplored_children = true; + + break 'child_loop; + } + _ => {} + } + } + + if has_unexplored_children { + stack.push(top); + stack.extend(self.children_of(top)?); + } else if !inserted { + correct_ends.insert(top, false); + } + } else { + correct_ends.insert(top, false); + } + + continue 'stack_loop; + } + + if top_label.is_packed() { + let mut has_unexplored_children = false; + + if to_write { + if let Ok(ref mut file) = file.borrow_mut() { + file.write_all(format!("{}: packed, {stack:?}\n", line!()).as_bytes()) + .unwrap(); + } + } + + for child in self.children_of(top)? { + match correct_ends.get(&child).copied() { + Some(true) => { + // NOTE: This is not recorded in the + // correct orders, as we do not handle + // packed nodes directly. + + correct_ends.insert(top, true); + } + None => { + has_unexplored_children = true; + } + _ => {} + } + } + + if to_write { + if let Ok(ref mut file) = file.borrow_mut() { + file.write_all( + format!("{}: packed, {has_unexplored_children}\n", line!()).as_bytes(), + ) + .unwrap(); + } + } + + if has_unexplored_children { + stack.push(top); + stack.extend(self.children_of(top)?); + } else if correct_ends.get(&top).is_none() { + correct_ends.insert(top, false); + } + + continue 'stack_loop; + } + + let degree = self.degree(top)?; + + if to_write { + if let Ok(ref mut file) = file.borrow_mut() { + file.write_all(format!("{}: degree = {degree}\n", line!()).as_bytes()) + .unwrap(); + } + } + + let last_index = if degree != 0 { + degree - 1 + } else { + // a leaf is supposed to be a terminal node and hence + // should have an ending position + + let end = match top_label.label().end() { + None => match top_label.label().label().tnt() { + Some(TNT::Ter(_)) => { + panic!("a terminal node {top} has no ending position?"); + } + Some(TNT::Non(nt)) => { + correct_ends.insert(top, true); + + self.close_pavi(atom.borrow(), PaVi::Virtual(nt, ter, top), pos)?; + + continue 'stack_loop; + } + None => { + unreachable!("we already handled the rule case above"); + } + }, + Some(end) => end, + }; + + correct_ends.insert(top, end == pos); + + if end == pos { + order_of_correct_ends.push(top); + } + + continue 'stack_loop; + }; + + if to_write { + if let Ok(ref mut file) = file.borrow_mut() { + file.write_all(format!("{}: last = {last_index}\n", line!()).as_bytes()) + .unwrap(); + } + } + + let last_child = self.nth_child(top, last_index)?; + + if let Some(child_correctness_value) = correct_ends.get(&last_child).copied() { + correct_ends.insert(top, child_correctness_value); + + if child_correctness_value { + order_of_correct_ends.push(top); + } + } else { + stack.extend([top, last_child]); + } + } + + if to_write { + if let Ok(ref mut file) = file.borrow_mut() { + file.write_all( + format!("{}: orders = {order_of_correct_ends:?}\n", line!()).as_bytes(), + ) + .unwrap(); + } + } + + // step 3: perform reductions by `splone`. + // + // NOTE: We must fix the order from top to bottom: this is the + // reverse order of `order_of_correct_ends` . + + for node in order_of_correct_ends.into_iter().rev() { + let label = self.vertex_label(node)?.ok_or(Error::NodeNoLabel(node))?; + let degree = self.degree(node)?; + + if !matches!(label.label().label().tnt(), Some(TNT::Non(_))) { + continue; + } + + #[cfg(debug_assertions)] + { + let label_label_label = label.label().label(); + + assert!(label_label_label.rule().is_none()); + + assert!(matches!(label_label_label.tnt(), Some(TNT::Non(_)))); + + assert!(label.label().end().is_none()); + + assert_ne!(degree, 0); + } + + let last_index = degree - 1; + + self.splone(node, Some(pos), last_index, false)?; + } + + node_label.set_end(pos); + + if let Some(packed) = + self.query_label(ForestLabel::new(node_label, ForestLabelType::Packed)) + { + result = packed; + + if to_write { + if let Ok(ref mut file) = file.borrow_mut() { + file.write_all(format!("{}: packed = {packed}\n", line!()).as_bytes()) + .unwrap(); + } + } + } else if let Some(plain) = + self.query_label(ForestLabel::new(node_label, ForestLabelType::Plain)) + { + result = plain; + + if to_write { + if let Ok(ref mut file) = file.borrow_mut() { + file.write_all(format!("{}: plain = {plain}\n", line!()).as_bytes()) + .unwrap(); + } + } + } + + if to_write { + if let Ok(ref mut file) = file.borrow_mut() { + file.write_all(&[101, 110, 100, 10]).unwrap(); + } + } + + Ok(result) + } +} diff --git a/chain/src/item/thoughts-on-reducer.org b/chain/src/item/thoughts-on-reducer.org new file mode 100644 index 0000000..39a799a --- /dev/null +++ b/chain/src/item/thoughts-on-reducer.org @@ -0,0 +1,121 @@ +#+TITLE: Thoughts on the reducer +#+AUTHOR: Durand +#+DATE: <2023-06-05 Lun 22:08> + +This module implements a data type for storing reduction +information. + +* Questions + +What is reduction information, and why is it necessary to store it? + +* Nullable-closure process + +In the process of running the chain-rule machine, we will collapse +edges, in the sense that, if there is an edge from node a to node b +and another from node b to node c, and if the edge from node a to node +b is "nullable", that is if the edge corresponds to a position in the +atomic language that is deemed nullable by the atomic language, then +we also make an edge from node a to node c. + +The purpose of this process of forming the nullable closure is to +ensure that the chain-rule machine can save the time to traverse the +entire machine to find the nullable closure later on. But a +side-effect of this process is that reductions are not explicitly +marked. + +To explain this in detail, we first investigate what reduction is and +what it means in the chain-rule machine, and then explain the problem. + +* Three types of actions + +We can imagine a "traditional parser generator" as a stack machine: +there are three types of actions associated with it, depending on the +current state and the current input token. The first is expansion: it +means that we are expanding from a non-terminal, by some rule. The +second is a normal push: we just continue parsing according to the +current rule. The final one is reduction: it means the current +expansion from a non-terminal has terminated and we go back to the +previous level of expansion. + +* Relation to the chain-rule machine + +For our chain-rule machine, expansion means to create a new node +pointing at the current node, forming a path of length increased by +one. A normal push means to create a new node that points to the +target of an edge going out from the current node, which was not +created by the process of forming nullable closures. And the +reduction means to create a new node that points to the target of an +edge going out from the current node, which *was* created by the +process of forming nullable closures. + +* Problem + +As can be seen from the previous paragraph, the distinction between a +normal push and a reduction in the chain-rule machine is simply +whether or not the original edge was created by the process of forming +nullable closures. For the chain-rule machine, this does not matter: +it can function well. For the formation of the derivation forest, +however, this is not so well: we cannot read-off immediately whether +or not to perform reductions from the chain-rule machine. + +* Solution + +Since we cannot read-off the reduction information directly from the +chain-rule machine, we have to store that information somehow. + +** Digression: past experiences + +When developping this part, I did not have a clear picture of the +situation in mind: I was experimenting with the ways to construct the +derivation forest from the chain-rule machine, as the paper describing +the algorithm does not construct the derivation forest directly: it +constructs an alternative format. A consequence is that I spent quite +some time in figuring out how to construct derivation forests +correctly. + +During the experiments, I tried various ideas: including to "fill-in" +the reductions after we have parsed the entire input. This seemed +ostensibly a good idea, as I seem to be able to "read-off" the +reductions from the resulting partially complete derivation forests. + +As it turned out, this was actually a bad idea. In fact, I can now +prove that this will not work by the presence of a specific grammar +that will cause this approach to fail definitely. This led me to +believe that the only way is to store the needed reduction information +in order to fill in this gap. + +** Storing reduction information + +Now we want to store the reduction information, so we need to be clear +about what we are storing. + +In the derivation forest, a reduction happens when we reach the +right-most descendent of a subtree, which marks the end of an +expansion from a non-terminal. Moreover, our derivation forests +usually contain half-open nodes, which mean that they are not +completed yet and can keep growing, until a reduction happens when we +"close" the half-open node. Thus, what we really need is the list of +nodes to close. + +This information is readily available when we form nullable closures: +the skipped nodes are the nodes we need to close later on. To be more +exact, when we skip nodes, we have two nodes: the top node and the +bottom node, and we want to store the middle node that is skipped. + +This naturally led to the structure of a nondeterministic finite +automaton: when we want to close nodes, we start from the bottom-most +node, and query the nodes upward by the top node, until we reach the +top node or until we have no more nodes to go. + +The special characteristic about this structure is that we need to +label both the vertices and the edges. Since we already have a +structure that labels edges, we can simply extend that structure by +adding an array of labels and possibly a map from labels to nodes, to +make sure the labels of vertices are unique and can be queried +quickly. + +One thing to note is that we actually only need the ability to query +children by the labels, and do not need to query labels by the target. +We need experiments to see if thiw will be the bottleneck and hence +need to be optimized out. |