summaryrefslogtreecommitdiff
path: root/chain/src/item
diff options
context:
space:
mode:
Diffstat (limited to 'chain/src/item')
-rw-r--r--chain/src/item/default/mod.rs257
-rw-r--r--chain/src/item/default/splone.rs7
-rw-r--r--chain/src/item/genins.rs288
-rw-r--r--chain/src/item/mod.rs25
-rw-r--r--chain/src/item/reduction.rs428
-rw-r--r--chain/src/item/thoughts-on-reducer.org121
6 files changed, 710 insertions, 416 deletions
diff --git a/chain/src/item/default/mod.rs b/chain/src/item/default/mod.rs
index 6d28956..47e8c90 100644
--- a/chain/src/item/default/mod.rs
+++ b/chain/src/item/default/mod.rs
@@ -250,7 +250,12 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> {
frag_stack.pop();
self_stack.pop();
- if fragment.vertex_label(frag_top)? != self.vertex_label(self_top)? {
+ // NOTE: The labels might defer by the status. We test
+ // for the equalities of inner labels only.
+
+ if fragment.vertex_label(frag_top)?.map(|label| label.label())
+ != self.vertex_label(self_top)?.map(|label| label.label())
+ {
// not a prefix
// dbg!(
// frag_top,
@@ -589,11 +594,15 @@ impl<T: GraphLabel> DefaultForest<ForestLabel<T>> {
return Ok(node);
}
+ fragment.set_root(1)?;
+
let node_label = self.vertex_label(node)?.ok_or(Error::NodeNoLabel(node))?;
if node_label.is_packed() || node_label.clone_index().is_some() {
let mut planted = false;
+ let mut to_plant: Option<usize> = None;
+
let mut node = node;
if node_label.clone_index().is_some() {
@@ -607,17 +616,25 @@ impl<T: GraphLabel> DefaultForest<ForestLabel<T>> {
for clone in self.children_of(node)? {
node = clone;
- if self.is_prefix(clone, &fragment)? {
- planted = true;
+ if !self.is_empty_node(clone)? {
+ let first_child = self.nth_child(clone, 0)?;
+
+ if self.is_prefix(first_child, &fragment)? {
+ planted = true;
- break;
+ break;
+ }
+ } else if to_plant.is_none() {
+ to_plant = Some(clone);
}
}
if !planted {
- let clone = self.clone_node(node, 0, false)?;
-
- fragment.set_root(1)?;
+ let clone = if let Some(cloned_node) = to_plant {
+ cloned_node
+ } else {
+ self.clone_node(node, 0, false)?
+ };
self.plant(clone, fragment, false)?;
@@ -626,51 +643,29 @@ impl<T: GraphLabel> DefaultForest<ForestLabel<T>> {
} else {
let clone_threshold = if self.root == Some(node) { 1 } else { 0 };
- let planted = self.is_prefix(node, &fragment)?;
+ let satisfy_threshold = self.degree(node)? > clone_threshold;
- fragment.set_root(1)?;
+ if !satisfy_threshold {
+ self.plant(node, fragment, false)?;
- if !planted && self.degree(node)? > clone_threshold {
+ return Ok(node);
+ }
+
+ let first_child = self.nth_child(node, clone_threshold)?;
+
+ let planted = self.is_prefix(first_child, &fragment)?;
+
+ if !planted {
let clone = self.clone_node(node, 0, false)?;
self.plant(clone, fragment, false)?;
return Ok(clone);
- } else if !planted {
- self.plant(node, fragment, false)?;
}
}
Ok(node)
}
-
- /// Extract the node from `pavi`.
- ///
- /// If pavi is a parent, this returns the child pointed to by the
- /// represented edge.
- ///
- /// If pavi is a virtual node, this simply returns the virtual
- /// node.
- ///
- /// If pavi is empty, this returns the root, unless the forest is
- /// empty.
- ///
- /// # Guarantee
- ///
- /// The returned node is guaranteed to be a valid node.
- pub(crate) fn node_from_pavi(&self, pavi: PaVi) -> Result<usize, Error> {
- match pavi {
- PaVi::Parent(node, edge) => Ok(self.nth_child(node, edge)?),
- PaVi::Virtual(_, _, node) => {
- if node >= self.nodes_len() {
- return Err(Error::IndexOutOfBounds(node, self.nodes_len()));
- }
-
- Ok(node)
- }
- PaVi::Empty => Ok(self.root().ok_or(Error::IndexOutOfBounds(0, 0))?),
- }
- }
}
pub mod splone;
@@ -850,188 +845,6 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
Ok(None)
}
- // /// Check if every child already has an end.
- // fn every_child_is_completed(&self, node_id: usize, atom: &DefaultAtom) -> Result<bool, Error> {
- // let children = self.children_of(node_id)?;
-
- // if children.len() == 0 {
- // return Ok(true);
- // }
-
- // let mut pos = self
- // .vertex_label(node_id)?
- // .ok_or(Error::NodeNoLabel(node_id))?
- // .label()
- // .start();
-
- // let mut last_child_label = None;
-
- // for child in children {
- // let child_label = self
- // .vertex_label(child)?
- // .ok_or(Error::NodeNoLabel(child))?
- // .label();
-
- // last_child_label = Some(child_label);
-
- // if child_label.start() != pos || child_label.end().is_none() {
- // return Ok(false);
- // }
-
- // pos = child_label.end().unwrap();
- // }
-
- // if let Some(label) = last_child_label {
- // if let Some(rule) = label.label().rule() {
- // if !atom
- // .is_accepting(2 * rule + 1)
- // .map_err(|_| Error::IndexOutOfBounds(2 * rule + 1, atom.nodes_len()))?
- // {
- // return Ok(false);
- // }
- // }
- // }
-
- // Ok(true)
- // }
-
- // /// Complete the forest by trying to set the ending position of
- // /// every node that does not have an end, by the ending position
- // /// of its last child.
- // pub fn complete(&mut self, atom: &DefaultAtom) -> Result<(), Error> {
- // let mut stack: Vec<_> = self
- // .nodes()
- // .filter(|node| {
- // let label = self.vertex_label(*node).unwrap().unwrap().label();
-
- // label.label().rule().is_some() && label.end().is_some()
- // })
- // .collect();
-
- // let mut second_stack: Vec<usize> = Vec::new();
-
- // let mut pending_candidates: Vec<usize> = Vec::new();
-
- // let nodes_len = self.nodes_len();
-
- // let mut seen_nodes: HashSet<usize> = HashSet::with_capacity(nodes_len);
-
- // while !stack.is_empty() {
- // while let Some(mut top) = stack.pop() {
- // if seen_nodes.contains(&top) {
- // continue;
- // }
-
- // seen_nodes.insert(top);
-
- // let top_label = self.vertex_label(top)?.unwrap();
-
- // if top_label.clone_index().is_some() {
- // let mut top_parents = self.parents_of(top)?;
-
- // if top_parents.len() != 1 {
- // return Err(Error::InvalidClone(top));
- // }
-
- // top = top_parents.next().unwrap().node();
- // }
-
- // let rule_parents = self.parents_of(top)?;
-
- // let candidates = {
- // let mut result = Vec::with_capacity(rule_parents.len());
-
- // for parent in rule_parents {
- // // for parent in self.parents_of(parent.node())? {
- // // if self.degree(parent.node())? <= parent.edge() + 1 {
- // result.push(parent);
- // // }
- // // }
- // }
-
- // result
- // };
-
- // for candidate in candidates {
- // if matches!(self.vertex_label(candidate.node())?, Some(label) if label.label().end().is_none())
- // {
- // if self.every_child_is_completed(candidate.node(), atom)? {
- // let last_child = self
- // .nth_child(candidate.node(), self.degree(candidate.node())? - 1)?;
- // let end = self
- // .vertex_label(last_child)?
- // .ok_or(Error::NodeNoLabel(last_child))?
- // .label()
- // .end();
-
- // let sploned_node = self.splone(
- // candidate.node(),
- // end,
- // self.degree(candidate.node())? - 1,
- // true,
- // )?;
-
- // second_stack.push(sploned_node);
- // } else {
- // pending_candidates.push(candidate.node());
- // }
- // } else {
- // second_stack.push(candidate.node());
- // }
- // }
-
- // let mut new_pending = Vec::with_capacity(pending_candidates.len());
-
- // while let Some(node) = pending_candidates.pop() {
- // if self.every_child_is_completed(node, atom)? {
- // let last_edge = self.degree(node)? - 1;
- // let last_child = self.nth_child(node, last_edge)?;
- // let end = self
- // .vertex_label(last_child)?
- // .ok_or(Error::NodeNoLabel(last_child))?
- // .label()
- // .end();
-
- // let sploned_node = self.splone(node, end, last_edge, true)?;
-
- // second_stack.push(sploned_node);
- // } else {
- // new_pending.push(node);
- // }
- // }
-
- // std::mem::swap(&mut pending_candidates, &mut new_pending);
- // }
-
- // std::mem::swap(&mut stack, &mut second_stack);
- // }
-
- // Ok(())
- // }
-
- // /// Unconditionally set the label of the node to be the new label.
- // ///
- // /// # Warning
- // ///
- // /// Use with caution: it does not do anything special, so it is
- // /// the responsibility of the caller to ensure this operation is
- // /// legal.
- // #[allow(dead_code)]
- // pub(crate) fn set_label(&mut self, node: usize, label: GrammarLabel) -> Result<(), Error> {
- // let status = self
- // .vertex_label(node)?
- // .ok_or(Error::NodeNoLabel(node))?
- // .status;
-
- // let label = ForestLabel::new(label, status);
-
- // let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph);
-
- // builder.set_label(node, label)?;
-
- // Ok(())
- // }
-
/// Set the position of every node.
///
/// If ALLP is non-nil or if the node is a terminal node, also set
diff --git a/chain/src/item/default/splone.rs b/chain/src/item/default/splone.rs
index d77686e..581d1dc 100644
--- a/chain/src/item/default/splone.rs
+++ b/chain/src/item/default/splone.rs
@@ -555,7 +555,12 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
let modified_degree = std::cmp::max(existing_degree, 1) - 1;
if existing_degree != 0
- && self.has_same_children(existing, node, modified_degree, edge_index)?
+ && self.has_same_children(
+ existing,
+ node,
+ modified_degree,
+ edge_index + 1,
+ )?
{
let last_child = self.nth_child(existing, existing_degree - 1)?;
diff --git a/chain/src/item/genins.rs b/chain/src/item/genins.rs
index a2bb22c..19f835b 100644
--- a/chain/src/item/genins.rs
+++ b/chain/src/item/genins.rs
@@ -9,16 +9,14 @@
use super::*;
use crate::{
atom::{Atom, DefaultAtom},
- default::{BoTop, Error, Reducer},
+ default::Error,
item::default::DefaultForest,
Edge,
};
use grammar::{Error as GrammarError, GrammarLabel, GrammarLabelType, TNT};
use graph::Graph;
-use core::borrow::Borrow;
-
-use std::collections::HashSet;
+use std::borrow::Borrow;
/// Convert an error telling us that an index is out of bounds.
///
@@ -179,8 +177,8 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
/// fragment under the result splone.
pub(crate) fn insert_item(
&mut self,
- reducer: &Reducer,
label: Edge,
+ ter: usize,
fragment: impl Borrow<DefaultForest<ForestLabel<GrammarLabel>>>,
atom_child_iter: impl Iterator<Item = usize> + ExactSizeIterator + Clone,
atom: &DefaultAtom,
@@ -200,7 +198,7 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
let fragment_root = if let Some(root) = fragment.root() {
root
} else {
- unreachable!("empty item");
+ panic!("empty item");
};
let fragment_root_label = fragment
@@ -209,7 +207,11 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
let pos = fragment_root_label.label().start();
- dbg!((pos, label));
+ // dbg!((pos, label));
+
+ // Whether or not to print detailed graphs of each step of
+ // operation for debugging purposes.
+ let to_print = false;
let tnt_string = {
let empty_p = atom_child_iter.len() == 0;
@@ -217,10 +219,10 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
match label.label().label() {
GrammarLabelType::TNT(TNT::Ter(t)) => {
- format!("t {t}")
+ format!("t {t}{}", if empty_p { " second" } else { "" })
}
GrammarLabelType::TNT(TNT::Non(n)) => {
- format!("n {n} {}", if empty_p { "second" } else { "first" })
+ format!("n {n}")
}
_ => "error".to_string(),
}
@@ -230,7 +232,7 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
let mut repetition = 0;
while std::fs::metadata(format!(
- "/Users/durand/Desktop/Centre/A propos de programmes/Rust/rep/chain/output/pos {pos} {tnt_string} {repetition}.gv"
+ "/Users/durand/Desktop/Centre/A propos de programmes/Rust/rep/chain/output/pos {pos} - {repetition}.gv"
))
.is_ok()
{
@@ -240,44 +242,32 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
repetition
};
- self.print_viz(&format!("pos {pos} {tnt_string} {num}.gv"))
- .unwrap();
-
- self.extra_reductions(
- BoTop::new(true_source, pavi),
- pos,
- reducer.borrow(),
- atom.borrow(),
- )?;
+ if to_print {
+ self.print_viz(&format!("pos {pos} - {num}.gv")).unwrap();
+ }
/// A cute little macro to produce compact representations
/// of Parents, Virtual nodes, or empty.
+ #[allow(unused_macros)]
macro_rules! pavi_to_short_str {
($pavi:ident) => {
match $pavi {
- PaVi::Parent(node, edge) => format!("{node} {edge}"),
- PaVi::Virtual(nt, t, node) => format!("{nt} {t} {node}"),
+ PaVi::Parent(node, edge, child) => format!("p{node} {edge} {child}"),
+ PaVi::Virtual(nt, t, node) => format!("v{nt} {t} {node}"),
PaVi::Empty => "ε".to_string(),
}
};
}
- self.print_viz(&format!(
- "pos {pos} {tnt_string} {num} stage 1 {}, {}.gv",
- pavi_to_short_str!(true_source),
- pavi_to_short_str!(pavi)
- ))
- .unwrap();
-
// Ensure the last node in the PaVi is a terminal or a
// non-terminal node, as an extra safety guard during
// development.
#[cfg(debug_assertions)]
{
match pavi {
- PaVi::Parent(node, edge) => {
+ PaVi::Parent(_node, _edge, child) => {
assert!(matches!(
- self.vertex_label(self.nth_child(node, edge)?),
+ self.vertex_label(child),
Ok(Some(label))
if label.label().label().tnt().is_some()));
}
@@ -312,11 +302,23 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
let is_empty_segment = pavi.is_empty();
+ if true_source.is_virtual() {
+ self.close_pavi(atom.borrow(), true_source, pos)?;
+
+ if to_print {
+ self.print_viz(&format!(
+ "pos {pos} - {num} {tnt_string} stage 0.1 {}.gv",
+ pavi_to_short_str!(true_source)
+ ))
+ .unwrap();
+ }
+ }
+
let mut parents: Vec<Parent> = {
let mut result = Vec::new();
match pavi {
- PaVi::Parent(node, edge) => {
+ PaVi::Parent(node, edge, _) => {
result.push(Parent::new(node, edge));
}
PaVi::Virtual(nt, t, node) => {
@@ -347,10 +349,12 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
self.plant_at_start(node, frag)?;
- self.print_viz(&format!(
- "pos {pos} {tnt_string} {num} stage 1.5 {node}.gv"
- ))
- .unwrap();
+ if to_print {
+ self.print_viz(&format!(
+ "pos {pos} - {num} {tnt_string} stage 0.2 {node}.gv"
+ ))
+ .unwrap();
+ }
let rule_label_pos = self
.query_label(last_but_one_label)
@@ -369,6 +373,24 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
result
};
+ if let PaVi::Parent(node, edge, _) = pavi {
+ let nth_child = self.nth_child(node, edge)?;
+
+ let reduced = self.reduction(nth_child, pos, ter, atom.borrow())?;
+
+ if reduced != nth_child && !self.is_empty_node(reduced)? {
+ parents.clear();
+ parents.extend(self.parents_of(reduced)?);
+ }
+
+ if to_print {
+ self.print_viz(&format!(
+ "pos {pos} - {num} {tnt_string} stage 0.3 {nth_child}.gv"
+ ))
+ .unwrap();
+ }
+ }
+
for parent in parents.iter() {
if !self.has_node(parent.node()) {
return Err(Error::IndexOutOfBounds(parent.node(), self.nodes_len()));
@@ -423,12 +445,14 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
let sploned_node =
self.splone(node.node(), Some(pos), node.edge(), false)?;
- self.print_viz(&format!(
- "pos {pos} {tnt_string} {num} stage 2 {} {}.gv",
- node.node(),
- node.edge(),
- ))
- .unwrap();
+ if to_print {
+ self.print_viz(&format!(
+ "pos {pos} - {num} {tnt_string} stage 1 {} {}.gv",
+ node.node(),
+ node.edge(),
+ ))
+ .unwrap();
+ }
node_label = self
.vertex_label(sploned_node)?
@@ -513,12 +537,16 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
for parent in stack {
let splanted = self.splant(parent.node(), parent.edge(), fragment, non_empty)?;
- self.print_viz(&format!(
- "pos {pos} {tnt_string} {num} stage 3 {} {} {splanted}.gv",
- parent.node(),
- parent.edge(),
- ))
- .unwrap();
+ let _splanted_child = self.nth_child(splanted, self.degree(splanted)? - 1)?;
+
+ if to_print {
+ self.print_viz(&format!(
+ "pos {pos} - {num} {tnt_string} stage 2 {} {} {splanted}.gv",
+ parent.node(),
+ parent.edge(),
+ ))
+ .unwrap();
+ }
non_empty = true;
}
@@ -541,25 +569,28 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
.query_label(root_label)
.expect("root label was not found");
- let edge = {
- let mut result = None;
+ let edge: usize;
+ let child: usize;
- for (index, child) in self.children_of(node)?.enumerate() {
- if matches!(self.vertex_label(child)?, Some(child_label) if child_label == leaf_label)
- {
- result = Some(index);
- break;
- }
- }
+ let mut result = None;
- if let Some(result) = result {
- result
- } else {
- unreachable!("the forest is wrongly planted");
+ for (index, child) in self.children_of(node)?.enumerate() {
+ if matches!(self.vertex_label(child)?, Some(child_label) if child_label == leaf_label)
+ {
+ result = Some((index, child));
+ break;
}
- };
+ }
+
+ if let Some((index, edge_child)) = result {
+ edge = index;
+ child = edge_child;
+ } else {
+ unreachable!("the forest is wrongly planted");
+ }
+
// dbg!(node, edge, root_label, leaf_label);
- PaVi::Parent(node, edge)
+ PaVi::Parent(node, edge, child)
} else {
assert_eq!(
fragment.nodes_len(),
@@ -612,131 +643,19 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
Ok(result)
}
- // REVIEW: Is this function really correct?
-
- /// Perform extra reductions.
- ///
- /// To be precise, this function first splones the bottom node,
- /// and then queries the reducer to find the next nodes to splone,
- /// and repeats this process until either we find the top node, or
- /// the reducer says to stop.
- ///
- /// One nuance to note is that after we splone a node, if we split
- /// that node, then the splitted node will have a cloned parent,
- /// see
- /// [`split_node`][DefaultForest::<ForestLabel<GrammarLabel>>::split_node]
- /// for details. So for each parent pointed out by the reducer,
- /// we need to find its clone that has the splitted node as a
- /// child.
- fn extra_reductions(
- &mut self,
- botop: BoTop,
- pos: usize,
- reducer: &Reducer,
- atom: &DefaultAtom,
- ) -> Result<Vec<usize>, Error> {
- if botop.bottom() == botop.top() {
- return Ok(vec![self.node_from_pavi(botop.top())?]);
- }
-
- let top = botop.top();
- let bottom = botop.bottom();
-
- let bottom_closed = self.close_pavi(atom.borrow(), bottom, pos)?;
-
- let top_node = self.node_from_pavi(top)?;
- let bottom_node = self.node_from_pavi(bottom)?;
-
- let mut stack = vec![bottom_node];
-
- // Exclude duplicate nodes to ensure that no infinite
- // recursion occurs. In the future I shall experiment if this
- // is necessary, and get rid of this if possible.
- let mut seen_nodes: HashSet<usize> = Default::default();
-
- let mut result = Vec::new();
-
- while let Some(bottom) = stack.pop() {
- if seen_nodes.contains(&bottom) {
- continue;
- }
-
- seen_nodes.insert(bottom);
-
- if let Some(set) = reducer.get(&(top_node, bottom)) {
- // First splone the bottom, except for the bottom one.
-
- let mut sploned = if bottom == bottom_node {
- bottom_closed
- } else {
- let degree = self.degree(bottom)?;
- let last_index = core::cmp::max(degree, 1) - 1;
-
- self.splone(bottom, Some(pos), last_index, false)?
- };
-
- // Then add parents whose labels are what we want into
- // the stack.
-
- let mut label_set: HashSet<GrammarLabel> = HashSet::with_capacity(set.len());
-
- for node in set.iter().copied() {
- label_set.insert(
- self.vertex_label(node)?
- .ok_or(Error::NodeNoLabel(node))?
- .label(),
- );
- }
-
- let sploned_label = self
- .vertex_label(sploned)?
- .ok_or(Error::NodeNoLabel(sploned))?;
-
- if sploned_label.clone_index().is_some() {
- let mut parents = self.parents_of(sploned)?;
-
- #[cfg(debug_assertions)]
- if parents.len() != 1 {
- panic!("assumption fails");
- }
-
- sploned = parents.next().unwrap().node();
- }
-
- for rule_parent in self.parents_of(sploned)? {
- if self.degree(rule_parent.node())? != 1 {
- panic!("assumption fails");
- }
-
- for parent in self.parents_of(rule_parent.node())? {
- let parent_node = parent.node();
-
- let parent_label = self
- .vertex_label(parent_node)?
- .ok_or(Error::NodeNoLabel(parent_node))?
- .label();
-
- if label_set.contains(&parent_label) {
- stack.push(parent_node);
- }
- }
- }
- } else {
- result.push(bottom);
- }
- }
-
- Ok(result)
- }
-
/// Set the end position of the node associated with `pavi` to be `pos`.
///
/// The parameter `atom` is used to query the reduction fragment
/// if `pavi` is a virtual node.
- fn close_pavi(&mut self, atom: &DefaultAtom, pavi: PaVi, pos: usize) -> Result<usize, Error> {
+ pub(crate) fn close_pavi(
+ &mut self,
+ atom: &DefaultAtom,
+ pavi: PaVi,
+ pos: usize,
+ ) -> Result<usize, Error> {
match pavi {
- PaVi::Parent(node, edge) => {
- let nth_child = self.nth_child(node, edge)?;
+ PaVi::Parent(_node, _edge, child) => {
+ let nth_child = child;
let nth_child_label = self
.vertex_label(nth_child)?
.ok_or(Error::NodeNoLabel(nth_child))?;
@@ -781,6 +700,11 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
let reduction_fragment = atom.generate_virtual_frags(nt, t, None);
+ // NOTE: the case of the root is exceptional
+ if reduction_fragment.is_none() && self.root() != Some(node) {
+ return Err(Error::CannotClose(nt, t, node, node_label_start));
+ }
+
for frag in reduction_fragment.into_iter().flatten() {
let mut frag = frag.clone();
frag.set_pos(node_label_start, true)?;
diff --git a/chain/src/item/mod.rs b/chain/src/item/mod.rs
index 5efa710..54ca946 100644
--- a/chain/src/item/mod.rs
+++ b/chain/src/item/mod.rs
@@ -32,8 +32,9 @@ use core::borrow::Borrow;
/// Also this might be empty if it represents the root node.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Default)]
pub enum PaVi {
- /// An edge from a node, as the n-th child.
- Parent(usize, usize),
+ /// An edge from a node, as the n-th child, along with the
+ /// nth-child node number.
+ Parent(usize, usize, usize),
/// A virtual segment from a non-terminal by a terminal, rooted at
/// a node.
///
@@ -50,16 +51,12 @@ pub enum PaVi {
Empty,
}
-impl From<Parent> for PaVi {
- fn from(value: Parent) -> Self {
- Self::Parent(value.node(), value.edge())
- }
-}
-
-impl core::fmt::Display for PaVi {
+impl std::fmt::Display for PaVi {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
- Self::Parent(node, edge) => write!(f, "the {edge}-th edge from {node}"),
+ Self::Parent(node, edge, child) => {
+ write!(f, "the {edge}-th edge from {node} to {child}")
+ }
Self::Virtual(nt, t, node) => write!(
f,
"a virtual node for non-terminal {nt} and terminal {t} at node {node}"
@@ -72,13 +69,17 @@ impl core::fmt::Display for PaVi {
impl PaVi {
/// Get the Parent variant.
fn parent(self) -> Option<Parent> {
- if let Self::Parent(node, edge) = self {
+ if let Self::Parent(node, edge, _) = self {
Some(Parent::new(node, edge))
} else {
None
}
}
+ fn is_virtual(self) -> bool {
+ matches!(self, Self::Virtual(_, _, _))
+ }
+
/// Is this an empty segment?
fn is_empty(self) -> bool {
self == Self::Empty
@@ -290,3 +291,5 @@ pub mod default;
pub mod genins;
pub use genins::generate_fragment;
+
+pub mod reduction;
diff --git a/chain/src/item/reduction.rs b/chain/src/item/reduction.rs
new file mode 100644
index 0000000..89306e6
--- /dev/null
+++ b/chain/src/item/reduction.rs
@@ -0,0 +1,428 @@
+//! This module implements the operation of reductions on the forest.
+//!
+//! # What are reductions
+//!
+//! To explain this in detail, we first investigate the
+//! nullable-closure process, then discuss what reduction is and what
+//! it means in the chain-rule machine, and then explain the problem.
+//!
+//! ## Nullable-closure process
+//!
+//! In the process of running the chain-rule machine, we will collapse
+//! edges, in the sense that, if there is an edge from node a to node
+//! b and another from node b to node c, and if the edge from node a
+//! to node b is "nullable", that is if the edge corresponds to a
+//! position in the atomic language that is deemed nullable by the
+//! atomic language, then we also make an edge from node a to node c.
+//!
+//! The purpose of this process of forming the nullable closure is to
+//! ensure that the chain-rule machine can save the time to traverse
+//! the entire machine to find the nullable closure later on. But a
+//! side-effect of this process is that reductions are not explicitly
+//! marked.
+//!
+//! Note that we must perform this nullable closure process in order
+//! that the time complexity of the chain-rule machine lies within the
+//! cubic bound.
+//!
+//! ## Three types of actions
+//!
+//! We can imagine a "traditional parser generator" as a stack
+//! machine: there are three types of actions associated with it,
+//! depending on the current state and the current input token. The
+//! first is expansion: it means that we are expanding from a
+//! non-terminal, by some rule. The second is a normal push: we just
+//! continue parsing according to the current rule. The final one is
+//! reduction: it means the current expansion from a non-terminal has
+//! terminated and we go back to the previous level of expansion.
+//!
+//! ## Relation to the chain-rule machine
+//!
+//! For our chain-rule machine, expansion means to create a new node
+//! pointing at the current node, forming a path with one more length.
+//! A normal push means to create a new node that points to the target
+//! of an edge going out from the current node, which was not created
+//! by the process of forming nullable closures. And the reduction
+//! means to create a new node that points to the target of an edge
+//! going out from the current node, which *was* created by the
+//! process of forming nullable closures.
+//!
+//! # Problem
+//!
+//! As can be seen from the previous paragraph, the distinction
+//! between a normal push and a reduction in the chain-rule machine is
+//! simply whether or not the original edge was created by the process
+//! of forming nullable closures. For the chain-rule machine, this
+//! does not matter: it can function well. For the formation of the
+//! derivation forest, however, this is not so well: we cannot
+//! read-off immediately whether or not to perform reductions from the
+//! chain-rule machine.
+//!
+//! # Solutions
+//!
+//! I tried some solutions to solve this problem.
+//!
+//! ## Complete at the end
+//!
+//! The first idea I tried is to find the nodes that were not closed
+//! properly and fill in the needed reductions, at the end of the
+//! parse. This idea did not end up well, as we already lost a lot of
+//! information at the end of the parse, so it becomes quite difficult
+//! to know on which nodes we need to perform reductions.
+//!
+//! ## Record precise information
+//!
+//! The next idea is to record the nodes that were skipped by the
+//! nullable-closure process, and then when we encounter a skipped
+//! segment, we perform the reductions there. This idea sounds
+//! feasible at first, but it turns out that we cannot track the nodes
+//! properly. That is, when running the chain-rule machine, we will
+//! split and clone nodes from time to time. A consequence is that
+//! the old node numbers that were recorded previously will not be
+//! correct afterwards. This means we cannot know exactly which
+//! reductions to perform later.
+//!
+//! ## Guided brute-force
+//!
+//! Now I am trying out the third idea. The motivation is simple: if
+//! we cannot properly track nodes, then we track no nodes. Then, in
+//! order to perform reductions, we do not distinguish between
+//! necessary reductions and unnecessary reductions, and perform all
+//! possible reductions. In other words, when we are about to plant a
+//! new node in the forest, we first check the last child of the
+//! to-be-parent. If it is properly closed, we do nothing.
+//! Otherwise, we recursively descend into its children(s) to find all
+//! last children, and perform all possible valid reductions.
+
+use super::*;
+use crate::{
+ atom::{Atom, DefaultAtom},
+ default::Error,
+ item::default::DefaultForest,
+};
+use grammar::{GrammarLabel, TNT};
+use graph::Graph;
+
+use std::collections::HashMap as Map;
+
+impl DefaultForest<ForestLabel<GrammarLabel>> {
+ /// Perform reduction at last descendents of `node`.
+ ///
+ /// # Parameters
+ ///
+ /// The parameter `pos` is the next starting position. It is used
+ /// to find the descendents that need reductions: only those nodes
+ /// which have descendents with the correct ending positions will
+ /// receive reductions.
+ ///
+ /// The parameter `atom` is used to know which rule numbers are
+ /// deemed as accepting. Only accepting rule numbers can receive
+ /// reductions: this is the definition of being accepting.
+ ///
+ /// The parameter `ter` is used to fill in segments for virtual
+ /// nodes if they are found along the way.
+ ///
+ /// # Errors
+ ///
+ /// 1. Index out of bounds: some node number is corrupted.
+ /// 2. Node has no label: some node label is lost.
+ pub(crate) fn reduction(
+ &mut self,
+ node: usize,
+ pos: usize,
+ ter: usize,
+ atom: &DefaultAtom,
+ ) -> Result<usize, Error> {
+ let mut result = node;
+
+ // step 1: Determine if this needs reductions.
+
+ if self.root() == Some(node) {
+ return Ok(result);
+ }
+
+ // REVIEW: Do we need to check the end matches the position?
+
+ let mut node_label = self
+ .vertex_label(node)?
+ .ok_or(Error::NodeNoLabel(node))?
+ .label();
+
+ if node_label.end().is_some() {
+ return Ok(result);
+ }
+
+ // step 2: Find descendents that need reductions.
+
+ let mut correct_ends: Map<usize, bool> = Default::default();
+
+ let mut order_of_correct_ends: Vec<usize> = Vec::new();
+
+ let mut stack: Vec<usize> = vec![node];
+
+ let mut file = std::fs::OpenOptions::new().append(true).open("debug.log");
+
+ use std::{borrow::BorrowMut, io::Write};
+
+ // Whether or not to write a debug file.
+ let to_write = false;
+
+ if to_write {
+ if let Ok(ref mut file) = file.borrow_mut() {
+ file.write_all(format!("beginning, pos = {pos}, node = {node}\n").as_bytes())
+ .unwrap();
+ }
+ }
+
+ 'stack_loop: while let Some(top) = stack.pop() {
+ if to_write {
+ if let Ok(ref mut file) = file.borrow_mut() {
+ file.write_all(format!("top: {top}\n").as_bytes()).unwrap();
+ }
+ }
+
+ if correct_ends.get(&top).is_some() {
+ continue 'stack_loop;
+ }
+
+ let top_label = self.vertex_label(top)?.ok_or(Error::NodeNoLabel(top))?;
+
+ if let Some(end) = top_label.label().end() {
+ correct_ends.insert(top, end == pos);
+
+ continue 'stack_loop;
+ }
+
+ if let Some(rule) = top_label.label().label().rule() {
+ // A rule node is not considered directly: it should
+ // be affected by its child implicitly.
+ //
+ // We only consider a rule node if it is deemed
+ // accepting by the atom.
+
+ if to_write {
+ if let Ok(ref mut file) = file.borrow_mut() {
+ file.write_all(format!("{}: {rule}, {stack:?}\n", line!()).as_bytes())
+ .unwrap();
+ }
+ }
+
+ if atom
+ .is_accepting(rule * 2 + 1)
+ .map_err(|_| Error::IndexOutOfBounds(2 * rule + 1, atom.accepting_len()))?
+ {
+ let mut has_unexplored_children = false;
+ let mut inserted = false;
+
+ 'child_loop: for child in self.children_of(top)? {
+ match correct_ends.get(&child).copied() {
+ Some(true) => {
+ correct_ends.insert(top, true);
+
+ inserted = true;
+ }
+ None => {
+ has_unexplored_children = true;
+
+ break 'child_loop;
+ }
+ _ => {}
+ }
+ }
+
+ if has_unexplored_children {
+ stack.push(top);
+ stack.extend(self.children_of(top)?);
+ } else if !inserted {
+ correct_ends.insert(top, false);
+ }
+ } else {
+ correct_ends.insert(top, false);
+ }
+
+ continue 'stack_loop;
+ }
+
+ if top_label.is_packed() {
+ let mut has_unexplored_children = false;
+
+ if to_write {
+ if let Ok(ref mut file) = file.borrow_mut() {
+ file.write_all(format!("{}: packed, {stack:?}\n", line!()).as_bytes())
+ .unwrap();
+ }
+ }
+
+ for child in self.children_of(top)? {
+ match correct_ends.get(&child).copied() {
+ Some(true) => {
+ // NOTE: This is not recorded in the
+ // correct orders, as we do not handle
+ // packed nodes directly.
+
+ correct_ends.insert(top, true);
+ }
+ None => {
+ has_unexplored_children = true;
+ }
+ _ => {}
+ }
+ }
+
+ if to_write {
+ if let Ok(ref mut file) = file.borrow_mut() {
+ file.write_all(
+ format!("{}: packed, {has_unexplored_children}\n", line!()).as_bytes(),
+ )
+ .unwrap();
+ }
+ }
+
+ if has_unexplored_children {
+ stack.push(top);
+ stack.extend(self.children_of(top)?);
+ } else if correct_ends.get(&top).is_none() {
+ correct_ends.insert(top, false);
+ }
+
+ continue 'stack_loop;
+ }
+
+ let degree = self.degree(top)?;
+
+ if to_write {
+ if let Ok(ref mut file) = file.borrow_mut() {
+ file.write_all(format!("{}: degree = {degree}\n", line!()).as_bytes())
+ .unwrap();
+ }
+ }
+
+ let last_index = if degree != 0 {
+ degree - 1
+ } else {
+ // a leaf is supposed to be a terminal node and hence
+ // should have an ending position
+
+ let end = match top_label.label().end() {
+ None => match top_label.label().label().tnt() {
+ Some(TNT::Ter(_)) => {
+ panic!("a terminal node {top} has no ending position?");
+ }
+ Some(TNT::Non(nt)) => {
+ correct_ends.insert(top, true);
+
+ self.close_pavi(atom.borrow(), PaVi::Virtual(nt, ter, top), pos)?;
+
+ continue 'stack_loop;
+ }
+ None => {
+ unreachable!("we already handled the rule case above");
+ }
+ },
+ Some(end) => end,
+ };
+
+ correct_ends.insert(top, end == pos);
+
+ if end == pos {
+ order_of_correct_ends.push(top);
+ }
+
+ continue 'stack_loop;
+ };
+
+ if to_write {
+ if let Ok(ref mut file) = file.borrow_mut() {
+ file.write_all(format!("{}: last = {last_index}\n", line!()).as_bytes())
+ .unwrap();
+ }
+ }
+
+ let last_child = self.nth_child(top, last_index)?;
+
+ if let Some(child_correctness_value) = correct_ends.get(&last_child).copied() {
+ correct_ends.insert(top, child_correctness_value);
+
+ if child_correctness_value {
+ order_of_correct_ends.push(top);
+ }
+ } else {
+ stack.extend([top, last_child]);
+ }
+ }
+
+ if to_write {
+ if let Ok(ref mut file) = file.borrow_mut() {
+ file.write_all(
+ format!("{}: orders = {order_of_correct_ends:?}\n", line!()).as_bytes(),
+ )
+ .unwrap();
+ }
+ }
+
+ // step 3: perform reductions by `splone`.
+ //
+ // NOTE: We must fix the order from top to bottom: this is the
+ // reverse order of `order_of_correct_ends` .
+
+ for node in order_of_correct_ends.into_iter().rev() {
+ let label = self.vertex_label(node)?.ok_or(Error::NodeNoLabel(node))?;
+ let degree = self.degree(node)?;
+
+ if !matches!(label.label().label().tnt(), Some(TNT::Non(_))) {
+ continue;
+ }
+
+ #[cfg(debug_assertions)]
+ {
+ let label_label_label = label.label().label();
+
+ assert!(label_label_label.rule().is_none());
+
+ assert!(matches!(label_label_label.tnt(), Some(TNT::Non(_))));
+
+ assert!(label.label().end().is_none());
+
+ assert_ne!(degree, 0);
+ }
+
+ let last_index = degree - 1;
+
+ self.splone(node, Some(pos), last_index, false)?;
+ }
+
+ node_label.set_end(pos);
+
+ if let Some(packed) =
+ self.query_label(ForestLabel::new(node_label, ForestLabelType::Packed))
+ {
+ result = packed;
+
+ if to_write {
+ if let Ok(ref mut file) = file.borrow_mut() {
+ file.write_all(format!("{}: packed = {packed}\n", line!()).as_bytes())
+ .unwrap();
+ }
+ }
+ } else if let Some(plain) =
+ self.query_label(ForestLabel::new(node_label, ForestLabelType::Plain))
+ {
+ result = plain;
+
+ if to_write {
+ if let Ok(ref mut file) = file.borrow_mut() {
+ file.write_all(format!("{}: plain = {plain}\n", line!()).as_bytes())
+ .unwrap();
+ }
+ }
+ }
+
+ if to_write {
+ if let Ok(ref mut file) = file.borrow_mut() {
+ file.write_all(&[101, 110, 100, 10]).unwrap();
+ }
+ }
+
+ Ok(result)
+ }
+}
diff --git a/chain/src/item/thoughts-on-reducer.org b/chain/src/item/thoughts-on-reducer.org
new file mode 100644
index 0000000..39a799a
--- /dev/null
+++ b/chain/src/item/thoughts-on-reducer.org
@@ -0,0 +1,121 @@
+#+TITLE: Thoughts on the reducer
+#+AUTHOR: Durand
+#+DATE: <2023-06-05 Lun 22:08>
+
+This module implements a data type for storing reduction
+information.
+
+* Questions
+
+What is reduction information, and why is it necessary to store it?
+
+* Nullable-closure process
+
+In the process of running the chain-rule machine, we will collapse
+edges, in the sense that, if there is an edge from node a to node b
+and another from node b to node c, and if the edge from node a to node
+b is "nullable", that is if the edge corresponds to a position in the
+atomic language that is deemed nullable by the atomic language, then
+we also make an edge from node a to node c.
+
+The purpose of this process of forming the nullable closure is to
+ensure that the chain-rule machine can save the time to traverse the
+entire machine to find the nullable closure later on. But a
+side-effect of this process is that reductions are not explicitly
+marked.
+
+To explain this in detail, we first investigate what reduction is and
+what it means in the chain-rule machine, and then explain the problem.
+
+* Three types of actions
+
+We can imagine a "traditional parser generator" as a stack machine:
+there are three types of actions associated with it, depending on the
+current state and the current input token. The first is expansion: it
+means that we are expanding from a non-terminal, by some rule. The
+second is a normal push: we just continue parsing according to the
+current rule. The final one is reduction: it means the current
+expansion from a non-terminal has terminated and we go back to the
+previous level of expansion.
+
+* Relation to the chain-rule machine
+
+For our chain-rule machine, expansion means to create a new node
+pointing at the current node, forming a path of length increased by
+one. A normal push means to create a new node that points to the
+target of an edge going out from the current node, which was not
+created by the process of forming nullable closures. And the
+reduction means to create a new node that points to the target of an
+edge going out from the current node, which *was* created by the
+process of forming nullable closures.
+
+* Problem
+
+As can be seen from the previous paragraph, the distinction between a
+normal push and a reduction in the chain-rule machine is simply
+whether or not the original edge was created by the process of forming
+nullable closures. For the chain-rule machine, this does not matter:
+it can function well. For the formation of the derivation forest,
+however, this is not so well: we cannot read-off immediately whether
+or not to perform reductions from the chain-rule machine.
+
+* Solution
+
+Since we cannot read-off the reduction information directly from the
+chain-rule machine, we have to store that information somehow.
+
+** Digression: past experiences
+
+When developping this part, I did not have a clear picture of the
+situation in mind: I was experimenting with the ways to construct the
+derivation forest from the chain-rule machine, as the paper describing
+the algorithm does not construct the derivation forest directly: it
+constructs an alternative format. A consequence is that I spent quite
+some time in figuring out how to construct derivation forests
+correctly.
+
+During the experiments, I tried various ideas: including to "fill-in"
+the reductions after we have parsed the entire input. This seemed
+ostensibly a good idea, as I seem to be able to "read-off" the
+reductions from the resulting partially complete derivation forests.
+
+As it turned out, this was actually a bad idea. In fact, I can now
+prove that this will not work by the presence of a specific grammar
+that will cause this approach to fail definitely. This led me to
+believe that the only way is to store the needed reduction information
+in order to fill in this gap.
+
+** Storing reduction information
+
+Now we want to store the reduction information, so we need to be clear
+about what we are storing.
+
+In the derivation forest, a reduction happens when we reach the
+right-most descendent of a subtree, which marks the end of an
+expansion from a non-terminal. Moreover, our derivation forests
+usually contain half-open nodes, which mean that they are not
+completed yet and can keep growing, until a reduction happens when we
+"close" the half-open node. Thus, what we really need is the list of
+nodes to close.
+
+This information is readily available when we form nullable closures:
+the skipped nodes are the nodes we need to close later on. To be more
+exact, when we skip nodes, we have two nodes: the top node and the
+bottom node, and we want to store the middle node that is skipped.
+
+This naturally led to the structure of a nondeterministic finite
+automaton: when we want to close nodes, we start from the bottom-most
+node, and query the nodes upward by the top node, until we reach the
+top node or until we have no more nodes to go.
+
+The special characteristic about this structure is that we need to
+label both the vertices and the edges. Since we already have a
+structure that labels edges, we can simply extend that structure by
+adding an array of labels and possibly a map from labels to nodes, to
+make sure the labels of vertices are unique and can be queried
+quickly.
+
+One thing to note is that we actually only need the ability to query
+children by the labels, and do not need to query labels by the target.
+We need experiments to see if thiw will be the bottleneck and hence
+need to be optimized out.