summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--chain/src/archive.txt495
-rw-r--r--chain/src/atom/default.rs7
-rw-r--r--chain/src/atom/mod.rs3
-rw-r--r--chain/src/default.rs353
-rw-r--r--chain/src/item/default/mod.rs257
-rw-r--r--chain/src/item/default/splone.rs7
-rw-r--r--chain/src/item/genins.rs288
-rw-r--r--chain/src/item/mod.rs25
-rw-r--r--chain/src/item/reduction.rs428
-rw-r--r--chain/src/item/thoughts-on-reducer.org121
-rw-r--r--chain/src/reducer.rs191
-rw-r--r--grammar/src/abnf.rs4
-rw-r--r--nfa/src/default/nfa.rs32
13 files changed, 1574 insertions, 637 deletions
diff --git a/chain/src/archive.txt b/chain/src/archive.txt
new file mode 100644
index 0000000..030ccba
--- /dev/null
+++ b/chain/src/archive.txt
@@ -0,0 +1,495 @@
+// Temporary archive
+
+// let mut seen_nodes: HashSet<usize> = HashSet::with_capacity(stack.len());
+//
+// // dbg!(&stack);
+//
+// // self.forest.print_viz("dbg forest.gv").unwrap();
+//
+// while let Some(mut top) = stack.pop() {
+// if seen_nodes.contains(&top) {
+// continue;
+// }
+//
+// seen_nodes.insert(top);
+//
+// let mut top_label = self
+// .forest
+// .vertex_label(top)?
+// .ok_or(Error::NodeNoLabel(top))?;
+//
+// if !top_label.is_packed()
+// && matches!(top_label.label().label().tnt(), Some(TNT::Non(_)))
+// && top_label.label().end().is_none()
+// {
+// let degree = self.forest.degree(top)?;
+// let last_index = if degree > 0 { degree - 1 } else { 0 };
+//
+// let pos = if degree > 0 {
+// let last_child = self.forest.nth_child(top, last_index)?;
+// let last_child_label = self
+// .forest
+// .vertex_label(last_child)?
+// .ok_or(Error::NodeNoLabel(last_child))?
+// .label();
+// let last_child_end = last_child_label.end();
+//
+// if !matches!(last_child_label.label().rule(),
+// Some(rule) if
+// self
+// .atom
+// .is_accepting(2*rule+1)
+// .map_err(index_out_of_bounds_conversion)?)
+// {
+// continue;
+// }
+//
+// if let Some(pos) = last_child_end {
+// pos
+// } else {
+// stack.extend(self.forest.children_of(last_child)?);
+// continue;
+// }
+// } else {
+// top_label.label().start() + 1
+// };
+//
+// top = self.forest.splone(top, Some(pos), last_index, true)?;
+// top_label = self
+// .forest
+// .vertex_label(top)?
+// .ok_or(Error::NodeNoLabel(top))?;
+// } else if top_label.is_packed()
+// || top_label.label().label().rule().is_some() && top_label.label().end().is_none()
+// {
+// stack.extend(self.forest.children_of(top)?);
+// }
+//
+// if top_label.clone_index().is_some() {
+// let mut parents = self.forest.parents_of(top)?;
+// if parents.len() != 1 {
+// dbg!(top, top_label, parents.len());
+// self.forest.print_viz("dbg forest.gv").unwrap();
+// panic!("0 parent?");
+// }
+//
+// top = parents.next().unwrap().node();
+// }
+//
+// stack.extend(self.forest.parents_of(top)?.map(|parent| parent.node()));
+// }
+
+
+// extra reduction archive function
+
+// /// Perform extra reductions.
+// ///
+// /// To be precise, this function first splones the bottom node,
+// /// and then queries the reducer to find the next nodes to splone,
+// /// and repeats this process until either we find the top node, or
+// /// the reducer says to stop.
+// ///
+// /// One nuance to note is that after we splone a node, if we split
+// /// that node, then the splitted node will have a cloned parent,
+// /// see
+// /// [`split_node`][DefaultForest::<ForestLabel<GrammarLabel>>::split_node]
+// /// for details. So for each parent pointed out by the reducer,
+// /// we need to find its clone that has the splitted node as a
+// /// child.
+// #[allow(dead_code)]
+// fn extra_reductions(
+// &mut self,
+// botop: BoTop,
+// pos: usize,
+// reducer: &Reducer,
+// atom: &DefaultAtom,
+// ) -> Result<Vec<usize>, Error> {
+// if botop.bottom() == botop.top() {
+// return Ok(vec![self.node_from_pavi(botop.top())?]);
+// }
+//
+// let top = botop.top();
+// let bottom = botop.bottom();
+//
+// let bottom_closed = self.close_pavi(atom.borrow(), bottom, pos)?;
+//
+// let top_node = self.node_from_pavi(top)?;
+// let bottom_node = self.node_from_pavi(bottom)?;
+//
+// let mut stack = vec![(bottom_closed, bottom_node)];
+//
+// // Exclude duplicate nodes to ensure that no infinite
+// // recursion occurs. In the future I shall experiment if this
+// // is necessary, and get rid of this if possible.
+// // let mut seen_nodes: HashSet<usize> = Default::default();
+//
+// let mut result = Vec::new();
+//
+// // NOTE: We must query the reducer by the original nodes,
+// // otherwise the reducer has no chance to know about the new
+// // nodes that were created by a previous iteration here. At
+// // the same time, we must splone a clone of the queried result
+// // which is the parent of the previously sploned node, so that
+// // we are on the right track.
+// //
+// // A summary is that we record both the original node and the
+// // sploned node in the stack, so that we can query by the
+// // original node, and then find the correct clone of the
+// // queried result by means of the sploned node.
+//
+// let mut seen_nodes = HashSet::new();
+//
+// while let Some((bottom_sploned, bottom_original)) = stack.pop() {
+// if seen_nodes.contains(&bottom_original) {
+// continue;
+// }
+//
+// seen_nodes.insert(bottom_original);
+//
+// if pos == 4 {
+// println!("stack popped: {bottom_sploned}, {bottom_original}");
+// }
+//
+// let query_result = reducer.query(bottom_original, top_node);
+//
+// let mut sploned = bottom_sploned;
+//
+// let mut label_set: HashSet<GrammarLabel> = HashSet::with_capacity(query_result.len());
+//
+// if query_result.len() != 0 {
+// if pos == 4 {
+// println!("reducer:");
+// }
+//
+// for node in query_result {
+// if pos == 4 {
+// println!("{node}");
+// }
+//
+// label_set.insert(
+// self.vertex_label(node)?
+// .ok_or(Error::NodeNoLabel(node))?
+// .label(),
+// );
+// }
+//
+// let sploned_label = self
+// .vertex_label(sploned)?
+// .ok_or(Error::NodeNoLabel(sploned))?;
+//
+// if sploned_label.clone_index().is_some() {
+// let mut parents = self.parents_of(sploned)?;
+//
+// #[cfg(debug_assertions)]
+// if parents.len() != 1 {
+// panic!("assumption fails");
+// }
+//
+// sploned = parents.next().unwrap().node();
+// }
+//
+// // NOTE: We cannot borrow self mutably and immutably
+// // at the same time, so we have to collect the nodes
+// // first.
+// let mut to_splone = Vec::with_capacity(label_set.len());
+//
+// for rule_parent in self.parents_of(sploned)? {
+// if self.degree(rule_parent.node())? != 1 {
+// panic!("assumption fails");
+// }
+//
+// for parent in self.parents_of(rule_parent.node())? {
+// let parent_node = parent.node();
+//
+// let parent_label = self
+// .vertex_label(parent_node)?
+// .ok_or(Error::NodeNoLabel(parent_node))?
+// .label();
+//
+// if label_set.contains(&parent_label) {
+// to_splone.push(parent_node);
+// }
+// }
+// }
+//
+// for parent_node in to_splone {
+// let degree = self.degree(parent_node)?;
+// let last_index = std::cmp::max(degree, 1) - 1;
+// let parent_sploned = self.splone(parent_node, Some(pos), last_index, false)?;
+//
+// stack.push((parent_sploned, parent_node));
+// }
+// } else {
+// result.push(bottom_sploned);
+// }
+// }
+//
+// Ok(result)
+// }
+
+// BoTop is now unused
+
+// / TODO: Add a hashmap mapping tuples of forest positions (top,
+// / bottom) to vectors of tuples of unsigned integers. Each tuple
+// / represents a non-terminal and a rule. The tuple means that we need
+// / to close the expansion of the non-terminal by the rule position, if
+// / we want to move from bottom to top.
+// /
+// / After we move up one layer, we are now in a new forest position, so
+// / we just query again with the new bottom and top. This means we do
+// / not need to clear the map.
+//
+// / REVIEW: Why is the above needed?
+//
+// // This represents a tuple of bottom and top forest positions.
+// //
+// // It is supposed to be used as keys to query the reduction
+// // information. See the type [`Reducer`] for more details.
+// [derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Default)]
+// ub(crate) struct BoTop {
+// bottom: PaVi,
+// top: PaVi,
+//
+//
+// mpl BoTop {
+// #[allow(dead_code)]
+// pub(crate) fn new(bottom: PaVi, top: PaVi) -> Self {
+// Self { bottom, top }
+// }
+//
+// pub(crate) fn top(&self) -> PaVi {
+// self.top
+// }
+//
+// pub(crate) fn bottom(&self) -> PaVi {
+// self.bottom
+// }
+//
+
+
+// FIXME: Fix this outdated documentation.
+// This hashmap records information for performing extra reductions
+// when we insert items.
+
+// When we want to *jump* from the bottom to the top positions in the
+// forest, we need to perform a set of reductions, in order to jump
+// one layer upwards. After one step, we query again for the next
+// step of reductions to perform.
+
+// # Reduction format
+
+// The value records a set of tuples of the form `(non-terminal,
+// starting_position)`. Such a tuple means we want to reduce from
+// the expansion of the given `non-terminal`, which expands from the
+// given starting position. We need to restrict the
+// pub(crate) type Reducer = Map<usize, HashSet<usize>>;
+
+// /// Check if every child already has an end.
+// fn every_child_is_completed(&self, node_id: usize, atom: &DefaultAtom) -> Result<bool, Error> {
+// let children = self.children_of(node_id)?;
+
+// if children.len() == 0 {
+// return Ok(true);
+// }
+
+// let mut pos = self
+// .vertex_label(node_id)?
+// .ok_or(Error::NodeNoLabel(node_id))?
+// .label()
+// .start();
+
+// let mut last_child_label = None;
+
+// for child in children {
+// let child_label = self
+// .vertex_label(child)?
+// .ok_or(Error::NodeNoLabel(child))?
+// .label();
+
+// last_child_label = Some(child_label);
+
+// if child_label.start() != pos || child_label.end().is_none() {
+// return Ok(false);
+// }
+
+// pos = child_label.end().unwrap();
+// }
+
+// if let Some(label) = last_child_label {
+// if let Some(rule) = label.label().rule() {
+// if !atom
+// .is_accepting(2 * rule + 1)
+// .map_err(|_| Error::IndexOutOfBounds(2 * rule + 1, atom.nodes_len()))?
+// {
+// return Ok(false);
+// }
+// }
+// }
+
+// Ok(true)
+// }
+
+// /// Complete the forest by trying to set the ending position of
+// /// every node that does not have an end, by the ending position
+// /// of its last child.
+// pub fn complete(&mut self, atom: &DefaultAtom) -> Result<(), Error> {
+// let mut stack: Vec<_> = self
+// .nodes()
+// .filter(|node| {
+// let label = self.vertex_label(*node).unwrap().unwrap().label();
+
+// label.label().rule().is_some() && label.end().is_some()
+// })
+// .collect();
+
+// let mut second_stack: Vec<usize> = Vec::new();
+
+// let mut pending_candidates: Vec<usize> = Vec::new();
+
+// let nodes_len = self.nodes_len();
+
+// let mut seen_nodes: HashSet<usize> = HashSet::with_capacity(nodes_len);
+
+// while !stack.is_empty() {
+// while let Some(mut top) = stack.pop() {
+// if seen_nodes.contains(&top) {
+// continue;
+// }
+
+// seen_nodes.insert(top);
+
+// let top_label = self.vertex_label(top)?.unwrap();
+
+// if top_label.clone_index().is_some() {
+// let mut top_parents = self.parents_of(top)?;
+
+// if top_parents.len() != 1 {
+// return Err(Error::InvalidClone(top));
+// }
+
+// top = top_parents.next().unwrap().node();
+// }
+
+// let rule_parents = self.parents_of(top)?;
+
+// let candidates = {
+// let mut result = Vec::with_capacity(rule_parents.len());
+
+// for parent in rule_parents {
+// // for parent in self.parents_of(parent.node())? {
+// // if self.degree(parent.node())? <= parent.edge() + 1 {
+// result.push(parent);
+// // }
+// // }
+// }
+
+// result
+// };
+
+// for candidate in candidates {
+// if matches!(self.vertex_label(candidate.node())?, Some(label) if label.label().end().is_none())
+// {
+// if self.every_child_is_completed(candidate.node(), atom)? {
+// let last_child = self
+// .nth_child(candidate.node(), self.degree(candidate.node())? - 1)?;
+// let end = self
+// .vertex_label(last_child)?
+// .ok_or(Error::NodeNoLabel(last_child))?
+// .label()
+// .end();
+
+// let sploned_node = self.splone(
+// candidate.node(),
+// end,
+// self.degree(candidate.node())? - 1,
+// true,
+// )?;
+
+// second_stack.push(sploned_node);
+// } else {
+// pending_candidates.push(candidate.node());
+// }
+// } else {
+// second_stack.push(candidate.node());
+// }
+// }
+
+// let mut new_pending = Vec::with_capacity(pending_candidates.len());
+
+// while let Some(node) = pending_candidates.pop() {
+// if self.every_child_is_completed(node, atom)? {
+// let last_edge = self.degree(node)? - 1;
+// let last_child = self.nth_child(node, last_edge)?;
+// let end = self
+// .vertex_label(last_child)?
+// .ok_or(Error::NodeNoLabel(last_child))?
+// .label()
+// .end();
+
+// let sploned_node = self.splone(node, end, last_edge, true)?;
+
+// second_stack.push(sploned_node);
+// } else {
+// new_pending.push(node);
+// }
+// }
+
+// std::mem::swap(&mut pending_candidates, &mut new_pending);
+// }
+
+// std::mem::swap(&mut stack, &mut second_stack);
+// }
+
+// Ok(())
+// }
+
+// /// Unconditionally set the label of the node to be the new label.
+// ///
+// /// # Warning
+// ///
+// /// Use with caution: it does not do anything special, so it is
+// /// the responsibility of the caller to ensure this operation is
+// /// legal.
+// #[allow(dead_code)]
+// pub(crate) fn set_label(&mut self, node: usize, label: GrammarLabel) -> Result<(), Error> {
+// let status = self
+// .vertex_label(node)?
+// .ok_or(Error::NodeNoLabel(node))?
+// .status;
+
+// let label = ForestLabel::new(label, status);
+
+// let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph);
+
+// builder.set_label(node, label)?;
+
+// Ok(())
+// }
+
+// /// Extract the node from `pavi`.
+// ///
+// /// If pavi is a parent, this returns the child pointed to by the
+// /// represented edge.
+// ///
+// /// If pavi is a virtual node, this simply returns the virtual
+// /// node.
+// ///
+// /// If pavi is empty, this returns the root, unless the forest is
+// /// empty.
+// ///
+// /// # Guarantee
+// ///
+// /// The returned node is guaranteed to be a valid node.
+// pub(crate) fn node_from_pavi(&self, pavi: PaVi) -> Result<usize, Error> {
+// match pavi {
+// PaVi::Parent(_node, _edge, child) => Ok(child),
+// PaVi::Virtual(_, _, node) => {
+// if node >= self.nodes_len() {
+// return Err(Error::IndexOutOfBounds(node, self.nodes_len()));
+// }
+//
+// Ok(node)
+// }
+// PaVi::Empty => Ok(self.root().ok_or(Error::IndexOutOfBounds(0, 0))?),
+// }
+// }
diff --git a/chain/src/atom/default.rs b/chain/src/atom/default.rs
index 6883907..317090e 100644
--- a/chain/src/atom/default.rs
+++ b/chain/src/atom/default.rs
@@ -507,6 +507,9 @@ impl DefaultAtom {
DError::InvalidClone(_) => {
unreachable!("we are not cloning")
}
+ DError::CannotClose(_, _, _, _) => {
+ unreachable!("we are not closing virtual nodes")
+ }
DError::Invalid => {
panic!("a label is wrongly planted?")
}
@@ -713,4 +716,8 @@ impl Atom for DefaultAtom {
.get(&trace)
.map(|set| set.iter().copied())
}
+
+ fn accepting_len(&self) -> usize {
+ self.accepting_vec.len()
+ }
}
diff --git a/chain/src/atom/mod.rs b/chain/src/atom/mod.rs
index c9dadb2..4b3bfce 100644
--- a/chain/src/atom/mod.rs
+++ b/chain/src/atom/mod.rs
@@ -32,6 +32,9 @@ pub trait Atom: Nfa<LabelType<TNT>> + Deref<Target = Grammar> {
/// Tell whether a node is accepting.
fn is_accepting(&self, node_id: usize) -> Result<bool, GrammarError>;
+
+ /// Return the bound for `is_accepting`.
+ fn accepting_len(&self) -> usize;
}
pub mod default;
diff --git a/chain/src/default.rs b/chain/src/default.rs
index e61df41..ed63ff4 100644
--- a/chain/src/default.rs
+++ b/chain/src/default.rs
@@ -38,6 +38,10 @@ pub enum Error {
CannotReserve(TryReserveError),
/// Cannot create label for cloning nodes.
CannotClone(ForestLabelError),
+ /// Cannot close the virtual node.
+ ///
+ /// The format is (nt, t, node, pos).
+ CannotClose(usize, usize, usize, usize),
/// Cannot find a suitable node to plant the new forest fragment.
CannotPlant,
/// Cannot split a packed node.
@@ -60,6 +64,12 @@ impl Display for Error {
Self::NodeNoLabel(n) => write!(f, "node {n} has no labels while it should have one"),
Self::CannotReserve(e) => write!(f, "cannot reserve memory: {e}"),
Self::CannotClone(fe) => write!(f, "cannot clone due to {fe}"),
+ Self::CannotClose(nt, t, node, pos) => write!(
+ f,
+ "cannot close the virtual node {node} \
+ with the expansion from nt {nt} by t \
+ {t} at pos {pos}"
+ ),
Self::CannotPlant => write!(f, "cannot plant a new forest fragment"),
Self::SplitPack(n) => write!(f, "cannot split the packed node {n}"),
Self::InvalidClone(n) => {
@@ -196,58 +206,6 @@ impl Iterator for DerIter {
}
}
-// TODO: Add a hashmap mapping tuples of forest positions (top,
-// bottom) to vectors of tuples of unsigned integers. Each tuple
-// represents a non-terminal and a rule. The tuple means that we need
-// to close the expansion of the non-terminal by the rule position, if
-// we want to move from bottom to top.
-//
-// After we move up one layer, we are now in a new forest position, so
-// we just query again with the new bottom and top. This means we do
-// not need to clear the map.
-
-// REVIEW: Why is the above needed?
-
-/// This represents a tuple of bottom and top forest positions.
-///
-/// It is supposed to be used as keys to query the reduction
-/// information. See the type [`Reducer`] for more details.
-#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Default)]
-pub(crate) struct BoTop {
- bottom: PaVi,
- top: PaVi,
-}
-
-impl BoTop {
- pub(crate) fn new(bottom: PaVi, top: PaVi) -> Self {
- Self { bottom, top }
- }
-
- pub(crate) fn top(&self) -> PaVi {
- self.top
- }
-
- pub(crate) fn bottom(&self) -> PaVi {
- self.bottom
- }
-}
-
-/// This hashmap records information for performing extra reductions
-/// when we insert items.
-///
-/// When we want to *jump* from the bottom to the top positions in the
-/// forest, we need to perform a set of reductions, in order to jump
-/// one layer upwards. After one step, we query again for the next
-/// step of reductions to perform.
-///
-/// # Reduction format
-///
-/// The value records a set of tuples of the form `(non-terminal,
-/// starting_position)`. Such a tuple means we want to reduce from
-/// the expansion of the given `non-terminal`, which expands from the
-/// given starting position. We need to restrict the
-pub(crate) type Reducer = Map<(usize, usize), HashSet<usize>>;
-
/// A default implementation for the [`Chain`] trait.
#[derive(Debug, Clone, Default, Graph)]
pub struct DefaultChain {
@@ -259,7 +217,6 @@ pub struct DefaultChain {
forest: DefaultForest<ForestLabel<GrammarLabel>>,
accepting_vec: Vec<bool>,
accepting_sources: Vec<PaVi>,
- reducer: Reducer,
}
impl DefaultChain {
@@ -431,8 +388,6 @@ impl Chain for DefaultChain {
let accepting_sources = Vec::new();
- let reducer = Default::default();
-
Ok(Self {
graph,
atom,
@@ -441,7 +396,6 @@ impl Chain for DefaultChain {
forest,
accepting_vec,
accepting_sources,
- reducer,
})
}
@@ -486,19 +440,6 @@ impl Chain for DefaultChain {
type DerIter = DerIter;
- // EXPERIMENT: Try the approach of keeping an additional vector of
- // vectors of unsigned integers. Each element of the vector
- // corresponds to an edge of the current node. Each element is a
- // vector. This vector represents the list of reductions that are
- // implied due to skipping edges without children.
- //
- // Then when we insert an item, we can use this list to perform
- // additional reductions. This can avoid the ugly and inefficient
- // position_search method currently adopted.
- //
- // Of course we can use an optional vector to prevent allocating
- // too much memory for edges whose corresponding vector is empty.
-
fn derive(
&mut self,
t: usize,
@@ -536,12 +477,11 @@ impl Chain for DefaultChain {
input: GeneratorInput<'_>,
mut child_iter: impl Iterator<Item = usize> + ExactSizeIterator + Clone,
atom_child_iter: impl Iterator<Item = usize> + Clone,
- reducer: &mut Reducer,
mut output: impl AsMut<Vec<(Roi, usize)>>,
) -> Result<bool, Error> {
let mut accepting = false;
- let (graph, atom, forest, accepting_vec, pavi, true_pavi) = input;
+ let (graph, atom, _forest, accepting_vec, pavi, true_pavi) = input;
// First check the values from iterators are all valid.
let graph_len = graph.nodes_len();
@@ -606,19 +546,8 @@ impl Chain for DefaultChain {
for child in child_iter.clone() {
for (child_label, child_child) in graph.labels_of(child).unwrap() {
let mut new_label = *child_label;
- new_label.set_true_source(true_pavi);
-
- let botop = BoTop::new(true_pavi, new_label.forest_source());
- let key = (
- forest.node_from_pavi(botop.top())?,
- forest.node_from_pavi(botop.bottom)?,
- );
-
- reducer
- .entry(key)
- .or_insert_with(Default::default)
- .insert(forest.node_from_pavi(new_label.true_source())?);
+ new_label.set_true_source(true_pavi);
output.extend(
std::iter::repeat(new_label.into())
@@ -658,8 +587,8 @@ impl Chain for DefaultChain {
generate_fragment([atom_moved.into(), Ter(ter).into()], pos)?;
self.forest.insert_item(
- &self.reducer,
*label,
+ t,
fragment,
atom_child_iter.clone(),
&self.atom,
@@ -681,7 +610,6 @@ impl Chain for DefaultChain {
generator_input,
child_iter.clone(),
atom_child_iter.clone(),
- &mut self.reducer,
&mut der_iter.singles,
)?;
@@ -717,8 +645,8 @@ impl Chain for DefaultChain {
generate_fragment([atom_moved.into(), Non(non).into()], pos)?;
first_segment_pavi = self.forest.insert_item(
- &self.reducer,
*label,
+ t,
first_fragment,
atom_child_iter.clone(),
&self.atom,
@@ -732,8 +660,8 @@ impl Chain for DefaultChain {
// iterator, in which case the passed
// label is only used for the PaVi.
virtual_pavi = self.forest.insert_item(
- &self.reducer,
Edge::new(0, first_segment_pavi, accepting),
+ t,
virtual_fragment,
std::iter::empty(),
&self.atom,
@@ -758,7 +686,6 @@ impl Chain for DefaultChain {
generator_input,
child_iter.clone(),
atom_child_iter.clone(),
- &mut self.reducer,
&mut new_edges,
)?;
@@ -773,27 +700,8 @@ impl Chain for DefaultChain {
match roi {
Roi::Re(edge) => {
let mut edge = *edge;
- edge.set_true_source(virtual_pavi);
-
- // FIXME: Modify reducer after first experiments.
-
- // let botop = BoTop::new(virtual_pavi, edge.forest_source());
- // let rule_pos = atom_child.div_euclid(2);
-
- // let rule_nt = atom
- // .get_rule_num(rule_pos)
- // .map_err(index_out_of_bounds_conversion)?;
-
- // let rule_pos = rule_pos
- // - atom
- // .nth_accumulator(rule_nt)
- // .map_err(index_out_of_bounds_conversion)?;
-
- // reducer
- // .entry(botop)
- // .or_insert_with(Default::default)
- // .insert((rule_nt, rule_pos));
+ edge.set_true_source(virtual_pavi);
der_iter.singles.push((Roi::Re(edge), *target));
}
@@ -831,23 +739,32 @@ impl Chain for DefaultChain {
// this has been checked in
// `generate_edges`
if self.atom.is_empty_node(atom_child).unwrap() {
- // REVIEW: flat flat map,
- // hmm...
-
- // FIXME: Set true_source as well.
-
- // FIXME: Modify two layers of reducer.
- der_iter.singles.extend(child_iter.clone().flat_map(
- |child| {
- self.graph.labels_of(child).unwrap().flat_map(
- |(child_label, child_child_iter)| {
- child_child_iter.map(|child_child| {
- ((*child_label).into(), child_child)
- })
- },
- )
- },
- ));
+ let mut extra_len = 0usize;
+
+ for child in child_iter.clone() {
+ extra_len += self
+ .graph
+ .labels_of(child)
+ .unwrap()
+ .map(|(_, child_child_iter)| child_child_iter.len())
+ .sum::<usize>();
+ }
+
+ der_iter.singles.try_reserve(extra_len)?;
+
+ for child in child_iter.clone() {
+ for (child_label, child_child_iter) in
+ self.graph.labels_of(child).unwrap()
+ {
+ let mut child_label = *child_label;
+
+ child_label.set_true_source(virtual_pavi);
+
+ der_iter.singles.extend(child_child_iter.map(
+ |child_child| (child_label.into(), child_child),
+ ));
+ }
+ }
}
}
}
@@ -866,8 +783,21 @@ impl Chain for DefaultChain {
Ok(der_iter)
}
- // FIXME: Refactor this.
+ // FIXME: Do nothing more than to make this at the end of input,
+ // actually.
+ //
+ // This means we only close the expansions at the last accepting
+ // sources.
+ //
+ // As to the remaining still open fragments, they should be
+ // considered as bugs and should be fixed in the function
+ // `insert_item` instead.
+
fn end_of_input(&mut self) -> Result<(), Self::Error> {
+ if !matches!(self.epsilon(), Ok(true)) {
+ return Ok(());
+ }
+
self.forest.remove_node(1)?;
let mut stack = Vec::new();
@@ -878,7 +808,7 @@ impl Chain for DefaultChain {
for pavi in self.accepting_sources.iter() {
match pavi {
- PaVi::Parent(node, edge) => {
+ PaVi::Parent(node, _edge, child) => {
// REVIEW: This is a garbage node that has to be
// added when the machine starts. Is there a way
// to avoid this garbage?
@@ -886,9 +816,7 @@ impl Chain for DefaultChain {
continue;
}
- let nth_child = self.forest.nth_child(*node, *edge)?;
-
- stack.push(nth_child);
+ stack.push(*child);
}
PaVi::Virtual(nt, t, node) => {
let node_label = self
@@ -911,83 +839,87 @@ impl Chain for DefaultChain {
}
}
- let mut seen_nodes: HashSet<usize> = HashSet::with_capacity(stack.len());
-
- // dbg!(&stack);
+ let forest_nodes_len = self.forest.nodes_len();
- // self.forest.print_viz("dbg forest.gv").unwrap();
+ let mut node_ends: Vec<Vec<usize>> = std::iter::repeat_with(Default::default)
+ .take(forest_nodes_len)
+ .collect();
- while let Some(mut top) = stack.pop() {
- if seen_nodes.contains(&top) {
- continue;
+ // NOTE: Use iterator to avoid checking bounds when accessing
+ // `node_ends`.
+ for (node, node_end) in self.forest.nodes().zip(node_ends.iter_mut()) {
+ if let Some(end) = self
+ .forest
+ .vertex_label(node)?
+ .ok_or(Error::NodeNoLabel(node))?
+ .label()
+ .end()
+ {
+ node_end.push(end);
}
+ }
- seen_nodes.insert(top);
+ #[derive(Copy, Clone)]
+ enum StackElement {
+ Seen(usize),
+ Unseen(usize),
+ }
- let mut top_label = self
- .forest
- .vertex_label(top)?
- .ok_or(Error::NodeNoLabel(top))?;
+ use StackElement::{Seen, Unseen};
- if !top_label.is_packed()
- && matches!(top_label.label().label().tnt(), Some(TNT::Non(_)))
- && top_label.label().end().is_none()
- {
- let degree = self.forest.degree(top)?;
- let last_index = if degree > 0 { degree - 1 } else { 0 };
+ // maybe we do not need so much space?
+ let mut stack = Vec::with_capacity(forest_nodes_len);
- let pos = if degree > 0 {
- let last_child = self.forest.nth_child(top, last_index)?;
- let last_child_label = self
- .forest
- .vertex_label(last_child)?
- .ok_or(Error::NodeNoLabel(last_child))?
- .label();
- let last_child_end = last_child_label.end();
-
- if !matches!(last_child_label.label().rule(),
- Some(rule) if
- self
- .atom
- .is_accepting(2*rule+1)
- .map_err(index_out_of_bounds_conversion)?)
- {
+ stack.push(Unseen(
+ self.forest.root().ok_or(Error::IndexOutOfBounds(0, 0))?,
+ ));
+
+ let mut seen_nodes = HashSet::with_capacity(forest_nodes_len);
+
+ // depth-first search
+ while let Some(top) = stack.pop() {
+ match top {
+ Seen(top) => {
+ // Every of its children should be processed
+ // already.
+
+ if !node_ends.get(top).unwrap().is_empty() {
continue;
}
- if let Some(pos) = last_child_end {
- pos
- } else {
- stack.extend(self.forest.children_of(last_child)?);
+ let last_index = self.forest.degree(top).unwrap() - 1;
+ let last_child = self.forest.nth_child(top, last_index)?;
+
+ *node_ends.get_mut(top).unwrap() = node_ends
+ .get(last_child)
+ .ok_or(Error::IndexOutOfBounds(last_child, forest_nodes_len))?
+ .clone();
+ }
+ Unseen(top) => {
+ if seen_nodes.contains(&top) {
continue;
}
- } else {
- top_label.label().start() + 1
- };
+ seen_nodes.insert(top);
- top = self.forest.splone(top, Some(pos), last_index, true)?;
- top_label = self
- .forest
- .vertex_label(top)?
- .ok_or(Error::NodeNoLabel(top))?;
- } else if top_label.is_packed()
- || top_label.label().label().rule().is_some() && top_label.label().end().is_none()
- {
- stack.extend(self.forest.children_of(top)?);
- }
+ // NOTE: After the above check we can safely unwrap.
- if top_label.clone_index().is_some() {
- let mut parents = self.forest.parents_of(top)?;
- if parents.len() != 1 {
- dbg!(top, top_label, parents.len());
- self.forest.print_viz("dbg forest.gv").unwrap();
- panic!("0 parent?");
- }
+ let mut top_no_children = true;
- top = parents.next().unwrap().node();
- }
+ for child in self.forest.children_of(top).unwrap() {
+ if top_no_children {
+ stack.push(Seen(top));
+ }
+
+ top_no_children = false;
+
+ stack.push(Unseen(child));
+ }
- stack.extend(self.forest.parents_of(top)?.map(|parent| parent.node()));
+ if !top_no_children {
+ continue;
+ }
+ }
+ }
}
Ok(())
@@ -1120,6 +1052,11 @@ mod test_chain {
if !no_item {
chain.end_of_input()?;
+ chain.forest.print_viz("forest.gv")?;
+
+ chain.graph.print_viz("chain.gv")?;
+ chain.atom.print_nfa("nfa.gv")?;
+ item::default::print_labels(&chain.atom, &chain.forest)?;
}
for label in chain.labels_of(chain.current())?.map(|(label, _)| label) {
@@ -1143,6 +1080,10 @@ mod test_chain {
#[test]
fn test_ambiguity() -> Result<(), Box<dyn std::error::Error>> {
+ if std::fs::metadata("debug.log").is_ok() {
+ std::fs::remove_file("debug.log")?;
+ }
+
let mut no_item = false;
for arg in std::env::args() {
@@ -1158,21 +1099,37 @@ mod test_chain {
let mut chain = DefaultChain::unit(atom)?;
chain.chain(0, 0, no_item)?;
- chain.forest.print_viz("forest0.gv")?;
+
+ if !no_item {
+ chain.forest.print_viz("forest0.gv")?;
+ }
+
chain.chain(2, 1, no_item)?;
- chain.forest.print_viz("forest1.gv")?;
+
+ if !no_item {
+ chain.forest.print_viz("forest1.gv")?;
+ }
+
chain.chain(2, 2, no_item)?;
- chain.forest.print_viz("forest2.gv")?;
+
+ if !no_item {
+ chain.forest.print_viz("forest2.gv")?;
+ }
+
chain.chain(2, 3, no_item)?;
- chain.forest.print_viz("forest3.gv")?;
+
+ if !no_item {
+ chain.forest.print_viz("forest3.gv")?;
+ }
+
chain.chain(1, 4, no_item)?;
- chain.forest.print_viz("forest4.gv")?;
if !no_item {
- chain.end_of_input()?;
+ // chain.end_of_input()?;
chain.forest.print_viz("forest.gv")?;
- chain.forest.print_closed_viz("closed.gv")?;
+ // chain.forest.print_closed_viz("closed.gv")?;
}
+
chain.graph.print_viz("chain.gv")?;
chain.atom.print_nfa("nfa.gv")?;
item::default::print_labels(&chain.atom, &chain.forest)?;
diff --git a/chain/src/item/default/mod.rs b/chain/src/item/default/mod.rs
index 6d28956..47e8c90 100644
--- a/chain/src/item/default/mod.rs
+++ b/chain/src/item/default/mod.rs
@@ -250,7 +250,12 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> {
frag_stack.pop();
self_stack.pop();
- if fragment.vertex_label(frag_top)? != self.vertex_label(self_top)? {
+ // NOTE: The labels might defer by the status. We test
+ // for the equalities of inner labels only.
+
+ if fragment.vertex_label(frag_top)?.map(|label| label.label())
+ != self.vertex_label(self_top)?.map(|label| label.label())
+ {
// not a prefix
// dbg!(
// frag_top,
@@ -589,11 +594,15 @@ impl<T: GraphLabel> DefaultForest<ForestLabel<T>> {
return Ok(node);
}
+ fragment.set_root(1)?;
+
let node_label = self.vertex_label(node)?.ok_or(Error::NodeNoLabel(node))?;
if node_label.is_packed() || node_label.clone_index().is_some() {
let mut planted = false;
+ let mut to_plant: Option<usize> = None;
+
let mut node = node;
if node_label.clone_index().is_some() {
@@ -607,17 +616,25 @@ impl<T: GraphLabel> DefaultForest<ForestLabel<T>> {
for clone in self.children_of(node)? {
node = clone;
- if self.is_prefix(clone, &fragment)? {
- planted = true;
+ if !self.is_empty_node(clone)? {
+ let first_child = self.nth_child(clone, 0)?;
+
+ if self.is_prefix(first_child, &fragment)? {
+ planted = true;
- break;
+ break;
+ }
+ } else if to_plant.is_none() {
+ to_plant = Some(clone);
}
}
if !planted {
- let clone = self.clone_node(node, 0, false)?;
-
- fragment.set_root(1)?;
+ let clone = if let Some(cloned_node) = to_plant {
+ cloned_node
+ } else {
+ self.clone_node(node, 0, false)?
+ };
self.plant(clone, fragment, false)?;
@@ -626,51 +643,29 @@ impl<T: GraphLabel> DefaultForest<ForestLabel<T>> {
} else {
let clone_threshold = if self.root == Some(node) { 1 } else { 0 };
- let planted = self.is_prefix(node, &fragment)?;
+ let satisfy_threshold = self.degree(node)? > clone_threshold;
- fragment.set_root(1)?;
+ if !satisfy_threshold {
+ self.plant(node, fragment, false)?;
- if !planted && self.degree(node)? > clone_threshold {
+ return Ok(node);
+ }
+
+ let first_child = self.nth_child(node, clone_threshold)?;
+
+ let planted = self.is_prefix(first_child, &fragment)?;
+
+ if !planted {
let clone = self.clone_node(node, 0, false)?;
self.plant(clone, fragment, false)?;
return Ok(clone);
- } else if !planted {
- self.plant(node, fragment, false)?;
}
}
Ok(node)
}
-
- /// Extract the node from `pavi`.
- ///
- /// If pavi is a parent, this returns the child pointed to by the
- /// represented edge.
- ///
- /// If pavi is a virtual node, this simply returns the virtual
- /// node.
- ///
- /// If pavi is empty, this returns the root, unless the forest is
- /// empty.
- ///
- /// # Guarantee
- ///
- /// The returned node is guaranteed to be a valid node.
- pub(crate) fn node_from_pavi(&self, pavi: PaVi) -> Result<usize, Error> {
- match pavi {
- PaVi::Parent(node, edge) => Ok(self.nth_child(node, edge)?),
- PaVi::Virtual(_, _, node) => {
- if node >= self.nodes_len() {
- return Err(Error::IndexOutOfBounds(node, self.nodes_len()));
- }
-
- Ok(node)
- }
- PaVi::Empty => Ok(self.root().ok_or(Error::IndexOutOfBounds(0, 0))?),
- }
- }
}
pub mod splone;
@@ -850,188 +845,6 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
Ok(None)
}
- // /// Check if every child already has an end.
- // fn every_child_is_completed(&self, node_id: usize, atom: &DefaultAtom) -> Result<bool, Error> {
- // let children = self.children_of(node_id)?;
-
- // if children.len() == 0 {
- // return Ok(true);
- // }
-
- // let mut pos = self
- // .vertex_label(node_id)?
- // .ok_or(Error::NodeNoLabel(node_id))?
- // .label()
- // .start();
-
- // let mut last_child_label = None;
-
- // for child in children {
- // let child_label = self
- // .vertex_label(child)?
- // .ok_or(Error::NodeNoLabel(child))?
- // .label();
-
- // last_child_label = Some(child_label);
-
- // if child_label.start() != pos || child_label.end().is_none() {
- // return Ok(false);
- // }
-
- // pos = child_label.end().unwrap();
- // }
-
- // if let Some(label) = last_child_label {
- // if let Some(rule) = label.label().rule() {
- // if !atom
- // .is_accepting(2 * rule + 1)
- // .map_err(|_| Error::IndexOutOfBounds(2 * rule + 1, atom.nodes_len()))?
- // {
- // return Ok(false);
- // }
- // }
- // }
-
- // Ok(true)
- // }
-
- // /// Complete the forest by trying to set the ending position of
- // /// every node that does not have an end, by the ending position
- // /// of its last child.
- // pub fn complete(&mut self, atom: &DefaultAtom) -> Result<(), Error> {
- // let mut stack: Vec<_> = self
- // .nodes()
- // .filter(|node| {
- // let label = self.vertex_label(*node).unwrap().unwrap().label();
-
- // label.label().rule().is_some() && label.end().is_some()
- // })
- // .collect();
-
- // let mut second_stack: Vec<usize> = Vec::new();
-
- // let mut pending_candidates: Vec<usize> = Vec::new();
-
- // let nodes_len = self.nodes_len();
-
- // let mut seen_nodes: HashSet<usize> = HashSet::with_capacity(nodes_len);
-
- // while !stack.is_empty() {
- // while let Some(mut top) = stack.pop() {
- // if seen_nodes.contains(&top) {
- // continue;
- // }
-
- // seen_nodes.insert(top);
-
- // let top_label = self.vertex_label(top)?.unwrap();
-
- // if top_label.clone_index().is_some() {
- // let mut top_parents = self.parents_of(top)?;
-
- // if top_parents.len() != 1 {
- // return Err(Error::InvalidClone(top));
- // }
-
- // top = top_parents.next().unwrap().node();
- // }
-
- // let rule_parents = self.parents_of(top)?;
-
- // let candidates = {
- // let mut result = Vec::with_capacity(rule_parents.len());
-
- // for parent in rule_parents {
- // // for parent in self.parents_of(parent.node())? {
- // // if self.degree(parent.node())? <= parent.edge() + 1 {
- // result.push(parent);
- // // }
- // // }
- // }
-
- // result
- // };
-
- // for candidate in candidates {
- // if matches!(self.vertex_label(candidate.node())?, Some(label) if label.label().end().is_none())
- // {
- // if self.every_child_is_completed(candidate.node(), atom)? {
- // let last_child = self
- // .nth_child(candidate.node(), self.degree(candidate.node())? - 1)?;
- // let end = self
- // .vertex_label(last_child)?
- // .ok_or(Error::NodeNoLabel(last_child))?
- // .label()
- // .end();
-
- // let sploned_node = self.splone(
- // candidate.node(),
- // end,
- // self.degree(candidate.node())? - 1,
- // true,
- // )?;
-
- // second_stack.push(sploned_node);
- // } else {
- // pending_candidates.push(candidate.node());
- // }
- // } else {
- // second_stack.push(candidate.node());
- // }
- // }
-
- // let mut new_pending = Vec::with_capacity(pending_candidates.len());
-
- // while let Some(node) = pending_candidates.pop() {
- // if self.every_child_is_completed(node, atom)? {
- // let last_edge = self.degree(node)? - 1;
- // let last_child = self.nth_child(node, last_edge)?;
- // let end = self
- // .vertex_label(last_child)?
- // .ok_or(Error::NodeNoLabel(last_child))?
- // .label()
- // .end();
-
- // let sploned_node = self.splone(node, end, last_edge, true)?;
-
- // second_stack.push(sploned_node);
- // } else {
- // new_pending.push(node);
- // }
- // }
-
- // std::mem::swap(&mut pending_candidates, &mut new_pending);
- // }
-
- // std::mem::swap(&mut stack, &mut second_stack);
- // }
-
- // Ok(())
- // }
-
- // /// Unconditionally set the label of the node to be the new label.
- // ///
- // /// # Warning
- // ///
- // /// Use with caution: it does not do anything special, so it is
- // /// the responsibility of the caller to ensure this operation is
- // /// legal.
- // #[allow(dead_code)]
- // pub(crate) fn set_label(&mut self, node: usize, label: GrammarLabel) -> Result<(), Error> {
- // let status = self
- // .vertex_label(node)?
- // .ok_or(Error::NodeNoLabel(node))?
- // .status;
-
- // let label = ForestLabel::new(label, status);
-
- // let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph);
-
- // builder.set_label(node, label)?;
-
- // Ok(())
- // }
-
/// Set the position of every node.
///
/// If ALLP is non-nil or if the node is a terminal node, also set
diff --git a/chain/src/item/default/splone.rs b/chain/src/item/default/splone.rs
index d77686e..581d1dc 100644
--- a/chain/src/item/default/splone.rs
+++ b/chain/src/item/default/splone.rs
@@ -555,7 +555,12 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
let modified_degree = std::cmp::max(existing_degree, 1) - 1;
if existing_degree != 0
- && self.has_same_children(existing, node, modified_degree, edge_index)?
+ && self.has_same_children(
+ existing,
+ node,
+ modified_degree,
+ edge_index + 1,
+ )?
{
let last_child = self.nth_child(existing, existing_degree - 1)?;
diff --git a/chain/src/item/genins.rs b/chain/src/item/genins.rs
index a2bb22c..19f835b 100644
--- a/chain/src/item/genins.rs
+++ b/chain/src/item/genins.rs
@@ -9,16 +9,14 @@
use super::*;
use crate::{
atom::{Atom, DefaultAtom},
- default::{BoTop, Error, Reducer},
+ default::Error,
item::default::DefaultForest,
Edge,
};
use grammar::{Error as GrammarError, GrammarLabel, GrammarLabelType, TNT};
use graph::Graph;
-use core::borrow::Borrow;
-
-use std::collections::HashSet;
+use std::borrow::Borrow;
/// Convert an error telling us that an index is out of bounds.
///
@@ -179,8 +177,8 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
/// fragment under the result splone.
pub(crate) fn insert_item(
&mut self,
- reducer: &Reducer,
label: Edge,
+ ter: usize,
fragment: impl Borrow<DefaultForest<ForestLabel<GrammarLabel>>>,
atom_child_iter: impl Iterator<Item = usize> + ExactSizeIterator + Clone,
atom: &DefaultAtom,
@@ -200,7 +198,7 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
let fragment_root = if let Some(root) = fragment.root() {
root
} else {
- unreachable!("empty item");
+ panic!("empty item");
};
let fragment_root_label = fragment
@@ -209,7 +207,11 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
let pos = fragment_root_label.label().start();
- dbg!((pos, label));
+ // dbg!((pos, label));
+
+ // Whether or not to print detailed graphs of each step of
+ // operation for debugging purposes.
+ let to_print = false;
let tnt_string = {
let empty_p = atom_child_iter.len() == 0;
@@ -217,10 +219,10 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
match label.label().label() {
GrammarLabelType::TNT(TNT::Ter(t)) => {
- format!("t {t}")
+ format!("t {t}{}", if empty_p { " second" } else { "" })
}
GrammarLabelType::TNT(TNT::Non(n)) => {
- format!("n {n} {}", if empty_p { "second" } else { "first" })
+ format!("n {n}")
}
_ => "error".to_string(),
}
@@ -230,7 +232,7 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
let mut repetition = 0;
while std::fs::metadata(format!(
- "/Users/durand/Desktop/Centre/A propos de programmes/Rust/rep/chain/output/pos {pos} {tnt_string} {repetition}.gv"
+ "/Users/durand/Desktop/Centre/A propos de programmes/Rust/rep/chain/output/pos {pos} - {repetition}.gv"
))
.is_ok()
{
@@ -240,44 +242,32 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
repetition
};
- self.print_viz(&format!("pos {pos} {tnt_string} {num}.gv"))
- .unwrap();
-
- self.extra_reductions(
- BoTop::new(true_source, pavi),
- pos,
- reducer.borrow(),
- atom.borrow(),
- )?;
+ if to_print {
+ self.print_viz(&format!("pos {pos} - {num}.gv")).unwrap();
+ }
/// A cute little macro to produce compact representations
/// of Parents, Virtual nodes, or empty.
+ #[allow(unused_macros)]
macro_rules! pavi_to_short_str {
($pavi:ident) => {
match $pavi {
- PaVi::Parent(node, edge) => format!("{node} {edge}"),
- PaVi::Virtual(nt, t, node) => format!("{nt} {t} {node}"),
+ PaVi::Parent(node, edge, child) => format!("p{node} {edge} {child}"),
+ PaVi::Virtual(nt, t, node) => format!("v{nt} {t} {node}"),
PaVi::Empty => "ε".to_string(),
}
};
}
- self.print_viz(&format!(
- "pos {pos} {tnt_string} {num} stage 1 {}, {}.gv",
- pavi_to_short_str!(true_source),
- pavi_to_short_str!(pavi)
- ))
- .unwrap();
-
// Ensure the last node in the PaVi is a terminal or a
// non-terminal node, as an extra safety guard during
// development.
#[cfg(debug_assertions)]
{
match pavi {
- PaVi::Parent(node, edge) => {
+ PaVi::Parent(_node, _edge, child) => {
assert!(matches!(
- self.vertex_label(self.nth_child(node, edge)?),
+ self.vertex_label(child),
Ok(Some(label))
if label.label().label().tnt().is_some()));
}
@@ -312,11 +302,23 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
let is_empty_segment = pavi.is_empty();
+ if true_source.is_virtual() {
+ self.close_pavi(atom.borrow(), true_source, pos)?;
+
+ if to_print {
+ self.print_viz(&format!(
+ "pos {pos} - {num} {tnt_string} stage 0.1 {}.gv",
+ pavi_to_short_str!(true_source)
+ ))
+ .unwrap();
+ }
+ }
+
let mut parents: Vec<Parent> = {
let mut result = Vec::new();
match pavi {
- PaVi::Parent(node, edge) => {
+ PaVi::Parent(node, edge, _) => {
result.push(Parent::new(node, edge));
}
PaVi::Virtual(nt, t, node) => {
@@ -347,10 +349,12 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
self.plant_at_start(node, frag)?;
- self.print_viz(&format!(
- "pos {pos} {tnt_string} {num} stage 1.5 {node}.gv"
- ))
- .unwrap();
+ if to_print {
+ self.print_viz(&format!(
+ "pos {pos} - {num} {tnt_string} stage 0.2 {node}.gv"
+ ))
+ .unwrap();
+ }
let rule_label_pos = self
.query_label(last_but_one_label)
@@ -369,6 +373,24 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
result
};
+ if let PaVi::Parent(node, edge, _) = pavi {
+ let nth_child = self.nth_child(node, edge)?;
+
+ let reduced = self.reduction(nth_child, pos, ter, atom.borrow())?;
+
+ if reduced != nth_child && !self.is_empty_node(reduced)? {
+ parents.clear();
+ parents.extend(self.parents_of(reduced)?);
+ }
+
+ if to_print {
+ self.print_viz(&format!(
+ "pos {pos} - {num} {tnt_string} stage 0.3 {nth_child}.gv"
+ ))
+ .unwrap();
+ }
+ }
+
for parent in parents.iter() {
if !self.has_node(parent.node()) {
return Err(Error::IndexOutOfBounds(parent.node(), self.nodes_len()));
@@ -423,12 +445,14 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
let sploned_node =
self.splone(node.node(), Some(pos), node.edge(), false)?;
- self.print_viz(&format!(
- "pos {pos} {tnt_string} {num} stage 2 {} {}.gv",
- node.node(),
- node.edge(),
- ))
- .unwrap();
+ if to_print {
+ self.print_viz(&format!(
+ "pos {pos} - {num} {tnt_string} stage 1 {} {}.gv",
+ node.node(),
+ node.edge(),
+ ))
+ .unwrap();
+ }
node_label = self
.vertex_label(sploned_node)?
@@ -513,12 +537,16 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
for parent in stack {
let splanted = self.splant(parent.node(), parent.edge(), fragment, non_empty)?;
- self.print_viz(&format!(
- "pos {pos} {tnt_string} {num} stage 3 {} {} {splanted}.gv",
- parent.node(),
- parent.edge(),
- ))
- .unwrap();
+ let _splanted_child = self.nth_child(splanted, self.degree(splanted)? - 1)?;
+
+ if to_print {
+ self.print_viz(&format!(
+ "pos {pos} - {num} {tnt_string} stage 2 {} {} {splanted}.gv",
+ parent.node(),
+ parent.edge(),
+ ))
+ .unwrap();
+ }
non_empty = true;
}
@@ -541,25 +569,28 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
.query_label(root_label)
.expect("root label was not found");
- let edge = {
- let mut result = None;
+ let edge: usize;
+ let child: usize;
- for (index, child) in self.children_of(node)?.enumerate() {
- if matches!(self.vertex_label(child)?, Some(child_label) if child_label == leaf_label)
- {
- result = Some(index);
- break;
- }
- }
+ let mut result = None;
- if let Some(result) = result {
- result
- } else {
- unreachable!("the forest is wrongly planted");
+ for (index, child) in self.children_of(node)?.enumerate() {
+ if matches!(self.vertex_label(child)?, Some(child_label) if child_label == leaf_label)
+ {
+ result = Some((index, child));
+ break;
}
- };
+ }
+
+ if let Some((index, edge_child)) = result {
+ edge = index;
+ child = edge_child;
+ } else {
+ unreachable!("the forest is wrongly planted");
+ }
+
// dbg!(node, edge, root_label, leaf_label);
- PaVi::Parent(node, edge)
+ PaVi::Parent(node, edge, child)
} else {
assert_eq!(
fragment.nodes_len(),
@@ -612,131 +643,19 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
Ok(result)
}
- // REVIEW: Is this function really correct?
-
- /// Perform extra reductions.
- ///
- /// To be precise, this function first splones the bottom node,
- /// and then queries the reducer to find the next nodes to splone,
- /// and repeats this process until either we find the top node, or
- /// the reducer says to stop.
- ///
- /// One nuance to note is that after we splone a node, if we split
- /// that node, then the splitted node will have a cloned parent,
- /// see
- /// [`split_node`][DefaultForest::<ForestLabel<GrammarLabel>>::split_node]
- /// for details. So for each parent pointed out by the reducer,
- /// we need to find its clone that has the splitted node as a
- /// child.
- fn extra_reductions(
- &mut self,
- botop: BoTop,
- pos: usize,
- reducer: &Reducer,
- atom: &DefaultAtom,
- ) -> Result<Vec<usize>, Error> {
- if botop.bottom() == botop.top() {
- return Ok(vec![self.node_from_pavi(botop.top())?]);
- }
-
- let top = botop.top();
- let bottom = botop.bottom();
-
- let bottom_closed = self.close_pavi(atom.borrow(), bottom, pos)?;
-
- let top_node = self.node_from_pavi(top)?;
- let bottom_node = self.node_from_pavi(bottom)?;
-
- let mut stack = vec![bottom_node];
-
- // Exclude duplicate nodes to ensure that no infinite
- // recursion occurs. In the future I shall experiment if this
- // is necessary, and get rid of this if possible.
- let mut seen_nodes: HashSet<usize> = Default::default();
-
- let mut result = Vec::new();
-
- while let Some(bottom) = stack.pop() {
- if seen_nodes.contains(&bottom) {
- continue;
- }
-
- seen_nodes.insert(bottom);
-
- if let Some(set) = reducer.get(&(top_node, bottom)) {
- // First splone the bottom, except for the bottom one.
-
- let mut sploned = if bottom == bottom_node {
- bottom_closed
- } else {
- let degree = self.degree(bottom)?;
- let last_index = core::cmp::max(degree, 1) - 1;
-
- self.splone(bottom, Some(pos), last_index, false)?
- };
-
- // Then add parents whose labels are what we want into
- // the stack.
-
- let mut label_set: HashSet<GrammarLabel> = HashSet::with_capacity(set.len());
-
- for node in set.iter().copied() {
- label_set.insert(
- self.vertex_label(node)?
- .ok_or(Error::NodeNoLabel(node))?
- .label(),
- );
- }
-
- let sploned_label = self
- .vertex_label(sploned)?
- .ok_or(Error::NodeNoLabel(sploned))?;
-
- if sploned_label.clone_index().is_some() {
- let mut parents = self.parents_of(sploned)?;
-
- #[cfg(debug_assertions)]
- if parents.len() != 1 {
- panic!("assumption fails");
- }
-
- sploned = parents.next().unwrap().node();
- }
-
- for rule_parent in self.parents_of(sploned)? {
- if self.degree(rule_parent.node())? != 1 {
- panic!("assumption fails");
- }
-
- for parent in self.parents_of(rule_parent.node())? {
- let parent_node = parent.node();
-
- let parent_label = self
- .vertex_label(parent_node)?
- .ok_or(Error::NodeNoLabel(parent_node))?
- .label();
-
- if label_set.contains(&parent_label) {
- stack.push(parent_node);
- }
- }
- }
- } else {
- result.push(bottom);
- }
- }
-
- Ok(result)
- }
-
/// Set the end position of the node associated with `pavi` to be `pos`.
///
/// The parameter `atom` is used to query the reduction fragment
/// if `pavi` is a virtual node.
- fn close_pavi(&mut self, atom: &DefaultAtom, pavi: PaVi, pos: usize) -> Result<usize, Error> {
+ pub(crate) fn close_pavi(
+ &mut self,
+ atom: &DefaultAtom,
+ pavi: PaVi,
+ pos: usize,
+ ) -> Result<usize, Error> {
match pavi {
- PaVi::Parent(node, edge) => {
- let nth_child = self.nth_child(node, edge)?;
+ PaVi::Parent(_node, _edge, child) => {
+ let nth_child = child;
let nth_child_label = self
.vertex_label(nth_child)?
.ok_or(Error::NodeNoLabel(nth_child))?;
@@ -781,6 +700,11 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
let reduction_fragment = atom.generate_virtual_frags(nt, t, None);
+ // NOTE: the case of the root is exceptional
+ if reduction_fragment.is_none() && self.root() != Some(node) {
+ return Err(Error::CannotClose(nt, t, node, node_label_start));
+ }
+
for frag in reduction_fragment.into_iter().flatten() {
let mut frag = frag.clone();
frag.set_pos(node_label_start, true)?;
diff --git a/chain/src/item/mod.rs b/chain/src/item/mod.rs
index 5efa710..54ca946 100644
--- a/chain/src/item/mod.rs
+++ b/chain/src/item/mod.rs
@@ -32,8 +32,9 @@ use core::borrow::Borrow;
/// Also this might be empty if it represents the root node.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Default)]
pub enum PaVi {
- /// An edge from a node, as the n-th child.
- Parent(usize, usize),
+ /// An edge from a node, as the n-th child, along with the
+ /// nth-child node number.
+ Parent(usize, usize, usize),
/// A virtual segment from a non-terminal by a terminal, rooted at
/// a node.
///
@@ -50,16 +51,12 @@ pub enum PaVi {
Empty,
}
-impl From<Parent> for PaVi {
- fn from(value: Parent) -> Self {
- Self::Parent(value.node(), value.edge())
- }
-}
-
-impl core::fmt::Display for PaVi {
+impl std::fmt::Display for PaVi {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
- Self::Parent(node, edge) => write!(f, "the {edge}-th edge from {node}"),
+ Self::Parent(node, edge, child) => {
+ write!(f, "the {edge}-th edge from {node} to {child}")
+ }
Self::Virtual(nt, t, node) => write!(
f,
"a virtual node for non-terminal {nt} and terminal {t} at node {node}"
@@ -72,13 +69,17 @@ impl core::fmt::Display for PaVi {
impl PaVi {
/// Get the Parent variant.
fn parent(self) -> Option<Parent> {
- if let Self::Parent(node, edge) = self {
+ if let Self::Parent(node, edge, _) = self {
Some(Parent::new(node, edge))
} else {
None
}
}
+ fn is_virtual(self) -> bool {
+ matches!(self, Self::Virtual(_, _, _))
+ }
+
/// Is this an empty segment?
fn is_empty(self) -> bool {
self == Self::Empty
@@ -290,3 +291,5 @@ pub mod default;
pub mod genins;
pub use genins::generate_fragment;
+
+pub mod reduction;
diff --git a/chain/src/item/reduction.rs b/chain/src/item/reduction.rs
new file mode 100644
index 0000000..89306e6
--- /dev/null
+++ b/chain/src/item/reduction.rs
@@ -0,0 +1,428 @@
+//! This module implements the operation of reductions on the forest.
+//!
+//! # What are reductions
+//!
+//! To explain this in detail, we first investigate the
+//! nullable-closure process, then discuss what reduction is and what
+//! it means in the chain-rule machine, and then explain the problem.
+//!
+//! ## Nullable-closure process
+//!
+//! In the process of running the chain-rule machine, we will collapse
+//! edges, in the sense that, if there is an edge from node a to node
+//! b and another from node b to node c, and if the edge from node a
+//! to node b is "nullable", that is if the edge corresponds to a
+//! position in the atomic language that is deemed nullable by the
+//! atomic language, then we also make an edge from node a to node c.
+//!
+//! The purpose of this process of forming the nullable closure is to
+//! ensure that the chain-rule machine can save the time to traverse
+//! the entire machine to find the nullable closure later on. But a
+//! side-effect of this process is that reductions are not explicitly
+//! marked.
+//!
+//! Note that we must perform this nullable closure process in order
+//! that the time complexity of the chain-rule machine lies within the
+//! cubic bound.
+//!
+//! ## Three types of actions
+//!
+//! We can imagine a "traditional parser generator" as a stack
+//! machine: there are three types of actions associated with it,
+//! depending on the current state and the current input token. The
+//! first is expansion: it means that we are expanding from a
+//! non-terminal, by some rule. The second is a normal push: we just
+//! continue parsing according to the current rule. The final one is
+//! reduction: it means the current expansion from a non-terminal has
+//! terminated and we go back to the previous level of expansion.
+//!
+//! ## Relation to the chain-rule machine
+//!
+//! For our chain-rule machine, expansion means to create a new node
+//! pointing at the current node, forming a path with one more length.
+//! A normal push means to create a new node that points to the target
+//! of an edge going out from the current node, which was not created
+//! by the process of forming nullable closures. And the reduction
+//! means to create a new node that points to the target of an edge
+//! going out from the current node, which *was* created by the
+//! process of forming nullable closures.
+//!
+//! # Problem
+//!
+//! As can be seen from the previous paragraph, the distinction
+//! between a normal push and a reduction in the chain-rule machine is
+//! simply whether or not the original edge was created by the process
+//! of forming nullable closures. For the chain-rule machine, this
+//! does not matter: it can function well. For the formation of the
+//! derivation forest, however, this is not so well: we cannot
+//! read-off immediately whether or not to perform reductions from the
+//! chain-rule machine.
+//!
+//! # Solutions
+//!
+//! I tried some solutions to solve this problem.
+//!
+//! ## Complete at the end
+//!
+//! The first idea I tried is to find the nodes that were not closed
+//! properly and fill in the needed reductions, at the end of the
+//! parse. This idea did not end up well, as we already lost a lot of
+//! information at the end of the parse, so it becomes quite difficult
+//! to know on which nodes we need to perform reductions.
+//!
+//! ## Record precise information
+//!
+//! The next idea is to record the nodes that were skipped by the
+//! nullable-closure process, and then when we encounter a skipped
+//! segment, we perform the reductions there. This idea sounds
+//! feasible at first, but it turns out that we cannot track the nodes
+//! properly. That is, when running the chain-rule machine, we will
+//! split and clone nodes from time to time. A consequence is that
+//! the old node numbers that were recorded previously will not be
+//! correct afterwards. This means we cannot know exactly which
+//! reductions to perform later.
+//!
+//! ## Guided brute-force
+//!
+//! Now I am trying out the third idea. The motivation is simple: if
+//! we cannot properly track nodes, then we track no nodes. Then, in
+//! order to perform reductions, we do not distinguish between
+//! necessary reductions and unnecessary reductions, and perform all
+//! possible reductions. In other words, when we are about to plant a
+//! new node in the forest, we first check the last child of the
+//! to-be-parent. If it is properly closed, we do nothing.
+//! Otherwise, we recursively descend into its children(s) to find all
+//! last children, and perform all possible valid reductions.
+
+use super::*;
+use crate::{
+ atom::{Atom, DefaultAtom},
+ default::Error,
+ item::default::DefaultForest,
+};
+use grammar::{GrammarLabel, TNT};
+use graph::Graph;
+
+use std::collections::HashMap as Map;
+
+impl DefaultForest<ForestLabel<GrammarLabel>> {
+ /// Perform reduction at last descendents of `node`.
+ ///
+ /// # Parameters
+ ///
+ /// The parameter `pos` is the next starting position. It is used
+ /// to find the descendents that need reductions: only those nodes
+ /// which have descendents with the correct ending positions will
+ /// receive reductions.
+ ///
+ /// The parameter `atom` is used to know which rule numbers are
+ /// deemed as accepting. Only accepting rule numbers can receive
+ /// reductions: this is the definition of being accepting.
+ ///
+ /// The parameter `ter` is used to fill in segments for virtual
+ /// nodes if they are found along the way.
+ ///
+ /// # Errors
+ ///
+ /// 1. Index out of bounds: some node number is corrupted.
+ /// 2. Node has no label: some node label is lost.
+ pub(crate) fn reduction(
+ &mut self,
+ node: usize,
+ pos: usize,
+ ter: usize,
+ atom: &DefaultAtom,
+ ) -> Result<usize, Error> {
+ let mut result = node;
+
+ // step 1: Determine if this needs reductions.
+
+ if self.root() == Some(node) {
+ return Ok(result);
+ }
+
+ // REVIEW: Do we need to check the end matches the position?
+
+ let mut node_label = self
+ .vertex_label(node)?
+ .ok_or(Error::NodeNoLabel(node))?
+ .label();
+
+ if node_label.end().is_some() {
+ return Ok(result);
+ }
+
+ // step 2: Find descendents that need reductions.
+
+ let mut correct_ends: Map<usize, bool> = Default::default();
+
+ let mut order_of_correct_ends: Vec<usize> = Vec::new();
+
+ let mut stack: Vec<usize> = vec![node];
+
+ let mut file = std::fs::OpenOptions::new().append(true).open("debug.log");
+
+ use std::{borrow::BorrowMut, io::Write};
+
+ // Whether or not to write a debug file.
+ let to_write = false;
+
+ if to_write {
+ if let Ok(ref mut file) = file.borrow_mut() {
+ file.write_all(format!("beginning, pos = {pos}, node = {node}\n").as_bytes())
+ .unwrap();
+ }
+ }
+
+ 'stack_loop: while let Some(top) = stack.pop() {
+ if to_write {
+ if let Ok(ref mut file) = file.borrow_mut() {
+ file.write_all(format!("top: {top}\n").as_bytes()).unwrap();
+ }
+ }
+
+ if correct_ends.get(&top).is_some() {
+ continue 'stack_loop;
+ }
+
+ let top_label = self.vertex_label(top)?.ok_or(Error::NodeNoLabel(top))?;
+
+ if let Some(end) = top_label.label().end() {
+ correct_ends.insert(top, end == pos);
+
+ continue 'stack_loop;
+ }
+
+ if let Some(rule) = top_label.label().label().rule() {
+ // A rule node is not considered directly: it should
+ // be affected by its child implicitly.
+ //
+ // We only consider a rule node if it is deemed
+ // accepting by the atom.
+
+ if to_write {
+ if let Ok(ref mut file) = file.borrow_mut() {
+ file.write_all(format!("{}: {rule}, {stack:?}\n", line!()).as_bytes())
+ .unwrap();
+ }
+ }
+
+ if atom
+ .is_accepting(rule * 2 + 1)
+ .map_err(|_| Error::IndexOutOfBounds(2 * rule + 1, atom.accepting_len()))?
+ {
+ let mut has_unexplored_children = false;
+ let mut inserted = false;
+
+ 'child_loop: for child in self.children_of(top)? {
+ match correct_ends.get(&child).copied() {
+ Some(true) => {
+ correct_ends.insert(top, true);
+
+ inserted = true;
+ }
+ None => {
+ has_unexplored_children = true;
+
+ break 'child_loop;
+ }
+ _ => {}
+ }
+ }
+
+ if has_unexplored_children {
+ stack.push(top);
+ stack.extend(self.children_of(top)?);
+ } else if !inserted {
+ correct_ends.insert(top, false);
+ }
+ } else {
+ correct_ends.insert(top, false);
+ }
+
+ continue 'stack_loop;
+ }
+
+ if top_label.is_packed() {
+ let mut has_unexplored_children = false;
+
+ if to_write {
+ if let Ok(ref mut file) = file.borrow_mut() {
+ file.write_all(format!("{}: packed, {stack:?}\n", line!()).as_bytes())
+ .unwrap();
+ }
+ }
+
+ for child in self.children_of(top)? {
+ match correct_ends.get(&child).copied() {
+ Some(true) => {
+ // NOTE: This is not recorded in the
+ // correct orders, as we do not handle
+ // packed nodes directly.
+
+ correct_ends.insert(top, true);
+ }
+ None => {
+ has_unexplored_children = true;
+ }
+ _ => {}
+ }
+ }
+
+ if to_write {
+ if let Ok(ref mut file) = file.borrow_mut() {
+ file.write_all(
+ format!("{}: packed, {has_unexplored_children}\n", line!()).as_bytes(),
+ )
+ .unwrap();
+ }
+ }
+
+ if has_unexplored_children {
+ stack.push(top);
+ stack.extend(self.children_of(top)?);
+ } else if correct_ends.get(&top).is_none() {
+ correct_ends.insert(top, false);
+ }
+
+ continue 'stack_loop;
+ }
+
+ let degree = self.degree(top)?;
+
+ if to_write {
+ if let Ok(ref mut file) = file.borrow_mut() {
+ file.write_all(format!("{}: degree = {degree}\n", line!()).as_bytes())
+ .unwrap();
+ }
+ }
+
+ let last_index = if degree != 0 {
+ degree - 1
+ } else {
+ // a leaf is supposed to be a terminal node and hence
+ // should have an ending position
+
+ let end = match top_label.label().end() {
+ None => match top_label.label().label().tnt() {
+ Some(TNT::Ter(_)) => {
+ panic!("a terminal node {top} has no ending position?");
+ }
+ Some(TNT::Non(nt)) => {
+ correct_ends.insert(top, true);
+
+ self.close_pavi(atom.borrow(), PaVi::Virtual(nt, ter, top), pos)?;
+
+ continue 'stack_loop;
+ }
+ None => {
+ unreachable!("we already handled the rule case above");
+ }
+ },
+ Some(end) => end,
+ };
+
+ correct_ends.insert(top, end == pos);
+
+ if end == pos {
+ order_of_correct_ends.push(top);
+ }
+
+ continue 'stack_loop;
+ };
+
+ if to_write {
+ if let Ok(ref mut file) = file.borrow_mut() {
+ file.write_all(format!("{}: last = {last_index}\n", line!()).as_bytes())
+ .unwrap();
+ }
+ }
+
+ let last_child = self.nth_child(top, last_index)?;
+
+ if let Some(child_correctness_value) = correct_ends.get(&last_child).copied() {
+ correct_ends.insert(top, child_correctness_value);
+
+ if child_correctness_value {
+ order_of_correct_ends.push(top);
+ }
+ } else {
+ stack.extend([top, last_child]);
+ }
+ }
+
+ if to_write {
+ if let Ok(ref mut file) = file.borrow_mut() {
+ file.write_all(
+ format!("{}: orders = {order_of_correct_ends:?}\n", line!()).as_bytes(),
+ )
+ .unwrap();
+ }
+ }
+
+ // step 3: perform reductions by `splone`.
+ //
+ // NOTE: We must fix the order from top to bottom: this is the
+ // reverse order of `order_of_correct_ends` .
+
+ for node in order_of_correct_ends.into_iter().rev() {
+ let label = self.vertex_label(node)?.ok_or(Error::NodeNoLabel(node))?;
+ let degree = self.degree(node)?;
+
+ if !matches!(label.label().label().tnt(), Some(TNT::Non(_))) {
+ continue;
+ }
+
+ #[cfg(debug_assertions)]
+ {
+ let label_label_label = label.label().label();
+
+ assert!(label_label_label.rule().is_none());
+
+ assert!(matches!(label_label_label.tnt(), Some(TNT::Non(_))));
+
+ assert!(label.label().end().is_none());
+
+ assert_ne!(degree, 0);
+ }
+
+ let last_index = degree - 1;
+
+ self.splone(node, Some(pos), last_index, false)?;
+ }
+
+ node_label.set_end(pos);
+
+ if let Some(packed) =
+ self.query_label(ForestLabel::new(node_label, ForestLabelType::Packed))
+ {
+ result = packed;
+
+ if to_write {
+ if let Ok(ref mut file) = file.borrow_mut() {
+ file.write_all(format!("{}: packed = {packed}\n", line!()).as_bytes())
+ .unwrap();
+ }
+ }
+ } else if let Some(plain) =
+ self.query_label(ForestLabel::new(node_label, ForestLabelType::Plain))
+ {
+ result = plain;
+
+ if to_write {
+ if let Ok(ref mut file) = file.borrow_mut() {
+ file.write_all(format!("{}: plain = {plain}\n", line!()).as_bytes())
+ .unwrap();
+ }
+ }
+ }
+
+ if to_write {
+ if let Ok(ref mut file) = file.borrow_mut() {
+ file.write_all(&[101, 110, 100, 10]).unwrap();
+ }
+ }
+
+ Ok(result)
+ }
+}
diff --git a/chain/src/item/thoughts-on-reducer.org b/chain/src/item/thoughts-on-reducer.org
new file mode 100644
index 0000000..39a799a
--- /dev/null
+++ b/chain/src/item/thoughts-on-reducer.org
@@ -0,0 +1,121 @@
+#+TITLE: Thoughts on the reducer
+#+AUTHOR: Durand
+#+DATE: <2023-06-05 Lun 22:08>
+
+This module implements a data type for storing reduction
+information.
+
+* Questions
+
+What is reduction information, and why is it necessary to store it?
+
+* Nullable-closure process
+
+In the process of running the chain-rule machine, we will collapse
+edges, in the sense that, if there is an edge from node a to node b
+and another from node b to node c, and if the edge from node a to node
+b is "nullable", that is if the edge corresponds to a position in the
+atomic language that is deemed nullable by the atomic language, then
+we also make an edge from node a to node c.
+
+The purpose of this process of forming the nullable closure is to
+ensure that the chain-rule machine can save the time to traverse the
+entire machine to find the nullable closure later on. But a
+side-effect of this process is that reductions are not explicitly
+marked.
+
+To explain this in detail, we first investigate what reduction is and
+what it means in the chain-rule machine, and then explain the problem.
+
+* Three types of actions
+
+We can imagine a "traditional parser generator" as a stack machine:
+there are three types of actions associated with it, depending on the
+current state and the current input token. The first is expansion: it
+means that we are expanding from a non-terminal, by some rule. The
+second is a normal push: we just continue parsing according to the
+current rule. The final one is reduction: it means the current
+expansion from a non-terminal has terminated and we go back to the
+previous level of expansion.
+
+* Relation to the chain-rule machine
+
+For our chain-rule machine, expansion means to create a new node
+pointing at the current node, forming a path of length increased by
+one. A normal push means to create a new node that points to the
+target of an edge going out from the current node, which was not
+created by the process of forming nullable closures. And the
+reduction means to create a new node that points to the target of an
+edge going out from the current node, which *was* created by the
+process of forming nullable closures.
+
+* Problem
+
+As can be seen from the previous paragraph, the distinction between a
+normal push and a reduction in the chain-rule machine is simply
+whether or not the original edge was created by the process of forming
+nullable closures. For the chain-rule machine, this does not matter:
+it can function well. For the formation of the derivation forest,
+however, this is not so well: we cannot read-off immediately whether
+or not to perform reductions from the chain-rule machine.
+
+* Solution
+
+Since we cannot read-off the reduction information directly from the
+chain-rule machine, we have to store that information somehow.
+
+** Digression: past experiences
+
+When developping this part, I did not have a clear picture of the
+situation in mind: I was experimenting with the ways to construct the
+derivation forest from the chain-rule machine, as the paper describing
+the algorithm does not construct the derivation forest directly: it
+constructs an alternative format. A consequence is that I spent quite
+some time in figuring out how to construct derivation forests
+correctly.
+
+During the experiments, I tried various ideas: including to "fill-in"
+the reductions after we have parsed the entire input. This seemed
+ostensibly a good idea, as I seem to be able to "read-off" the
+reductions from the resulting partially complete derivation forests.
+
+As it turned out, this was actually a bad idea. In fact, I can now
+prove that this will not work by the presence of a specific grammar
+that will cause this approach to fail definitely. This led me to
+believe that the only way is to store the needed reduction information
+in order to fill in this gap.
+
+** Storing reduction information
+
+Now we want to store the reduction information, so we need to be clear
+about what we are storing.
+
+In the derivation forest, a reduction happens when we reach the
+right-most descendent of a subtree, which marks the end of an
+expansion from a non-terminal. Moreover, our derivation forests
+usually contain half-open nodes, which mean that they are not
+completed yet and can keep growing, until a reduction happens when we
+"close" the half-open node. Thus, what we really need is the list of
+nodes to close.
+
+This information is readily available when we form nullable closures:
+the skipped nodes are the nodes we need to close later on. To be more
+exact, when we skip nodes, we have two nodes: the top node and the
+bottom node, and we want to store the middle node that is skipped.
+
+This naturally led to the structure of a nondeterministic finite
+automaton: when we want to close nodes, we start from the bottom-most
+node, and query the nodes upward by the top node, until we reach the
+top node or until we have no more nodes to go.
+
+The special characteristic about this structure is that we need to
+label both the vertices and the edges. Since we already have a
+structure that labels edges, we can simply extend that structure by
+adding an array of labels and possibly a map from labels to nodes, to
+make sure the labels of vertices are unique and can be queried
+quickly.
+
+One thing to note is that we actually only need the ability to query
+children by the labels, and do not need to query labels by the target.
+We need experiments to see if thiw will be the bottleneck and hence
+need to be optimized out.
diff --git a/chain/src/reducer.rs b/chain/src/reducer.rs
new file mode 100644
index 0000000..7ff1858
--- /dev/null
+++ b/chain/src/reducer.rs
@@ -0,0 +1,191 @@
+//! This module implements a data type for storing reduction
+//! information.
+//!
+//! # Questions
+//!
+//! What is reduction information, and why is it necessary to store
+//! it?
+//!
+//! # Nullable-closure process
+//!
+//! In the process of running the chain-rule machine, we will collapse
+//! edges, in the sense that, if there is an edge from node a to node
+//! b and another from node b to node c, and if the edge from node a
+//! to node b is "nullable", that is if the edge corresponds to a
+//! position in the atomic language that is deemed nullable by the
+//! atomic language, then we also make an edge from node a to node c.
+//!
+//! The purpose of this process of forming the nullable closure is to
+//! ensure that the chain-rule machine can save the time to traverse
+//! the entire machine to find the nullable closure later on. But a
+//! side-effect of this process is that reductions are not explicitly
+//! marked.
+//!
+//! To explain this in detail, we first investigate what reduction is
+//! and what it means in the chain-rule machine, and then explain the
+//! problem.
+//!
+//! # Three types of actions
+//!
+//! We can imagine a "traditional parser generator" as a stack
+//! machine: there are three types of actions associated with it,
+//! depending on the current state and the current input token. The
+//! first is expansion: it means that we are expanding from a
+//! non-terminal, by some rule. The second is a normal push: we just
+//! continue parsing according to the current rule. The final one is
+//! reduction: it means the current expansion from a non-terminal has
+//! terminated and we go back to the previous level of expansion.
+//!
+//! # Relation to the chain-rule machine
+//!
+//! For our chain-rule machine, expansion means to create a new node
+//! pointing at the current node, forming a path of length increased
+//! by one. A normal push means to create a new node that points to
+//! the target of an edge going out from the current node, which was
+//! not created by the process of forming nullable closures. And the
+//! reduction means to create a new node that points to the target of
+//! an edge going out from the current node, which *was* created by
+//! the process of forming nullable closures.
+//!
+//! # Problem
+//!
+//! As can be seen from the previous paragraph, the distinction
+//! between a normal push and a reduction in the chain-rule machine is
+//! simply whether or not the original edge was created by the process
+//! of forming nullable closures. For the chain-rule machine, this
+//! does not matter: it can function well. For the formation of the
+//! derivation forest, however, this is not so well: we cannot
+//! read-off immediately whether or not to perform reductions from the
+//! chain-rule machine.
+//!
+//! # Solution
+//!
+//! Since we cannot read-off the reduction information directly from
+//! the chain-rule machine, we have to store that information somehow.
+//!
+//! ## Digression: past experiences
+//!
+//! When developping this part, I did not have a clear picture of the
+//! situation in mind: I was experimenting with the ways to construct
+//! the derivation forest from the chain-rule machine, as the paper
+//! describing the algorithm does not construct the derivation forest
+//! directly: it constructs an alternative format. A consequence is
+//! that I spent quite some time in figuring out how to construct
+//! derivation forests correctly.
+//!
+//! During the experiments, I tried various ideas: including to
+//! "fill-in" the reductions after we have parsed the entire input.
+//! This seemed ostensibly a good idea, as I seem to be able to
+//! "read-off" the reductions from the resulting partially complete
+//! derivation forests.
+//!
+//! As it turned out, this was actually a bad idea. In fact, I can
+//! now prove that this will not work by the presence of a specific
+//! grammar that will cause this approach to fail definitely. This
+//! led me to believe that the only way is to store the needed
+//! reduction information in order to fill in this gap.
+//!
+//! ## Storing reduction information
+//!
+//! Now we want to store the reduction information, so we need to be
+//! clear about what we are storing.
+//!
+//! In the derivation forest, a reduction happens when we reach the
+//! right-most descendent of a subtree, which marks the end of an
+//! expansion from a non-terminal. Moreover, our derivation forests
+//! usually contain half-open nodes, which mean that they are not
+//! completed yet and can keep growing, until a reduction happens when
+//! we "close" the half-open node. Thus, what we really need is the
+//! list of nodes to close.
+//!
+//! This information is readily available when we form nullable
+//! closures: the skipped nodes are the nodes we need to close later
+//! on. To be more exact, when we skip nodes, we have two nodes: the
+//! top node and the bottom node, and we want to store the middle node
+//! that is skipped.
+//!
+//! This naturally led to the structure of a nondeterministic finite
+//! automaton: when we want to close nodes, we start from the
+//! bottom-most node, and query the nodes upward by the top node,
+//! until we reach the top node or until we have no more nodes to go.
+//!
+//! The special characteristic about this structure is that we need to
+//! label both the vertices and the edges. Since we already have a
+//! structure that labels edges, we can simply extend that structure
+//! by adding an array of labels and possibly a map from labels to
+//! nodes, to make sure the labels of vertices are unique and can be
+//! queried quickly.
+//!
+//! One thing to note is that we actually only need the ability to
+//! query children by the labels, and do not need to query labels by
+//! the target. So we can represent this nondeterministic finite
+//! automaton by a plain hashmap.
+
+#![allow(unused)]
+
+use std::collections::{hash_set::Iter, HashMap, HashSet};
+
+#[derive(Debug, Default, Clone)]
+pub(crate) struct Reducer(HashMap<(usize, usize), HashSet<usize>>);
+
+#[derive(Debug, Default)]
+pub(crate) enum ReducerIter<'a> {
+ #[default]
+ Empty,
+ NonEmpty(Iter<'a, usize>),
+}
+
+impl<'a> Iterator for ReducerIter<'a> {
+ type Item = usize;
+
+ #[inline]
+ fn next(&mut self) -> Option<Self::Item> {
+ match self {
+ ReducerIter::Empty => None,
+ ReducerIter::NonEmpty(iter) => iter.next().copied(),
+ }
+ }
+
+ #[inline]
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ match self {
+ ReducerIter::Empty => (0, Some(0)),
+ ReducerIter::NonEmpty(iter) => iter.size_hint(),
+ }
+ }
+}
+
+impl<'a> ExactSizeIterator for ReducerIter<'a> {
+ #[inline]
+ fn len(&self) -> usize {
+ match self {
+ ReducerIter::Empty => 0,
+ ReducerIter::NonEmpty(iter) => iter.len(),
+ }
+ }
+}
+
+impl<'a> ReducerIter<'a> {
+ #[inline]
+ pub(crate) fn new(iter: Option<Iter<'a, usize>>) -> Self {
+ match iter {
+ Some(iter) => Self::NonEmpty(iter),
+ None => Self::default(),
+ }
+ }
+}
+
+impl Reducer {
+ #[inline]
+ pub(crate) fn query(&self, bottom: usize, top: usize) -> ReducerIter<'_> {
+ ReducerIter::new(self.0.get(&(bottom, top)).map(|set| set.iter()))
+ }
+
+ #[inline]
+ pub(crate) fn save(&mut self, bottom: usize, top: usize, middle: usize) {
+ self.0
+ .entry((bottom, top))
+ .or_insert_with(Default::default)
+ .insert(middle);
+ }
+}
diff --git a/grammar/src/abnf.rs b/grammar/src/abnf.rs
index 00bc0b2..eb56819 100644
--- a/grammar/src/abnf.rs
+++ b/grammar/src/abnf.rs
@@ -1,7 +1,7 @@
//! This file implements the function to read a grammar from a string,
//! in the [augmented Backus Naur
-//! form][https://en.wikipedia.org/wiki/Augmented_Backus–Naur_form].
-//! See [RFC5234][https://www.rfc-editor.org/rfc/rfc5234] for its
+//! form](https://en.wikipedia.org/wiki/Augmented_Backus–Naur_form).
+//! See [RFC5234](https://www.rfc-editor.org/rfc/rfc5234) for its
//! specifications.
//!
//! In fact, the rules of ABNF are considered to be too strict. For
diff --git a/nfa/src/default/nfa.rs b/nfa/src/default/nfa.rs
index d2fe88e..e38a1a9 100644
--- a/nfa/src/default/nfa.rs
+++ b/nfa/src/default/nfa.rs
@@ -20,7 +20,7 @@ pub struct DefaultNFA<T: GraphLabel> {
graph: DLGraph<T>,
}
-impl<T: GraphLabel + Display> Default for DefaultNFA<T> {
+impl<T: GraphLabel> Default for DefaultNFA<T> {
fn default() -> Self {
Self {
graph: Default::default(),
@@ -85,10 +85,9 @@ impl<T: GraphLabel> LabelExtGraph<T> for DefaultNFA<T> {
}
impl<T: GraphLabel> Nfa<T> for DefaultNFA<T> {
- type FromRegex<S: GraphLabel + Display + Default> = DefaultNFA<S>;
+ type FromRegex<S: GraphLabel + Default> = DefaultNFA<S>;
- // Reminder: This is an adopted version of Thompson's
- // construction.
+ // NOTE: This is an adopted version of Thompson's construction.
fn to_nfa(
regexps: &[impl Regex<RegexType<T>>],
sub_pred: impl Fn(T) -> Result<SoC<T>, Error>,
@@ -107,10 +106,6 @@ impl<T: GraphLabel> Nfa<T> for DefaultNFA<T> {
builder.add_vertices(nfa_len);
- // for _ in 0..nfa_len {
- // builder.add_vertex();
- // }
-
let default = LabelType::new(DOption(default), total_regexps_len, false);
// add a default edge
@@ -122,7 +117,7 @@ impl<T: GraphLabel> Nfa<T> for DefaultNFA<T> {
result.push(0);
for regexp in regexps.iter().take(regexps.len() - 1) {
- result.push(result.last().unwrap() + regexp.nodes_len() * 2);
+ result.push(*result.last().unwrap() + regexp.nodes_len() * 2);
}
result
@@ -137,14 +132,14 @@ impl<T: GraphLabel> Nfa<T> for DefaultNFA<T> {
}
);
- /// Get offset from `accumulators` without checking bounds.
- macro_rules! get_offset_unsafe (
- ($num:expr) => {
- { unsafe { *accumulators.get_unchecked($num) } }
- }
- );
+ // /// Get offset from `accumulators` without checking bounds.
+ // macro_rules! get_offset_unsafe (
+ // ($num:expr) => {
+ // { unsafe { *accumulators.get_unchecked($num) } }
+ // }
+ // );
- for (index, regex) in regexps.iter().enumerate() {
+ for (regex, offset) in regexps.iter().zip(accumulators.iter().copied()) {
let root = if let Some(root) = regex.root() {
root
} else {
@@ -155,11 +150,6 @@ impl<T: GraphLabel> Nfa<T> for DefaultNFA<T> {
let regex_len = regex.nodes_len();
- // It is safe here to call `get_offset_unsafe`, as `index`
- // is guaranteed to be strictly less than the length of
- // `accumulators` by an assertion above.
- let offset = get_offset_unsafe!(index);
-
let mut stack: Vec<usize> = Vec::with_capacity(regex_len);
stack.push(root);