summaryrefslogtreecommitdiff
path: root/chain
diff options
context:
space:
mode:
authorJSDurand <mmemmew@gmail.com>2023-07-08 12:30:21 +0800
committerJSDurand <mmemmew@gmail.com>2023-07-08 12:31:13 +0800
commit9a317e56f8a6126583f7d0c431bf878d9b1fe7b1 (patch)
tree7bb6004196b38446a5ab0cb3a0ab642d35f113e9 /chain
parent691f969eb104fa3d4c2a1667693fd0382eb9d6b5 (diff)
Finished the Emacs binding.
Now the binding part is finished. What remains is a bug encountered when planting a fragment to the forest which intersects a packed node, which would lead to invalid forests. This will also cause problem when planting a packed fragment, but until now my testing grammars do not produce packed fragments, so this problem is not encountered yet. I am still figuring out efficient ways to solve this problem.
Diffstat (limited to 'chain')
-rw-r--r--chain/src/archive.txt47
-rw-r--r--chain/src/default.rs247
-rw-r--r--chain/src/item/default/extract.rs509
-rw-r--r--chain/src/item/default/mod.rs62
-rw-r--r--chain/src/item/default/splone.rs10
-rw-r--r--chain/src/item/genins.rs18
-rw-r--r--chain/src/item/mod.rs15
-rw-r--r--chain/src/item/reduction.rs85
-rw-r--r--chain/src/lib.rs29
9 files changed, 812 insertions, 210 deletions
diff --git a/chain/src/archive.txt b/chain/src/archive.txt
index 030ccba..7bd6f31 100644
--- a/chain/src/archive.txt
+++ b/chain/src/archive.txt
@@ -493,3 +493,50 @@
// PaVi::Empty => Ok(self.root().ok_or(Error::IndexOutOfBounds(0, 0))?),
// }
// }
+
+// /// Find the last child of the given node whose start and end
+// /// positions contain the given position. If no such child is
+// /// found, return `Ok(None)`.
+// ///
+// /// The returned tuple is of the form (child, index), where
+// /// `child` is the index of the child node in question, and
+// /// `index` means that the child is the `index`-th child of the
+// /// node.
+// #[allow(dead_code)]
+// pub(crate) fn position_search(
+// &self,
+// node: usize,
+// pos: usize,
+// ) -> Result<Option<(usize, usize)>, Error> {
+// fn range_contains(label: GrammarLabel, pos: usize) -> bool {
+// let start = label.start();
+//
+// if let Some(end) = label.end() {
+// (start..end).contains(&pos)
+// } else {
+// (start..).contains(&pos)
+// }
+// }
+//
+// let node_label = self
+// .vertex_label(node)?
+// .ok_or(Error::NodeNoLabel(node))?
+// .label();
+//
+// if !range_contains(node_label, pos) {
+// return Ok(None);
+// }
+//
+// for (index, child) in self.children_of(node)?.enumerate().rev() {
+// let child_label = self
+// .vertex_label(child)?
+// .ok_or(Error::NodeNoLabel(child))?
+// .label();
+//
+// if range_contains(child_label, pos) {
+// return Ok(Some((child, index)));
+// }
+// }
+//
+// Ok(None)
+// }
diff --git a/chain/src/default.rs b/chain/src/default.rs
index ed63ff4..7de720f 100644
--- a/chain/src/default.rs
+++ b/chain/src/default.rs
@@ -11,15 +11,19 @@ use crate::item::{
default::DefaultForest, generate_fragment, genins::index_out_of_bounds_conversion, Forest,
ForestLabel, ForestLabelError,
};
-use core::fmt::Display;
use grammar::{GrammarLabel, GrammarLabelType, START_NONTERMINAL, TNT};
+#[allow(unused_imports)]
use graph::{
labelled::DLGBuilder, Builder, DLGraph, Graph, LabelExtGraph, LabelGraph, ParentsGraph,
};
+use std::fmt::Display;
use graph_macro::Graph;
-use std::collections::{HashMap as Map, HashSet, TryReserveError};
+use std::{
+ borrow::Borrow,
+ collections::{HashMap as Map, HashSet, TryReserveError},
+};
/// The errors related to taking derivatives by chain rule.
#[non_exhaustive]
@@ -399,6 +403,10 @@ impl Chain for DefaultChain {
})
}
+ fn atom(&self) -> &Self::Atom {
+ self.atom.borrow()
+ }
+
fn epsilon(&self) -> Result<bool, Self::Error> {
self.accepting_vec
.get(self.current)
@@ -783,146 +791,119 @@ impl Chain for DefaultChain {
Ok(der_iter)
}
- // FIXME: Do nothing more than to make this at the end of input,
- // actually.
- //
- // This means we only close the expansions at the last accepting
- // sources.
- //
- // As to the remaining still open fragments, they should be
- // considered as bugs and should be fixed in the function
- // `insert_item` instead.
-
- fn end_of_input(&mut self) -> Result<(), Self::Error> {
- if !matches!(self.epsilon(), Ok(true)) {
- return Ok(());
- }
-
- self.forest.remove_node(1)?;
-
- let mut stack = Vec::new();
-
- dbg!(&self.accepting_sources);
+ type Item = DefaultForest<ForestLabel<GrammarLabel>>;
- // self.forest.print_viz("dbg forest before.gv").unwrap();
+ fn end_of_input(&mut self, pos: usize, ter: usize) -> Result<Self::Item, Error> {
+ let mut result = Default::default();
- for pavi in self.accepting_sources.iter() {
- match pavi {
- PaVi::Parent(node, _edge, child) => {
- // REVIEW: This is a garbage node that has to be
- // added when the machine starts. Is there a way
- // to avoid this garbage?
- if *node == 1 {
- continue;
- }
+ let root = if let Some(root) = self.forest.root() {
+ root
+ } else {
+ return Ok(result);
+ };
- stack.push(*child);
- }
- PaVi::Virtual(nt, t, node) => {
- let node_label = self
- .forest
- .vertex_label(*node)?
- .ok_or(Error::NodeNoLabel(*node))?;
+ // The 1-th node is a dummy node that should be removed.
+ assert_ne!(root, 1);
- if node_label.label().end().is_none() {
- let reduction_fragment = self.atom.generate_virtual_frags(*nt, *t, None);
+ self.forest.remove_node(1)?;
- for frag in reduction_fragment.into_iter().flatten() {
- let mut frag = frag.clone();
- frag.set_pos(node_label.label().start(), true)?;
+ let root_degree = self.forest.degree(root)?;
- stack.push(self.forest.plant_at_start(*node, frag)?);
- }
- }
- }
- PaVi::Empty => {}
- }
- }
+ let root_label = self
+ .forest
+ .vertex_label(root)?
+ .ok_or(Error::NodeNoLabel(root))?;
- let forest_nodes_len = self.forest.nodes_len();
-
- let mut node_ends: Vec<Vec<usize>> = std::iter::repeat_with(Default::default)
- .take(forest_nodes_len)
- .collect();
-
- // NOTE: Use iterator to avoid checking bounds when accessing
- // `node_ends`.
- for (node, node_end) in self.forest.nodes().zip(node_ends.iter_mut()) {
- if let Some(end) = self
- .forest
- .vertex_label(node)?
- .ok_or(Error::NodeNoLabel(node))?
- .label()
- .end()
- {
- node_end.push(end);
- }
- }
+ dbg!(root_degree, root_label);
- #[derive(Copy, Clone)]
- enum StackElement {
- Seen(usize),
- Unseen(usize),
- }
+ // REVIEW: Why nodes?
+ let mut nodes = Vec::with_capacity(root_degree);
- use StackElement::{Seen, Unseen};
+ if root_label.is_packed() {
+ let mut all_completed_clones = Vec::with_capacity(root_degree);
- // maybe we do not need so much space?
- let mut stack = Vec::with_capacity(forest_nodes_len);
+ for clone in self.forest.children_of(root)? {
+ let mut all_completed = true;
- stack.push(Unseen(
- self.forest.root().ok_or(Error::IndexOutOfBounds(0, 0))?,
- ));
+ // The last child does not count.
+ let clone_child_degree_minus_one = std::cmp::max(self.forest.degree(clone)?, 1) - 1;
- let mut seen_nodes = HashSet::with_capacity(forest_nodes_len);
+ // The loop to determine whether or not it is all
+ // completed, except for the last child.
+ 'completion_det_loop: for clone_child in self
+ .forest
+ .children_of(clone)?
+ .take(clone_child_degree_minus_one)
+ {
+ let clone_child_label = self
+ .forest
+ .vertex_label(clone_child)?
+ .ok_or(Error::NodeNoLabel(clone_child))?;
- // depth-first search
- while let Some(top) = stack.pop() {
- match top {
- Seen(top) => {
- // Every of its children should be processed
- // already.
+ if clone_child_label.label().end().is_none() {
+ all_completed = false;
- if !node_ends.get(top).unwrap().is_empty() {
- continue;
+ break 'completion_det_loop;
}
-
- let last_index = self.forest.degree(top).unwrap() - 1;
- let last_child = self.forest.nth_child(top, last_index)?;
-
- *node_ends.get_mut(top).unwrap() = node_ends
- .get(last_child)
- .ok_or(Error::IndexOutOfBounds(last_child, forest_nodes_len))?
- .clone();
}
- Unseen(top) => {
- if seen_nodes.contains(&top) {
- continue;
- }
- seen_nodes.insert(top);
- // NOTE: After the above check we can safely unwrap.
+ if all_completed {
+ all_completed_clones.push(clone);
+ }
+ }
- let mut top_no_children = true;
+ if all_completed_clones.is_empty() {
+ // Nothing to do
+ return Ok(result);
+ }
- for child in self.forest.children_of(top).unwrap() {
- if top_no_children {
- stack.push(Seen(top));
- }
+ for clone in all_completed_clones {
+ nodes.push(
+ self.forest
+ .reduction(clone, pos, ter, self.atom.borrow(), true)?,
+ );
+ }
+ } else if root_label.clone_index().is_some() {
+ panic!(
+ "If the root of the forest is cloned, \
+ the root should be changed to the packed node."
+ );
+ } else {
+ // The last child does not count.
+ let root_degree_minus_one = std::cmp::max(root_degree, 1) - 1;
- top_no_children = false;
+ dbg!(root_degree_minus_one);
- stack.push(Unseen(child));
- }
+ // The loop to determine whether or not it is all
+ // completed, except for the last child.
+ for root_child in self.forest.children_of(root)?.take(root_degree_minus_one) {
+ let root_child_label = self
+ .forest
+ .vertex_label(root_child)?
+ .ok_or(Error::NodeNoLabel(root_child))?;
- if !top_no_children {
- continue;
- }
+ if root_child_label.label().end().is_none() {
+ return Ok(result);
}
}
+
+ nodes.push(
+ self.forest
+ .reduction(root, pos, ter, self.atom.borrow(), true)?,
+ );
}
- Ok(())
+ dbg!(&nodes);
+
+ // self.forest
+ // .print_viz("forest before extraction.gv")
+ // .unwrap();
+
+ result = self.forest.extract(pos)?;
+
+ // result.print_viz("extracted forest.gv").unwrap();
+
+ Ok(result)
}
}
@@ -1051,7 +1032,7 @@ mod test_chain {
chain.chain(0, 12, no_item)?;
if !no_item {
- chain.end_of_input()?;
+ let _output = chain.end_of_input(13, 0)?;
chain.forest.print_viz("forest.gv")?;
chain.graph.print_viz("chain.gv")?;
@@ -1079,6 +1060,36 @@ mod test_chain {
}
#[test]
+ fn test_assumption() -> Result<(), Box<dyn std::error::Error>> {
+ use grammar::Grammar;
+
+ dbg!();
+ let grammar_str = std::fs::read_to_string(
+ "/Users/durand/Desktop/Centre/A propos de programmes/Rust/rep/grammar/abnf grammars/test.abnf",
+ )
+ .unwrap();
+ dbg!();
+ let grammar: Grammar = grammar_str.parse()?;
+
+ let atom = DefaultAtom::from_grammar(grammar)?;
+ let mut chain = DefaultChain::unit(atom)?;
+
+ let no_item = false;
+
+ let input: &[usize] = &[3, 0, 2, 2, 2, 1, 1, 1, 0, 1];
+
+ for (pos, t) in input.iter().copied().enumerate().take(2) {
+ chain.chain(t, pos, no_item)?;
+ chain.forest().print_viz(&format!("forest {pos}.gv"))?;
+ dbg!(pos, t);
+ }
+
+ item::default::print_labels(&chain.atom, &chain.forest)?;
+
+ Ok(())
+ }
+
+ #[test]
fn test_ambiguity() -> Result<(), Box<dyn std::error::Error>> {
if std::fs::metadata("debug.log").is_ok() {
std::fs::remove_file("debug.log")?;
@@ -1125,7 +1136,7 @@ mod test_chain {
chain.chain(1, 4, no_item)?;
if !no_item {
- // chain.end_of_input()?;
+ let _output = chain.end_of_input(5, 1)?;
chain.forest.print_viz("forest.gv")?;
// chain.forest.print_closed_viz("closed.gv")?;
}
diff --git a/chain/src/item/default/extract.rs b/chain/src/item/default/extract.rs
new file mode 100644
index 0000000..96f5119
--- /dev/null
+++ b/chain/src/item/default/extract.rs
@@ -0,0 +1,509 @@
+//! This module defines a function for extracting the "completed part"
+//! of a forest.
+//!
+//! # Completed sub-forest
+//!
+//! The completed part of a forest refers to the sub-forest of the
+//! forest that consists entirely of nodes whose ending positions are
+//! set, and whose subtrees also consist of such nodes.
+//!
+//! # Definition
+//!
+//! To be more rigorous, we call a node *n* of a forest *F* complete
+//! if its ending position is set. We call a sub-forest *S* of *F*
+//! completed if the following two conditions are satisfied:
+//!
+//! 1. Every node in *S* is complete.
+//! 2. For every node *n* in *S*, the subtree of *F* rooted at *n*
+//! consists entirely of complete nodes.
+//!
+//! # Uniqueness of the maximal completed sub-forest
+//!
+//! **Lemma**
+//!
+//! For any forest *F*, there is only one maximal completed sub-forest
+//! *S* of *F*. Here the maximality means that if *T* is a completed
+//! sub-forest of *F* which contains *S*, then *S* is equal to *T*.
+//!
+//! Then by **the completed part** of a forest *F* we refer to the
+//! unuique maximal completed sub-forest of *F*.
+//!
+//! **Proof**
+//!
+//! Note that if *S_1* and *S_2* are two completed sub-forests of *F*,
+//! and if *S_1* is not contained in *S_2*, say *n* is a node in *S_1*
+//! but not in *S_2*, then by adjoining the subtree of *S_2* rooted at
+//! *n* to *S_1* we obtain a completed sub-forest *S_3* which contains
+//! *S_1*, contradicting the maximality of *S_1*. Thus there can only
+//! be one maximal completed sub-forest of a forest.
+//!
+//! # Connected component
+//!
+//! The extraction function actually returns the connected component
+//! of the completed part of a forest which contains its root, as that
+//! is what we care about the most.
+
+use super::*;
+
+impl DefaultForest<ForestLabel<GrammarLabel>> {
+ pub(crate) fn extract(&self, pos: usize) -> Result<Self, Error> {
+ // Preparations
+
+ let nodes_len = self.nodes_len();
+
+ let mut graph = PLGraph::default();
+ let mut builder = PLGBuilderMut::from_graph_mut(&mut graph);
+ let root = Some(0);
+
+ // Now find the connected component of the completed part of
+ // `self` and clone that to graph by means of `builder`.
+
+ // REVIEW: Maybe a small_vec can help here.
+
+ // A fixed-size hash table, sort of.
+ let mut validity_array: Vec<bool> = std::iter::repeat(true).take(nodes_len).collect();
+
+ // Calculate the exact length to avoid too many allocations.
+ let mut stack_len = 0usize;
+
+ // Use an iterator to avoid checking bounds in accessing
+ // elements of the array.
+ for (node, validity_ptr) in self.nodes().zip(validity_array.iter_mut()) {
+ if self
+ .vertex_label(node)?
+ .ok_or(Error::NodeNoLabel(node))?
+ .label()
+ .end()
+ .is_none()
+ {
+ *validity_ptr = false;
+
+ stack_len += 1;
+ }
+ }
+
+ dbg!(&validity_array);
+
+ // A stack for propagating the falsehood to parents and
+ // children of incomplete nodes, like a plague. The only
+ // nodes that can stop the spread of this plague are packed
+ // nodes with a child that is not infected (yet) by the
+ // plague.
+
+ let mut stack: Vec<usize> = Vec::with_capacity(stack_len);
+
+ for (n, validity) in validity_array.iter().enumerate() {
+ if !*validity {
+ stack.push(n);
+ }
+ }
+
+ while let Some(top) = stack.pop() {
+ 'parent_loop: for parent in self.parents_of(top)?.map(|p| p.node()) {
+ if !*validity_array
+ .get(parent)
+ .ok_or(Error::IndexOutOfBounds(parent, nodes_len))?
+ {
+ // already infected by the plague
+
+ continue 'parent_loop;
+ }
+
+ let should_spread_p = if self
+ .vertex_label(parent)?
+ .ok_or(Error::NodeNoLabel(parent))?
+ .is_packed()
+ {
+ !self
+ .children_of(parent)?
+ .any(|node| validity_array.get(node).copied() == Some(true))
+ } else {
+ true
+ };
+
+ if should_spread_p {
+ *validity_array
+ .get_mut(parent)
+ .ok_or(Error::IndexOutOfBounds(parent, nodes_len))? = false;
+ stack.push(parent);
+ }
+ }
+ }
+
+ let validity_array = validity_array;
+
+ /// A little macro to produce a vector of valid children.
+ macro_rules! valid_children {
+ ($node:ident) => {
+ self.children_of($node)?
+ .filter(|child| validity_array.get(*child).copied() == Some(true))
+ .collect::<Vec<usize>>()
+ };
+ }
+
+ dbg!(&validity_array);
+
+ if validity_array.iter().all(|validity| !*validity) {
+ // every element is false
+
+ let root = None;
+
+ return Ok(Self { graph, root });
+ }
+
+ // Finally clone the connected component to the new graph.
+
+ let root_label = GrammarLabel::new_closed(TNT::Non(0), 0, pos);
+
+ let packed_label = ForestLabel::new(root_label, ForestLabelType::Packed);
+
+ let plain_label = ForestLabel::new(root_label, ForestLabelType::Plain);
+
+ let original_root_label;
+
+ let original_root = if let Some(packed_node) = self.query_label(packed_label) {
+ original_root_label = packed_label;
+
+ packed_node
+ } else if let Some(plain_node) = self.query_label(plain_label) {
+ original_root_label = plain_label;
+
+ plain_node
+ } else {
+ let root = None;
+ return Ok(Self { graph, root });
+ };
+
+ let mut roots_stack: Vec<usize>;
+
+ if original_root_label.is_packed() {
+ roots_stack = valid_children!(original_root);
+
+ match roots_stack.len() {
+ 0 => {
+ let root = None;
+
+ return Ok(Self { graph, root });
+ }
+ 1 => {
+ let child = *roots_stack.first().unwrap();
+
+ builder.add_vertex(self.vertex_label(child)?.ok_or(Error::NodeNoLabel(child))?);
+ }
+ _ => {
+ let builder_root = builder.add_vertex(original_root_label);
+
+ for child in roots_stack.iter().copied() {
+ let child_node = builder.add_vertex(
+ self.vertex_label(child)?.ok_or(Error::NodeNoLabel(child))?,
+ );
+
+ builder.add_edge(builder_root, child_node, original_root_label)?;
+ }
+ }
+ }
+ } else {
+ builder.add_vertex(original_root_label);
+
+ roots_stack = vec![original_root];
+ }
+
+ while let Some(top) = roots_stack.pop() {
+ let top_label = self.vertex_label(top)?.ok_or(Error::NodeNoLabel(top))?;
+
+ let top_in_builder = match builder.query_label(top_label) {
+ Some(top_node) => top_node,
+ None => {
+ // an old cloned node now becomes a plain node
+ let plain_label = ForestLabel::new(top_label.label(), ForestLabelType::Plain);
+
+ builder
+ .query_label(plain_label)
+ .unwrap_or_else(|| panic!("the top {top} should be planted already"))
+ }
+ };
+
+ 'children_loop: for child in self.children_of(top)? {
+ let child_label = self.vertex_label(child)?.ok_or(Error::NodeNoLabel(child))?;
+
+ // filter out invalid children
+ if validity_array.get(child).copied() != Some(true) {
+ continue 'children_loop;
+ }
+
+ // avoid unnecessary effort
+ if let Some(child_node) = builder.query_label(child_label) {
+ builder.add_edge(top_in_builder, child_node, child_label)?;
+
+ continue 'children_loop;
+ }
+
+ if child_label.is_packed() {
+ let child_valid_children = valid_children!(child);
+
+ match child_valid_children.len() {
+ 0 => {
+ panic!("this case should not happen");
+ }
+ 1 => {
+ // If a packed node only has one valid
+ // child, we clone a plain node instead.
+
+ let child_child = *child_valid_children.first().unwrap();
+
+ let child_plain_label =
+ ForestLabel::new(child_label.label(), ForestLabelType::Plain);
+
+ let child_plain_node = builder.add_vertex(child_plain_label);
+
+ builder.add_edge(
+ top_in_builder,
+ child_plain_node,
+ child_plain_label,
+ )?;
+
+ roots_stack.push(child_child);
+ }
+ _ => {
+ let child_node = builder.add_vertex(child_label);
+
+ builder.add_edge(top_in_builder, child_node, child_label)?;
+
+ roots_stack.push(child);
+ }
+ }
+
+ continue 'children_loop;
+ }
+
+ let child_node = builder.add_vertex(child_label);
+
+ builder.add_edge(top_in_builder, child_node, child_label)?;
+
+ roots_stack.push(child);
+ }
+ }
+
+ Ok(Self { graph, root })
+ }
+}
+
+#[cfg(test)]
+mod extract_tests {
+ use super::*;
+
+ fn construct_test_forest(
+ ) -> Result<DefaultForest<ForestLabel<GrammarLabel>>, Box<dyn std::error::Error>> {
+ // node 0
+ let mut result: DefaultForest<ForestLabel<GrammarLabel>> = DefaultForest::new_leaf(
+ GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Non(0)), 0, 3),
+ );
+
+ // node 1
+ result.plant(
+ 0,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::Rule(15), 0, 3),
+ _
+ ),
+ false,
+ )?;
+
+ result.plant(
+ 1,
+ DefaultForest::new_leaf(GrammarLabel::new_closed(
+ GrammarLabelType::TNT(TNT::Non(0)),
+ 0,
+ 3,
+ )),
+ true,
+ )?;
+
+ // node 2
+ result.plant(
+ 0,
+ leaf!(GrammarLabel::new_closed(GrammarLabelType::Rule(6), 0, 1), _),
+ false,
+ )?;
+
+ // node 3
+ result.plant(
+ 2,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Ter(0)), 0, 1),
+ _
+ ),
+ false,
+ )?;
+
+ // node 4
+ result.plant(
+ 0,
+ leaf!(GrammarLabel::new_closed(GrammarLabelType::Rule(7), 1, 3), _),
+ false,
+ )?;
+
+ // node 5
+ result.plant(
+ 4,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Non(0)), 1, 3),
+ _
+ ),
+ false,
+ )?;
+
+ // node 6
+ result.plant(
+ 5,
+ leaf!(GrammarLabel::new_closed(GrammarLabelType::Rule(3), 1, 2), _),
+ false,
+ )?;
+
+ // node 7
+ result.plant(
+ 6,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Non(1)), 1, 2),
+ _
+ ),
+ false,
+ )?;
+
+ // node 8
+ result.plant(
+ 7,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::Rule(11), 1, 2),
+ _
+ ),
+ false,
+ )?;
+
+ // node 9
+ result.plant(
+ 8,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Ter(2)), 1, 2),
+ _
+ ),
+ false,
+ )?;
+
+ // Clone node 5 to have node 10 and node 11
+ result.clone_node(5, 0, false)?;
+
+ // node 12
+ result.plant(
+ 11,
+ leaf!(GrammarLabel::new_closed(GrammarLabelType::Rule(3), 1, 3), _),
+ false,
+ )?;
+
+ // node 13
+ result.plant(
+ 12,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Non(1)), 1, 3),
+ _
+ ),
+ false,
+ )?;
+
+ result.plant(
+ 13,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::Rule(11), 1, 2),
+ _
+ ),
+ true,
+ )?;
+
+ // node 14
+ result.plant(
+ 13,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::Rule(13), 2, 3),
+ _
+ ),
+ false,
+ )?;
+
+ // node 15
+ result.plant(
+ 14,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Ter(2)), 2, 3),
+ _
+ ),
+ false,
+ )?;
+
+ // node 16
+ result.plant(
+ 5,
+ leaf!(GrammarLabel::new(GrammarLabelType::Rule(4), 2), _),
+ false,
+ )?;
+
+ // node 17
+ result.plant(
+ 16,
+ leaf!(GrammarLabel::new(GrammarLabelType::TNT(TNT::Non(0)), 2), _),
+ false,
+ )?;
+
+ // node 18
+ result.plant(
+ 17,
+ leaf!(GrammarLabel::new_closed(GrammarLabelType::Rule(3), 2, 3), _),
+ false,
+ )?;
+
+ // node 19
+ result.plant(
+ 18,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Non(1)), 2, 3),
+ _
+ ),
+ false,
+ )?;
+
+ // node 20
+ result.plant(
+ 19,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::Rule(11), 2, 3),
+ _
+ ),
+ false,
+ )?;
+
+ result.plant(
+ 20,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Ter(2)), 2, 3),
+ _
+ ),
+ true,
+ )?;
+
+ Ok(result)
+ }
+
+ #[test]
+ fn test_extract() -> Result<(), Box<dyn std::error::Error>> {
+ let forest = construct_test_forest()?;
+
+ assert_eq!(forest.nodes_len(), 21);
+
+ forest.print_viz("forest before extraction.gv")?;
+
+ let extract_result = forest.extract(3)?;
+
+ extract_result.print_viz("extracted forest.gv")?;
+
+ Ok(())
+ }
+}
diff --git a/chain/src/item/default/mod.rs b/chain/src/item/default/mod.rs
index 47e8c90..28bdbee 100644
--- a/chain/src/item/default/mod.rs
+++ b/chain/src/item/default/mod.rs
@@ -12,7 +12,7 @@ use graph_macro::Graph;
use std::collections::HashSet;
-use core::fmt::Display;
+use std::fmt::Display;
/// The type of errors for forest operations.
#[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd)]
@@ -216,6 +216,15 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> {
return Err(Error::IndexOutOfBounds(node_id, self.nodes_len()));
}
+ // FIXME: We should take the presence of packed nodes into
+ // consideration. That is, the fragment might have already
+ // been planted and packed, and we need to account for such
+ // possibilities.
+ //
+ // Moreover, the fragments might also contain packed nodes, in
+ // which case we need to check these multiple possibilities
+ // are properly contained.
+
// We do a depth-first traversal to determine if every node
// encountered has the same set of children (labels taken into
// the consideration).
@@ -450,7 +459,7 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> {
// Make sure it only has one parent, which is the
// representative node.
- assert_eq!(parents.len(), 1);
+ // assert_eq!(parents.len(), 1);
// We checked its length is 1, so it is safe to unwrap
// here.
@@ -670,6 +679,8 @@ impl<T: GraphLabel> DefaultForest<ForestLabel<T>> {
pub mod splone;
+pub mod extract;
+
impl<T: GraphLabel> DefaultForest<T> {
/// Create a forest with only one leaf from a raw label, unlike
/// `new_leaf`, which transforms the label for convenience.
@@ -798,53 +809,6 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
Ok(())
}
- /// Find the last child of the given node whose start and end
- /// positions contain the given position. If no such child is
- /// found, return `Ok(None)`.
- ///
- /// The returned tuple is of the form (child, index), where
- /// `child` is the index of the child node in question, and
- /// `index` means that the child is the `index`-th child of the
- /// node.
- #[allow(dead_code)]
- pub(crate) fn position_search(
- &self,
- node: usize,
- pos: usize,
- ) -> Result<Option<(usize, usize)>, Error> {
- fn range_contains(label: GrammarLabel, pos: usize) -> bool {
- let start = label.start();
-
- if let Some(end) = label.end() {
- (start..end).contains(&pos)
- } else {
- (start..).contains(&pos)
- }
- }
-
- let node_label = self
- .vertex_label(node)?
- .ok_or(Error::NodeNoLabel(node))?
- .label();
-
- if !range_contains(node_label, pos) {
- return Ok(None);
- }
-
- for (index, child) in self.children_of(node)?.enumerate().rev() {
- let child_label = self
- .vertex_label(child)?
- .ok_or(Error::NodeNoLabel(child))?
- .label();
-
- if range_contains(child_label, pos) {
- return Ok(Some((child, index)));
- }
- }
-
- Ok(None)
- }
-
/// Set the position of every node.
///
/// If ALLP is non-nil or if the node is a terminal node, also set
diff --git a/chain/src/item/default/splone.rs b/chain/src/item/default/splone.rs
index 581d1dc..da13c56 100644
--- a/chain/src/item/default/splone.rs
+++ b/chain/src/item/default/splone.rs
@@ -516,7 +516,13 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
if let Some((fragment, _planted)) = fragment {
let modified_degree = std::cmp::max(child_degree, 1) - 1;
- if self.has_same_children(child, node, modified_degree, edge_index)?
+ dbg!(node, end, edge_index, modified_degree);
+
+ dbg!(child, child_degree);
+
+ dbg!(&fragment);
+
+ if self.has_same_children(child, node, modified_degree, edge_index + 1)?
&& child_degree != 0
{
let last_child = self.nth_child(child, child_degree - 1)?;
@@ -651,7 +657,7 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
} else if labela.clone_index().is_some() {
let mut parentsa = self.parents_of(nodea)?;
- assert_eq!(parentsa.len(), 1);
+ // assert_eq!(parentsa.len(), 1);
let parenta = parentsa.next().unwrap().node();
diff --git a/chain/src/item/genins.rs b/chain/src/item/genins.rs
index 19f835b..e409c3c 100644
--- a/chain/src/item/genins.rs
+++ b/chain/src/item/genins.rs
@@ -32,7 +32,7 @@ pub(crate) fn index_out_of_bounds_conversion(ge: GrammarError) -> Error {
/// Determine if a label is labelled by a terminal.
fn is_labelled_by_terminal(label: GrammarLabelType) -> bool {
- matches!(label.tnt(), Some(t) if matches!(t, TNT::Ter(_)))
+ matches!(label.tnt(), Some(tnt) if matches!(tnt, TNT::Ter(_)))
}
/// A helper function to generate a fragment of forest.
@@ -211,7 +211,7 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
// Whether or not to print detailed graphs of each step of
// operation for debugging purposes.
- let to_print = false;
+ let to_print = true;
let tnt_string = {
let empty_p = atom_child_iter.len() == 0;
@@ -376,7 +376,7 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
if let PaVi::Parent(node, edge, _) = pavi {
let nth_child = self.nth_child(node, edge)?;
- let reduced = self.reduction(nth_child, pos, ter, atom.borrow())?;
+ let reduced = self.reduction(nth_child, pos, ter, atom.borrow(), false)?;
if reduced != nth_child && !self.is_empty_node(reduced)? {
parents.clear();
@@ -700,10 +700,14 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
let reduction_fragment = atom.generate_virtual_frags(nt, t, None);
- // NOTE: the case of the root is exceptional
- if reduction_fragment.is_none() && self.root() != Some(node) {
- return Err(Error::CannotClose(nt, t, node, node_label_start));
- }
+ // Maybe we do not have to force the reduciton here?
+
+ // // NOTE: the case of the root is exceptional
+ // if reduction_fragment.is_none() && self.root() != Some(node) {
+ // dbg!(self.root());
+ // self.print_viz("cannot close.gv").unwrap();
+ // return Err(Error::CannotClose(nt, t, node, node_label_start));
+ // }
for frag in reduction_fragment.into_iter().flatten() {
let mut frag = frag.clone();
diff --git a/chain/src/item/mod.rs b/chain/src/item/mod.rs
index 54ca946..d2c3127 100644
--- a/chain/src/item/mod.rs
+++ b/chain/src/item/mod.rs
@@ -7,7 +7,7 @@
use graph::{error::Error as GError, GraphLabel, LabelGraph, Parent, ParentsGraph};
-use core::borrow::Borrow;
+use std::borrow::Borrow;
/// A parent or a virtual segment.
///
@@ -96,7 +96,7 @@ enum ForestLabelType {
Cloned(usize),
}
-impl core::fmt::Display for ForestLabelType {
+impl std::fmt::Display for ForestLabelType {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self {
Self::Packed => write!(f, "packed"),
@@ -113,8 +113,8 @@ pub struct ForestLabel<T: GraphLabel> {
status: ForestLabelType,
}
-impl<T: GraphLabel> core::fmt::Display for ForestLabel<T> {
- fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+impl<T: GraphLabel> std::fmt::Display for ForestLabel<T> {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
if !matches!(self.status, ForestLabelType::Plain) {
write!(f, "{}, {}", self.label, self.status)
} else {
@@ -132,8 +132,8 @@ pub enum ForestLabelError {
ClonePack,
}
-impl core::fmt::Display for ForestLabelError {
- fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+impl std::fmt::Display for ForestLabelError {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::PackClone => write!(f, "cannot pack a cloned node"),
Self::ClonePack => write!(f, "cannot clone a packed node"),
@@ -230,6 +230,9 @@ impl<T: GraphLabel> From<T> for ForestLabel<T> {
}
}
+// REVIEW: Should we move this trait (and only this trait) to a
+// separate crate, or is it enough to keep it here?
+
/// The expected behaviours of an item derivation forest.
///
/// Note that it requires only a subset of the functionalities of
diff --git a/chain/src/item/reduction.rs b/chain/src/item/reduction.rs
index 89306e6..3c76c1e 100644
--- a/chain/src/item/reduction.rs
+++ b/chain/src/item/reduction.rs
@@ -122,6 +122,9 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
/// The parameter `ter` is used to fill in segments for virtual
/// nodes if they are found along the way.
///
+ /// The parameter `accept_root` controls whether we want to
+ /// perform reduction on the root.
+ ///
/// # Errors
///
/// 1. Index out of bounds: some node number is corrupted.
@@ -132,12 +135,13 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
pos: usize,
ter: usize,
atom: &DefaultAtom,
+ accept_root: bool,
) -> Result<usize, Error> {
let mut result = node;
// step 1: Determine if this needs reductions.
- if self.root() == Some(node) {
+ if !accept_root && self.root() == Some(node) {
return Ok(result);
}
@@ -152,9 +156,30 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
return Ok(result);
}
+ // Data type for representing the status when performing a
+ // search.
+
+ #[derive(PartialEq, Eq, Copy, Clone, Debug)]
+ enum Status {
+ Correct,
+ Incorrect,
+ Visited,
+ }
+
+ impl From<bool> for Status {
+ fn from(value: bool) -> Self {
+ match value {
+ true => Self::Correct,
+ false => Self::Incorrect,
+ }
+ }
+ }
+
+ use Status::*;
+
// step 2: Find descendents that need reductions.
- let mut correct_ends: Map<usize, bool> = Default::default();
+ let mut correct_ends: Map<usize, Status> = Default::default();
let mut order_of_correct_ends: Vec<usize> = Vec::new();
@@ -165,7 +190,7 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
use std::{borrow::BorrowMut, io::Write};
// Whether or not to write a debug file.
- let to_write = false;
+ let to_write = true;
if to_write {
if let Ok(ref mut file) = file.borrow_mut() {
@@ -181,14 +206,18 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
}
}
- if correct_ends.get(&top).is_some() {
+ let old_value = correct_ends.get(&top).copied();
+
+ if matches!(old_value, Some(Correct) | Some(Incorrect)) {
continue 'stack_loop;
}
+ correct_ends.insert(top, Visited);
+
let top_label = self.vertex_label(top)?.ok_or(Error::NodeNoLabel(top))?;
if let Some(end) = top_label.label().end() {
- correct_ends.insert(top, end == pos);
+ correct_ends.insert(top, (end == pos).into());
continue 'stack_loop;
}
@@ -216,8 +245,8 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
'child_loop: for child in self.children_of(top)? {
match correct_ends.get(&child).copied() {
- Some(true) => {
- correct_ends.insert(top, true);
+ Some(Correct) => {
+ correct_ends.insert(top, Correct);
inserted = true;
}
@@ -232,12 +261,15 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
if has_unexplored_children {
stack.push(top);
- stack.extend(self.children_of(top)?);
+ stack.extend(
+ self.children_of(top)?
+ .filter(|child| correct_ends.get(child).is_none()),
+ );
} else if !inserted {
- correct_ends.insert(top, false);
+ correct_ends.insert(top, Incorrect);
}
} else {
- correct_ends.insert(top, false);
+ correct_ends.insert(top, Incorrect);
}
continue 'stack_loop;
@@ -245,6 +277,7 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
if top_label.is_packed() {
let mut has_unexplored_children = false;
+ let mut inserted = false;
if to_write {
if let Ok(ref mut file) = file.borrow_mut() {
@@ -255,12 +288,13 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
for child in self.children_of(top)? {
match correct_ends.get(&child).copied() {
- Some(true) => {
+ Some(Correct) => {
// NOTE: This is not recorded in the
// correct orders, as we do not handle
// packed nodes directly.
- correct_ends.insert(top, true);
+ correct_ends.insert(top, Correct);
+ inserted = true;
}
None => {
has_unexplored_children = true;
@@ -280,9 +314,12 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
if has_unexplored_children {
stack.push(top);
- stack.extend(self.children_of(top)?);
- } else if correct_ends.get(&top).is_none() {
- correct_ends.insert(top, false);
+ stack.extend(
+ self.children_of(top)?
+ .filter(|child| correct_ends.get(child).is_none()),
+ );
+ } else if !inserted {
+ correct_ends.insert(top, Incorrect);
}
continue 'stack_loop;
@@ -309,7 +346,7 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
panic!("a terminal node {top} has no ending position?");
}
Some(TNT::Non(nt)) => {
- correct_ends.insert(top, true);
+ correct_ends.insert(top, Correct);
self.close_pavi(atom.borrow(), PaVi::Virtual(nt, ter, top), pos)?;
@@ -322,7 +359,7 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
Some(end) => end,
};
- correct_ends.insert(top, end == pos);
+ correct_ends.insert(top, (end == pos).into());
if end == pos {
order_of_correct_ends.push(top);
@@ -341,10 +378,12 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
let last_child = self.nth_child(top, last_index)?;
if let Some(child_correctness_value) = correct_ends.get(&last_child).copied() {
- correct_ends.insert(top, child_correctness_value);
+ if child_correctness_value != Visited {
+ correct_ends.insert(top, child_correctness_value);
- if child_correctness_value {
- order_of_correct_ends.push(top);
+ if child_correctness_value == Correct {
+ order_of_correct_ends.push(top);
+ }
}
} else {
stack.extend([top, last_child]);
@@ -375,12 +414,6 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
#[cfg(debug_assertions)]
{
- let label_label_label = label.label().label();
-
- assert!(label_label_label.rule().is_none());
-
- assert!(matches!(label_label_label.tnt(), Some(TNT::Non(_))));
-
assert!(label.label().end().is_none());
assert_ne!(degree, 0);
diff --git a/chain/src/lib.rs b/chain/src/lib.rs
index 9de1df7..bf3a8b4 100644
--- a/chain/src/lib.rs
+++ b/chain/src/lib.rs
@@ -240,6 +240,10 @@ pub trait Chain: LabelExtGraph<Edge> {
/// language.
fn unit(atom: Self::Atom) -> Result<Self, Self::Error>;
+ /// Produce the underlying atom, so that we can produce an initial
+ /// empty chain from the same atom again.
+ fn atom(&self) -> &Self::Atom;
+
/// Return true if and only if the language contains the empty
/// string.
fn epsilon(&self) -> Result<bool, Self::Error>;
@@ -283,7 +287,8 @@ pub trait Chain: LabelExtGraph<Edge> {
/// The argument `t` is the terminal we computet the derivative
/// with.
///
- /// The argument `pos` is the position within the input.
+ /// The argument `pos` is the zero-based position within the
+ /// input.
///
/// The argument `no_item` determines whether we want the item
/// derivation forest as well.
@@ -307,9 +312,29 @@ pub trait Chain: LabelExtGraph<Edge> {
Ok(())
}
+ // FIXME: I shall probably not use the trait of forests for this
+ // purpose, but I have not figured out yet wha the correct trait
+ // should be.
+ /// The type of output item that will be produced by this machine.
+ type Item: item::Forest<grammar::GrammarLabel>;
+
/// Signal to the parser that the end of the input is reached, so
/// that the parser knows to generate suitable forests.
- fn end_of_input(&mut self) -> Result<(), Self::Error>;
+ ///
+ /// This is not needed when recognizing instead of parsing.
+ ///
+ /// We also pass in the current position `pos` so that we have a
+ /// little flexibility in the position of the end of input. In
+ /// addition, this makes it easier to produce the suitable
+ /// forests.
+ ///
+ /// As a reminder, `pos` should be the position of the last
+ /// terminal, so if there are three terminals in total, the `pos`
+ /// should be `2` , instead of `3`.
+ ///
+ /// Similarly, we pass in the terminal at the position `pos` to
+ /// aid the production of the forest.
+ fn end_of_input(&mut self, pos: usize, ter: usize) -> Result<Self::Item, Self::Error>;
}
pub mod default;