summaryrefslogtreecommitdiff
path: root/chain/src/item/default
diff options
context:
space:
mode:
Diffstat (limited to 'chain/src/item/default')
-rw-r--r--chain/src/item/default/extract.rs509
-rw-r--r--chain/src/item/default/mod.rs62
-rw-r--r--chain/src/item/default/splone.rs10
3 files changed, 530 insertions, 51 deletions
diff --git a/chain/src/item/default/extract.rs b/chain/src/item/default/extract.rs
new file mode 100644
index 0000000..96f5119
--- /dev/null
+++ b/chain/src/item/default/extract.rs
@@ -0,0 +1,509 @@
+//! This module defines a function for extracting the "completed part"
+//! of a forest.
+//!
+//! # Completed sub-forest
+//!
+//! The completed part of a forest refers to the sub-forest of the
+//! forest that consists entirely of nodes whose ending positions are
+//! set, and whose subtrees also consist of such nodes.
+//!
+//! # Definition
+//!
+//! To be more rigorous, we call a node *n* of a forest *F* complete
+//! if its ending position is set. We call a sub-forest *S* of *F*
+//! completed if the following two conditions are satisfied:
+//!
+//! 1. Every node in *S* is complete.
+//! 2. For every node *n* in *S*, the subtree of *F* rooted at *n*
+//! consists entirely of complete nodes.
+//!
+//! # Uniqueness of the maximal completed sub-forest
+//!
+//! **Lemma**
+//!
+//! For any forest *F*, there is only one maximal completed sub-forest
+//! *S* of *F*. Here the maximality means that if *T* is a completed
+//! sub-forest of *F* which contains *S*, then *S* is equal to *T*.
+//!
+//! Then by **the completed part** of a forest *F* we refer to the
+//! unuique maximal completed sub-forest of *F*.
+//!
+//! **Proof**
+//!
+//! Note that if *S_1* and *S_2* are two completed sub-forests of *F*,
+//! and if *S_1* is not contained in *S_2*, say *n* is a node in *S_1*
+//! but not in *S_2*, then by adjoining the subtree of *S_2* rooted at
+//! *n* to *S_1* we obtain a completed sub-forest *S_3* which contains
+//! *S_1*, contradicting the maximality of *S_1*. Thus there can only
+//! be one maximal completed sub-forest of a forest.
+//!
+//! # Connected component
+//!
+//! The extraction function actually returns the connected component
+//! of the completed part of a forest which contains its root, as that
+//! is what we care about the most.
+
+use super::*;
+
+impl DefaultForest<ForestLabel<GrammarLabel>> {
+ pub(crate) fn extract(&self, pos: usize) -> Result<Self, Error> {
+ // Preparations
+
+ let nodes_len = self.nodes_len();
+
+ let mut graph = PLGraph::default();
+ let mut builder = PLGBuilderMut::from_graph_mut(&mut graph);
+ let root = Some(0);
+
+ // Now find the connected component of the completed part of
+ // `self` and clone that to graph by means of `builder`.
+
+ // REVIEW: Maybe a small_vec can help here.
+
+ // A fixed-size hash table, sort of.
+ let mut validity_array: Vec<bool> = std::iter::repeat(true).take(nodes_len).collect();
+
+ // Calculate the exact length to avoid too many allocations.
+ let mut stack_len = 0usize;
+
+ // Use an iterator to avoid checking bounds in accessing
+ // elements of the array.
+ for (node, validity_ptr) in self.nodes().zip(validity_array.iter_mut()) {
+ if self
+ .vertex_label(node)?
+ .ok_or(Error::NodeNoLabel(node))?
+ .label()
+ .end()
+ .is_none()
+ {
+ *validity_ptr = false;
+
+ stack_len += 1;
+ }
+ }
+
+ dbg!(&validity_array);
+
+ // A stack for propagating the falsehood to parents and
+ // children of incomplete nodes, like a plague. The only
+ // nodes that can stop the spread of this plague are packed
+ // nodes with a child that is not infected (yet) by the
+ // plague.
+
+ let mut stack: Vec<usize> = Vec::with_capacity(stack_len);
+
+ for (n, validity) in validity_array.iter().enumerate() {
+ if !*validity {
+ stack.push(n);
+ }
+ }
+
+ while let Some(top) = stack.pop() {
+ 'parent_loop: for parent in self.parents_of(top)?.map(|p| p.node()) {
+ if !*validity_array
+ .get(parent)
+ .ok_or(Error::IndexOutOfBounds(parent, nodes_len))?
+ {
+ // already infected by the plague
+
+ continue 'parent_loop;
+ }
+
+ let should_spread_p = if self
+ .vertex_label(parent)?
+ .ok_or(Error::NodeNoLabel(parent))?
+ .is_packed()
+ {
+ !self
+ .children_of(parent)?
+ .any(|node| validity_array.get(node).copied() == Some(true))
+ } else {
+ true
+ };
+
+ if should_spread_p {
+ *validity_array
+ .get_mut(parent)
+ .ok_or(Error::IndexOutOfBounds(parent, nodes_len))? = false;
+ stack.push(parent);
+ }
+ }
+ }
+
+ let validity_array = validity_array;
+
+ /// A little macro to produce a vector of valid children.
+ macro_rules! valid_children {
+ ($node:ident) => {
+ self.children_of($node)?
+ .filter(|child| validity_array.get(*child).copied() == Some(true))
+ .collect::<Vec<usize>>()
+ };
+ }
+
+ dbg!(&validity_array);
+
+ if validity_array.iter().all(|validity| !*validity) {
+ // every element is false
+
+ let root = None;
+
+ return Ok(Self { graph, root });
+ }
+
+ // Finally clone the connected component to the new graph.
+
+ let root_label = GrammarLabel::new_closed(TNT::Non(0), 0, pos);
+
+ let packed_label = ForestLabel::new(root_label, ForestLabelType::Packed);
+
+ let plain_label = ForestLabel::new(root_label, ForestLabelType::Plain);
+
+ let original_root_label;
+
+ let original_root = if let Some(packed_node) = self.query_label(packed_label) {
+ original_root_label = packed_label;
+
+ packed_node
+ } else if let Some(plain_node) = self.query_label(plain_label) {
+ original_root_label = plain_label;
+
+ plain_node
+ } else {
+ let root = None;
+ return Ok(Self { graph, root });
+ };
+
+ let mut roots_stack: Vec<usize>;
+
+ if original_root_label.is_packed() {
+ roots_stack = valid_children!(original_root);
+
+ match roots_stack.len() {
+ 0 => {
+ let root = None;
+
+ return Ok(Self { graph, root });
+ }
+ 1 => {
+ let child = *roots_stack.first().unwrap();
+
+ builder.add_vertex(self.vertex_label(child)?.ok_or(Error::NodeNoLabel(child))?);
+ }
+ _ => {
+ let builder_root = builder.add_vertex(original_root_label);
+
+ for child in roots_stack.iter().copied() {
+ let child_node = builder.add_vertex(
+ self.vertex_label(child)?.ok_or(Error::NodeNoLabel(child))?,
+ );
+
+ builder.add_edge(builder_root, child_node, original_root_label)?;
+ }
+ }
+ }
+ } else {
+ builder.add_vertex(original_root_label);
+
+ roots_stack = vec![original_root];
+ }
+
+ while let Some(top) = roots_stack.pop() {
+ let top_label = self.vertex_label(top)?.ok_or(Error::NodeNoLabel(top))?;
+
+ let top_in_builder = match builder.query_label(top_label) {
+ Some(top_node) => top_node,
+ None => {
+ // an old cloned node now becomes a plain node
+ let plain_label = ForestLabel::new(top_label.label(), ForestLabelType::Plain);
+
+ builder
+ .query_label(plain_label)
+ .unwrap_or_else(|| panic!("the top {top} should be planted already"))
+ }
+ };
+
+ 'children_loop: for child in self.children_of(top)? {
+ let child_label = self.vertex_label(child)?.ok_or(Error::NodeNoLabel(child))?;
+
+ // filter out invalid children
+ if validity_array.get(child).copied() != Some(true) {
+ continue 'children_loop;
+ }
+
+ // avoid unnecessary effort
+ if let Some(child_node) = builder.query_label(child_label) {
+ builder.add_edge(top_in_builder, child_node, child_label)?;
+
+ continue 'children_loop;
+ }
+
+ if child_label.is_packed() {
+ let child_valid_children = valid_children!(child);
+
+ match child_valid_children.len() {
+ 0 => {
+ panic!("this case should not happen");
+ }
+ 1 => {
+ // If a packed node only has one valid
+ // child, we clone a plain node instead.
+
+ let child_child = *child_valid_children.first().unwrap();
+
+ let child_plain_label =
+ ForestLabel::new(child_label.label(), ForestLabelType::Plain);
+
+ let child_plain_node = builder.add_vertex(child_plain_label);
+
+ builder.add_edge(
+ top_in_builder,
+ child_plain_node,
+ child_plain_label,
+ )?;
+
+ roots_stack.push(child_child);
+ }
+ _ => {
+ let child_node = builder.add_vertex(child_label);
+
+ builder.add_edge(top_in_builder, child_node, child_label)?;
+
+ roots_stack.push(child);
+ }
+ }
+
+ continue 'children_loop;
+ }
+
+ let child_node = builder.add_vertex(child_label);
+
+ builder.add_edge(top_in_builder, child_node, child_label)?;
+
+ roots_stack.push(child);
+ }
+ }
+
+ Ok(Self { graph, root })
+ }
+}
+
+#[cfg(test)]
+mod extract_tests {
+ use super::*;
+
+ fn construct_test_forest(
+ ) -> Result<DefaultForest<ForestLabel<GrammarLabel>>, Box<dyn std::error::Error>> {
+ // node 0
+ let mut result: DefaultForest<ForestLabel<GrammarLabel>> = DefaultForest::new_leaf(
+ GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Non(0)), 0, 3),
+ );
+
+ // node 1
+ result.plant(
+ 0,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::Rule(15), 0, 3),
+ _
+ ),
+ false,
+ )?;
+
+ result.plant(
+ 1,
+ DefaultForest::new_leaf(GrammarLabel::new_closed(
+ GrammarLabelType::TNT(TNT::Non(0)),
+ 0,
+ 3,
+ )),
+ true,
+ )?;
+
+ // node 2
+ result.plant(
+ 0,
+ leaf!(GrammarLabel::new_closed(GrammarLabelType::Rule(6), 0, 1), _),
+ false,
+ )?;
+
+ // node 3
+ result.plant(
+ 2,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Ter(0)), 0, 1),
+ _
+ ),
+ false,
+ )?;
+
+ // node 4
+ result.plant(
+ 0,
+ leaf!(GrammarLabel::new_closed(GrammarLabelType::Rule(7), 1, 3), _),
+ false,
+ )?;
+
+ // node 5
+ result.plant(
+ 4,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Non(0)), 1, 3),
+ _
+ ),
+ false,
+ )?;
+
+ // node 6
+ result.plant(
+ 5,
+ leaf!(GrammarLabel::new_closed(GrammarLabelType::Rule(3), 1, 2), _),
+ false,
+ )?;
+
+ // node 7
+ result.plant(
+ 6,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Non(1)), 1, 2),
+ _
+ ),
+ false,
+ )?;
+
+ // node 8
+ result.plant(
+ 7,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::Rule(11), 1, 2),
+ _
+ ),
+ false,
+ )?;
+
+ // node 9
+ result.plant(
+ 8,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Ter(2)), 1, 2),
+ _
+ ),
+ false,
+ )?;
+
+ // Clone node 5 to have node 10 and node 11
+ result.clone_node(5, 0, false)?;
+
+ // node 12
+ result.plant(
+ 11,
+ leaf!(GrammarLabel::new_closed(GrammarLabelType::Rule(3), 1, 3), _),
+ false,
+ )?;
+
+ // node 13
+ result.plant(
+ 12,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Non(1)), 1, 3),
+ _
+ ),
+ false,
+ )?;
+
+ result.plant(
+ 13,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::Rule(11), 1, 2),
+ _
+ ),
+ true,
+ )?;
+
+ // node 14
+ result.plant(
+ 13,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::Rule(13), 2, 3),
+ _
+ ),
+ false,
+ )?;
+
+ // node 15
+ result.plant(
+ 14,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Ter(2)), 2, 3),
+ _
+ ),
+ false,
+ )?;
+
+ // node 16
+ result.plant(
+ 5,
+ leaf!(GrammarLabel::new(GrammarLabelType::Rule(4), 2), _),
+ false,
+ )?;
+
+ // node 17
+ result.plant(
+ 16,
+ leaf!(GrammarLabel::new(GrammarLabelType::TNT(TNT::Non(0)), 2), _),
+ false,
+ )?;
+
+ // node 18
+ result.plant(
+ 17,
+ leaf!(GrammarLabel::new_closed(GrammarLabelType::Rule(3), 2, 3), _),
+ false,
+ )?;
+
+ // node 19
+ result.plant(
+ 18,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Non(1)), 2, 3),
+ _
+ ),
+ false,
+ )?;
+
+ // node 20
+ result.plant(
+ 19,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::Rule(11), 2, 3),
+ _
+ ),
+ false,
+ )?;
+
+ result.plant(
+ 20,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Ter(2)), 2, 3),
+ _
+ ),
+ true,
+ )?;
+
+ Ok(result)
+ }
+
+ #[test]
+ fn test_extract() -> Result<(), Box<dyn std::error::Error>> {
+ let forest = construct_test_forest()?;
+
+ assert_eq!(forest.nodes_len(), 21);
+
+ forest.print_viz("forest before extraction.gv")?;
+
+ let extract_result = forest.extract(3)?;
+
+ extract_result.print_viz("extracted forest.gv")?;
+
+ Ok(())
+ }
+}
diff --git a/chain/src/item/default/mod.rs b/chain/src/item/default/mod.rs
index 47e8c90..28bdbee 100644
--- a/chain/src/item/default/mod.rs
+++ b/chain/src/item/default/mod.rs
@@ -12,7 +12,7 @@ use graph_macro::Graph;
use std::collections::HashSet;
-use core::fmt::Display;
+use std::fmt::Display;
/// The type of errors for forest operations.
#[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd)]
@@ -216,6 +216,15 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> {
return Err(Error::IndexOutOfBounds(node_id, self.nodes_len()));
}
+ // FIXME: We should take the presence of packed nodes into
+ // consideration. That is, the fragment might have already
+ // been planted and packed, and we need to account for such
+ // possibilities.
+ //
+ // Moreover, the fragments might also contain packed nodes, in
+ // which case we need to check these multiple possibilities
+ // are properly contained.
+
// We do a depth-first traversal to determine if every node
// encountered has the same set of children (labels taken into
// the consideration).
@@ -450,7 +459,7 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> {
// Make sure it only has one parent, which is the
// representative node.
- assert_eq!(parents.len(), 1);
+ // assert_eq!(parents.len(), 1);
// We checked its length is 1, so it is safe to unwrap
// here.
@@ -670,6 +679,8 @@ impl<T: GraphLabel> DefaultForest<ForestLabel<T>> {
pub mod splone;
+pub mod extract;
+
impl<T: GraphLabel> DefaultForest<T> {
/// Create a forest with only one leaf from a raw label, unlike
/// `new_leaf`, which transforms the label for convenience.
@@ -798,53 +809,6 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
Ok(())
}
- /// Find the last child of the given node whose start and end
- /// positions contain the given position. If no such child is
- /// found, return `Ok(None)`.
- ///
- /// The returned tuple is of the form (child, index), where
- /// `child` is the index of the child node in question, and
- /// `index` means that the child is the `index`-th child of the
- /// node.
- #[allow(dead_code)]
- pub(crate) fn position_search(
- &self,
- node: usize,
- pos: usize,
- ) -> Result<Option<(usize, usize)>, Error> {
- fn range_contains(label: GrammarLabel, pos: usize) -> bool {
- let start = label.start();
-
- if let Some(end) = label.end() {
- (start..end).contains(&pos)
- } else {
- (start..).contains(&pos)
- }
- }
-
- let node_label = self
- .vertex_label(node)?
- .ok_or(Error::NodeNoLabel(node))?
- .label();
-
- if !range_contains(node_label, pos) {
- return Ok(None);
- }
-
- for (index, child) in self.children_of(node)?.enumerate().rev() {
- let child_label = self
- .vertex_label(child)?
- .ok_or(Error::NodeNoLabel(child))?
- .label();
-
- if range_contains(child_label, pos) {
- return Ok(Some((child, index)));
- }
- }
-
- Ok(None)
- }
-
/// Set the position of every node.
///
/// If ALLP is non-nil or if the node is a terminal node, also set
diff --git a/chain/src/item/default/splone.rs b/chain/src/item/default/splone.rs
index 581d1dc..da13c56 100644
--- a/chain/src/item/default/splone.rs
+++ b/chain/src/item/default/splone.rs
@@ -516,7 +516,13 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
if let Some((fragment, _planted)) = fragment {
let modified_degree = std::cmp::max(child_degree, 1) - 1;
- if self.has_same_children(child, node, modified_degree, edge_index)?
+ dbg!(node, end, edge_index, modified_degree);
+
+ dbg!(child, child_degree);
+
+ dbg!(&fragment);
+
+ if self.has_same_children(child, node, modified_degree, edge_index + 1)?
&& child_degree != 0
{
let last_child = self.nth_child(child, child_degree - 1)?;
@@ -651,7 +657,7 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
} else if labela.clone_index().is_some() {
let mut parentsa = self.parents_of(nodea)?;
- assert_eq!(parentsa.len(), 1);
+ // assert_eq!(parentsa.len(), 1);
let parenta = parentsa.next().unwrap().node();