summaryrefslogtreecommitdiff
path: root/chain/src/item/default/extract.rs
diff options
context:
space:
mode:
Diffstat (limited to 'chain/src/item/default/extract.rs')
-rw-r--r--chain/src/item/default/extract.rs509
1 files changed, 509 insertions, 0 deletions
diff --git a/chain/src/item/default/extract.rs b/chain/src/item/default/extract.rs
new file mode 100644
index 0000000..96f5119
--- /dev/null
+++ b/chain/src/item/default/extract.rs
@@ -0,0 +1,509 @@
+//! This module defines a function for extracting the "completed part"
+//! of a forest.
+//!
+//! # Completed sub-forest
+//!
+//! The completed part of a forest refers to the sub-forest of the
+//! forest that consists entirely of nodes whose ending positions are
+//! set, and whose subtrees also consist of such nodes.
+//!
+//! # Definition
+//!
+//! To be more rigorous, we call a node *n* of a forest *F* complete
+//! if its ending position is set. We call a sub-forest *S* of *F*
+//! completed if the following two conditions are satisfied:
+//!
+//! 1. Every node in *S* is complete.
+//! 2. For every node *n* in *S*, the subtree of *F* rooted at *n*
+//! consists entirely of complete nodes.
+//!
+//! # Uniqueness of the maximal completed sub-forest
+//!
+//! **Lemma**
+//!
+//! For any forest *F*, there is only one maximal completed sub-forest
+//! *S* of *F*. Here the maximality means that if *T* is a completed
+//! sub-forest of *F* which contains *S*, then *S* is equal to *T*.
+//!
+//! Then by **the completed part** of a forest *F* we refer to the
+//! unuique maximal completed sub-forest of *F*.
+//!
+//! **Proof**
+//!
+//! Note that if *S_1* and *S_2* are two completed sub-forests of *F*,
+//! and if *S_1* is not contained in *S_2*, say *n* is a node in *S_1*
+//! but not in *S_2*, then by adjoining the subtree of *S_2* rooted at
+//! *n* to *S_1* we obtain a completed sub-forest *S_3* which contains
+//! *S_1*, contradicting the maximality of *S_1*. Thus there can only
+//! be one maximal completed sub-forest of a forest.
+//!
+//! # Connected component
+//!
+//! The extraction function actually returns the connected component
+//! of the completed part of a forest which contains its root, as that
+//! is what we care about the most.
+
+use super::*;
+
+impl DefaultForest<ForestLabel<GrammarLabel>> {
+ pub(crate) fn extract(&self, pos: usize) -> Result<Self, Error> {
+ // Preparations
+
+ let nodes_len = self.nodes_len();
+
+ let mut graph = PLGraph::default();
+ let mut builder = PLGBuilderMut::from_graph_mut(&mut graph);
+ let root = Some(0);
+
+ // Now find the connected component of the completed part of
+ // `self` and clone that to graph by means of `builder`.
+
+ // REVIEW: Maybe a small_vec can help here.
+
+ // A fixed-size hash table, sort of.
+ let mut validity_array: Vec<bool> = std::iter::repeat(true).take(nodes_len).collect();
+
+ // Calculate the exact length to avoid too many allocations.
+ let mut stack_len = 0usize;
+
+ // Use an iterator to avoid checking bounds in accessing
+ // elements of the array.
+ for (node, validity_ptr) in self.nodes().zip(validity_array.iter_mut()) {
+ if self
+ .vertex_label(node)?
+ .ok_or(Error::NodeNoLabel(node))?
+ .label()
+ .end()
+ .is_none()
+ {
+ *validity_ptr = false;
+
+ stack_len += 1;
+ }
+ }
+
+ dbg!(&validity_array);
+
+ // A stack for propagating the falsehood to parents and
+ // children of incomplete nodes, like a plague. The only
+ // nodes that can stop the spread of this plague are packed
+ // nodes with a child that is not infected (yet) by the
+ // plague.
+
+ let mut stack: Vec<usize> = Vec::with_capacity(stack_len);
+
+ for (n, validity) in validity_array.iter().enumerate() {
+ if !*validity {
+ stack.push(n);
+ }
+ }
+
+ while let Some(top) = stack.pop() {
+ 'parent_loop: for parent in self.parents_of(top)?.map(|p| p.node()) {
+ if !*validity_array
+ .get(parent)
+ .ok_or(Error::IndexOutOfBounds(parent, nodes_len))?
+ {
+ // already infected by the plague
+
+ continue 'parent_loop;
+ }
+
+ let should_spread_p = if self
+ .vertex_label(parent)?
+ .ok_or(Error::NodeNoLabel(parent))?
+ .is_packed()
+ {
+ !self
+ .children_of(parent)?
+ .any(|node| validity_array.get(node).copied() == Some(true))
+ } else {
+ true
+ };
+
+ if should_spread_p {
+ *validity_array
+ .get_mut(parent)
+ .ok_or(Error::IndexOutOfBounds(parent, nodes_len))? = false;
+ stack.push(parent);
+ }
+ }
+ }
+
+ let validity_array = validity_array;
+
+ /// A little macro to produce a vector of valid children.
+ macro_rules! valid_children {
+ ($node:ident) => {
+ self.children_of($node)?
+ .filter(|child| validity_array.get(*child).copied() == Some(true))
+ .collect::<Vec<usize>>()
+ };
+ }
+
+ dbg!(&validity_array);
+
+ if validity_array.iter().all(|validity| !*validity) {
+ // every element is false
+
+ let root = None;
+
+ return Ok(Self { graph, root });
+ }
+
+ // Finally clone the connected component to the new graph.
+
+ let root_label = GrammarLabel::new_closed(TNT::Non(0), 0, pos);
+
+ let packed_label = ForestLabel::new(root_label, ForestLabelType::Packed);
+
+ let plain_label = ForestLabel::new(root_label, ForestLabelType::Plain);
+
+ let original_root_label;
+
+ let original_root = if let Some(packed_node) = self.query_label(packed_label) {
+ original_root_label = packed_label;
+
+ packed_node
+ } else if let Some(plain_node) = self.query_label(plain_label) {
+ original_root_label = plain_label;
+
+ plain_node
+ } else {
+ let root = None;
+ return Ok(Self { graph, root });
+ };
+
+ let mut roots_stack: Vec<usize>;
+
+ if original_root_label.is_packed() {
+ roots_stack = valid_children!(original_root);
+
+ match roots_stack.len() {
+ 0 => {
+ let root = None;
+
+ return Ok(Self { graph, root });
+ }
+ 1 => {
+ let child = *roots_stack.first().unwrap();
+
+ builder.add_vertex(self.vertex_label(child)?.ok_or(Error::NodeNoLabel(child))?);
+ }
+ _ => {
+ let builder_root = builder.add_vertex(original_root_label);
+
+ for child in roots_stack.iter().copied() {
+ let child_node = builder.add_vertex(
+ self.vertex_label(child)?.ok_or(Error::NodeNoLabel(child))?,
+ );
+
+ builder.add_edge(builder_root, child_node, original_root_label)?;
+ }
+ }
+ }
+ } else {
+ builder.add_vertex(original_root_label);
+
+ roots_stack = vec![original_root];
+ }
+
+ while let Some(top) = roots_stack.pop() {
+ let top_label = self.vertex_label(top)?.ok_or(Error::NodeNoLabel(top))?;
+
+ let top_in_builder = match builder.query_label(top_label) {
+ Some(top_node) => top_node,
+ None => {
+ // an old cloned node now becomes a plain node
+ let plain_label = ForestLabel::new(top_label.label(), ForestLabelType::Plain);
+
+ builder
+ .query_label(plain_label)
+ .unwrap_or_else(|| panic!("the top {top} should be planted already"))
+ }
+ };
+
+ 'children_loop: for child in self.children_of(top)? {
+ let child_label = self.vertex_label(child)?.ok_or(Error::NodeNoLabel(child))?;
+
+ // filter out invalid children
+ if validity_array.get(child).copied() != Some(true) {
+ continue 'children_loop;
+ }
+
+ // avoid unnecessary effort
+ if let Some(child_node) = builder.query_label(child_label) {
+ builder.add_edge(top_in_builder, child_node, child_label)?;
+
+ continue 'children_loop;
+ }
+
+ if child_label.is_packed() {
+ let child_valid_children = valid_children!(child);
+
+ match child_valid_children.len() {
+ 0 => {
+ panic!("this case should not happen");
+ }
+ 1 => {
+ // If a packed node only has one valid
+ // child, we clone a plain node instead.
+
+ let child_child = *child_valid_children.first().unwrap();
+
+ let child_plain_label =
+ ForestLabel::new(child_label.label(), ForestLabelType::Plain);
+
+ let child_plain_node = builder.add_vertex(child_plain_label);
+
+ builder.add_edge(
+ top_in_builder,
+ child_plain_node,
+ child_plain_label,
+ )?;
+
+ roots_stack.push(child_child);
+ }
+ _ => {
+ let child_node = builder.add_vertex(child_label);
+
+ builder.add_edge(top_in_builder, child_node, child_label)?;
+
+ roots_stack.push(child);
+ }
+ }
+
+ continue 'children_loop;
+ }
+
+ let child_node = builder.add_vertex(child_label);
+
+ builder.add_edge(top_in_builder, child_node, child_label)?;
+
+ roots_stack.push(child);
+ }
+ }
+
+ Ok(Self { graph, root })
+ }
+}
+
+#[cfg(test)]
+mod extract_tests {
+ use super::*;
+
+ fn construct_test_forest(
+ ) -> Result<DefaultForest<ForestLabel<GrammarLabel>>, Box<dyn std::error::Error>> {
+ // node 0
+ let mut result: DefaultForest<ForestLabel<GrammarLabel>> = DefaultForest::new_leaf(
+ GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Non(0)), 0, 3),
+ );
+
+ // node 1
+ result.plant(
+ 0,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::Rule(15), 0, 3),
+ _
+ ),
+ false,
+ )?;
+
+ result.plant(
+ 1,
+ DefaultForest::new_leaf(GrammarLabel::new_closed(
+ GrammarLabelType::TNT(TNT::Non(0)),
+ 0,
+ 3,
+ )),
+ true,
+ )?;
+
+ // node 2
+ result.plant(
+ 0,
+ leaf!(GrammarLabel::new_closed(GrammarLabelType::Rule(6), 0, 1), _),
+ false,
+ )?;
+
+ // node 3
+ result.plant(
+ 2,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Ter(0)), 0, 1),
+ _
+ ),
+ false,
+ )?;
+
+ // node 4
+ result.plant(
+ 0,
+ leaf!(GrammarLabel::new_closed(GrammarLabelType::Rule(7), 1, 3), _),
+ false,
+ )?;
+
+ // node 5
+ result.plant(
+ 4,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Non(0)), 1, 3),
+ _
+ ),
+ false,
+ )?;
+
+ // node 6
+ result.plant(
+ 5,
+ leaf!(GrammarLabel::new_closed(GrammarLabelType::Rule(3), 1, 2), _),
+ false,
+ )?;
+
+ // node 7
+ result.plant(
+ 6,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Non(1)), 1, 2),
+ _
+ ),
+ false,
+ )?;
+
+ // node 8
+ result.plant(
+ 7,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::Rule(11), 1, 2),
+ _
+ ),
+ false,
+ )?;
+
+ // node 9
+ result.plant(
+ 8,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Ter(2)), 1, 2),
+ _
+ ),
+ false,
+ )?;
+
+ // Clone node 5 to have node 10 and node 11
+ result.clone_node(5, 0, false)?;
+
+ // node 12
+ result.plant(
+ 11,
+ leaf!(GrammarLabel::new_closed(GrammarLabelType::Rule(3), 1, 3), _),
+ false,
+ )?;
+
+ // node 13
+ result.plant(
+ 12,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Non(1)), 1, 3),
+ _
+ ),
+ false,
+ )?;
+
+ result.plant(
+ 13,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::Rule(11), 1, 2),
+ _
+ ),
+ true,
+ )?;
+
+ // node 14
+ result.plant(
+ 13,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::Rule(13), 2, 3),
+ _
+ ),
+ false,
+ )?;
+
+ // node 15
+ result.plant(
+ 14,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Ter(2)), 2, 3),
+ _
+ ),
+ false,
+ )?;
+
+ // node 16
+ result.plant(
+ 5,
+ leaf!(GrammarLabel::new(GrammarLabelType::Rule(4), 2), _),
+ false,
+ )?;
+
+ // node 17
+ result.plant(
+ 16,
+ leaf!(GrammarLabel::new(GrammarLabelType::TNT(TNT::Non(0)), 2), _),
+ false,
+ )?;
+
+ // node 18
+ result.plant(
+ 17,
+ leaf!(GrammarLabel::new_closed(GrammarLabelType::Rule(3), 2, 3), _),
+ false,
+ )?;
+
+ // node 19
+ result.plant(
+ 18,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Non(1)), 2, 3),
+ _
+ ),
+ false,
+ )?;
+
+ // node 20
+ result.plant(
+ 19,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::Rule(11), 2, 3),
+ _
+ ),
+ false,
+ )?;
+
+ result.plant(
+ 20,
+ leaf!(
+ GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Ter(2)), 2, 3),
+ _
+ ),
+ true,
+ )?;
+
+ Ok(result)
+ }
+
+ #[test]
+ fn test_extract() -> Result<(), Box<dyn std::error::Error>> {
+ let forest = construct_test_forest()?;
+
+ assert_eq!(forest.nodes_len(), 21);
+
+ forest.print_viz("forest before extraction.gv")?;
+
+ let extract_result = forest.extract(3)?;
+
+ extract_result.print_viz("extracted forest.gv")?;
+
+ Ok(())
+ }
+}