From 9a317e56f8a6126583f7d0c431bf878d9b1fe7b1 Mon Sep 17 00:00:00 2001 From: JSDurand Date: Sat, 8 Jul 2023 12:30:21 +0800 Subject: Finished the Emacs binding. Now the binding part is finished. What remains is a bug encountered when planting a fragment to the forest which intersects a packed node, which would lead to invalid forests. This will also cause problem when planting a packed fragment, but until now my testing grammars do not produce packed fragments, so this problem is not encountered yet. I am still figuring out efficient ways to solve this problem. --- chain/src/item/default/extract.rs | 509 ++++++++++++++++++++++++++++++++++++++ chain/src/item/default/mod.rs | 62 +---- chain/src/item/default/splone.rs | 10 +- 3 files changed, 530 insertions(+), 51 deletions(-) create mode 100644 chain/src/item/default/extract.rs (limited to 'chain/src/item/default') diff --git a/chain/src/item/default/extract.rs b/chain/src/item/default/extract.rs new file mode 100644 index 0000000..96f5119 --- /dev/null +++ b/chain/src/item/default/extract.rs @@ -0,0 +1,509 @@ +//! This module defines a function for extracting the "completed part" +//! of a forest. +//! +//! # Completed sub-forest +//! +//! The completed part of a forest refers to the sub-forest of the +//! forest that consists entirely of nodes whose ending positions are +//! set, and whose subtrees also consist of such nodes. +//! +//! # Definition +//! +//! To be more rigorous, we call a node *n* of a forest *F* complete +//! if its ending position is set. We call a sub-forest *S* of *F* +//! completed if the following two conditions are satisfied: +//! +//! 1. Every node in *S* is complete. +//! 2. For every node *n* in *S*, the subtree of *F* rooted at *n* +//! consists entirely of complete nodes. +//! +//! # Uniqueness of the maximal completed sub-forest +//! +//! **Lemma** +//! +//! For any forest *F*, there is only one maximal completed sub-forest +//! *S* of *F*. Here the maximality means that if *T* is a completed +//! sub-forest of *F* which contains *S*, then *S* is equal to *T*. +//! +//! Then by **the completed part** of a forest *F* we refer to the +//! unuique maximal completed sub-forest of *F*. +//! +//! **Proof** +//! +//! Note that if *S_1* and *S_2* are two completed sub-forests of *F*, +//! and if *S_1* is not contained in *S_2*, say *n* is a node in *S_1* +//! but not in *S_2*, then by adjoining the subtree of *S_2* rooted at +//! *n* to *S_1* we obtain a completed sub-forest *S_3* which contains +//! *S_1*, contradicting the maximality of *S_1*. Thus there can only +//! be one maximal completed sub-forest of a forest. +//! +//! # Connected component +//! +//! The extraction function actually returns the connected component +//! of the completed part of a forest which contains its root, as that +//! is what we care about the most. + +use super::*; + +impl DefaultForest> { + pub(crate) fn extract(&self, pos: usize) -> Result { + // Preparations + + let nodes_len = self.nodes_len(); + + let mut graph = PLGraph::default(); + let mut builder = PLGBuilderMut::from_graph_mut(&mut graph); + let root = Some(0); + + // Now find the connected component of the completed part of + // `self` and clone that to graph by means of `builder`. + + // REVIEW: Maybe a small_vec can help here. + + // A fixed-size hash table, sort of. + let mut validity_array: Vec = std::iter::repeat(true).take(nodes_len).collect(); + + // Calculate the exact length to avoid too many allocations. + let mut stack_len = 0usize; + + // Use an iterator to avoid checking bounds in accessing + // elements of the array. + for (node, validity_ptr) in self.nodes().zip(validity_array.iter_mut()) { + if self + .vertex_label(node)? + .ok_or(Error::NodeNoLabel(node))? + .label() + .end() + .is_none() + { + *validity_ptr = false; + + stack_len += 1; + } + } + + dbg!(&validity_array); + + // A stack for propagating the falsehood to parents and + // children of incomplete nodes, like a plague. The only + // nodes that can stop the spread of this plague are packed + // nodes with a child that is not infected (yet) by the + // plague. + + let mut stack: Vec = Vec::with_capacity(stack_len); + + for (n, validity) in validity_array.iter().enumerate() { + if !*validity { + stack.push(n); + } + } + + while let Some(top) = stack.pop() { + 'parent_loop: for parent in self.parents_of(top)?.map(|p| p.node()) { + if !*validity_array + .get(parent) + .ok_or(Error::IndexOutOfBounds(parent, nodes_len))? + { + // already infected by the plague + + continue 'parent_loop; + } + + let should_spread_p = if self + .vertex_label(parent)? + .ok_or(Error::NodeNoLabel(parent))? + .is_packed() + { + !self + .children_of(parent)? + .any(|node| validity_array.get(node).copied() == Some(true)) + } else { + true + }; + + if should_spread_p { + *validity_array + .get_mut(parent) + .ok_or(Error::IndexOutOfBounds(parent, nodes_len))? = false; + stack.push(parent); + } + } + } + + let validity_array = validity_array; + + /// A little macro to produce a vector of valid children. + macro_rules! valid_children { + ($node:ident) => { + self.children_of($node)? + .filter(|child| validity_array.get(*child).copied() == Some(true)) + .collect::>() + }; + } + + dbg!(&validity_array); + + if validity_array.iter().all(|validity| !*validity) { + // every element is false + + let root = None; + + return Ok(Self { graph, root }); + } + + // Finally clone the connected component to the new graph. + + let root_label = GrammarLabel::new_closed(TNT::Non(0), 0, pos); + + let packed_label = ForestLabel::new(root_label, ForestLabelType::Packed); + + let plain_label = ForestLabel::new(root_label, ForestLabelType::Plain); + + let original_root_label; + + let original_root = if let Some(packed_node) = self.query_label(packed_label) { + original_root_label = packed_label; + + packed_node + } else if let Some(plain_node) = self.query_label(plain_label) { + original_root_label = plain_label; + + plain_node + } else { + let root = None; + return Ok(Self { graph, root }); + }; + + let mut roots_stack: Vec; + + if original_root_label.is_packed() { + roots_stack = valid_children!(original_root); + + match roots_stack.len() { + 0 => { + let root = None; + + return Ok(Self { graph, root }); + } + 1 => { + let child = *roots_stack.first().unwrap(); + + builder.add_vertex(self.vertex_label(child)?.ok_or(Error::NodeNoLabel(child))?); + } + _ => { + let builder_root = builder.add_vertex(original_root_label); + + for child in roots_stack.iter().copied() { + let child_node = builder.add_vertex( + self.vertex_label(child)?.ok_or(Error::NodeNoLabel(child))?, + ); + + builder.add_edge(builder_root, child_node, original_root_label)?; + } + } + } + } else { + builder.add_vertex(original_root_label); + + roots_stack = vec![original_root]; + } + + while let Some(top) = roots_stack.pop() { + let top_label = self.vertex_label(top)?.ok_or(Error::NodeNoLabel(top))?; + + let top_in_builder = match builder.query_label(top_label) { + Some(top_node) => top_node, + None => { + // an old cloned node now becomes a plain node + let plain_label = ForestLabel::new(top_label.label(), ForestLabelType::Plain); + + builder + .query_label(plain_label) + .unwrap_or_else(|| panic!("the top {top} should be planted already")) + } + }; + + 'children_loop: for child in self.children_of(top)? { + let child_label = self.vertex_label(child)?.ok_or(Error::NodeNoLabel(child))?; + + // filter out invalid children + if validity_array.get(child).copied() != Some(true) { + continue 'children_loop; + } + + // avoid unnecessary effort + if let Some(child_node) = builder.query_label(child_label) { + builder.add_edge(top_in_builder, child_node, child_label)?; + + continue 'children_loop; + } + + if child_label.is_packed() { + let child_valid_children = valid_children!(child); + + match child_valid_children.len() { + 0 => { + panic!("this case should not happen"); + } + 1 => { + // If a packed node only has one valid + // child, we clone a plain node instead. + + let child_child = *child_valid_children.first().unwrap(); + + let child_plain_label = + ForestLabel::new(child_label.label(), ForestLabelType::Plain); + + let child_plain_node = builder.add_vertex(child_plain_label); + + builder.add_edge( + top_in_builder, + child_plain_node, + child_plain_label, + )?; + + roots_stack.push(child_child); + } + _ => { + let child_node = builder.add_vertex(child_label); + + builder.add_edge(top_in_builder, child_node, child_label)?; + + roots_stack.push(child); + } + } + + continue 'children_loop; + } + + let child_node = builder.add_vertex(child_label); + + builder.add_edge(top_in_builder, child_node, child_label)?; + + roots_stack.push(child); + } + } + + Ok(Self { graph, root }) + } +} + +#[cfg(test)] +mod extract_tests { + use super::*; + + fn construct_test_forest( + ) -> Result>, Box> { + // node 0 + let mut result: DefaultForest> = DefaultForest::new_leaf( + GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Non(0)), 0, 3), + ); + + // node 1 + result.plant( + 0, + leaf!( + GrammarLabel::new_closed(GrammarLabelType::Rule(15), 0, 3), + _ + ), + false, + )?; + + result.plant( + 1, + DefaultForest::new_leaf(GrammarLabel::new_closed( + GrammarLabelType::TNT(TNT::Non(0)), + 0, + 3, + )), + true, + )?; + + // node 2 + result.plant( + 0, + leaf!(GrammarLabel::new_closed(GrammarLabelType::Rule(6), 0, 1), _), + false, + )?; + + // node 3 + result.plant( + 2, + leaf!( + GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Ter(0)), 0, 1), + _ + ), + false, + )?; + + // node 4 + result.plant( + 0, + leaf!(GrammarLabel::new_closed(GrammarLabelType::Rule(7), 1, 3), _), + false, + )?; + + // node 5 + result.plant( + 4, + leaf!( + GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Non(0)), 1, 3), + _ + ), + false, + )?; + + // node 6 + result.plant( + 5, + leaf!(GrammarLabel::new_closed(GrammarLabelType::Rule(3), 1, 2), _), + false, + )?; + + // node 7 + result.plant( + 6, + leaf!( + GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Non(1)), 1, 2), + _ + ), + false, + )?; + + // node 8 + result.plant( + 7, + leaf!( + GrammarLabel::new_closed(GrammarLabelType::Rule(11), 1, 2), + _ + ), + false, + )?; + + // node 9 + result.plant( + 8, + leaf!( + GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Ter(2)), 1, 2), + _ + ), + false, + )?; + + // Clone node 5 to have node 10 and node 11 + result.clone_node(5, 0, false)?; + + // node 12 + result.plant( + 11, + leaf!(GrammarLabel::new_closed(GrammarLabelType::Rule(3), 1, 3), _), + false, + )?; + + // node 13 + result.plant( + 12, + leaf!( + GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Non(1)), 1, 3), + _ + ), + false, + )?; + + result.plant( + 13, + leaf!( + GrammarLabel::new_closed(GrammarLabelType::Rule(11), 1, 2), + _ + ), + true, + )?; + + // node 14 + result.plant( + 13, + leaf!( + GrammarLabel::new_closed(GrammarLabelType::Rule(13), 2, 3), + _ + ), + false, + )?; + + // node 15 + result.plant( + 14, + leaf!( + GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Ter(2)), 2, 3), + _ + ), + false, + )?; + + // node 16 + result.plant( + 5, + leaf!(GrammarLabel::new(GrammarLabelType::Rule(4), 2), _), + false, + )?; + + // node 17 + result.plant( + 16, + leaf!(GrammarLabel::new(GrammarLabelType::TNT(TNT::Non(0)), 2), _), + false, + )?; + + // node 18 + result.plant( + 17, + leaf!(GrammarLabel::new_closed(GrammarLabelType::Rule(3), 2, 3), _), + false, + )?; + + // node 19 + result.plant( + 18, + leaf!( + GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Non(1)), 2, 3), + _ + ), + false, + )?; + + // node 20 + result.plant( + 19, + leaf!( + GrammarLabel::new_closed(GrammarLabelType::Rule(11), 2, 3), + _ + ), + false, + )?; + + result.plant( + 20, + leaf!( + GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Ter(2)), 2, 3), + _ + ), + true, + )?; + + Ok(result) + } + + #[test] + fn test_extract() -> Result<(), Box> { + let forest = construct_test_forest()?; + + assert_eq!(forest.nodes_len(), 21); + + forest.print_viz("forest before extraction.gv")?; + + let extract_result = forest.extract(3)?; + + extract_result.print_viz("extracted forest.gv")?; + + Ok(()) + } +} diff --git a/chain/src/item/default/mod.rs b/chain/src/item/default/mod.rs index 47e8c90..28bdbee 100644 --- a/chain/src/item/default/mod.rs +++ b/chain/src/item/default/mod.rs @@ -12,7 +12,7 @@ use graph_macro::Graph; use std::collections::HashSet; -use core::fmt::Display; +use std::fmt::Display; /// The type of errors for forest operations. #[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd)] @@ -216,6 +216,15 @@ impl Forest for DefaultForest> { return Err(Error::IndexOutOfBounds(node_id, self.nodes_len())); } + // FIXME: We should take the presence of packed nodes into + // consideration. That is, the fragment might have already + // been planted and packed, and we need to account for such + // possibilities. + // + // Moreover, the fragments might also contain packed nodes, in + // which case we need to check these multiple possibilities + // are properly contained. + // We do a depth-first traversal to determine if every node // encountered has the same set of children (labels taken into // the consideration). @@ -450,7 +459,7 @@ impl Forest for DefaultForest> { // Make sure it only has one parent, which is the // representative node. - assert_eq!(parents.len(), 1); + // assert_eq!(parents.len(), 1); // We checked its length is 1, so it is safe to unwrap // here. @@ -670,6 +679,8 @@ impl DefaultForest> { pub mod splone; +pub mod extract; + impl DefaultForest { /// Create a forest with only one leaf from a raw label, unlike /// `new_leaf`, which transforms the label for convenience. @@ -798,53 +809,6 @@ impl DefaultForest> { Ok(()) } - /// Find the last child of the given node whose start and end - /// positions contain the given position. If no such child is - /// found, return `Ok(None)`. - /// - /// The returned tuple is of the form (child, index), where - /// `child` is the index of the child node in question, and - /// `index` means that the child is the `index`-th child of the - /// node. - #[allow(dead_code)] - pub(crate) fn position_search( - &self, - node: usize, - pos: usize, - ) -> Result, Error> { - fn range_contains(label: GrammarLabel, pos: usize) -> bool { - let start = label.start(); - - if let Some(end) = label.end() { - (start..end).contains(&pos) - } else { - (start..).contains(&pos) - } - } - - let node_label = self - .vertex_label(node)? - .ok_or(Error::NodeNoLabel(node))? - .label(); - - if !range_contains(node_label, pos) { - return Ok(None); - } - - for (index, child) in self.children_of(node)?.enumerate().rev() { - let child_label = self - .vertex_label(child)? - .ok_or(Error::NodeNoLabel(child))? - .label(); - - if range_contains(child_label, pos) { - return Ok(Some((child, index))); - } - } - - Ok(None) - } - /// Set the position of every node. /// /// If ALLP is non-nil or if the node is a terminal node, also set diff --git a/chain/src/item/default/splone.rs b/chain/src/item/default/splone.rs index 581d1dc..da13c56 100644 --- a/chain/src/item/default/splone.rs +++ b/chain/src/item/default/splone.rs @@ -516,7 +516,13 @@ impl DefaultForest> { if let Some((fragment, _planted)) = fragment { let modified_degree = std::cmp::max(child_degree, 1) - 1; - if self.has_same_children(child, node, modified_degree, edge_index)? + dbg!(node, end, edge_index, modified_degree); + + dbg!(child, child_degree); + + dbg!(&fragment); + + if self.has_same_children(child, node, modified_degree, edge_index + 1)? && child_degree != 0 { let last_child = self.nth_child(child, child_degree - 1)?; @@ -651,7 +657,7 @@ impl DefaultForest> { } else if labela.clone_index().is_some() { let mut parentsa = self.parents_of(nodea)?; - assert_eq!(parentsa.len(), 1); + // assert_eq!(parentsa.len(), 1); let parenta = parentsa.next().unwrap().node(); -- cgit v1.2.3-18-g5258