diff options
author | JSDurand <mmemmew@gmail.com> | 2023-06-02 15:00:48 +0800 |
---|---|---|
committer | JSDurand <mmemmew@gmail.com> | 2023-06-02 15:00:48 +0800 |
commit | 8486474f377faf2d800d79166a7abe6b975e3e50 (patch) | |
tree | f06baa22bafebf9393869664067e9cf308c92634 /chain/src/item/default | |
parent | 1455da10f943e2aa1bdf26fb2697dafccc61e073 (diff) |
Fix a bug of duplication from planting after sploing
I should have staged and committed these changes separately, but I am
too lazy to deal with that.
The main changes in this commit are that I added the derive macro that
automates the delegation of the Graph trait. This saves a lot of
boiler-plate codes.
The second main change, perhaps the most important one, is that I
found and tried to fix a bug that caused duplication of nodes. The
bug arises from splitting or cloning a node multiple times, and
immediately planting the same fragment under the new "sploned" node.
That is, when we try to splone the node again, we found that we need
to splone, because the node that was created by the same sploning
process now has a different label because of the planting of the
fragment. Then after the sploning, we plant the fragment again. This
makes the newly sploned node have the same label (except for the clone
index) and the same children as the node that was sploned and planted
in the previous rounds.
The fix is to check for the existence of a node that has the same set
of children as the about-to-be-sploned node, except for the last one,
which contains the about-to-be-planted fragment as a prefix. If that
is the case, treat it as an already existing node, so that we do not
have to splone the node again.
This is consistent with the principle to not create what we do not
need.
Diffstat (limited to 'chain/src/item/default')
-rw-r--r-- | chain/src/item/default/mod.rs | 57 | ||||
-rw-r--r-- | chain/src/item/default/splone.rs | 189 |
2 files changed, 185 insertions, 61 deletions
diff --git a/chain/src/item/default/mod.rs b/chain/src/item/default/mod.rs index 3903162..6d28956 100644 --- a/chain/src/item/default/mod.rs +++ b/chain/src/item/default/mod.rs @@ -8,6 +8,8 @@ use graph::{ builder::BuilderMut, labelled::binary::PLGBuilderMut, Graph, LabelGraph, PLGraph, RedirectGraph, }; +use graph_macro::Graph; + use std::collections::HashSet; use core::fmt::Display; @@ -73,7 +75,7 @@ impl From<ForestLabelError> for Error { } /// A default implementation of forest. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Graph)] pub struct DefaultForest<T: GraphLabel> { pub(crate) graph: PLGraph<T>, root: Option<usize>, @@ -94,56 +96,6 @@ impl<T: GraphLabel> AsRef<DefaultForest<T>> for DefaultForest<T> { } } -impl<T: GraphLabel> Graph for DefaultForest<T> { - type Iter<'a> = <PLGraph<T> as Graph>::Iter<'a> - where - Self: 'a; - - #[inline] - fn is_empty(&self) -> bool { - self.graph.is_empty() - } - - #[inline] - fn nodes_len(&self) -> usize { - self.graph.nodes_len() - } - - #[inline] - fn edges_len(&self) -> Option<usize> { - self.graph.edges_len() - } - - #[inline] - fn children_of(&self, node_id: usize) -> Result<Self::Iter<'_>, GError> { - self.graph.children_of(node_id) - } - - #[inline] - fn degree(&self, node_id: usize) -> Result<usize, GError> { - self.graph.degree(node_id) - } - - #[inline] - fn is_empty_node(&self, node_id: usize) -> Result<bool, GError> { - self.graph.is_empty_node(node_id) - } - - #[inline] - fn has_edge(&self, source: usize, target: usize) -> Result<bool, GError> { - self.graph.has_edge(source, target) - } - - fn replace_by_builder(&mut self, _builder: impl graph::Builder<Result = Self>) { - unimplemented!() - } - - #[inline] - fn print_viz(&self, filename: &str) -> Result<(), std::io::Error> { - self.graph.print_viz(filename) - } -} - impl<T: GraphLabel> ParentsGraph for DefaultForest<T> { type Iter<'a>= <PLGraph<T> as ParentsGraph>::Iter<'a> where @@ -1081,6 +1033,9 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { // } /// Set the position of every node. + /// + /// If ALLP is non-nil or if the node is a terminal node, also set + /// the ending position. pub(crate) fn set_pos(&mut self, pos: usize, allp: bool) -> Result<(), Error> { let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); diff --git a/chain/src/item/default/splone.rs b/chain/src/item/default/splone.rs index 4cd11b9..d77686e 100644 --- a/chain/src/item/default/splone.rs +++ b/chain/src/item/default/splone.rs @@ -123,7 +123,7 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { return Ok(cloned); } - let new_label = self.create_new_label(node, end, edge_index)?; + let new_label = self.create_new_label(node, end, edge_index, None)?; let new_label = match new_label { Eon::Nex(label) => label, @@ -140,7 +140,126 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { return self.split_node(new_label, node, edge_index, completingp); } - // replace the label directly + self.replace_label(node, new_label)?; + + Ok(node) + } + + /// Split or clone, and then plant. + /// + /// # Splone + /// + /// This function is similar to + /// [`splone`][DefaultForest::<ForestLabel<GrammarLabel>>::splone], + /// but this is specialized for planting a fragment under the + /// newly sploned node. See the above-mentionned function for + /// details on how to splone. + /// + /// # Parameter `planted` + /// + /// The function to plant a fragment takes a parameter `planted`, + /// which indicates whether or not the fragment had already been + /// planted before. This is used to avoid re-inserting fragments + /// into the forest. One can just add an edge and be done. + /// + /// # Special treatment + /// + /// This function is aimed at solving a specific problem that the + /// function + /// [`splone`][DefaultForest::<ForestLabel<GrammarLabel>>::splone] + /// faces: duplication of nodes after planting a fragment under + /// the newly sploned node. That is, if we splone a node and then + /// immediately plant a fragment under it, then that new node will + /// become a different node from the original sploned node, so if + /// we splone another node that ends up creating the same sploned + /// node, and if we plant the same fragment under it, we will end + /// up creating duplicated cloned nodes, which later mess up the + /// forests. + /// + /// So this function first checks if the would-be-sploned node + /// with the fragment planted already exists; if so, then just + /// return that node, otherwise we perform the usual sploning and + /// plant the fragment after the splone. + /// + /// # Special values of two parameters to `splone` + /// + /// Since we want to plant a fragment under the splone, the + /// parameter `end` to the splone function is set to `None` + /// automatically. + /// + /// Moreover, the parameter `completingp` is for completing the + /// forest at the end, while we are definitely not at the end if + /// we are going to plant a fragment after sploning, so that + /// parameter is automatically set to `false` as well. + pub(crate) fn splant( + &mut self, + node: usize, + edge_index: usize, + fragment: &DefaultForest<ForestLabel<GrammarLabel>>, + planted: bool, + ) -> Result<usize, Error> { + let node_label = self.vertex_label(node)?.ok_or(Error::NodeNoLabel(node))?; + + assert!(get_nt_label(node_label.label()).is_some()); + + if node_label.is_packed() { + self.print_viz("dbg forest.gv").unwrap(); + dbg!(self.vertex_label(node)?); + return Err(Error::SplitPack(node)); + } + + let node_end = node_label.label().end(); + let node_degree = self.degree(node)?; + + // We can check the end to know whether the new label is equal + // to the old label. + if node_end.is_none() { + if node_degree == edge_index + 2 { + let last_child = self.nth_child(node, node_degree - 1)?; + + if self.is_prefix(last_child, fragment.borrow())? { + return Ok(node); + } + } + + if node_degree <= edge_index + 1 { + self.plant(node, fragment, planted)?; + + return Ok(node); + } + + let cloned = self.clone_node(node, edge_index + 1, false)?; + + self.plant(cloned, fragment, planted)?; + + return Ok(cloned); + } + + let new_label = self.create_new_label(node, None, edge_index, Some((fragment, planted)))?; + + let new_label = match new_label { + Eon::Nex(label) => label, + Eon::Ex(existing) => { + return Ok(existing); + } + }; + + let splitted = self.split_node(new_label, node, edge_index, false)?; + + self.plant(splitted, fragment, planted)?; + + Ok(splitted) + } + + /// Replace the label of a node by a new label. + /// + /// This also handles the labels of parents of the node. + fn replace_label( + &mut self, + node: usize, + new_label: ForestLabel<GrammarLabel>, + ) -> Result<(), Error> { + let end = new_label.label().end(); let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); @@ -162,7 +281,6 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { panic!("assumption fails"); } - assert!(get_rule_label(parent_label).is_some()); assert_eq!(builder.degree(parent)?, 1); if let Some(pos) = end { @@ -176,7 +294,7 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { builder.set_label(parent, parent_label)?; } - Ok(node) + Ok(()) } /// Procedure to split the node: @@ -352,11 +470,23 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { /// existing label, and return the clone of the cloned label. /// /// 7. Else return the plain label. + /// + /// # Fragment planting + /// + /// If the parameter `fragment` contains a fragment, that means we + /// also check if an existing label is what we want by checking + /// whether it has the same children except for the last one, + /// whereas its last child contains the fragment as a prefix. + /// + /// Also, if an existing label is found to have exactly the same + /// children, then for the sake of consistency, we plant the + /// fragment under that existing node. fn create_new_label( &mut self, node: usize, end: Option<usize>, edge_index: usize, + fragment: Option<(&DefaultForest<ForestLabel<GrammarLabel>>, bool)>, ) -> Result<Eon, Error> { let mut copied_label = self .vertex_label(node)? @@ -373,9 +503,29 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { if let Some(packed) = self.query_label(label) { for child in self.children_of(packed)? { - if self.has_same_children(child, node, self.degree(child)?, edge_index + 1)? { + let child_degree = self.degree(child)?; + + if self.has_same_children(child, node, child_degree, edge_index + 1)? { + if let Some((fragment, planted)) = fragment { + self.plant(child, fragment, planted)?; + } + return Ok(Eon::Ex(child)); } + + if let Some((fragment, _planted)) = fragment { + let modified_degree = std::cmp::max(child_degree, 1) - 1; + + if self.has_same_children(child, node, modified_degree, edge_index)? + && child_degree != 0 + { + let last_child = self.nth_child(child, child_degree - 1)?; + + if self.is_prefix(last_child, fragment)? { + return Ok(Eon::Ex(child)); + } + } + } } let mut packed_children = self.children_of(packed)?; @@ -384,18 +534,37 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { let clone_index = self.clone_node(first_child, 0, true)?; - Ok(Eon::Nex(ForestLabel::new( - copied_label, - ForestLabelType::Cloned(clone_index), - ))) + let cloned_label = ForestLabel::new(copied_label, ForestLabelType::Cloned(clone_index)); + + Ok(Eon::Nex(cloned_label)) } else { let plain_label = ForestLabel::new(copied_label, ForestLabelType::Plain); if let Some(existing) = self.query_label(plain_label) { - if self.has_same_children(existing, node, self.degree(existing)?, edge_index + 1)? { + let existing_degree = self.degree(existing)?; + + if self.has_same_children(existing, node, existing_degree, edge_index + 1)? { + if let Some((fragment, planted)) = fragment { + self.plant(existing, fragment, planted)?; + } + return Ok(Eon::Ex(existing)); } + if let Some((fragment, _planted)) = fragment { + let modified_degree = std::cmp::max(existing_degree, 1) - 1; + + if existing_degree != 0 + && self.has_same_children(existing, node, modified_degree, edge_index)? + { + let last_child = self.nth_child(existing, existing_degree - 1)?; + + if self.is_prefix(last_child, fragment)? { + return Ok(Eon::Ex(existing)); + } + } + } + let clone_index = self.clone_node(existing, 0, true)?; Ok(Eon::Nex(ForestLabel::new( |