diff options
author | JSDurand <mmemmew@gmail.com> | 2023-06-02 15:00:48 +0800 |
---|---|---|
committer | JSDurand <mmemmew@gmail.com> | 2023-06-02 15:00:48 +0800 |
commit | 8486474f377faf2d800d79166a7abe6b975e3e50 (patch) | |
tree | f06baa22bafebf9393869664067e9cf308c92634 /chain/src/item | |
parent | 1455da10f943e2aa1bdf26fb2697dafccc61e073 (diff) |
Fix a bug of duplication from planting after sploing
I should have staged and committed these changes separately, but I am
too lazy to deal with that.
The main changes in this commit are that I added the derive macro that
automates the delegation of the Graph trait. This saves a lot of
boiler-plate codes.
The second main change, perhaps the most important one, is that I
found and tried to fix a bug that caused duplication of nodes. The
bug arises from splitting or cloning a node multiple times, and
immediately planting the same fragment under the new "sploned" node.
That is, when we try to splone the node again, we found that we need
to splone, because the node that was created by the same sploning
process now has a different label because of the planting of the
fragment. Then after the sploning, we plant the fragment again. This
makes the newly sploned node have the same label (except for the clone
index) and the same children as the node that was sploned and planted
in the previous rounds.
The fix is to check for the existence of a node that has the same set
of children as the about-to-be-sploned node, except for the last one,
which contains the about-to-be-planted fragment as a prefix. If that
is the case, treat it as an already existing node, so that we do not
have to splone the node again.
This is consistent with the principle to not create what we do not
need.
Diffstat (limited to 'chain/src/item')
-rw-r--r-- | chain/src/item/default/mod.rs | 57 | ||||
-rw-r--r-- | chain/src/item/default/splone.rs | 189 | ||||
-rw-r--r-- | chain/src/item/genins.rs | 103 | ||||
-rw-r--r-- | chain/src/item/mod.rs | 12 |
4 files changed, 274 insertions, 87 deletions
diff --git a/chain/src/item/default/mod.rs b/chain/src/item/default/mod.rs index 3903162..6d28956 100644 --- a/chain/src/item/default/mod.rs +++ b/chain/src/item/default/mod.rs @@ -8,6 +8,8 @@ use graph::{ builder::BuilderMut, labelled::binary::PLGBuilderMut, Graph, LabelGraph, PLGraph, RedirectGraph, }; +use graph_macro::Graph; + use std::collections::HashSet; use core::fmt::Display; @@ -73,7 +75,7 @@ impl From<ForestLabelError> for Error { } /// A default implementation of forest. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Graph)] pub struct DefaultForest<T: GraphLabel> { pub(crate) graph: PLGraph<T>, root: Option<usize>, @@ -94,56 +96,6 @@ impl<T: GraphLabel> AsRef<DefaultForest<T>> for DefaultForest<T> { } } -impl<T: GraphLabel> Graph for DefaultForest<T> { - type Iter<'a> = <PLGraph<T> as Graph>::Iter<'a> - where - Self: 'a; - - #[inline] - fn is_empty(&self) -> bool { - self.graph.is_empty() - } - - #[inline] - fn nodes_len(&self) -> usize { - self.graph.nodes_len() - } - - #[inline] - fn edges_len(&self) -> Option<usize> { - self.graph.edges_len() - } - - #[inline] - fn children_of(&self, node_id: usize) -> Result<Self::Iter<'_>, GError> { - self.graph.children_of(node_id) - } - - #[inline] - fn degree(&self, node_id: usize) -> Result<usize, GError> { - self.graph.degree(node_id) - } - - #[inline] - fn is_empty_node(&self, node_id: usize) -> Result<bool, GError> { - self.graph.is_empty_node(node_id) - } - - #[inline] - fn has_edge(&self, source: usize, target: usize) -> Result<bool, GError> { - self.graph.has_edge(source, target) - } - - fn replace_by_builder(&mut self, _builder: impl graph::Builder<Result = Self>) { - unimplemented!() - } - - #[inline] - fn print_viz(&self, filename: &str) -> Result<(), std::io::Error> { - self.graph.print_viz(filename) - } -} - impl<T: GraphLabel> ParentsGraph for DefaultForest<T> { type Iter<'a>= <PLGraph<T> as ParentsGraph>::Iter<'a> where @@ -1081,6 +1033,9 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { // } /// Set the position of every node. + /// + /// If ALLP is non-nil or if the node is a terminal node, also set + /// the ending position. pub(crate) fn set_pos(&mut self, pos: usize, allp: bool) -> Result<(), Error> { let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); diff --git a/chain/src/item/default/splone.rs b/chain/src/item/default/splone.rs index 4cd11b9..d77686e 100644 --- a/chain/src/item/default/splone.rs +++ b/chain/src/item/default/splone.rs @@ -123,7 +123,7 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { return Ok(cloned); } - let new_label = self.create_new_label(node, end, edge_index)?; + let new_label = self.create_new_label(node, end, edge_index, None)?; let new_label = match new_label { Eon::Nex(label) => label, @@ -140,7 +140,126 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { return self.split_node(new_label, node, edge_index, completingp); } - // replace the label directly + self.replace_label(node, new_label)?; + + Ok(node) + } + + /// Split or clone, and then plant. + /// + /// # Splone + /// + /// This function is similar to + /// [`splone`][DefaultForest::<ForestLabel<GrammarLabel>>::splone], + /// but this is specialized for planting a fragment under the + /// newly sploned node. See the above-mentionned function for + /// details on how to splone. + /// + /// # Parameter `planted` + /// + /// The function to plant a fragment takes a parameter `planted`, + /// which indicates whether or not the fragment had already been + /// planted before. This is used to avoid re-inserting fragments + /// into the forest. One can just add an edge and be done. + /// + /// # Special treatment + /// + /// This function is aimed at solving a specific problem that the + /// function + /// [`splone`][DefaultForest::<ForestLabel<GrammarLabel>>::splone] + /// faces: duplication of nodes after planting a fragment under + /// the newly sploned node. That is, if we splone a node and then + /// immediately plant a fragment under it, then that new node will + /// become a different node from the original sploned node, so if + /// we splone another node that ends up creating the same sploned + /// node, and if we plant the same fragment under it, we will end + /// up creating duplicated cloned nodes, which later mess up the + /// forests. + /// + /// So this function first checks if the would-be-sploned node + /// with the fragment planted already exists; if so, then just + /// return that node, otherwise we perform the usual sploning and + /// plant the fragment after the splone. + /// + /// # Special values of two parameters to `splone` + /// + /// Since we want to plant a fragment under the splone, the + /// parameter `end` to the splone function is set to `None` + /// automatically. + /// + /// Moreover, the parameter `completingp` is for completing the + /// forest at the end, while we are definitely not at the end if + /// we are going to plant a fragment after sploning, so that + /// parameter is automatically set to `false` as well. + pub(crate) fn splant( + &mut self, + node: usize, + edge_index: usize, + fragment: &DefaultForest<ForestLabel<GrammarLabel>>, + planted: bool, + ) -> Result<usize, Error> { + let node_label = self.vertex_label(node)?.ok_or(Error::NodeNoLabel(node))?; + + assert!(get_nt_label(node_label.label()).is_some()); + + if node_label.is_packed() { + self.print_viz("dbg forest.gv").unwrap(); + dbg!(self.vertex_label(node)?); + return Err(Error::SplitPack(node)); + } + + let node_end = node_label.label().end(); + let node_degree = self.degree(node)?; + + // We can check the end to know whether the new label is equal + // to the old label. + if node_end.is_none() { + if node_degree == edge_index + 2 { + let last_child = self.nth_child(node, node_degree - 1)?; + + if self.is_prefix(last_child, fragment.borrow())? { + return Ok(node); + } + } + + if node_degree <= edge_index + 1 { + self.plant(node, fragment, planted)?; + + return Ok(node); + } + + let cloned = self.clone_node(node, edge_index + 1, false)?; + + self.plant(cloned, fragment, planted)?; + + return Ok(cloned); + } + + let new_label = self.create_new_label(node, None, edge_index, Some((fragment, planted)))?; + + let new_label = match new_label { + Eon::Nex(label) => label, + Eon::Ex(existing) => { + return Ok(existing); + } + }; + + let splitted = self.split_node(new_label, node, edge_index, false)?; + + self.plant(splitted, fragment, planted)?; + + Ok(splitted) + } + + /// Replace the label of a node by a new label. + /// + /// This also handles the labels of parents of the node. + fn replace_label( + &mut self, + node: usize, + new_label: ForestLabel<GrammarLabel>, + ) -> Result<(), Error> { + let end = new_label.label().end(); let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); @@ -162,7 +281,6 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { panic!("assumption fails"); } - assert!(get_rule_label(parent_label).is_some()); assert_eq!(builder.degree(parent)?, 1); if let Some(pos) = end { @@ -176,7 +294,7 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { builder.set_label(parent, parent_label)?; } - Ok(node) + Ok(()) } /// Procedure to split the node: @@ -352,11 +470,23 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { /// existing label, and return the clone of the cloned label. /// /// 7. Else return the plain label. + /// + /// # Fragment planting + /// + /// If the parameter `fragment` contains a fragment, that means we + /// also check if an existing label is what we want by checking + /// whether it has the same children except for the last one, + /// whereas its last child contains the fragment as a prefix. + /// + /// Also, if an existing label is found to have exactly the same + /// children, then for the sake of consistency, we plant the + /// fragment under that existing node. fn create_new_label( &mut self, node: usize, end: Option<usize>, edge_index: usize, + fragment: Option<(&DefaultForest<ForestLabel<GrammarLabel>>, bool)>, ) -> Result<Eon, Error> { let mut copied_label = self .vertex_label(node)? @@ -373,9 +503,29 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { if let Some(packed) = self.query_label(label) { for child in self.children_of(packed)? { - if self.has_same_children(child, node, self.degree(child)?, edge_index + 1)? { + let child_degree = self.degree(child)?; + + if self.has_same_children(child, node, child_degree, edge_index + 1)? { + if let Some((fragment, planted)) = fragment { + self.plant(child, fragment, planted)?; + } + return Ok(Eon::Ex(child)); } + + if let Some((fragment, _planted)) = fragment { + let modified_degree = std::cmp::max(child_degree, 1) - 1; + + if self.has_same_children(child, node, modified_degree, edge_index)? + && child_degree != 0 + { + let last_child = self.nth_child(child, child_degree - 1)?; + + if self.is_prefix(last_child, fragment)? { + return Ok(Eon::Ex(child)); + } + } + } } let mut packed_children = self.children_of(packed)?; @@ -384,18 +534,37 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { let clone_index = self.clone_node(first_child, 0, true)?; - Ok(Eon::Nex(ForestLabel::new( - copied_label, - ForestLabelType::Cloned(clone_index), - ))) + let cloned_label = ForestLabel::new(copied_label, ForestLabelType::Cloned(clone_index)); + + Ok(Eon::Nex(cloned_label)) } else { let plain_label = ForestLabel::new(copied_label, ForestLabelType::Plain); if let Some(existing) = self.query_label(plain_label) { - if self.has_same_children(existing, node, self.degree(existing)?, edge_index + 1)? { + let existing_degree = self.degree(existing)?; + + if self.has_same_children(existing, node, existing_degree, edge_index + 1)? { + if let Some((fragment, planted)) = fragment { + self.plant(existing, fragment, planted)?; + } + return Ok(Eon::Ex(existing)); } + if let Some((fragment, _planted)) = fragment { + let modified_degree = std::cmp::max(existing_degree, 1) - 1; + + if existing_degree != 0 + && self.has_same_children(existing, node, modified_degree, edge_index)? + { + let last_child = self.nth_child(existing, existing_degree - 1)?; + + if self.is_prefix(last_child, fragment)? { + return Ok(Eon::Ex(existing)); + } + } + } + let clone_index = self.clone_node(existing, 0, true)?; Ok(Eon::Nex(ForestLabel::new( diff --git a/chain/src/item/genins.rs b/chain/src/item/genins.rs index 0c3f616..97d7ba9 100644 --- a/chain/src/item/genins.rs +++ b/chain/src/item/genins.rs @@ -190,7 +190,7 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { let root = if let Some(root) = self.root() { root } else { - panic!("the forest must be non-empty when we insert items"); + unreachable!("the forest must be non-empty when we insert items"); }; let pavi = label.forest_source(); @@ -211,13 +211,50 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { let pos = fragment_root_label.label().start(); + dbg!((pos, label)); + + let tnt_string = { + let empty_p = atom_child_iter.len() == 0; + let label = fragment.vertex_label(fragment.nodes_len() - 1)?.unwrap(); + + match label.label().label() { + GrammarLabelType::TNT(TNT::Ter(t)) => { + format!("t {t}") + } + GrammarLabelType::TNT(TNT::Non(n)) => { + format!("n {n} {}", if empty_p { "second" } else { "first" }) + } + _ => "error".to_string(), + } + }; + + let num = { + let mut repetition = 0; + + while std::fs::metadata(format!( + "/Users/durand/Desktop/Centre/A propos de programmes/Rust/rep/chain/output/pos {pos} {tnt_string} {repetition}.gv" + )) + .is_ok() + { + repetition += 1; + } + + repetition + }; + + self.print_viz(&format!("pos {pos} {tnt_string} {num}.gv")) + .unwrap(); + self.extra_reductions( - BoTop::new(pavi, true_source), + BoTop::new(true_source, pavi), pos, reducer.borrow(), atom.borrow(), )?; + self.print_viz(&format!("pos {pos} {tnt_string} {num} stage 1.gv")) + .unwrap(); + // Ensure the last node in the PaVi is a terminal or a // non-terminal node, as an extra safety guard during // development. @@ -259,6 +296,8 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { } } + // TODO: Print each and every step. + // TODO: Refactor this. let is_empty_segment = pavi.is_empty(); @@ -290,15 +329,19 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { .vertex_label(frag_nodes_len - 2)? .ok_or(Error::NodeNoLabel(frag_nodes_len - 2))?; - // NOTE: The function - // `plant_if_needed` assumes that we - // want to plant the fragment as the - // first child of the node. This - // assumption holds in this case, but - // not in general. + // NOTE: The function `plant_at_start` + // assumes that we want to plant the + // fragment as the first child of the + // node. This assumption holds in + // this case, but not in general. self.plant_at_start(node, frag)?; + self.print_viz(&format!( + "pos {pos} {tnt_string} {num} stage 1.5 {node}.gv" + )) + .unwrap(); + let rule_label_pos = self .query_label(last_but_one_label) .expect("the forest was wrongly planted"); @@ -370,6 +413,13 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { let sploned_node = self.splone(node.node(), Some(pos), node.edge(), false)?; + self.print_viz(&format!( + "pos {pos} {tnt_string} {num} stage 2 {} {}.gv", + node.node(), + node.edge(), + )) + .unwrap(); + node_label = self .vertex_label(sploned_node)? .ok_or(Error::NodeNoLabel(sploned_node))?; @@ -394,9 +444,11 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { let parents_iter = self.parents_of(node.node())?; for parent in parents_iter { + let parent_node = parent.node(); + let parent_label = self - .vertex_label(parent.node())? - .ok_or_else(|| Error::NodeNoLabel(parent.node()))? + .vertex_label(parent_node)? + .ok_or(Error::NodeNoLabel(parent_node))? .label(); if parent_label.label().rule().is_none() { @@ -448,15 +500,15 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { return Err(Error::CannotPlant); } - if pos == 4 && matches!(true_source, PaVi::Virtual(_, _, _)) { - dbg!(&stack, reduction_info, true_source, pavi); - self.print_viz("pos4ib.gv").unwrap(); - } - for parent in stack { - let sploned_node = self.splone(parent.node(), None, parent.edge(), false)?; + let splanted = self.splant(parent.node(), parent.edge(), fragment, non_empty)?; - self.plant(sploned_node, fragment, non_empty)?; + self.print_viz(&format!( + "pos {pos} {tnt_string} {num} stage 3 {} {} {splanted}.gv", + parent.node(), + parent.edge(), + )) + .unwrap(); non_empty = true; } @@ -533,12 +585,14 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { } }; - let t = match root_label.label().label().tnt().unwrap() { + let error_str = "a virtual fragment should consist of a single terminal node"; + + let t = match root_label.label().label().tnt().expect(error_str) { TNT::Ter(t) => t, _ => { dbg!(root_label); - panic!("a virtual fragment should consist of a single terminal node") + panic!("{error_str}") } }; @@ -550,7 +604,7 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { /// Perform extra reductions. /// - /// To be precise, this functions first splones the bottom node, + /// To be precise, this function first splones the bottom node, /// and then queries the reducer to find the next nodes to splone, /// and repeats this process until either we find the top node, or /// the reducer says to stop. @@ -583,6 +637,9 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { let mut stack = vec![bottom_node]; + // Exclude duplicate nodes to ensure that no infinite + // recursion occurs. In the future I shall experiment if this + // is necessary, and get rid of this if possible. let mut seen_nodes: HashSet<usize> = Default::default(); let mut result = Vec::new(); @@ -683,6 +740,8 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { // REVIEW: is this really correct? dbg!("this should not really happen?"); + // SUMMARY: splone every child of nth_child + let mut result: usize = nth_child; for node in self.children_of(nth_child)?.collect::<Vec<_>>() { @@ -806,8 +865,8 @@ mod genins_test { assert!(matches!(atom.query_reduction(17, 9), Ok(Some(&[1])))); - assert!(matches!(atom.query_reduction(35, 9), Ok(Some(&[1, 2])))); - assert!(matches!(atom.query_reduction(35, 25), Ok(Some(&[2])))); + // assert!(matches!(atom.query_reduction(35, 9), Ok(Some(&[1, 2])))); + // assert!(matches!(atom.query_reduction(35, 25), Ok(Some(&[2])))); Ok(()) } diff --git a/chain/src/item/mod.rs b/chain/src/item/mod.rs index f8e0aa0..5efa710 100644 --- a/chain/src/item/mod.rs +++ b/chain/src/item/mod.rs @@ -21,11 +21,15 @@ use core::borrow::Borrow; /// A virtual segment represents an expansion from a non-terminal by a /// terminal. We do not directly add this segment when we encounter /// this expansion at the start because this expansion might contain -/// multiple derivations some of which will not be used. +/// multiple derivations some of which might not be used. /// /// If we add the expansion immediately when we encounter it, we have /// to later discern and delete those unwanted derivations. This is -/// asking for trouble, in my experiences. +/// asking for trouble, as I learned from experiences. +/// +/// # Empty +/// +/// Also this might be empty if it represents the root node. #[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Default)] pub enum PaVi { /// An edge from a node, as the n-th child. @@ -35,8 +39,8 @@ pub enum PaVi { /// /// # Tuple elements /// - /// The contained tuple is of the form (nt, t, node), which means - /// a virtually added node at the `node` representing the + /// The contained tuple is of the form `(nt, t, node)` , which + /// means a virtually added node at the `node` representing the /// expansion from the non-terminal `nt` by the terminal `t`. Virtual(usize, usize, usize), /// This is an empty segment that represents the root node. This |