From fbaa420ed550e9c3e7cdc09d4a8ec22bfbd782a6 Mon Sep 17 00:00:00 2001 From: JSDurand Date: Mon, 27 Feb 2023 12:36:41 +0800 Subject: before a major refactor I decide to adopt a new approach of recording and updating item derivation forests. Since this affects a lot of things, I decide to commit before the refactor, so that I can create a branch for that refactor. --- chain/src/item/default/mod.rs | 469 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 442 insertions(+), 27 deletions(-) (limited to 'chain/src/item/default/mod.rs') diff --git a/chain/src/item/default/mod.rs b/chain/src/item/default/mod.rs index 1a00368..22069d2 100644 --- a/chain/src/item/default/mod.rs +++ b/chain/src/item/default/mod.rs @@ -3,11 +3,13 @@ use super::*; use crate::atom::default::DefaultAtom; -use grammar::{GrammarLabel, GrammarLabelType}; +use grammar::{GrammarLabel, GrammarLabelType, TNT}; use graph::{ builder::BuilderMut, labelled::binary::PLGBuilderMut, Graph, LabelGraph, PLGraph, RedirectGraph, }; +use std::collections::HashSet; + use core::fmt::Display; /// The type of errors for forest operations. @@ -30,6 +32,8 @@ pub enum Error { LabelConversion(ForestLabelError), /// Trying to split a packed node. SplitPack(usize), + /// A cloned node should have exactly one parent. + InvalidClone(usize), } impl Display for Error { @@ -43,6 +47,9 @@ impl Display for Error { Error::NodeNoLabel(n) => write!(f, "node {n} has no labels, but it should have one"), Error::LabelConversion(le) => write!(f, "fail to convert labels: {le}"), Error::SplitPack(n) => write!(f, "cannot split the packed node {n}"), + Error::InvalidClone(n) => { + write!(f, "the cloned node {n} should have exactly one parent") + } } } } @@ -68,7 +75,7 @@ impl From for Error { /// A default implementation of forest. #[derive(Debug, Clone)] pub struct DefaultForest { - graph: PLGraph, + pub(crate) graph: PLGraph, root: Option, } @@ -262,15 +269,14 @@ impl Forest for DefaultForest> { // the consideration). let fragment = fragment.borrow(); + let fragment_nodes_len = fragment.nodes_len(); - let mut frag_stack = Vec::with_capacity(fragment.nodes_len()); + let mut frag_stack = Vec::with_capacity(fragment_nodes_len); - let mut self_stack = Vec::with_capacity(fragment.nodes_len()); + let mut self_stack = Vec::with_capacity(fragment_nodes_len); - use std::collections::HashSet; - - let mut frag_seen: HashSet = HashSet::with_capacity(fragment.nodes_len()); - let mut self_seen: HashSet = HashSet::with_capacity(fragment.nodes_len()); + let mut frag_seen: HashSet = HashSet::with_capacity(fragment_nodes_len); + let mut self_seen: HashSet = HashSet::with_capacity(fragment_nodes_len); let frag_root = if let Some(root) = fragment.root() { root @@ -294,25 +300,25 @@ impl Forest for DefaultForest> { if fragment.vertex_label(frag_top)? != self.vertex_label(self_top)? { // not a prefix - dbg!( - frag_top, - self_top, - fragment.vertex_label(frag_top)?, - self.vertex_label(self_top)? - ); + // dbg!( + // frag_top, + // self_top, + // fragment.vertex_label(frag_top)?, + // self.vertex_label(self_top)? + // ); return Ok(false); } let self_children = self.children_of(self_top)?; let frag_children = fragment.children_of(frag_top)?; - if frag_children.len() != self_children.len() { - dbg!( - frag_top, - self_top, - fragment.vertex_label(frag_top)?, - self.vertex_label(self_top)? - ); + if frag_children.len() > self_children.len() { + // dbg!( + // frag_top, + // self_top, + // fragment.vertex_label(frag_top)?, + // self.vertex_label(self_top)? + // ); return Ok(false); } @@ -320,7 +326,7 @@ impl Forest for DefaultForest> { if frag_seen.contains(&frag_child) && self_seen.contains(&self_child) { continue; } else if frag_seen.contains(&frag_child) || self_seen.contains(&self_child) { - dbg!(); + // dbg!(); return Ok(false); } @@ -352,7 +358,7 @@ impl Forest for DefaultForest> { let root = if let Some(root) = fragment.root() { root } else { - // Nothing to do to plant an empty forest. + // Nothing to plant. return Ok(()); }; @@ -393,9 +399,27 @@ impl Forest for DefaultForest> { return Ok(()); } - // First adjoin those nodes and join the edges later. + // First adjoin the relevant nodes and join the edges later. + + let mut used_nodes: Vec = std::iter::repeat(false).take(nodes_len).collect(); + + let mut stack = vec![root]; + + while let Some(top) = stack.pop() { + if used_nodes.get(top).copied() == Some(true) { + continue; + } + + *used_nodes + .get_mut(top) + .ok_or(Error::IndexOutOfBounds(top, nodes_len))? = true; + + stack.extend(fragment.children_of(top)?); + } + + let used_nodes = used_nodes; - for node in 0..nodes_len { + for node in (0..nodes_len).filter(|node| used_nodes.get(*node).copied() == Some(true)) { let label = fragment .vertex_label(node)? .ok_or(Error::NodeNoLabel(node))?; @@ -582,6 +606,93 @@ impl PartialEq for DefaultForest> { } } +impl Eq for DefaultForest> {} + +impl DefaultForest> { + /// A convenient helper function to plant a fragment under a + /// node, if it has not been planted yet. + /// + /// To be precise, this function first checks if the node is + /// packed; if so, then it checks every of the cloned children, to + /// see if the fragment has already been planted. If none of the + /// clones have the fragment as a prefix, we make a new clone and + /// plant the fragment there. + /// + /// On the other hand, if the node is a plain node, then it checks + /// if the fragment is a prefix of the node. If so, do nothing, + /// else clone the node and plant the fragment there, unless the + /// node has no children yet, in which case just plant the node + /// there. + /// + /// A special case occurs when the node is the root node, in which + /// case we clone it only when its degree is larger than one. + /// + /// Return either the original node or the cloned node at the end. + pub(crate) fn plant_if_needed( + &mut self, + node: usize, + mut fragment: Self, + ) -> Result { + if fragment.is_empty() { + return Ok(node); + } + + let node_label = self.vertex_label(node)?.ok_or(Error::NodeNoLabel(node))?; + + if node_label.is_packed() || node_label.clone_index().is_some() { + let mut planted = false; + + let mut node = node; + + if node_label.clone_index().is_some() { + let mut parents = self.parents_of(node)?; + + assert_eq!(parents.len(), 1); + + node = parents.next().unwrap().node(); + } + + for clone in self.children_of(node)? { + node = clone; + + if self.is_prefix(clone, &fragment)? { + planted = true; + + break; + } + } + + if !planted { + let clone = self.clone_node(node, 0, false)?; + + fragment.set_root(1)?; + + self.plant(clone, fragment, false)?; + + return Ok(clone); + } + } else { + let clone_threshold = if self.root == Some(node) { 1 } else { 0 }; + + let planted = self.is_prefix(node, &fragment)?; + + fragment.set_root(1)?; + + if !planted && self.degree(node)? > clone_threshold { + let clone = self.clone_node(node, 0, false)?; + + self.plant(clone, fragment, false)?; + + return Ok(clone); + } else if !planted { + self.plant(node, fragment, false)?; + } + } + + Ok(node) + } +} + pub mod splone; impl DefaultForest { @@ -596,9 +707,18 @@ impl DefaultForest { Self { graph, root } } -} -impl Eq for DefaultForest> {} + /// Set the root to the given node. + pub fn set_root(&mut self, root: usize) -> Result<(), Error> { + if root >= self.nodes_len() { + return Err(Error::IndexOutOfBounds(root, self.nodes_len())); + } + + self.root = Some(root); + + Ok(()) + } +} impl DefaultForest> { /// Prints the forest without nodes that do not have ending @@ -613,6 +733,8 @@ impl DefaultForest> { Ok(label) => { if let Some(label) = label { label.label().end().is_none() + || (matches!(self.degree(*node), Ok(0)) + && matches!(self.parents_of(*node).map(|iter| iter.len()), Ok(0))) } else { true } @@ -674,6 +796,299 @@ impl DefaultForest> { file.write_all(result.as_bytes()) } + + /// Remove a node by removing all edges connecting with it. + pub fn remove_node(&mut self, node_id: usize) -> Result<(), Error> { + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + let mut to_remove = + Vec::with_capacity(builder.degree(node_id)? + builder.parents_of(node_id)?.len()); + + to_remove.extend( + builder + .children_of(node_id)? + .map(|target| (node_id, target)), + ); + + to_remove.extend( + builder + .parents_of(node_id)? + .map(|parent| (parent.node(), node_id)), + ); + + for (source, target) in to_remove { + builder.remove_edge(source, target, |_| true)?; + } + + Ok(()) + } + + /// Find the last child of the given node whose start and end + /// positions contain the given position. If no such child is + /// found, return `Ok(None)`. + /// + /// The returned tuple is of the form (child, index), where + /// `child` is the index of the child node in question, and + /// `index` means that the child is the `index`-th child of the + /// node. + pub(crate) fn position_search( + &self, + node: usize, + pos: usize, + ) -> Result, Error> { + fn range_contains(label: GrammarLabel, pos: usize) -> bool { + let start = label.start(); + + if let Some(end) = label.end() { + (start..end).contains(&pos) + } else { + (start..).contains(&pos) + } + } + + let node_label = self + .vertex_label(node)? + .ok_or(Error::NodeNoLabel(node))? + .label(); + + if !range_contains(node_label, pos) { + return Ok(None); + } + + for (index, child) in self.children_of(node)?.enumerate().rev() { + let child_label = self + .vertex_label(child)? + .ok_or(Error::NodeNoLabel(child))? + .label(); + + if range_contains(child_label, pos) { + return Ok(Some((child, index))); + } + } + + Ok(None) + } + + // /// Check if every child already has an end. + // fn every_child_is_completed(&self, node_id: usize, atom: &DefaultAtom) -> Result { + // let children = self.children_of(node_id)?; + + // if children.len() == 0 { + // return Ok(true); + // } + + // let mut pos = self + // .vertex_label(node_id)? + // .ok_or(Error::NodeNoLabel(node_id))? + // .label() + // .start(); + + // let mut last_child_label = None; + + // for child in children { + // let child_label = self + // .vertex_label(child)? + // .ok_or(Error::NodeNoLabel(child))? + // .label(); + + // last_child_label = Some(child_label); + + // if child_label.start() != pos || child_label.end().is_none() { + // return Ok(false); + // } + + // pos = child_label.end().unwrap(); + // } + + // if let Some(label) = last_child_label { + // if let Some(rule) = label.label().rule() { + // if !atom + // .is_accepting(2 * rule + 1) + // .map_err(|_| Error::IndexOutOfBounds(2 * rule + 1, atom.nodes_len()))? + // { + // return Ok(false); + // } + // } + // } + + // Ok(true) + // } + + // /// Complete the forest by trying to set the ending position of + // /// every node that does not have an end, by the ending position + // /// of its last child. + // pub fn complete(&mut self, atom: &DefaultAtom) -> Result<(), Error> { + // let mut stack: Vec<_> = self + // .nodes() + // .filter(|node| { + // let label = self.vertex_label(*node).unwrap().unwrap().label(); + + // label.label().rule().is_some() && label.end().is_some() + // }) + // .collect(); + + // let mut second_stack: Vec = Vec::new(); + + // let mut pending_candidates: Vec = Vec::new(); + + // let nodes_len = self.nodes_len(); + + // let mut seen_nodes: HashSet = HashSet::with_capacity(nodes_len); + + // while !stack.is_empty() { + // while let Some(mut top) = stack.pop() { + // if seen_nodes.contains(&top) { + // continue; + // } + + // seen_nodes.insert(top); + + // let top_label = self.vertex_label(top)?.unwrap(); + + // if top_label.clone_index().is_some() { + // let mut top_parents = self.parents_of(top)?; + + // if top_parents.len() != 1 { + // return Err(Error::InvalidClone(top)); + // } + + // top = top_parents.next().unwrap().node(); + // } + + // let rule_parents = self.parents_of(top)?; + + // let candidates = { + // let mut result = Vec::with_capacity(rule_parents.len()); + + // for parent in rule_parents { + // // for parent in self.parents_of(parent.node())? { + // // if self.degree(parent.node())? <= parent.edge() + 1 { + // result.push(parent); + // // } + // // } + // } + + // result + // }; + + // for candidate in candidates { + // if matches!(self.vertex_label(candidate.node())?, Some(label) if label.label().end().is_none()) + // { + // if self.every_child_is_completed(candidate.node(), atom)? { + // let last_child = self + // .nth_child(candidate.node(), self.degree(candidate.node())? - 1)?; + // let end = self + // .vertex_label(last_child)? + // .ok_or(Error::NodeNoLabel(last_child))? + // .label() + // .end(); + + // let sploned_node = self.splone( + // candidate.node(), + // end, + // self.degree(candidate.node())? - 1, + // true, + // )?; + + // second_stack.push(sploned_node); + // } else { + // pending_candidates.push(candidate.node()); + // } + // } else { + // second_stack.push(candidate.node()); + // } + // } + + // let mut new_pending = Vec::with_capacity(pending_candidates.len()); + + // while let Some(node) = pending_candidates.pop() { + // if self.every_child_is_completed(node, atom)? { + // let last_edge = self.degree(node)? - 1; + // let last_child = self.nth_child(node, last_edge)?; + // let end = self + // .vertex_label(last_child)? + // .ok_or(Error::NodeNoLabel(last_child))? + // .label() + // .end(); + + // let sploned_node = self.splone(node, end, last_edge, true)?; + + // second_stack.push(sploned_node); + // } else { + // new_pending.push(node); + // } + // } + + // std::mem::swap(&mut pending_candidates, &mut new_pending); + // } + + // std::mem::swap(&mut stack, &mut second_stack); + // } + + // Ok(()) + // } + + // /// Unconditionally set the label of the node to be the new label. + // /// + // /// # Warning + // /// + // /// Use with caution: it does not do anything special, so it is + // /// the responsibility of the caller to ensure this operation is + // /// legal. + // #[allow(dead_code)] + // pub(crate) fn set_label(&mut self, node: usize, label: GrammarLabel) -> Result<(), Error> { + // let status = self + // .vertex_label(node)? + // .ok_or(Error::NodeNoLabel(node))? + // .status; + + // let label = ForestLabel::new(label, status); + + // let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + // builder.set_label(node, label)?; + + // Ok(()) + // } + + /// Set the position of every node. + pub(crate) fn set_pos(&mut self, pos: usize) -> Result<(), Error> { + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + for node in 0..builder.nodes_len() { + let label = builder + .vertex_label(node)? + .ok_or(Error::NodeNoLabel(node))?; + + let mut label_label = label.label(); + + label_label.set_start(pos); + + if matches!(label_label.label().tnt(), Some(TNT::Ter(_))) { + label_label.set_end(pos + 1); + } else if builder.degree(node)? == 1 && label_label.label().rule().is_some() { + let child = builder.children_of(node)?.next().unwrap(); + + if matches!( + builder + .vertex_label(child)? + .ok_or(Error::NodeNoLabel(child))? + .label() + .label() + .tnt(), + Some(TNT::Ter(_)) + ) { + label_label.set_end(pos + 1); + } + } + + let new_label = ForestLabel::new(label_label, label.status); + + builder.set_label(node, new_label)?; + } + + Ok(()) + } } /// Print the labels found in the forest, so that we can easily -- cgit v1.2.3-18-g5258