summaryrefslogtreecommitdiff
path: root/chain/src/item/default/mod.rs
diff options
context:
space:
mode:
authorJSDurand <mmemmew@gmail.com>2023-02-27 12:36:41 +0800
committerJSDurand <mmemmew@gmail.com>2023-02-27 12:36:41 +0800
commitfbaa420ed550e9c3e7cdc09d4a8ec22bfbd782a6 (patch)
treefad9722825bb3fa796dd52c3fd4a8bf46b958cf9 /chain/src/item/default/mod.rs
parentafad02bdff111ecccb0077b9c989e869723c231c (diff)
before a major refactor
I decide to adopt a new approach of recording and updating item derivation forests. Since this affects a lot of things, I decide to commit before the refactor, so that I can create a branch for that refactor.
Diffstat (limited to 'chain/src/item/default/mod.rs')
-rw-r--r--chain/src/item/default/mod.rs469
1 files changed, 442 insertions, 27 deletions
diff --git a/chain/src/item/default/mod.rs b/chain/src/item/default/mod.rs
index 1a00368..22069d2 100644
--- a/chain/src/item/default/mod.rs
+++ b/chain/src/item/default/mod.rs
@@ -3,11 +3,13 @@
use super::*;
use crate::atom::default::DefaultAtom;
-use grammar::{GrammarLabel, GrammarLabelType};
+use grammar::{GrammarLabel, GrammarLabelType, TNT};
use graph::{
builder::BuilderMut, labelled::binary::PLGBuilderMut, Graph, LabelGraph, PLGraph, RedirectGraph,
};
+use std::collections::HashSet;
+
use core::fmt::Display;
/// The type of errors for forest operations.
@@ -30,6 +32,8 @@ pub enum Error {
LabelConversion(ForestLabelError),
/// Trying to split a packed node.
SplitPack(usize),
+ /// A cloned node should have exactly one parent.
+ InvalidClone(usize),
}
impl Display for Error {
@@ -43,6 +47,9 @@ impl Display for Error {
Error::NodeNoLabel(n) => write!(f, "node {n} has no labels, but it should have one"),
Error::LabelConversion(le) => write!(f, "fail to convert labels: {le}"),
Error::SplitPack(n) => write!(f, "cannot split the packed node {n}"),
+ Error::InvalidClone(n) => {
+ write!(f, "the cloned node {n} should have exactly one parent")
+ }
}
}
}
@@ -68,7 +75,7 @@ impl From<ForestLabelError> for Error {
/// A default implementation of forest.
#[derive(Debug, Clone)]
pub struct DefaultForest<T: GraphLabel> {
- graph: PLGraph<T>,
+ pub(crate) graph: PLGraph<T>,
root: Option<usize>,
}
@@ -262,15 +269,14 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> {
// the consideration).
let fragment = fragment.borrow();
+ let fragment_nodes_len = fragment.nodes_len();
- let mut frag_stack = Vec::with_capacity(fragment.nodes_len());
+ let mut frag_stack = Vec::with_capacity(fragment_nodes_len);
- let mut self_stack = Vec::with_capacity(fragment.nodes_len());
+ let mut self_stack = Vec::with_capacity(fragment_nodes_len);
- use std::collections::HashSet;
-
- let mut frag_seen: HashSet<usize> = HashSet::with_capacity(fragment.nodes_len());
- let mut self_seen: HashSet<usize> = HashSet::with_capacity(fragment.nodes_len());
+ let mut frag_seen: HashSet<usize> = HashSet::with_capacity(fragment_nodes_len);
+ let mut self_seen: HashSet<usize> = HashSet::with_capacity(fragment_nodes_len);
let frag_root = if let Some(root) = fragment.root() {
root
@@ -294,25 +300,25 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> {
if fragment.vertex_label(frag_top)? != self.vertex_label(self_top)? {
// not a prefix
- dbg!(
- frag_top,
- self_top,
- fragment.vertex_label(frag_top)?,
- self.vertex_label(self_top)?
- );
+ // dbg!(
+ // frag_top,
+ // self_top,
+ // fragment.vertex_label(frag_top)?,
+ // self.vertex_label(self_top)?
+ // );
return Ok(false);
}
let self_children = self.children_of(self_top)?;
let frag_children = fragment.children_of(frag_top)?;
- if frag_children.len() != self_children.len() {
- dbg!(
- frag_top,
- self_top,
- fragment.vertex_label(frag_top)?,
- self.vertex_label(self_top)?
- );
+ if frag_children.len() > self_children.len() {
+ // dbg!(
+ // frag_top,
+ // self_top,
+ // fragment.vertex_label(frag_top)?,
+ // self.vertex_label(self_top)?
+ // );
return Ok(false);
}
@@ -320,7 +326,7 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> {
if frag_seen.contains(&frag_child) && self_seen.contains(&self_child) {
continue;
} else if frag_seen.contains(&frag_child) || self_seen.contains(&self_child) {
- dbg!();
+ // dbg!();
return Ok(false);
}
@@ -352,7 +358,7 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> {
let root = if let Some(root) = fragment.root() {
root
} else {
- // Nothing to do to plant an empty forest.
+ // Nothing to plant.
return Ok(());
};
@@ -393,9 +399,27 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> {
return Ok(());
}
- // First adjoin those nodes and join the edges later.
+ // First adjoin the relevant nodes and join the edges later.
+
+ let mut used_nodes: Vec<bool> = std::iter::repeat(false).take(nodes_len).collect();
+
+ let mut stack = vec![root];
+
+ while let Some(top) = stack.pop() {
+ if used_nodes.get(top).copied() == Some(true) {
+ continue;
+ }
+
+ *used_nodes
+ .get_mut(top)
+ .ok_or(Error::IndexOutOfBounds(top, nodes_len))? = true;
+
+ stack.extend(fragment.children_of(top)?);
+ }
+
+ let used_nodes = used_nodes;
- for node in 0..nodes_len {
+ for node in (0..nodes_len).filter(|node| used_nodes.get(*node).copied() == Some(true)) {
let label = fragment
.vertex_label(node)?
.ok_or(Error::NodeNoLabel(node))?;
@@ -582,6 +606,93 @@ impl<T: GraphLabel> PartialEq for DefaultForest<ForestLabel<T>> {
}
}
+impl<T: GraphLabel> Eq for DefaultForest<ForestLabel<T>> {}
+
+impl<T: GraphLabel> DefaultForest<ForestLabel<T>> {
+ /// A convenient helper function to plant a fragment under a
+ /// node, if it has not been planted yet.
+ ///
+ /// To be precise, this function first checks if the node is
+ /// packed; if so, then it checks every of the cloned children, to
+ /// see if the fragment has already been planted. If none of the
+ /// clones have the fragment as a prefix, we make a new clone and
+ /// plant the fragment there.
+ ///
+ /// On the other hand, if the node is a plain node, then it checks
+ /// if the fragment is a prefix of the node. If so, do nothing,
+ /// else clone the node and plant the fragment there, unless the
+ /// node has no children yet, in which case just plant the node
+ /// there.
+ ///
+ /// A special case occurs when the node is the root node, in which
+ /// case we clone it only when its degree is larger than one.
+ ///
+ /// Return either the original node or the cloned node at the end.
+ pub(crate) fn plant_if_needed(
+ &mut self,
+ node: usize,
+ mut fragment: Self,
+ ) -> Result<usize, Error> {
+ if fragment.is_empty() {
+ return Ok(node);
+ }
+
+ let node_label = self.vertex_label(node)?.ok_or(Error::NodeNoLabel(node))?;
+
+ if node_label.is_packed() || node_label.clone_index().is_some() {
+ let mut planted = false;
+
+ let mut node = node;
+
+ if node_label.clone_index().is_some() {
+ let mut parents = self.parents_of(node)?;
+
+ assert_eq!(parents.len(), 1);
+
+ node = parents.next().unwrap().node();
+ }
+
+ for clone in self.children_of(node)? {
+ node = clone;
+
+ if self.is_prefix(clone, &fragment)? {
+ planted = true;
+
+ break;
+ }
+ }
+
+ if !planted {
+ let clone = self.clone_node(node, 0, false)?;
+
+ fragment.set_root(1)?;
+
+ self.plant(clone, fragment, false)?;
+
+ return Ok(clone);
+ }
+ } else {
+ let clone_threshold = if self.root == Some(node) { 1 } else { 0 };
+
+ let planted = self.is_prefix(node, &fragment)?;
+
+ fragment.set_root(1)?;
+
+ if !planted && self.degree(node)? > clone_threshold {
+ let clone = self.clone_node(node, 0, false)?;
+
+ self.plant(clone, fragment, false)?;
+
+ return Ok(clone);
+ } else if !planted {
+ self.plant(node, fragment, false)?;
+ }
+ }
+
+ Ok(node)
+ }
+}
+
pub mod splone;
impl<T: GraphLabel> DefaultForest<T> {
@@ -596,9 +707,18 @@ impl<T: GraphLabel> DefaultForest<T> {
Self { graph, root }
}
-}
-impl<T: GraphLabel> Eq for DefaultForest<ForestLabel<T>> {}
+ /// Set the root to the given node.
+ pub fn set_root(&mut self, root: usize) -> Result<(), Error> {
+ if root >= self.nodes_len() {
+ return Err(Error::IndexOutOfBounds(root, self.nodes_len()));
+ }
+
+ self.root = Some(root);
+
+ Ok(())
+ }
+}
impl DefaultForest<ForestLabel<GrammarLabel>> {
/// Prints the forest without nodes that do not have ending
@@ -613,6 +733,8 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
Ok(label) => {
if let Some(label) = label {
label.label().end().is_none()
+ || (matches!(self.degree(*node), Ok(0))
+ && matches!(self.parents_of(*node).map(|iter| iter.len()), Ok(0)))
} else {
true
}
@@ -674,6 +796,299 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
file.write_all(result.as_bytes())
}
+
+ /// Remove a node by removing all edges connecting with it.
+ pub fn remove_node(&mut self, node_id: usize) -> Result<(), Error> {
+ let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph);
+
+ let mut to_remove =
+ Vec::with_capacity(builder.degree(node_id)? + builder.parents_of(node_id)?.len());
+
+ to_remove.extend(
+ builder
+ .children_of(node_id)?
+ .map(|target| (node_id, target)),
+ );
+
+ to_remove.extend(
+ builder
+ .parents_of(node_id)?
+ .map(|parent| (parent.node(), node_id)),
+ );
+
+ for (source, target) in to_remove {
+ builder.remove_edge(source, target, |_| true)?;
+ }
+
+ Ok(())
+ }
+
+ /// Find the last child of the given node whose start and end
+ /// positions contain the given position. If no such child is
+ /// found, return `Ok(None)`.
+ ///
+ /// The returned tuple is of the form (child, index), where
+ /// `child` is the index of the child node in question, and
+ /// `index` means that the child is the `index`-th child of the
+ /// node.
+ pub(crate) fn position_search(
+ &self,
+ node: usize,
+ pos: usize,
+ ) -> Result<Option<(usize, usize)>, Error> {
+ fn range_contains(label: GrammarLabel, pos: usize) -> bool {
+ let start = label.start();
+
+ if let Some(end) = label.end() {
+ (start..end).contains(&pos)
+ } else {
+ (start..).contains(&pos)
+ }
+ }
+
+ let node_label = self
+ .vertex_label(node)?
+ .ok_or(Error::NodeNoLabel(node))?
+ .label();
+
+ if !range_contains(node_label, pos) {
+ return Ok(None);
+ }
+
+ for (index, child) in self.children_of(node)?.enumerate().rev() {
+ let child_label = self
+ .vertex_label(child)?
+ .ok_or(Error::NodeNoLabel(child))?
+ .label();
+
+ if range_contains(child_label, pos) {
+ return Ok(Some((child, index)));
+ }
+ }
+
+ Ok(None)
+ }
+
+ // /// Check if every child already has an end.
+ // fn every_child_is_completed(&self, node_id: usize, atom: &DefaultAtom) -> Result<bool, Error> {
+ // let children = self.children_of(node_id)?;
+
+ // if children.len() == 0 {
+ // return Ok(true);
+ // }
+
+ // let mut pos = self
+ // .vertex_label(node_id)?
+ // .ok_or(Error::NodeNoLabel(node_id))?
+ // .label()
+ // .start();
+
+ // let mut last_child_label = None;
+
+ // for child in children {
+ // let child_label = self
+ // .vertex_label(child)?
+ // .ok_or(Error::NodeNoLabel(child))?
+ // .label();
+
+ // last_child_label = Some(child_label);
+
+ // if child_label.start() != pos || child_label.end().is_none() {
+ // return Ok(false);
+ // }
+
+ // pos = child_label.end().unwrap();
+ // }
+
+ // if let Some(label) = last_child_label {
+ // if let Some(rule) = label.label().rule() {
+ // if !atom
+ // .is_accepting(2 * rule + 1)
+ // .map_err(|_| Error::IndexOutOfBounds(2 * rule + 1, atom.nodes_len()))?
+ // {
+ // return Ok(false);
+ // }
+ // }
+ // }
+
+ // Ok(true)
+ // }
+
+ // /// Complete the forest by trying to set the ending position of
+ // /// every node that does not have an end, by the ending position
+ // /// of its last child.
+ // pub fn complete(&mut self, atom: &DefaultAtom) -> Result<(), Error> {
+ // let mut stack: Vec<_> = self
+ // .nodes()
+ // .filter(|node| {
+ // let label = self.vertex_label(*node).unwrap().unwrap().label();
+
+ // label.label().rule().is_some() && label.end().is_some()
+ // })
+ // .collect();
+
+ // let mut second_stack: Vec<usize> = Vec::new();
+
+ // let mut pending_candidates: Vec<usize> = Vec::new();
+
+ // let nodes_len = self.nodes_len();
+
+ // let mut seen_nodes: HashSet<usize> = HashSet::with_capacity(nodes_len);
+
+ // while !stack.is_empty() {
+ // while let Some(mut top) = stack.pop() {
+ // if seen_nodes.contains(&top) {
+ // continue;
+ // }
+
+ // seen_nodes.insert(top);
+
+ // let top_label = self.vertex_label(top)?.unwrap();
+
+ // if top_label.clone_index().is_some() {
+ // let mut top_parents = self.parents_of(top)?;
+
+ // if top_parents.len() != 1 {
+ // return Err(Error::InvalidClone(top));
+ // }
+
+ // top = top_parents.next().unwrap().node();
+ // }
+
+ // let rule_parents = self.parents_of(top)?;
+
+ // let candidates = {
+ // let mut result = Vec::with_capacity(rule_parents.len());
+
+ // for parent in rule_parents {
+ // // for parent in self.parents_of(parent.node())? {
+ // // if self.degree(parent.node())? <= parent.edge() + 1 {
+ // result.push(parent);
+ // // }
+ // // }
+ // }
+
+ // result
+ // };
+
+ // for candidate in candidates {
+ // if matches!(self.vertex_label(candidate.node())?, Some(label) if label.label().end().is_none())
+ // {
+ // if self.every_child_is_completed(candidate.node(), atom)? {
+ // let last_child = self
+ // .nth_child(candidate.node(), self.degree(candidate.node())? - 1)?;
+ // let end = self
+ // .vertex_label(last_child)?
+ // .ok_or(Error::NodeNoLabel(last_child))?
+ // .label()
+ // .end();
+
+ // let sploned_node = self.splone(
+ // candidate.node(),
+ // end,
+ // self.degree(candidate.node())? - 1,
+ // true,
+ // )?;
+
+ // second_stack.push(sploned_node);
+ // } else {
+ // pending_candidates.push(candidate.node());
+ // }
+ // } else {
+ // second_stack.push(candidate.node());
+ // }
+ // }
+
+ // let mut new_pending = Vec::with_capacity(pending_candidates.len());
+
+ // while let Some(node) = pending_candidates.pop() {
+ // if self.every_child_is_completed(node, atom)? {
+ // let last_edge = self.degree(node)? - 1;
+ // let last_child = self.nth_child(node, last_edge)?;
+ // let end = self
+ // .vertex_label(last_child)?
+ // .ok_or(Error::NodeNoLabel(last_child))?
+ // .label()
+ // .end();
+
+ // let sploned_node = self.splone(node, end, last_edge, true)?;
+
+ // second_stack.push(sploned_node);
+ // } else {
+ // new_pending.push(node);
+ // }
+ // }
+
+ // std::mem::swap(&mut pending_candidates, &mut new_pending);
+ // }
+
+ // std::mem::swap(&mut stack, &mut second_stack);
+ // }
+
+ // Ok(())
+ // }
+
+ // /// Unconditionally set the label of the node to be the new label.
+ // ///
+ // /// # Warning
+ // ///
+ // /// Use with caution: it does not do anything special, so it is
+ // /// the responsibility of the caller to ensure this operation is
+ // /// legal.
+ // #[allow(dead_code)]
+ // pub(crate) fn set_label(&mut self, node: usize, label: GrammarLabel) -> Result<(), Error> {
+ // let status = self
+ // .vertex_label(node)?
+ // .ok_or(Error::NodeNoLabel(node))?
+ // .status;
+
+ // let label = ForestLabel::new(label, status);
+
+ // let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph);
+
+ // builder.set_label(node, label)?;
+
+ // Ok(())
+ // }
+
+ /// Set the position of every node.
+ pub(crate) fn set_pos(&mut self, pos: usize) -> Result<(), Error> {
+ let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph);
+
+ for node in 0..builder.nodes_len() {
+ let label = builder
+ .vertex_label(node)?
+ .ok_or(Error::NodeNoLabel(node))?;
+
+ let mut label_label = label.label();
+
+ label_label.set_start(pos);
+
+ if matches!(label_label.label().tnt(), Some(TNT::Ter(_))) {
+ label_label.set_end(pos + 1);
+ } else if builder.degree(node)? == 1 && label_label.label().rule().is_some() {
+ let child = builder.children_of(node)?.next().unwrap();
+
+ if matches!(
+ builder
+ .vertex_label(child)?
+ .ok_or(Error::NodeNoLabel(child))?
+ .label()
+ .label()
+ .tnt(),
+ Some(TNT::Ter(_))
+ ) {
+ label_label.set_end(pos + 1);
+ }
+ }
+
+ let new_label = ForestLabel::new(label_label, label.status);
+
+ builder.set_label(node, new_label)?;
+ }
+
+ Ok(())
+ }
}
/// Print the labels found in the forest, so that we can easily