diff options
Diffstat (limited to 'chain/src/item')
-rw-r--r-- | chain/src/item/default/mod.rs | 469 | ||||
-rw-r--r-- | chain/src/item/default/splone.rs | 187 | ||||
-rw-r--r-- | chain/src/item/forest-format.org | 10 | ||||
-rw-r--r-- | chain/src/item/genins.rs | 494 | ||||
-rw-r--r-- | chain/src/item/mod.rs | 85 |
5 files changed, 1039 insertions, 206 deletions
diff --git a/chain/src/item/default/mod.rs b/chain/src/item/default/mod.rs index 1a00368..22069d2 100644 --- a/chain/src/item/default/mod.rs +++ b/chain/src/item/default/mod.rs @@ -3,11 +3,13 @@ use super::*; use crate::atom::default::DefaultAtom; -use grammar::{GrammarLabel, GrammarLabelType}; +use grammar::{GrammarLabel, GrammarLabelType, TNT}; use graph::{ builder::BuilderMut, labelled::binary::PLGBuilderMut, Graph, LabelGraph, PLGraph, RedirectGraph, }; +use std::collections::HashSet; + use core::fmt::Display; /// The type of errors for forest operations. @@ -30,6 +32,8 @@ pub enum Error { LabelConversion(ForestLabelError), /// Trying to split a packed node. SplitPack(usize), + /// A cloned node should have exactly one parent. + InvalidClone(usize), } impl Display for Error { @@ -43,6 +47,9 @@ impl Display for Error { Error::NodeNoLabel(n) => write!(f, "node {n} has no labels, but it should have one"), Error::LabelConversion(le) => write!(f, "fail to convert labels: {le}"), Error::SplitPack(n) => write!(f, "cannot split the packed node {n}"), + Error::InvalidClone(n) => { + write!(f, "the cloned node {n} should have exactly one parent") + } } } } @@ -68,7 +75,7 @@ impl From<ForestLabelError> for Error { /// A default implementation of forest. #[derive(Debug, Clone)] pub struct DefaultForest<T: GraphLabel> { - graph: PLGraph<T>, + pub(crate) graph: PLGraph<T>, root: Option<usize>, } @@ -262,15 +269,14 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> { // the consideration). let fragment = fragment.borrow(); + let fragment_nodes_len = fragment.nodes_len(); - let mut frag_stack = Vec::with_capacity(fragment.nodes_len()); + let mut frag_stack = Vec::with_capacity(fragment_nodes_len); - let mut self_stack = Vec::with_capacity(fragment.nodes_len()); + let mut self_stack = Vec::with_capacity(fragment_nodes_len); - use std::collections::HashSet; - - let mut frag_seen: HashSet<usize> = HashSet::with_capacity(fragment.nodes_len()); - let mut self_seen: HashSet<usize> = HashSet::with_capacity(fragment.nodes_len()); + let mut frag_seen: HashSet<usize> = HashSet::with_capacity(fragment_nodes_len); + let mut self_seen: HashSet<usize> = HashSet::with_capacity(fragment_nodes_len); let frag_root = if let Some(root) = fragment.root() { root @@ -294,25 +300,25 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> { if fragment.vertex_label(frag_top)? != self.vertex_label(self_top)? { // not a prefix - dbg!( - frag_top, - self_top, - fragment.vertex_label(frag_top)?, - self.vertex_label(self_top)? - ); + // dbg!( + // frag_top, + // self_top, + // fragment.vertex_label(frag_top)?, + // self.vertex_label(self_top)? + // ); return Ok(false); } let self_children = self.children_of(self_top)?; let frag_children = fragment.children_of(frag_top)?; - if frag_children.len() != self_children.len() { - dbg!( - frag_top, - self_top, - fragment.vertex_label(frag_top)?, - self.vertex_label(self_top)? - ); + if frag_children.len() > self_children.len() { + // dbg!( + // frag_top, + // self_top, + // fragment.vertex_label(frag_top)?, + // self.vertex_label(self_top)? + // ); return Ok(false); } @@ -320,7 +326,7 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> { if frag_seen.contains(&frag_child) && self_seen.contains(&self_child) { continue; } else if frag_seen.contains(&frag_child) || self_seen.contains(&self_child) { - dbg!(); + // dbg!(); return Ok(false); } @@ -352,7 +358,7 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> { let root = if let Some(root) = fragment.root() { root } else { - // Nothing to do to plant an empty forest. + // Nothing to plant. return Ok(()); }; @@ -393,9 +399,27 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> { return Ok(()); } - // First adjoin those nodes and join the edges later. + // First adjoin the relevant nodes and join the edges later. + + let mut used_nodes: Vec<bool> = std::iter::repeat(false).take(nodes_len).collect(); + + let mut stack = vec![root]; + + while let Some(top) = stack.pop() { + if used_nodes.get(top).copied() == Some(true) { + continue; + } + + *used_nodes + .get_mut(top) + .ok_or(Error::IndexOutOfBounds(top, nodes_len))? = true; + + stack.extend(fragment.children_of(top)?); + } + + let used_nodes = used_nodes; - for node in 0..nodes_len { + for node in (0..nodes_len).filter(|node| used_nodes.get(*node).copied() == Some(true)) { let label = fragment .vertex_label(node)? .ok_or(Error::NodeNoLabel(node))?; @@ -582,6 +606,93 @@ impl<T: GraphLabel> PartialEq for DefaultForest<ForestLabel<T>> { } } +impl<T: GraphLabel> Eq for DefaultForest<ForestLabel<T>> {} + +impl<T: GraphLabel> DefaultForest<ForestLabel<T>> { + /// A convenient helper function to plant a fragment under a + /// node, if it has not been planted yet. + /// + /// To be precise, this function first checks if the node is + /// packed; if so, then it checks every of the cloned children, to + /// see if the fragment has already been planted. If none of the + /// clones have the fragment as a prefix, we make a new clone and + /// plant the fragment there. + /// + /// On the other hand, if the node is a plain node, then it checks + /// if the fragment is a prefix of the node. If so, do nothing, + /// else clone the node and plant the fragment there, unless the + /// node has no children yet, in which case just plant the node + /// there. + /// + /// A special case occurs when the node is the root node, in which + /// case we clone it only when its degree is larger than one. + /// + /// Return either the original node or the cloned node at the end. + pub(crate) fn plant_if_needed( + &mut self, + node: usize, + mut fragment: Self, + ) -> Result<usize, Error> { + if fragment.is_empty() { + return Ok(node); + } + + let node_label = self.vertex_label(node)?.ok_or(Error::NodeNoLabel(node))?; + + if node_label.is_packed() || node_label.clone_index().is_some() { + let mut planted = false; + + let mut node = node; + + if node_label.clone_index().is_some() { + let mut parents = self.parents_of(node)?; + + assert_eq!(parents.len(), 1); + + node = parents.next().unwrap().node(); + } + + for clone in self.children_of(node)? { + node = clone; + + if self.is_prefix(clone, &fragment)? { + planted = true; + + break; + } + } + + if !planted { + let clone = self.clone_node(node, 0, false)?; + + fragment.set_root(1)?; + + self.plant(clone, fragment, false)?; + + return Ok(clone); + } + } else { + let clone_threshold = if self.root == Some(node) { 1 } else { 0 }; + + let planted = self.is_prefix(node, &fragment)?; + + fragment.set_root(1)?; + + if !planted && self.degree(node)? > clone_threshold { + let clone = self.clone_node(node, 0, false)?; + + self.plant(clone, fragment, false)?; + + return Ok(clone); + } else if !planted { + self.plant(node, fragment, false)?; + } + } + + Ok(node) + } +} + pub mod splone; impl<T: GraphLabel> DefaultForest<T> { @@ -596,9 +707,18 @@ impl<T: GraphLabel> DefaultForest<T> { Self { graph, root } } -} -impl<T: GraphLabel> Eq for DefaultForest<ForestLabel<T>> {} + /// Set the root to the given node. + pub fn set_root(&mut self, root: usize) -> Result<(), Error> { + if root >= self.nodes_len() { + return Err(Error::IndexOutOfBounds(root, self.nodes_len())); + } + + self.root = Some(root); + + Ok(()) + } +} impl DefaultForest<ForestLabel<GrammarLabel>> { /// Prints the forest without nodes that do not have ending @@ -613,6 +733,8 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { Ok(label) => { if let Some(label) = label { label.label().end().is_none() + || (matches!(self.degree(*node), Ok(0)) + && matches!(self.parents_of(*node).map(|iter| iter.len()), Ok(0))) } else { true } @@ -674,6 +796,299 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { file.write_all(result.as_bytes()) } + + /// Remove a node by removing all edges connecting with it. + pub fn remove_node(&mut self, node_id: usize) -> Result<(), Error> { + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + let mut to_remove = + Vec::with_capacity(builder.degree(node_id)? + builder.parents_of(node_id)?.len()); + + to_remove.extend( + builder + .children_of(node_id)? + .map(|target| (node_id, target)), + ); + + to_remove.extend( + builder + .parents_of(node_id)? + .map(|parent| (parent.node(), node_id)), + ); + + for (source, target) in to_remove { + builder.remove_edge(source, target, |_| true)?; + } + + Ok(()) + } + + /// Find the last child of the given node whose start and end + /// positions contain the given position. If no such child is + /// found, return `Ok(None)`. + /// + /// The returned tuple is of the form (child, index), where + /// `child` is the index of the child node in question, and + /// `index` means that the child is the `index`-th child of the + /// node. + pub(crate) fn position_search( + &self, + node: usize, + pos: usize, + ) -> Result<Option<(usize, usize)>, Error> { + fn range_contains(label: GrammarLabel, pos: usize) -> bool { + let start = label.start(); + + if let Some(end) = label.end() { + (start..end).contains(&pos) + } else { + (start..).contains(&pos) + } + } + + let node_label = self + .vertex_label(node)? + .ok_or(Error::NodeNoLabel(node))? + .label(); + + if !range_contains(node_label, pos) { + return Ok(None); + } + + for (index, child) in self.children_of(node)?.enumerate().rev() { + let child_label = self + .vertex_label(child)? + .ok_or(Error::NodeNoLabel(child))? + .label(); + + if range_contains(child_label, pos) { + return Ok(Some((child, index))); + } + } + + Ok(None) + } + + // /// Check if every child already has an end. + // fn every_child_is_completed(&self, node_id: usize, atom: &DefaultAtom) -> Result<bool, Error> { + // let children = self.children_of(node_id)?; + + // if children.len() == 0 { + // return Ok(true); + // } + + // let mut pos = self + // .vertex_label(node_id)? + // .ok_or(Error::NodeNoLabel(node_id))? + // .label() + // .start(); + + // let mut last_child_label = None; + + // for child in children { + // let child_label = self + // .vertex_label(child)? + // .ok_or(Error::NodeNoLabel(child))? + // .label(); + + // last_child_label = Some(child_label); + + // if child_label.start() != pos || child_label.end().is_none() { + // return Ok(false); + // } + + // pos = child_label.end().unwrap(); + // } + + // if let Some(label) = last_child_label { + // if let Some(rule) = label.label().rule() { + // if !atom + // .is_accepting(2 * rule + 1) + // .map_err(|_| Error::IndexOutOfBounds(2 * rule + 1, atom.nodes_len()))? + // { + // return Ok(false); + // } + // } + // } + + // Ok(true) + // } + + // /// Complete the forest by trying to set the ending position of + // /// every node that does not have an end, by the ending position + // /// of its last child. + // pub fn complete(&mut self, atom: &DefaultAtom) -> Result<(), Error> { + // let mut stack: Vec<_> = self + // .nodes() + // .filter(|node| { + // let label = self.vertex_label(*node).unwrap().unwrap().label(); + + // label.label().rule().is_some() && label.end().is_some() + // }) + // .collect(); + + // let mut second_stack: Vec<usize> = Vec::new(); + + // let mut pending_candidates: Vec<usize> = Vec::new(); + + // let nodes_len = self.nodes_len(); + + // let mut seen_nodes: HashSet<usize> = HashSet::with_capacity(nodes_len); + + // while !stack.is_empty() { + // while let Some(mut top) = stack.pop() { + // if seen_nodes.contains(&top) { + // continue; + // } + + // seen_nodes.insert(top); + + // let top_label = self.vertex_label(top)?.unwrap(); + + // if top_label.clone_index().is_some() { + // let mut top_parents = self.parents_of(top)?; + + // if top_parents.len() != 1 { + // return Err(Error::InvalidClone(top)); + // } + + // top = top_parents.next().unwrap().node(); + // } + + // let rule_parents = self.parents_of(top)?; + + // let candidates = { + // let mut result = Vec::with_capacity(rule_parents.len()); + + // for parent in rule_parents { + // // for parent in self.parents_of(parent.node())? { + // // if self.degree(parent.node())? <= parent.edge() + 1 { + // result.push(parent); + // // } + // // } + // } + + // result + // }; + + // for candidate in candidates { + // if matches!(self.vertex_label(candidate.node())?, Some(label) if label.label().end().is_none()) + // { + // if self.every_child_is_completed(candidate.node(), atom)? { + // let last_child = self + // .nth_child(candidate.node(), self.degree(candidate.node())? - 1)?; + // let end = self + // .vertex_label(last_child)? + // .ok_or(Error::NodeNoLabel(last_child))? + // .label() + // .end(); + + // let sploned_node = self.splone( + // candidate.node(), + // end, + // self.degree(candidate.node())? - 1, + // true, + // )?; + + // second_stack.push(sploned_node); + // } else { + // pending_candidates.push(candidate.node()); + // } + // } else { + // second_stack.push(candidate.node()); + // } + // } + + // let mut new_pending = Vec::with_capacity(pending_candidates.len()); + + // while let Some(node) = pending_candidates.pop() { + // if self.every_child_is_completed(node, atom)? { + // let last_edge = self.degree(node)? - 1; + // let last_child = self.nth_child(node, last_edge)?; + // let end = self + // .vertex_label(last_child)? + // .ok_or(Error::NodeNoLabel(last_child))? + // .label() + // .end(); + + // let sploned_node = self.splone(node, end, last_edge, true)?; + + // second_stack.push(sploned_node); + // } else { + // new_pending.push(node); + // } + // } + + // std::mem::swap(&mut pending_candidates, &mut new_pending); + // } + + // std::mem::swap(&mut stack, &mut second_stack); + // } + + // Ok(()) + // } + + // /// Unconditionally set the label of the node to be the new label. + // /// + // /// # Warning + // /// + // /// Use with caution: it does not do anything special, so it is + // /// the responsibility of the caller to ensure this operation is + // /// legal. + // #[allow(dead_code)] + // pub(crate) fn set_label(&mut self, node: usize, label: GrammarLabel) -> Result<(), Error> { + // let status = self + // .vertex_label(node)? + // .ok_or(Error::NodeNoLabel(node))? + // .status; + + // let label = ForestLabel::new(label, status); + + // let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + // builder.set_label(node, label)?; + + // Ok(()) + // } + + /// Set the position of every node. + pub(crate) fn set_pos(&mut self, pos: usize) -> Result<(), Error> { + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + for node in 0..builder.nodes_len() { + let label = builder + .vertex_label(node)? + .ok_or(Error::NodeNoLabel(node))?; + + let mut label_label = label.label(); + + label_label.set_start(pos); + + if matches!(label_label.label().tnt(), Some(TNT::Ter(_))) { + label_label.set_end(pos + 1); + } else if builder.degree(node)? == 1 && label_label.label().rule().is_some() { + let child = builder.children_of(node)?.next().unwrap(); + + if matches!( + builder + .vertex_label(child)? + .ok_or(Error::NodeNoLabel(child))? + .label() + .label() + .tnt(), + Some(TNT::Ter(_)) + ) { + label_label.set_end(pos + 1); + } + } + + let new_label = ForestLabel::new(label_label, label.status); + + builder.set_label(node, new_label)?; + } + + Ok(()) + } } /// Print the labels found in the forest, so that we can easily diff --git a/chain/src/item/default/splone.rs b/chain/src/item/default/splone.rs index 851968c..237b92a 100644 --- a/chain/src/item/default/splone.rs +++ b/chain/src/item/default/splone.rs @@ -1,4 +1,3 @@ -#![allow(dead_code)] //! This module implements a function for "closing" and "splitting" a //! node in a forest, and hence the name. @@ -20,6 +19,7 @@ fn get_rule_label(label: GrammarLabel) -> Option<usize> { } /// Existing or non-existing label. +#[derive(Debug, Copy, Clone)] enum Eon { Ex(usize), Nex(ForestLabel<GrammarLabel>), @@ -62,6 +62,12 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { /// [`split_node`][DefaultForest::<ForestLabel<GrammarLabel>>::split_node] /// for details. /// + /// # `completingp` + /// + /// When we are completing the forest at the end, we do not wish + /// to keep the nodes at the end, so we pass a flag to inform the + /// function of this intention. + /// /// # Return /// /// The function returns the new, splitted or cloned node, or the @@ -90,6 +96,7 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { node: usize, end: Option<usize>, edge_index: usize, + completingp: bool, ) -> Result<usize, Error> { let node_label = self.vertex_label(node)?.ok_or(Error::NodeNoLabel(node))?; @@ -130,7 +137,7 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { || node_label.clone_index().is_some() || new_label.clone_index().is_some() { - return self.split_node(new_label, node, edge_index); + return self.split_node(new_label, node, edge_index, completingp); } // replace the label directly @@ -151,6 +158,11 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { .ok_or(Error::NodeNoLabel(parent))? .label(); + if get_rule_label(parent_label).is_none() { + self.print_viz("dbg forest.gv").unwrap(); + panic!("assumption fails"); + } + assert!(get_rule_label(parent_label).is_some()); assert_eq!(builder.degree(parent)?, 1); @@ -207,12 +219,13 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { new_label: ForestLabel<GrammarLabel>, mut node: usize, edge_index: usize, + completingp: bool, ) -> Result<usize, Error> { let end = new_label.label().end(); let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); - let mut new_node = builder.add_vertex(new_label); + let new_node = builder.add_vertex(new_label); let new_packed = if new_label.clone_index().is_some() { let packed = builder @@ -261,7 +274,13 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { .label(); assert!(get_rule_label(parent_label).is_some()); - assert_eq!(self.degree(parent.node())?, 1); + + if self.degree(parent.node())? != 1 { + dbg!(parent); + self.print_viz("dbg forest.gv").unwrap(); + + panic!("assumption fails"); + } if let Some(pos) = end { parent_label.set_end(pos); @@ -276,11 +295,11 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { let new_parent = builder.add_vertex(parent_label); if let Some(packed) = new_packed { - new_node = packed; + builder.add_edge(new_parent, packed, new_label)?; + } else { + builder.add_edge(new_parent, new_node, new_label)?; } - builder.add_edge(new_parent, new_node, new_label)?; - result.extend( self.parents_of(parent.node())? .map(|parent_parent| (parent_parent, new_parent)), @@ -291,21 +310,48 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { }; for (parent, new_child) in parents { - if self.has_same_children_until( - parent.node(), - parent.node(), - parent.edge(), - new_child, - )? { - continue; - } + if !completingp { + if self.has_same_children_until( + parent.node(), + parent.node(), + parent.edge(), + new_child, + )? { + continue; + } - // we don't add a child to parent.edge() here. - let cloned = self.clone_node(parent.node(), parent.edge(), false)?; + // we don't add a child to parent.edge() here. + let cloned = self.clone_node(parent.node(), parent.edge(), false)?; - let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); - builder.add_edge(cloned, new_child, new_label)?; + builder.add_edge(cloned, new_child, new_label)?; + } else { + if self.has_same_children_except( + parent.node(), + parent.node(), + parent.edge(), + new_child, + )? { + continue; + } + + // we don't add a child to parent.edge() here. + let cloned = self.clone_node(parent.node(), parent.edge(), false)?; + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + builder.add_edge(cloned, new_child, new_label)?; + + let children_to_add: Vec<_> = builder + .children_of(parent.node())? + .skip(parent.edge() + 1) + .collect(); + + for child in children_to_add { + builder.add_edge(cloned, child, new_label)?; + } + } } Ok(new_node) @@ -486,6 +532,85 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { unreachable!("should not examine children of a packed node") } } + + /// Detect if a node has a branch that has the same children as + /// another node, except for a given index, where it points to another + /// given node. + /// + /// # Clones + /// + /// If `nodea` is a clone, it checks every clone to see if the + /// condition is satisfied for some clone. + fn has_same_children_except( + &self, + nodea: usize, + nodeb: usize, + edge_index: usize, + alternative: usize, + ) -> Result<bool, Error> { + let labela = self.vertex_label(nodea)?.ok_or(Error::NodeNoLabel(nodea))?; + let children_b = self.children_of(nodeb)?; + + if children_b.len() < edge_index + 1 { + return Err(Error::IndexOutOfBounds(edge_index, children_b.len() - 1)); + } + + if labela.is_plain() { + let children_a = self.children_of(nodea)?; + + if children_a.len() != children_b.len() { + return Ok(false); + } + + for (index, (childa, childb)) in + children_a.zip(children_b).take(edge_index + 1).enumerate() + { + if index != edge_index { + if childa != childb { + return Ok(false); + } + } else if childa != alternative { + return Ok(false); + } + } + + Ok(true) + } else if labela.clone_index().is_some() { + let mut parentsa = self.parents_of(nodea)?; + + assert_eq!(parentsa.len(), 1); + + let parenta = parentsa.next().unwrap().node(); + + 'branch_loop: for branch in self.children_of(parenta)? { + let children_a = self.children_of(branch)?; + let children_b = children_b.clone(); + + if children_a.len() < children_b.len() { + continue 'branch_loop; + } + + for (index, (childa, childb)) in + children_a.zip(children_b).take(edge_index + 1).enumerate() + { + if index != edge_index { + if childa != childb { + continue 'branch_loop; + } + } else if childa != alternative { + continue 'branch_loop; + } + } + + return Ok(true); + } + + Ok(false) + } else { + // a packed node; this should not happen + unreachable!("should not examine children of a packed node") + } + } } #[cfg(test)] @@ -688,7 +813,7 @@ mod test_splone { fn test() -> Result<(), Box<dyn std::error::Error>> { let mut test_forest = create_test_forest()?; - test_forest.splone(6, Some(3), 1)?; + test_forest.splone(6, Some(3), 1, false)?; let answer = splone_6_3_1()?; @@ -698,7 +823,7 @@ mod test_splone { panic!("splone(6, Some(3), 1) produced wrong forest"); } - test_forest.splone(6, Some(3), 1)?; + test_forest.splone(6, Some(3), 1, false)?; if test_forest != answer { test_forest.print_viz("sploned forest.gv")?; @@ -706,7 +831,7 @@ mod test_splone { panic!("repeated splone(6, Some(3), 1) produced wrong forest"); } - test_forest.splone(6, Some(2), 0)?; + test_forest.splone(6, Some(2), 0, false)?; let answer = splone_6_2_0()?; @@ -716,7 +841,7 @@ mod test_splone { panic!("splone(6, Some(2), 0) produced wrong forest"); } - test_forest.splone(6, Some(2), 0)?; + test_forest.splone(6, Some(2), 0, false)?; if test_forest != answer { test_forest.print_viz("sploned forest.gv")?; @@ -724,7 +849,7 @@ mod test_splone { panic!("repeated splone(6, Some(2), 0) produced wrong forest"); } - test_forest.splone(6, None, 0)?; + test_forest.splone(6, None, 0, false)?; let answer = splone_6_n_0()?; @@ -734,7 +859,7 @@ mod test_splone { panic!("splone(6, None, 0) produced wrong forest"); } - test_forest.splone(14, None, 0)?; + test_forest.splone(14, None, 0, false)?; if test_forest != answer { test_forest.print_viz("sploned forest.gv")?; @@ -742,7 +867,7 @@ mod test_splone { panic!("repeated splone(6, None, 0) produced wrong forest"); } - test_forest.splone(4, Some(3), 0)?; + test_forest.splone(4, Some(3), 0, false)?; let answer = splone_4_3_0()?; @@ -752,7 +877,7 @@ mod test_splone { panic!("splone(4, Some(3), 0) produced wrong forest"); } - test_forest.splone(4, Some(3), 0)?; + test_forest.splone(4, Some(3), 0, false)?; if test_forest != answer { test_forest.print_viz("sploned forest.gv")?; @@ -760,8 +885,8 @@ mod test_splone { panic!("repeated splone(4, Some(3), 0) produced wrong forest"); } - test_forest.splone(17, Some(3), 0)?; - test_forest.splone(15, Some(3), 0)?; + test_forest.splone(17, Some(3), 0, false)?; + test_forest.splone(15, Some(3), 0, false)?; let answer = splone_17_3_0_15_3_0()?; @@ -771,8 +896,8 @@ mod test_splone { panic!("splone(17, Some(3), 0) - splone(15, Some(3), 0) produced wrong forest"); } - test_forest.splone(17, Some(3), 0)?; - test_forest.splone(15, Some(3), 0)?; + test_forest.splone(17, Some(3), 0, false)?; + test_forest.splone(15, Some(3), 0, false)?; if test_forest != answer { test_forest.print_viz("sploned forest.gv")?; diff --git a/chain/src/item/forest-format.org b/chain/src/item/forest-format.org new file mode 100644 index 0000000..eb0f150 --- /dev/null +++ b/chain/src/item/forest-format.org @@ -0,0 +1,10 @@ +#+TITLE: Format of forests +#+DATE: [2023-02-13 Lun 22:05] +#+AUTHOR: Durand + +In this document I try to explain the format of the forests returned +by the parser generator. + +* Basic terms + +# TODO: Explain diff --git a/chain/src/item/genins.rs b/chain/src/item/genins.rs index 4c51d9a..feb45c6 100644 --- a/chain/src/item/genins.rs +++ b/chain/src/item/genins.rs @@ -7,19 +7,24 @@ //! semirings later on. use super::*; -use crate::{atom::DefaultAtom, default::Error, item::default::DefaultForest, Edge}; +use crate::{ + atom::{Atom, DefaultAtom}, + default::Error, + item::default::DefaultForest, + Edge, +}; use grammar::{Error as GrammarError, GrammarLabel, GrammarLabelType, TNT}; -use graph::Graph; +#[allow(unused_imports)] +use graph::{builder::BuilderMut, labelled::binary::PLGBuilderMut, Graph, RedirectGraph}; use core::borrow::Borrow; -use std::collections::HashSet as Set; /// Convert an error telling us that an index is out of bounds. /// /// # Panics /// /// The function panics if the error is not of the expected kind. -fn index_out_of_bounds_conversion(ge: GrammarError) -> Error { +pub(crate) fn index_out_of_bounds_conversion(ge: GrammarError) -> Error { match ge { GrammarError::IndexOutOfBounds(index, bound) => Error::IndexOutOfBounds(index, bound), _ => Error::Invalid, @@ -105,11 +110,6 @@ pub fn virtual_generate_fragment( Some(TNT::Ter(ter)) if ter == t) { for child in child_iter { - // #[allow(unused_must_use)] - // { - // dbg!(non_start, child, atom.query_expansion(non_start, child)); - // } - let line: Vec<GrammarLabelType> = atom .query_expansion(non_start, child) .map_err(index_out_of_bounds_conversion)? @@ -124,7 +124,15 @@ pub fn virtual_generate_fragment( if result.is_empty() { result = generate_fragment(line, pos)?; } else { - result.plant(0, generate_fragment(line, pos)?, false)?; + let mut new_fragment = generate_fragment(line, pos)?; + + new_fragment.remove_node(0)?; + + new_fragment.set_root(1)?; + + let cloned = result.clone_node(0, 0, false)?; + + result.plant(cloned, new_fragment, false)?; } } } @@ -133,90 +141,313 @@ pub fn virtual_generate_fragment( Ok(result) } +// TODO: Examine `insert_item` again. + impl DefaultForest<ForestLabel<GrammarLabel>> { /// Insert an item derivation forest into a recording forest. /// /// We need the help of other things just for finding the correct /// places to insert these item fragments. - pub fn insert_item( + pub(crate) fn insert_item( &mut self, label: Edge, fragment: impl Borrow<DefaultForest<ForestLabel<GrammarLabel>>>, - atom_child_iter: impl Iterator<Item = usize> + ExactSizeIterator, + atom_child_iter: impl Iterator<Item = usize> + ExactSizeIterator + Clone, atom: &DefaultAtom, - ) -> Result<PaSe, Error> { - let fragment = fragment.borrow(); + ) -> Result<PaVi, Error> { + let root = if let Some(root) = self.root() { + root + } else { + panic!("the forest must be non-empty when we insert items"); + }; + + let pavi = label.forest_source(); - let pase = label.forest_source(); + let true_source = label.true_source(); + + let fragment = fragment.borrow(); let fragment_root = if let Some(root) = fragment.root() { root } else { - return Err(Error::EmptyItem); + unreachable!("empty item"); }; - let pos = fragment + let fragment_root_label = fragment .vertex_label(fragment_root)? - .ok_or(Error::NodeNoLabel(fragment_root))? - .label() - .start(); + .ok_or(Error::NodeNoLabel(fragment_root))?; + + let pos = fragment_root_label.label().start(); + + if pavi != true_source { + dbg!(label, pos); + + // Completing from true_source up to the pavi + let true_source_node = match true_source { + PaVi::Parent(node, edge) => { + let nth_child = self.nth_child(node, edge)?; + let nth_child_label = self + .vertex_label(nth_child)? + .ok_or(Error::NodeNoLabel(nth_child))?; + + let nth_child_degree = self.degree(nth_child)?; + let nth_child_last = if nth_child_degree > 0 { + nth_child_degree - 1 + } else { + 0 + }; + + if matches!(nth_child_label.label().label().tnt(), Some(TNT::Non(_))) + && !nth_child_label.is_packed() + { + let sploned = self.splone(nth_child, Some(pos), nth_child_last, false)?; + + sploned + } else { + nth_child + } + } + PaVi::Virtual(nt, t, mut node) => { + let node_label_start = self + .vertex_label(node)? + .ok_or(Error::NodeNoLabel(node))? + .label() + .start(); + + let reduction_fragment = atom.generate_virtual_frags(nt, t, None); + + // FIXME: We shall "close" this fragment as well. + + for frag in reduction_fragment.into_iter().flatten() { + let mut frag = frag.clone(); + frag.set_pos(node_label_start)?; + + // NOTE: The function `plant_if_needed` + // assumes that we want to plant the fragment + // as the first child of the node. This + // assumption holds in this case, but not in + // general. + + node = self.plant_if_needed(node, frag)?; + } + + node + } + PaVi::Empty => { + dbg!(); + root + } + }; + + let true_source_pos = self + .vertex_label(true_source_node)? + .ok_or(Error::NodeNoLabel(true_source_node))? + .label() + .start(); + + let top_node = match pavi { + PaVi::Parent(node, edge) => self.nth_child(node, edge)?, + PaVi::Virtual(_nt, _t, _node) => { + dbg!(label); + + self.print_viz("dbg forest.gv").unwrap(); + fragment.print_viz("dbg fragment.gv").unwrap(); + + unreachable!("I do not think this is possible") + } + PaVi::Empty => { + unreachable!("The empty segment should not need to be reduced") + } + }; + + let mut stack = vec![vec![(top_node, 0)]]; + let mut result_stack = Vec::new(); + + while let Some(mut top_stack) = stack.pop() { + let (node, _) = top_stack.pop().unwrap(); + + let node_label = self.vertex_label(node)?.ok_or(Error::NodeNoLabel(node))?; + + if node_label.is_packed() { + for node in self.children_of(node)? { + let search_child = self.position_search(node, true_source_pos)?; + + if let Some((child, index)) = search_child { + let mut top_stack = top_stack.clone(); + // Fix the previous element + top_stack.push((node, index)); + + if child == true_source_node { + result_stack.push(top_stack); + } else { + top_stack.push((child, 0)); + + stack.push(top_stack); + } + } + } + } else { + let search_child = self.position_search(node, true_source_pos)?; + + if let Some((child, index)) = search_child { + top_stack.push((node, index)); + + if child == true_source_node { + result_stack.push(top_stack); + } else { + top_stack.push((child, 0)); + + stack.push(top_stack); + } + } + } + } + + // FIXME: We have to change the pavi as well, otherwise we + // are going to use the wrong branch for planting later. + + for stack in result_stack { + // dbg!(&stack); + // self.print_viz("dbg forest.gv").unwrap(); + // panic!("test"); + + let mut first_time = true; + + for (node, index) in stack.into_iter().rev() { + if matches!( + self.vertex_label(node)? + .ok_or(Error::NodeNoLabel(node))? + .label() + .label() + .tnt(), + Some(TNT::Non(_)) + ) { + let sploned = self.splone(node, Some(pos), index, false)?; + + if !first_time { + let last_index = self.degree(sploned)? - 1; + + let last_child = self.nth_child(sploned, last_index)?; + + let last_child_label = self + .vertex_label(last_child)? + .ok_or(Error::NodeNoLabel(last_child))? + .label(); + + if last_child_label.end() != Some(pos) { + let closed_label = GrammarLabel::new_closed( + last_child_label.label(), + last_child_label.start(), + pos, + ); + + let closed_label_id = self + .query_label(closed_label.into()) + .expect("last child closed label not found"); + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); - // Ensure the last node in the PaSe is a terminal or a + builder.redirect(sploned, last_index, closed_label_id)?; + } + } + + first_time = false; + } + } + } + } + + // Ensure the last node in the PaVi is a terminal or a // non-terminal node, as an extra safety guard during // development. #[cfg(debug_assertions)] { - if let Some(parent) = pase.parent() { - assert!(matches!( - self.vertex_label(self.nth_child(parent.node(), parent.edge())?), - Ok(Some(label)) - if label.label().label().tnt().is_some())); - } else if let Some((_, leaf)) = pase.segment() { - assert!(matches!( - self.vertex_label(leaf), - Ok(Some(label)) - if label.label().label().tnt().is_some())); - } else { - unreachable!("a pase is neither a parent nor a segment"); + match pavi { + PaVi::Parent(node, edge) => { + assert!(matches!( + self.vertex_label(self.nth_child(node, edge)?), + Ok(Some(label)) + if label.label().label().tnt().is_some())); + } + PaVi::Virtual(nt, t, node) => { + if !matches!( + self.vertex_label(node), + Ok(Some(label)) + if matches!( + label.label().label().tnt(), + Some(TNT::Non(_)))) + { + dbg!(node, self.vertex_label(node)?, pavi); + + self.print_viz("dbg forest.gv").unwrap(); + + panic!("assumption fails"); + } + + if nt >= atom.non_num() { + dbg!(); + return Err(Error::IndexOutOfBounds(nt, atom.non_num())); + } + + if t >= atom.ter_num() { + dbg!(); + return Err(Error::IndexOutOfBounds(t, atom.ter_num())); + } + } + PaVi::Empty => {} } } - let mut is_empty_segment = false; + let is_empty_segment = pavi.is_empty(); let mut parents: Vec<Parent> = { let mut result = Vec::new(); - if let Some(parent) = pase.parent() { - result.push(parent); - } else { - let (root, leaf) = pase.segment().unwrap(); - let mut seen_nodes = Set::new(); + match pavi { + PaVi::Parent(node, edge) => { + result.push(Parent::new(node, edge)); + } + PaVi::Virtual(nt, t, node) => { + let node_label = self.vertex_label(node)?.ok_or(Error::NodeNoLabel(node))?; - let mut stack = if root == leaf { - // an empty segment means the root - is_empty_segment = true; + for atom_child in atom_child_iter.clone() { + for rule in atom.trace(nt, t, atom_child).into_iter().flatten() { + let virtual_frag = atom.generate_virtual_frags(nt, t, Some(rule)); - result.push(Parent::new(root, 0)); - vec![] - } else { - vec![root] - }; + if let Some(frag) = virtual_frag { + let mut frag = (*frag.get(0).unwrap()).clone(); - while let Some(top) = stack.pop() { - if seen_nodes.contains(&top) { - continue; - } + frag.set_pos(node_label.label().start())?; - seen_nodes.insert(top); + let frag_nodes_len = frag.nodes_len(); - for (index, child) in self.children_of(top)?.enumerate() { - if child == leaf { - result.push(Parent::new(top, index)); - } else { - stack.push(child); + assert!(frag_nodes_len > 1); + + let last_but_one_label = frag + .vertex_label(frag_nodes_len - 2)? + .ok_or(Error::NodeNoLabel(frag_nodes_len - 2))?; + + // NOTE: The function + // `plant_if_needed` assumes that we + // want to plant the fragment as the + // first child of the node. This + // assumption holds in this case, but + // not in general. + + self.plant_if_needed(node, frag)?; + + let rule_label_pos = self + .query_label(last_but_one_label) + .expect("the forest was wrongly planted"); + + result.push(Parent::new(rule_label_pos, 0)); + } } } } + PaVi::Empty => { + result.push(Parent::new(root, 0)); + } } result @@ -251,6 +482,7 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { for atom_child in atom_child_iter { // dbg!(label.label(), atom_child); + // Find reduction information. let reduction_info = atom .query_reduction(label.label(), atom_child) @@ -272,7 +504,8 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { .label(), GrammarLabelType::TNT(TNT::Non(nt)) if nt == *reduction_nt ) { - let sploned_node = self.splone(node.node(), Some(pos), node.edge())?; + let sploned_node = + self.splone(node.node(), Some(pos), node.edge(), false)?; node_label = self .vertex_label(sploned_node)? @@ -282,16 +515,15 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { let mut parent_iter = self.parents_of(sploned_node)?; #[cfg(debug_assertions)] - { - assert_eq!(parent_iter.len(), 1); - - assert!(self - .vertex_label(sploned_node)? - .ok_or(Error::NodeNoLabel(sploned_node))? - .is_packed()); - } + assert_eq!(parent_iter.len(), 1); node = parent_iter.next().unwrap(); + + #[cfg(debug_assertions)] + assert!(self + .vertex_label(node.node())? + .ok_or(Error::NodeNoLabel(node.node()))? + .is_packed()); } else { node = Parent::new(sploned_node, node.edge()); } @@ -337,19 +569,29 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { atom_child, parents, reduction_info, - atom.query_reduction(label.label(), atom_child).unwrap() + atom.query_reduction(label.label(), atom_child).unwrap(), + is_empty_segment, + atom.trace(0, 3, atom_child) + .into_iter() + .flatten() + .collect::<Vec<_>>(), ); self.print_viz("dbg forest.gv").unwrap(); #[cfg(test)] - genins_test::print_labels(atom, self.borrow()).unwrap(); + crate::item::default::print_labels(atom, self.borrow()).unwrap(); return Err(Error::CannotPlant); } - for parent in stack.into_iter() { - let sploned_node = self.splone(parent.node(), None, parent.edge())?; + if pos == 4 && matches!(true_source, PaVi::Virtual(_, _, _)) { + dbg!(&stack, reduction_info, true_source, pavi); + self.print_viz(&format!("pos4ib.gv")).unwrap(); + } + + for parent in stack { + let sploned_node = self.splone(parent.node(), None, parent.edge(), false)?; self.plant(sploned_node, fragment, non_empty)?; @@ -357,24 +599,22 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { } } - // If the iterator is empty, just plant at the specified - // child. + // If the iterator is empty, assert the fragment has length + // one, and do not plant anything. if !non_empty { - for parent in parents.into_iter() { - let nth_child = self.nth_child(parent.node(), parent.edge())?; - - self.plant(nth_child, fragment, non_empty)?; - - non_empty = true; - } + assert_eq!(fragment.nodes_len(), 1); } let result = if fragment.nodes_len() == 2 { - let root_label = fragment.vertex_label(0)?.unwrap(); - let leaf_label = fragment.vertex_label(1)?.unwrap(); + let root_label = fragment_root_label; + let leaf_label = fragment + .vertex_label(1 - fragment_root)? + .ok_or(Error::NodeNoLabel(1 - fragment_root))?; // it has been planted, so should be safe. - let node = self.query_label(root_label).unwrap(); + let node = self + .query_label(root_label) + .expect("root label was not found"); let edge = { let mut result = None; @@ -394,16 +634,52 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { } }; // dbg!(node, edge, root_label, leaf_label); - PaSe::Parent(node, edge) + PaVi::Parent(node, edge) } else { - let root_label = fragment.vertex_label(0)?.unwrap(); - let leaf_label = fragment.vertex_label(fragment.nodes_len() - 1)?.unwrap(); - // dbg!(root_label, leaf_label); - - PaSe::Segment( - self.query_label(root_label).unwrap(), - self.query_label(leaf_label).unwrap(), - ) + assert_eq!( + fragment.nodes_len(), + 1, + "a virtual fragment should consist of a single terminal node." + ); + + let root_label = fragment_root_label; + + let pavi_parent = pavi.parent().expect( + "When we insert a virtual fragment, the forest_source of + the label must be a parent.", + ); + + let nth_child = self.nth_child(pavi_parent.node(), pavi_parent.edge())?; + + let nth_child_label = self + .vertex_label(nth_child)? + .ok_or(Error::NodeNoLabel(nth_child))? + .label() + .label(); + + let error_str = "When we insert a virtual fragment, the \ + forest source of the label must point to \ + a non-terminal node"; + + let nt = match nth_child_label.tnt().expect(error_str) { + TNT::Non(nt) => nt, + _ => { + dbg!(nth_child, nth_child_label); + + panic!("{error_str}"); + } + }; + + let t = match root_label.label().label().tnt().unwrap() { + TNT::Ter(t) => t, + _ => { + dbg!(root_label); + + panic!("a virtual fragment should consist of a single terminal node") + } + }; + + PaVi::Virtual(nt, t, nth_child) }; Ok(result) @@ -413,40 +689,10 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { #[cfg(test)] mod genins_test { use super::*; - #[allow(unused_imports)] - use crate::{default::DefaultChain, item::default::leaf, Chain}; + use crate::item::default::leaf; use grammar::test_grammar_helper::*; - pub fn print_labels( - atom: impl Borrow<DefaultAtom>, - forest: impl Borrow<DefaultForest<ForestLabel<GrammarLabel>>>, - ) -> Result<(), Box<dyn std::error::Error>> { - let forest = forest.borrow(); - let atom = atom.borrow(); - - for node in forest.nodes() { - let label = forest.vertex_label(node)?; - - if let Some(label) = label { - let label = label.label.label(); - - match label { - GrammarLabelType::TNT(tnt) => { - println!("{tnt} = {}", atom.name_of_tnt(tnt)?); - } - GrammarLabelType::Rule(pos) => { - println!("pos {pos} = {}", atom.rule_pos_string(pos)?); - } - } - } else { - return Err(Error::NodeNoLabel(node).into()); - } - } - - Ok(()) - } - #[test] fn test_generate_fragment() -> Result<(), Box<dyn std::error::Error>> { let grammar = new_notes_grammar()?; @@ -497,7 +743,7 @@ mod genins_test { 0, )?; - print_labels(&atom, &virtual_fragment)?; + crate::item::default::print_labels(&atom, &virtual_fragment)?; assert_eq!(virtual_fragment, test_fragment); @@ -511,7 +757,7 @@ mod genins_test { let test_fragment = generate_fragment([TNT::Non(3).into(), 38.into(), TNT::Ter(2).into()], 1)?; - print_labels(&atom, &virtual_fragment)?; + crate::item::default::print_labels(&atom, &virtual_fragment)?; assert_eq!(virtual_fragment, test_fragment); diff --git a/chain/src/item/mod.rs b/chain/src/item/mod.rs index 284d640..39d04c7 100644 --- a/chain/src/item/mod.rs +++ b/chain/src/item/mod.rs @@ -9,43 +9,64 @@ use graph::{error::Error as GError, GraphLabel, LabelGraph, Parent, ParentsGraph use core::borrow::Borrow; -/// A parent or a segment. +/// A parent or a virtual segment. +/// +/// # Parent /// /// A parent is a node with an edge index, which represents a certain /// edge. /// -/// A segment represents every edge from the root node to the single -/// terminating node. -#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] -pub enum PaSe { +/// # Virtual Segment +/// +/// A virtual segment represents an expansion from a non-terminal by a +/// terminal. We do not directly add this segment when we encounter +/// this expansion at the start because this expansion might contain +/// multiple derivations some of which will not be used. +/// +/// If we add the expansion immediately when we encounter it, we have +/// to later discern and delete those unwanted derivations. This is +/// asking for trouble, in my experiences. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Default)] +pub enum PaVi { /// An edge from a node, as the n-th child. Parent(usize, usize), - /// A segment from a root to the single terminating node. - Segment(usize, usize), + /// A virtual segment from a non-terminal by a terminal, rooted at + /// a node. + /// + /// # Tuple elements + /// + /// The contained tuple is of the form (nt, t, node), which means + /// a virtually added node at the `node` representing the + /// expansion from the non-terminal `nt` by the terminal `t`. + Virtual(usize, usize, usize), + /// This is an empty segment that represents the root node. This + /// is a special case for the unit state of the chain-rule + /// machine. + #[default] + Empty, } -impl From<Parent> for PaSe { +impl From<Parent> for PaVi { fn from(value: Parent) -> Self { Self::Parent(value.node(), value.edge()) } } -impl Default for PaSe { - fn default() -> Self { - Self::Segment(0, 0) - } -} - -impl core::fmt::Display for PaSe { +impl core::fmt::Display for PaVi { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::Parent(node, edge) => write!(f, "the {edge}-th edge from {node}"), - Self::Segment(root, leaf) => write!(f, "a segment from {root} to {leaf}"), + Self::Virtual(nt, t, node) => write!( + f, + "a virtual node for non-terminal {nt} and terminal {t} at node {node}" + ), + Self::Empty => write!(f, "empty segment at root"), } } } -impl PaSe { +impl PaVi { + /// Get the Parent variant. fn parent(self) -> Option<Parent> { if let Self::Parent(node, edge) = self { Some(Parent::new(node, edge)) @@ -54,12 +75,29 @@ impl PaSe { } } - fn segment(self) -> Option<(usize, usize)> { - if let Self::Segment(root, leaf) = self { - Some((root, leaf)) - } else { - None - } + // /// Get the "Virtual" variant. + // /// + // /// # Name + // /// + // /// We cannot use the name "virtual" since it is a reserved + // /// keyword in Rust, so we use its French name. + // /// + // /// # Tuple elements + // /// + // /// The returned tuple is of the form (nt, t, node), which means a + // /// virtually added node at the `node` representing the expansion + // /// from the non-terminal `nt` by the terminal `t`. + // fn virtuel(self) -> Option<(usize, usize, usize)> { + // if let Self::Virtual(nt, t, node) = self { + // Some((nt, t, node)) + // } else { + // None + // } + // } + + /// Is this an empty segment? + fn is_empty(self) -> bool { + self == Self::Empty } } @@ -101,7 +139,6 @@ impl<T: GraphLabel> core::fmt::Display for ForestLabel<T> { } /// The type of erros for converting forest labels. -#[non_exhaustive] #[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] pub enum ForestLabelError { /// Try to pack a cloned node. |