diff options
author | JSDurand <mmemmew@gmail.com> | 2023-06-18 15:03:34 +0800 |
---|---|---|
committer | JSDurand <mmemmew@gmail.com> | 2023-06-18 15:03:34 +0800 |
commit | a80db17473ff09cc72acba2c1975101e6dbedf39 (patch) | |
tree | 4d5dfcdfcd1537b51b92349d9a5274aa90a6d110 /chain/src/item/default/mod.rs | |
parent | 6ce44bb3bdb79e8e727ee6fc7f5c6cd7fa0bb51e (diff) |
fixed the bugs of node duplications and left-open nodes
There were two main issues in the previous version.
One is that there are lots of duplications of nodes when manipulating
the forest. This does not mean that labels repeat: by the use of the
data type this cannot happen. What happened is that there were cloned
nodes whose children are exactly equal. In this case there is no need
to clone that node in the first place. This is now fixed by checking
carefully before cloning, so that we do not clone unnecessary nodes.
The other issue, which is perhaps more important, is that there are
nodes which are not closed. This means that when there should be a
reuction of grammar rules, the forest does not mark the corresponding
node as already reduced. The incorrect forests thus caused is hard to
fix: I tried several different approaches to fix it afterwards, but
all to no avail. I also tried to record enough information to fix
these nodes during the manipulations. It turned out that recording
nodes is a dead end, as I cannot properly syncronize the information
in the forest and the information in the chain-rule machine. Any
inconsistencies will result in incorrect operations later on.
The approach I finally adapt is to perform every possible reduction at
each step. This might lead to some more nodes than what we need. But
those are technically expected to be there after all, and it is easy
to filter them out, so it is fine, from my point of view at the
moment.
Therefore, what remains is to filter those nodes out and connect it to
the holy Emacs. :D
Diffstat (limited to 'chain/src/item/default/mod.rs')
-rw-r--r-- | chain/src/item/default/mod.rs | 257 |
1 files changed, 35 insertions, 222 deletions
diff --git a/chain/src/item/default/mod.rs b/chain/src/item/default/mod.rs index 6d28956..47e8c90 100644 --- a/chain/src/item/default/mod.rs +++ b/chain/src/item/default/mod.rs @@ -250,7 +250,12 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> { frag_stack.pop(); self_stack.pop(); - if fragment.vertex_label(frag_top)? != self.vertex_label(self_top)? { + // NOTE: The labels might defer by the status. We test + // for the equalities of inner labels only. + + if fragment.vertex_label(frag_top)?.map(|label| label.label()) + != self.vertex_label(self_top)?.map(|label| label.label()) + { // not a prefix // dbg!( // frag_top, @@ -589,11 +594,15 @@ impl<T: GraphLabel> DefaultForest<ForestLabel<T>> { return Ok(node); } + fragment.set_root(1)?; + let node_label = self.vertex_label(node)?.ok_or(Error::NodeNoLabel(node))?; if node_label.is_packed() || node_label.clone_index().is_some() { let mut planted = false; + let mut to_plant: Option<usize> = None; + let mut node = node; if node_label.clone_index().is_some() { @@ -607,17 +616,25 @@ impl<T: GraphLabel> DefaultForest<ForestLabel<T>> { for clone in self.children_of(node)? { node = clone; - if self.is_prefix(clone, &fragment)? { - planted = true; + if !self.is_empty_node(clone)? { + let first_child = self.nth_child(clone, 0)?; + + if self.is_prefix(first_child, &fragment)? { + planted = true; - break; + break; + } + } else if to_plant.is_none() { + to_plant = Some(clone); } } if !planted { - let clone = self.clone_node(node, 0, false)?; - - fragment.set_root(1)?; + let clone = if let Some(cloned_node) = to_plant { + cloned_node + } else { + self.clone_node(node, 0, false)? + }; self.plant(clone, fragment, false)?; @@ -626,51 +643,29 @@ impl<T: GraphLabel> DefaultForest<ForestLabel<T>> { } else { let clone_threshold = if self.root == Some(node) { 1 } else { 0 }; - let planted = self.is_prefix(node, &fragment)?; + let satisfy_threshold = self.degree(node)? > clone_threshold; - fragment.set_root(1)?; + if !satisfy_threshold { + self.plant(node, fragment, false)?; - if !planted && self.degree(node)? > clone_threshold { + return Ok(node); + } + + let first_child = self.nth_child(node, clone_threshold)?; + + let planted = self.is_prefix(first_child, &fragment)?; + + if !planted { let clone = self.clone_node(node, 0, false)?; self.plant(clone, fragment, false)?; return Ok(clone); - } else if !planted { - self.plant(node, fragment, false)?; } } Ok(node) } - - /// Extract the node from `pavi`. - /// - /// If pavi is a parent, this returns the child pointed to by the - /// represented edge. - /// - /// If pavi is a virtual node, this simply returns the virtual - /// node. - /// - /// If pavi is empty, this returns the root, unless the forest is - /// empty. - /// - /// # Guarantee - /// - /// The returned node is guaranteed to be a valid node. - pub(crate) fn node_from_pavi(&self, pavi: PaVi) -> Result<usize, Error> { - match pavi { - PaVi::Parent(node, edge) => Ok(self.nth_child(node, edge)?), - PaVi::Virtual(_, _, node) => { - if node >= self.nodes_len() { - return Err(Error::IndexOutOfBounds(node, self.nodes_len())); - } - - Ok(node) - } - PaVi::Empty => Ok(self.root().ok_or(Error::IndexOutOfBounds(0, 0))?), - } - } } pub mod splone; @@ -850,188 +845,6 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { Ok(None) } - // /// Check if every child already has an end. - // fn every_child_is_completed(&self, node_id: usize, atom: &DefaultAtom) -> Result<bool, Error> { - // let children = self.children_of(node_id)?; - - // if children.len() == 0 { - // return Ok(true); - // } - - // let mut pos = self - // .vertex_label(node_id)? - // .ok_or(Error::NodeNoLabel(node_id))? - // .label() - // .start(); - - // let mut last_child_label = None; - - // for child in children { - // let child_label = self - // .vertex_label(child)? - // .ok_or(Error::NodeNoLabel(child))? - // .label(); - - // last_child_label = Some(child_label); - - // if child_label.start() != pos || child_label.end().is_none() { - // return Ok(false); - // } - - // pos = child_label.end().unwrap(); - // } - - // if let Some(label) = last_child_label { - // if let Some(rule) = label.label().rule() { - // if !atom - // .is_accepting(2 * rule + 1) - // .map_err(|_| Error::IndexOutOfBounds(2 * rule + 1, atom.nodes_len()))? - // { - // return Ok(false); - // } - // } - // } - - // Ok(true) - // } - - // /// Complete the forest by trying to set the ending position of - // /// every node that does not have an end, by the ending position - // /// of its last child. - // pub fn complete(&mut self, atom: &DefaultAtom) -> Result<(), Error> { - // let mut stack: Vec<_> = self - // .nodes() - // .filter(|node| { - // let label = self.vertex_label(*node).unwrap().unwrap().label(); - - // label.label().rule().is_some() && label.end().is_some() - // }) - // .collect(); - - // let mut second_stack: Vec<usize> = Vec::new(); - - // let mut pending_candidates: Vec<usize> = Vec::new(); - - // let nodes_len = self.nodes_len(); - - // let mut seen_nodes: HashSet<usize> = HashSet::with_capacity(nodes_len); - - // while !stack.is_empty() { - // while let Some(mut top) = stack.pop() { - // if seen_nodes.contains(&top) { - // continue; - // } - - // seen_nodes.insert(top); - - // let top_label = self.vertex_label(top)?.unwrap(); - - // if top_label.clone_index().is_some() { - // let mut top_parents = self.parents_of(top)?; - - // if top_parents.len() != 1 { - // return Err(Error::InvalidClone(top)); - // } - - // top = top_parents.next().unwrap().node(); - // } - - // let rule_parents = self.parents_of(top)?; - - // let candidates = { - // let mut result = Vec::with_capacity(rule_parents.len()); - - // for parent in rule_parents { - // // for parent in self.parents_of(parent.node())? { - // // if self.degree(parent.node())? <= parent.edge() + 1 { - // result.push(parent); - // // } - // // } - // } - - // result - // }; - - // for candidate in candidates { - // if matches!(self.vertex_label(candidate.node())?, Some(label) if label.label().end().is_none()) - // { - // if self.every_child_is_completed(candidate.node(), atom)? { - // let last_child = self - // .nth_child(candidate.node(), self.degree(candidate.node())? - 1)?; - // let end = self - // .vertex_label(last_child)? - // .ok_or(Error::NodeNoLabel(last_child))? - // .label() - // .end(); - - // let sploned_node = self.splone( - // candidate.node(), - // end, - // self.degree(candidate.node())? - 1, - // true, - // )?; - - // second_stack.push(sploned_node); - // } else { - // pending_candidates.push(candidate.node()); - // } - // } else { - // second_stack.push(candidate.node()); - // } - // } - - // let mut new_pending = Vec::with_capacity(pending_candidates.len()); - - // while let Some(node) = pending_candidates.pop() { - // if self.every_child_is_completed(node, atom)? { - // let last_edge = self.degree(node)? - 1; - // let last_child = self.nth_child(node, last_edge)?; - // let end = self - // .vertex_label(last_child)? - // .ok_or(Error::NodeNoLabel(last_child))? - // .label() - // .end(); - - // let sploned_node = self.splone(node, end, last_edge, true)?; - - // second_stack.push(sploned_node); - // } else { - // new_pending.push(node); - // } - // } - - // std::mem::swap(&mut pending_candidates, &mut new_pending); - // } - - // std::mem::swap(&mut stack, &mut second_stack); - // } - - // Ok(()) - // } - - // /// Unconditionally set the label of the node to be the new label. - // /// - // /// # Warning - // /// - // /// Use with caution: it does not do anything special, so it is - // /// the responsibility of the caller to ensure this operation is - // /// legal. - // #[allow(dead_code)] - // pub(crate) fn set_label(&mut self, node: usize, label: GrammarLabel) -> Result<(), Error> { - // let status = self - // .vertex_label(node)? - // .ok_or(Error::NodeNoLabel(node))? - // .status; - - // let label = ForestLabel::new(label, status); - - // let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); - - // builder.set_label(node, label)?; - - // Ok(()) - // } - /// Set the position of every node. /// /// If ALLP is non-nil or if the node is a terminal node, also set |