summaryrefslogtreecommitdiff
path: root/chain/src/item/genins.rs
diff options
context:
space:
mode:
authorJSDurand <mmemmew@gmail.com>2023-06-18 15:03:34 +0800
committerJSDurand <mmemmew@gmail.com>2023-06-18 15:03:34 +0800
commita80db17473ff09cc72acba2c1975101e6dbedf39 (patch)
tree4d5dfcdfcd1537b51b92349d9a5274aa90a6d110 /chain/src/item/genins.rs
parent6ce44bb3bdb79e8e727ee6fc7f5c6cd7fa0bb51e (diff)
fixed the bugs of node duplications and left-open nodes
There were two main issues in the previous version. One is that there are lots of duplications of nodes when manipulating the forest. This does not mean that labels repeat: by the use of the data type this cannot happen. What happened is that there were cloned nodes whose children are exactly equal. In this case there is no need to clone that node in the first place. This is now fixed by checking carefully before cloning, so that we do not clone unnecessary nodes. The other issue, which is perhaps more important, is that there are nodes which are not closed. This means that when there should be a reuction of grammar rules, the forest does not mark the corresponding node as already reduced. The incorrect forests thus caused is hard to fix: I tried several different approaches to fix it afterwards, but all to no avail. I also tried to record enough information to fix these nodes during the manipulations. It turned out that recording nodes is a dead end, as I cannot properly syncronize the information in the forest and the information in the chain-rule machine. Any inconsistencies will result in incorrect operations later on. The approach I finally adapt is to perform every possible reduction at each step. This might lead to some more nodes than what we need. But those are technically expected to be there after all, and it is easy to filter them out, so it is fine, from my point of view at the moment. Therefore, what remains is to filter those nodes out and connect it to the holy Emacs. :D
Diffstat (limited to 'chain/src/item/genins.rs')
-rw-r--r--chain/src/item/genins.rs288
1 files changed, 106 insertions, 182 deletions
diff --git a/chain/src/item/genins.rs b/chain/src/item/genins.rs
index a2bb22c..19f835b 100644
--- a/chain/src/item/genins.rs
+++ b/chain/src/item/genins.rs
@@ -9,16 +9,14 @@
use super::*;
use crate::{
atom::{Atom, DefaultAtom},
- default::{BoTop, Error, Reducer},
+ default::Error,
item::default::DefaultForest,
Edge,
};
use grammar::{Error as GrammarError, GrammarLabel, GrammarLabelType, TNT};
use graph::Graph;
-use core::borrow::Borrow;
-
-use std::collections::HashSet;
+use std::borrow::Borrow;
/// Convert an error telling us that an index is out of bounds.
///
@@ -179,8 +177,8 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
/// fragment under the result splone.
pub(crate) fn insert_item(
&mut self,
- reducer: &Reducer,
label: Edge,
+ ter: usize,
fragment: impl Borrow<DefaultForest<ForestLabel<GrammarLabel>>>,
atom_child_iter: impl Iterator<Item = usize> + ExactSizeIterator + Clone,
atom: &DefaultAtom,
@@ -200,7 +198,7 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
let fragment_root = if let Some(root) = fragment.root() {
root
} else {
- unreachable!("empty item");
+ panic!("empty item");
};
let fragment_root_label = fragment
@@ -209,7 +207,11 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
let pos = fragment_root_label.label().start();
- dbg!((pos, label));
+ // dbg!((pos, label));
+
+ // Whether or not to print detailed graphs of each step of
+ // operation for debugging purposes.
+ let to_print = false;
let tnt_string = {
let empty_p = atom_child_iter.len() == 0;
@@ -217,10 +219,10 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
match label.label().label() {
GrammarLabelType::TNT(TNT::Ter(t)) => {
- format!("t {t}")
+ format!("t {t}{}", if empty_p { " second" } else { "" })
}
GrammarLabelType::TNT(TNT::Non(n)) => {
- format!("n {n} {}", if empty_p { "second" } else { "first" })
+ format!("n {n}")
}
_ => "error".to_string(),
}
@@ -230,7 +232,7 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
let mut repetition = 0;
while std::fs::metadata(format!(
- "/Users/durand/Desktop/Centre/A propos de programmes/Rust/rep/chain/output/pos {pos} {tnt_string} {repetition}.gv"
+ "/Users/durand/Desktop/Centre/A propos de programmes/Rust/rep/chain/output/pos {pos} - {repetition}.gv"
))
.is_ok()
{
@@ -240,44 +242,32 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
repetition
};
- self.print_viz(&format!("pos {pos} {tnt_string} {num}.gv"))
- .unwrap();
-
- self.extra_reductions(
- BoTop::new(true_source, pavi),
- pos,
- reducer.borrow(),
- atom.borrow(),
- )?;
+ if to_print {
+ self.print_viz(&format!("pos {pos} - {num}.gv")).unwrap();
+ }
/// A cute little macro to produce compact representations
/// of Parents, Virtual nodes, or empty.
+ #[allow(unused_macros)]
macro_rules! pavi_to_short_str {
($pavi:ident) => {
match $pavi {
- PaVi::Parent(node, edge) => format!("{node} {edge}"),
- PaVi::Virtual(nt, t, node) => format!("{nt} {t} {node}"),
+ PaVi::Parent(node, edge, child) => format!("p{node} {edge} {child}"),
+ PaVi::Virtual(nt, t, node) => format!("v{nt} {t} {node}"),
PaVi::Empty => "ε".to_string(),
}
};
}
- self.print_viz(&format!(
- "pos {pos} {tnt_string} {num} stage 1 {}, {}.gv",
- pavi_to_short_str!(true_source),
- pavi_to_short_str!(pavi)
- ))
- .unwrap();
-
// Ensure the last node in the PaVi is a terminal or a
// non-terminal node, as an extra safety guard during
// development.
#[cfg(debug_assertions)]
{
match pavi {
- PaVi::Parent(node, edge) => {
+ PaVi::Parent(_node, _edge, child) => {
assert!(matches!(
- self.vertex_label(self.nth_child(node, edge)?),
+ self.vertex_label(child),
Ok(Some(label))
if label.label().label().tnt().is_some()));
}
@@ -312,11 +302,23 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
let is_empty_segment = pavi.is_empty();
+ if true_source.is_virtual() {
+ self.close_pavi(atom.borrow(), true_source, pos)?;
+
+ if to_print {
+ self.print_viz(&format!(
+ "pos {pos} - {num} {tnt_string} stage 0.1 {}.gv",
+ pavi_to_short_str!(true_source)
+ ))
+ .unwrap();
+ }
+ }
+
let mut parents: Vec<Parent> = {
let mut result = Vec::new();
match pavi {
- PaVi::Parent(node, edge) => {
+ PaVi::Parent(node, edge, _) => {
result.push(Parent::new(node, edge));
}
PaVi::Virtual(nt, t, node) => {
@@ -347,10 +349,12 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
self.plant_at_start(node, frag)?;
- self.print_viz(&format!(
- "pos {pos} {tnt_string} {num} stage 1.5 {node}.gv"
- ))
- .unwrap();
+ if to_print {
+ self.print_viz(&format!(
+ "pos {pos} - {num} {tnt_string} stage 0.2 {node}.gv"
+ ))
+ .unwrap();
+ }
let rule_label_pos = self
.query_label(last_but_one_label)
@@ -369,6 +373,24 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
result
};
+ if let PaVi::Parent(node, edge, _) = pavi {
+ let nth_child = self.nth_child(node, edge)?;
+
+ let reduced = self.reduction(nth_child, pos, ter, atom.borrow())?;
+
+ if reduced != nth_child && !self.is_empty_node(reduced)? {
+ parents.clear();
+ parents.extend(self.parents_of(reduced)?);
+ }
+
+ if to_print {
+ self.print_viz(&format!(
+ "pos {pos} - {num} {tnt_string} stage 0.3 {nth_child}.gv"
+ ))
+ .unwrap();
+ }
+ }
+
for parent in parents.iter() {
if !self.has_node(parent.node()) {
return Err(Error::IndexOutOfBounds(parent.node(), self.nodes_len()));
@@ -423,12 +445,14 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
let sploned_node =
self.splone(node.node(), Some(pos), node.edge(), false)?;
- self.print_viz(&format!(
- "pos {pos} {tnt_string} {num} stage 2 {} {}.gv",
- node.node(),
- node.edge(),
- ))
- .unwrap();
+ if to_print {
+ self.print_viz(&format!(
+ "pos {pos} - {num} {tnt_string} stage 1 {} {}.gv",
+ node.node(),
+ node.edge(),
+ ))
+ .unwrap();
+ }
node_label = self
.vertex_label(sploned_node)?
@@ -513,12 +537,16 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
for parent in stack {
let splanted = self.splant(parent.node(), parent.edge(), fragment, non_empty)?;
- self.print_viz(&format!(
- "pos {pos} {tnt_string} {num} stage 3 {} {} {splanted}.gv",
- parent.node(),
- parent.edge(),
- ))
- .unwrap();
+ let _splanted_child = self.nth_child(splanted, self.degree(splanted)? - 1)?;
+
+ if to_print {
+ self.print_viz(&format!(
+ "pos {pos} - {num} {tnt_string} stage 2 {} {} {splanted}.gv",
+ parent.node(),
+ parent.edge(),
+ ))
+ .unwrap();
+ }
non_empty = true;
}
@@ -541,25 +569,28 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
.query_label(root_label)
.expect("root label was not found");
- let edge = {
- let mut result = None;
+ let edge: usize;
+ let child: usize;
- for (index, child) in self.children_of(node)?.enumerate() {
- if matches!(self.vertex_label(child)?, Some(child_label) if child_label == leaf_label)
- {
- result = Some(index);
- break;
- }
- }
+ let mut result = None;
- if let Some(result) = result {
- result
- } else {
- unreachable!("the forest is wrongly planted");
+ for (index, child) in self.children_of(node)?.enumerate() {
+ if matches!(self.vertex_label(child)?, Some(child_label) if child_label == leaf_label)
+ {
+ result = Some((index, child));
+ break;
}
- };
+ }
+
+ if let Some((index, edge_child)) = result {
+ edge = index;
+ child = edge_child;
+ } else {
+ unreachable!("the forest is wrongly planted");
+ }
+
// dbg!(node, edge, root_label, leaf_label);
- PaVi::Parent(node, edge)
+ PaVi::Parent(node, edge, child)
} else {
assert_eq!(
fragment.nodes_len(),
@@ -612,131 +643,19 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
Ok(result)
}
- // REVIEW: Is this function really correct?
-
- /// Perform extra reductions.
- ///
- /// To be precise, this function first splones the bottom node,
- /// and then queries the reducer to find the next nodes to splone,
- /// and repeats this process until either we find the top node, or
- /// the reducer says to stop.
- ///
- /// One nuance to note is that after we splone a node, if we split
- /// that node, then the splitted node will have a cloned parent,
- /// see
- /// [`split_node`][DefaultForest::<ForestLabel<GrammarLabel>>::split_node]
- /// for details. So for each parent pointed out by the reducer,
- /// we need to find its clone that has the splitted node as a
- /// child.
- fn extra_reductions(
- &mut self,
- botop: BoTop,
- pos: usize,
- reducer: &Reducer,
- atom: &DefaultAtom,
- ) -> Result<Vec<usize>, Error> {
- if botop.bottom() == botop.top() {
- return Ok(vec![self.node_from_pavi(botop.top())?]);
- }
-
- let top = botop.top();
- let bottom = botop.bottom();
-
- let bottom_closed = self.close_pavi(atom.borrow(), bottom, pos)?;
-
- let top_node = self.node_from_pavi(top)?;
- let bottom_node = self.node_from_pavi(bottom)?;
-
- let mut stack = vec![bottom_node];
-
- // Exclude duplicate nodes to ensure that no infinite
- // recursion occurs. In the future I shall experiment if this
- // is necessary, and get rid of this if possible.
- let mut seen_nodes: HashSet<usize> = Default::default();
-
- let mut result = Vec::new();
-
- while let Some(bottom) = stack.pop() {
- if seen_nodes.contains(&bottom) {
- continue;
- }
-
- seen_nodes.insert(bottom);
-
- if let Some(set) = reducer.get(&(top_node, bottom)) {
- // First splone the bottom, except for the bottom one.
-
- let mut sploned = if bottom == bottom_node {
- bottom_closed
- } else {
- let degree = self.degree(bottom)?;
- let last_index = core::cmp::max(degree, 1) - 1;
-
- self.splone(bottom, Some(pos), last_index, false)?
- };
-
- // Then add parents whose labels are what we want into
- // the stack.
-
- let mut label_set: HashSet<GrammarLabel> = HashSet::with_capacity(set.len());
-
- for node in set.iter().copied() {
- label_set.insert(
- self.vertex_label(node)?
- .ok_or(Error::NodeNoLabel(node))?
- .label(),
- );
- }
-
- let sploned_label = self
- .vertex_label(sploned)?
- .ok_or(Error::NodeNoLabel(sploned))?;
-
- if sploned_label.clone_index().is_some() {
- let mut parents = self.parents_of(sploned)?;
-
- #[cfg(debug_assertions)]
- if parents.len() != 1 {
- panic!("assumption fails");
- }
-
- sploned = parents.next().unwrap().node();
- }
-
- for rule_parent in self.parents_of(sploned)? {
- if self.degree(rule_parent.node())? != 1 {
- panic!("assumption fails");
- }
-
- for parent in self.parents_of(rule_parent.node())? {
- let parent_node = parent.node();
-
- let parent_label = self
- .vertex_label(parent_node)?
- .ok_or(Error::NodeNoLabel(parent_node))?
- .label();
-
- if label_set.contains(&parent_label) {
- stack.push(parent_node);
- }
- }
- }
- } else {
- result.push(bottom);
- }
- }
-
- Ok(result)
- }
-
/// Set the end position of the node associated with `pavi` to be `pos`.
///
/// The parameter `atom` is used to query the reduction fragment
/// if `pavi` is a virtual node.
- fn close_pavi(&mut self, atom: &DefaultAtom, pavi: PaVi, pos: usize) -> Result<usize, Error> {
+ pub(crate) fn close_pavi(
+ &mut self,
+ atom: &DefaultAtom,
+ pavi: PaVi,
+ pos: usize,
+ ) -> Result<usize, Error> {
match pavi {
- PaVi::Parent(node, edge) => {
- let nth_child = self.nth_child(node, edge)?;
+ PaVi::Parent(_node, _edge, child) => {
+ let nth_child = child;
let nth_child_label = self
.vertex_label(nth_child)?
.ok_or(Error::NodeNoLabel(nth_child))?;
@@ -781,6 +700,11 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
let reduction_fragment = atom.generate_virtual_frags(nt, t, None);
+ // NOTE: the case of the root is exceptional
+ if reduction_fragment.is_none() && self.root() != Some(node) {
+ return Err(Error::CannotClose(nt, t, node, node_label_start));
+ }
+
for frag in reduction_fragment.into_iter().flatten() {
let mut frag = frag.clone();
frag.set_pos(node_label_start, true)?;