diff options
author | JSDurand <mmemmew@gmail.com> | 2023-07-08 12:30:21 +0800 |
---|---|---|
committer | JSDurand <mmemmew@gmail.com> | 2023-07-08 12:31:13 +0800 |
commit | 9a317e56f8a6126583f7d0c431bf878d9b1fe7b1 (patch) | |
tree | 7bb6004196b38446a5ab0cb3a0ab642d35f113e9 /chain/src | |
parent | 691f969eb104fa3d4c2a1667693fd0382eb9d6b5 (diff) |
Finished the Emacs binding.
Now the binding part is finished.
What remains is a bug encountered when planting a fragment to the
forest which intersects a packed node, which would lead to invalid
forests. This will also cause problem when planting a packed
fragment, but until now my testing grammars do not produce packed
fragments, so this problem is not encountered yet.
I am still figuring out efficient ways to solve this problem.
Diffstat (limited to 'chain/src')
-rw-r--r-- | chain/src/archive.txt | 47 | ||||
-rw-r--r-- | chain/src/default.rs | 247 | ||||
-rw-r--r-- | chain/src/item/default/extract.rs | 509 | ||||
-rw-r--r-- | chain/src/item/default/mod.rs | 62 | ||||
-rw-r--r-- | chain/src/item/default/splone.rs | 10 | ||||
-rw-r--r-- | chain/src/item/genins.rs | 18 | ||||
-rw-r--r-- | chain/src/item/mod.rs | 15 | ||||
-rw-r--r-- | chain/src/item/reduction.rs | 85 | ||||
-rw-r--r-- | chain/src/lib.rs | 29 |
9 files changed, 812 insertions, 210 deletions
diff --git a/chain/src/archive.txt b/chain/src/archive.txt index 030ccba..7bd6f31 100644 --- a/chain/src/archive.txt +++ b/chain/src/archive.txt @@ -493,3 +493,50 @@ // PaVi::Empty => Ok(self.root().ok_or(Error::IndexOutOfBounds(0, 0))?), // } // } + +// /// Find the last child of the given node whose start and end +// /// positions contain the given position. If no such child is +// /// found, return `Ok(None)`. +// /// +// /// The returned tuple is of the form (child, index), where +// /// `child` is the index of the child node in question, and +// /// `index` means that the child is the `index`-th child of the +// /// node. +// #[allow(dead_code)] +// pub(crate) fn position_search( +// &self, +// node: usize, +// pos: usize, +// ) -> Result<Option<(usize, usize)>, Error> { +// fn range_contains(label: GrammarLabel, pos: usize) -> bool { +// let start = label.start(); +// +// if let Some(end) = label.end() { +// (start..end).contains(&pos) +// } else { +// (start..).contains(&pos) +// } +// } +// +// let node_label = self +// .vertex_label(node)? +// .ok_or(Error::NodeNoLabel(node))? +// .label(); +// +// if !range_contains(node_label, pos) { +// return Ok(None); +// } +// +// for (index, child) in self.children_of(node)?.enumerate().rev() { +// let child_label = self +// .vertex_label(child)? +// .ok_or(Error::NodeNoLabel(child))? +// .label(); +// +// if range_contains(child_label, pos) { +// return Ok(Some((child, index))); +// } +// } +// +// Ok(None) +// } diff --git a/chain/src/default.rs b/chain/src/default.rs index ed63ff4..7de720f 100644 --- a/chain/src/default.rs +++ b/chain/src/default.rs @@ -11,15 +11,19 @@ use crate::item::{ default::DefaultForest, generate_fragment, genins::index_out_of_bounds_conversion, Forest, ForestLabel, ForestLabelError, }; -use core::fmt::Display; use grammar::{GrammarLabel, GrammarLabelType, START_NONTERMINAL, TNT}; +#[allow(unused_imports)] use graph::{ labelled::DLGBuilder, Builder, DLGraph, Graph, LabelExtGraph, LabelGraph, ParentsGraph, }; +use std::fmt::Display; use graph_macro::Graph; -use std::collections::{HashMap as Map, HashSet, TryReserveError}; +use std::{ + borrow::Borrow, + collections::{HashMap as Map, HashSet, TryReserveError}, +}; /// The errors related to taking derivatives by chain rule. #[non_exhaustive] @@ -399,6 +403,10 @@ impl Chain for DefaultChain { }) } + fn atom(&self) -> &Self::Atom { + self.atom.borrow() + } + fn epsilon(&self) -> Result<bool, Self::Error> { self.accepting_vec .get(self.current) @@ -783,146 +791,119 @@ impl Chain for DefaultChain { Ok(der_iter) } - // FIXME: Do nothing more than to make this at the end of input, - // actually. - // - // This means we only close the expansions at the last accepting - // sources. - // - // As to the remaining still open fragments, they should be - // considered as bugs and should be fixed in the function - // `insert_item` instead. - - fn end_of_input(&mut self) -> Result<(), Self::Error> { - if !matches!(self.epsilon(), Ok(true)) { - return Ok(()); - } - - self.forest.remove_node(1)?; - - let mut stack = Vec::new(); - - dbg!(&self.accepting_sources); + type Item = DefaultForest<ForestLabel<GrammarLabel>>; - // self.forest.print_viz("dbg forest before.gv").unwrap(); + fn end_of_input(&mut self, pos: usize, ter: usize) -> Result<Self::Item, Error> { + let mut result = Default::default(); - for pavi in self.accepting_sources.iter() { - match pavi { - PaVi::Parent(node, _edge, child) => { - // REVIEW: This is a garbage node that has to be - // added when the machine starts. Is there a way - // to avoid this garbage? - if *node == 1 { - continue; - } + let root = if let Some(root) = self.forest.root() { + root + } else { + return Ok(result); + }; - stack.push(*child); - } - PaVi::Virtual(nt, t, node) => { - let node_label = self - .forest - .vertex_label(*node)? - .ok_or(Error::NodeNoLabel(*node))?; + // The 1-th node is a dummy node that should be removed. + assert_ne!(root, 1); - if node_label.label().end().is_none() { - let reduction_fragment = self.atom.generate_virtual_frags(*nt, *t, None); + self.forest.remove_node(1)?; - for frag in reduction_fragment.into_iter().flatten() { - let mut frag = frag.clone(); - frag.set_pos(node_label.label().start(), true)?; + let root_degree = self.forest.degree(root)?; - stack.push(self.forest.plant_at_start(*node, frag)?); - } - } - } - PaVi::Empty => {} - } - } + let root_label = self + .forest + .vertex_label(root)? + .ok_or(Error::NodeNoLabel(root))?; - let forest_nodes_len = self.forest.nodes_len(); - - let mut node_ends: Vec<Vec<usize>> = std::iter::repeat_with(Default::default) - .take(forest_nodes_len) - .collect(); - - // NOTE: Use iterator to avoid checking bounds when accessing - // `node_ends`. - for (node, node_end) in self.forest.nodes().zip(node_ends.iter_mut()) { - if let Some(end) = self - .forest - .vertex_label(node)? - .ok_or(Error::NodeNoLabel(node))? - .label() - .end() - { - node_end.push(end); - } - } + dbg!(root_degree, root_label); - #[derive(Copy, Clone)] - enum StackElement { - Seen(usize), - Unseen(usize), - } + // REVIEW: Why nodes? + let mut nodes = Vec::with_capacity(root_degree); - use StackElement::{Seen, Unseen}; + if root_label.is_packed() { + let mut all_completed_clones = Vec::with_capacity(root_degree); - // maybe we do not need so much space? - let mut stack = Vec::with_capacity(forest_nodes_len); + for clone in self.forest.children_of(root)? { + let mut all_completed = true; - stack.push(Unseen( - self.forest.root().ok_or(Error::IndexOutOfBounds(0, 0))?, - )); + // The last child does not count. + let clone_child_degree_minus_one = std::cmp::max(self.forest.degree(clone)?, 1) - 1; - let mut seen_nodes = HashSet::with_capacity(forest_nodes_len); + // The loop to determine whether or not it is all + // completed, except for the last child. + 'completion_det_loop: for clone_child in self + .forest + .children_of(clone)? + .take(clone_child_degree_minus_one) + { + let clone_child_label = self + .forest + .vertex_label(clone_child)? + .ok_or(Error::NodeNoLabel(clone_child))?; - // depth-first search - while let Some(top) = stack.pop() { - match top { - Seen(top) => { - // Every of its children should be processed - // already. + if clone_child_label.label().end().is_none() { + all_completed = false; - if !node_ends.get(top).unwrap().is_empty() { - continue; + break 'completion_det_loop; } - - let last_index = self.forest.degree(top).unwrap() - 1; - let last_child = self.forest.nth_child(top, last_index)?; - - *node_ends.get_mut(top).unwrap() = node_ends - .get(last_child) - .ok_or(Error::IndexOutOfBounds(last_child, forest_nodes_len))? - .clone(); } - Unseen(top) => { - if seen_nodes.contains(&top) { - continue; - } - seen_nodes.insert(top); - // NOTE: After the above check we can safely unwrap. + if all_completed { + all_completed_clones.push(clone); + } + } - let mut top_no_children = true; + if all_completed_clones.is_empty() { + // Nothing to do + return Ok(result); + } - for child in self.forest.children_of(top).unwrap() { - if top_no_children { - stack.push(Seen(top)); - } + for clone in all_completed_clones { + nodes.push( + self.forest + .reduction(clone, pos, ter, self.atom.borrow(), true)?, + ); + } + } else if root_label.clone_index().is_some() { + panic!( + "If the root of the forest is cloned, \ + the root should be changed to the packed node." + ); + } else { + // The last child does not count. + let root_degree_minus_one = std::cmp::max(root_degree, 1) - 1; - top_no_children = false; + dbg!(root_degree_minus_one); - stack.push(Unseen(child)); - } + // The loop to determine whether or not it is all + // completed, except for the last child. + for root_child in self.forest.children_of(root)?.take(root_degree_minus_one) { + let root_child_label = self + .forest + .vertex_label(root_child)? + .ok_or(Error::NodeNoLabel(root_child))?; - if !top_no_children { - continue; - } + if root_child_label.label().end().is_none() { + return Ok(result); } } + + nodes.push( + self.forest + .reduction(root, pos, ter, self.atom.borrow(), true)?, + ); } - Ok(()) + dbg!(&nodes); + + // self.forest + // .print_viz("forest before extraction.gv") + // .unwrap(); + + result = self.forest.extract(pos)?; + + // result.print_viz("extracted forest.gv").unwrap(); + + Ok(result) } } @@ -1051,7 +1032,7 @@ mod test_chain { chain.chain(0, 12, no_item)?; if !no_item { - chain.end_of_input()?; + let _output = chain.end_of_input(13, 0)?; chain.forest.print_viz("forest.gv")?; chain.graph.print_viz("chain.gv")?; @@ -1079,6 +1060,36 @@ mod test_chain { } #[test] + fn test_assumption() -> Result<(), Box<dyn std::error::Error>> { + use grammar::Grammar; + + dbg!(); + let grammar_str = std::fs::read_to_string( + "/Users/durand/Desktop/Centre/A propos de programmes/Rust/rep/grammar/abnf grammars/test.abnf", + ) + .unwrap(); + dbg!(); + let grammar: Grammar = grammar_str.parse()?; + + let atom = DefaultAtom::from_grammar(grammar)?; + let mut chain = DefaultChain::unit(atom)?; + + let no_item = false; + + let input: &[usize] = &[3, 0, 2, 2, 2, 1, 1, 1, 0, 1]; + + for (pos, t) in input.iter().copied().enumerate().take(2) { + chain.chain(t, pos, no_item)?; + chain.forest().print_viz(&format!("forest {pos}.gv"))?; + dbg!(pos, t); + } + + item::default::print_labels(&chain.atom, &chain.forest)?; + + Ok(()) + } + + #[test] fn test_ambiguity() -> Result<(), Box<dyn std::error::Error>> { if std::fs::metadata("debug.log").is_ok() { std::fs::remove_file("debug.log")?; @@ -1125,7 +1136,7 @@ mod test_chain { chain.chain(1, 4, no_item)?; if !no_item { - // chain.end_of_input()?; + let _output = chain.end_of_input(5, 1)?; chain.forest.print_viz("forest.gv")?; // chain.forest.print_closed_viz("closed.gv")?; } diff --git a/chain/src/item/default/extract.rs b/chain/src/item/default/extract.rs new file mode 100644 index 0000000..96f5119 --- /dev/null +++ b/chain/src/item/default/extract.rs @@ -0,0 +1,509 @@ +//! This module defines a function for extracting the "completed part" +//! of a forest. +//! +//! # Completed sub-forest +//! +//! The completed part of a forest refers to the sub-forest of the +//! forest that consists entirely of nodes whose ending positions are +//! set, and whose subtrees also consist of such nodes. +//! +//! # Definition +//! +//! To be more rigorous, we call a node *n* of a forest *F* complete +//! if its ending position is set. We call a sub-forest *S* of *F* +//! completed if the following two conditions are satisfied: +//! +//! 1. Every node in *S* is complete. +//! 2. For every node *n* in *S*, the subtree of *F* rooted at *n* +//! consists entirely of complete nodes. +//! +//! # Uniqueness of the maximal completed sub-forest +//! +//! **Lemma** +//! +//! For any forest *F*, there is only one maximal completed sub-forest +//! *S* of *F*. Here the maximality means that if *T* is a completed +//! sub-forest of *F* which contains *S*, then *S* is equal to *T*. +//! +//! Then by **the completed part** of a forest *F* we refer to the +//! unuique maximal completed sub-forest of *F*. +//! +//! **Proof** +//! +//! Note that if *S_1* and *S_2* are two completed sub-forests of *F*, +//! and if *S_1* is not contained in *S_2*, say *n* is a node in *S_1* +//! but not in *S_2*, then by adjoining the subtree of *S_2* rooted at +//! *n* to *S_1* we obtain a completed sub-forest *S_3* which contains +//! *S_1*, contradicting the maximality of *S_1*. Thus there can only +//! be one maximal completed sub-forest of a forest. +//! +//! # Connected component +//! +//! The extraction function actually returns the connected component +//! of the completed part of a forest which contains its root, as that +//! is what we care about the most. + +use super::*; + +impl DefaultForest<ForestLabel<GrammarLabel>> { + pub(crate) fn extract(&self, pos: usize) -> Result<Self, Error> { + // Preparations + + let nodes_len = self.nodes_len(); + + let mut graph = PLGraph::default(); + let mut builder = PLGBuilderMut::from_graph_mut(&mut graph); + let root = Some(0); + + // Now find the connected component of the completed part of + // `self` and clone that to graph by means of `builder`. + + // REVIEW: Maybe a small_vec can help here. + + // A fixed-size hash table, sort of. + let mut validity_array: Vec<bool> = std::iter::repeat(true).take(nodes_len).collect(); + + // Calculate the exact length to avoid too many allocations. + let mut stack_len = 0usize; + + // Use an iterator to avoid checking bounds in accessing + // elements of the array. + for (node, validity_ptr) in self.nodes().zip(validity_array.iter_mut()) { + if self + .vertex_label(node)? + .ok_or(Error::NodeNoLabel(node))? + .label() + .end() + .is_none() + { + *validity_ptr = false; + + stack_len += 1; + } + } + + dbg!(&validity_array); + + // A stack for propagating the falsehood to parents and + // children of incomplete nodes, like a plague. The only + // nodes that can stop the spread of this plague are packed + // nodes with a child that is not infected (yet) by the + // plague. + + let mut stack: Vec<usize> = Vec::with_capacity(stack_len); + + for (n, validity) in validity_array.iter().enumerate() { + if !*validity { + stack.push(n); + } + } + + while let Some(top) = stack.pop() { + 'parent_loop: for parent in self.parents_of(top)?.map(|p| p.node()) { + if !*validity_array + .get(parent) + .ok_or(Error::IndexOutOfBounds(parent, nodes_len))? + { + // already infected by the plague + + continue 'parent_loop; + } + + let should_spread_p = if self + .vertex_label(parent)? + .ok_or(Error::NodeNoLabel(parent))? + .is_packed() + { + !self + .children_of(parent)? + .any(|node| validity_array.get(node).copied() == Some(true)) + } else { + true + }; + + if should_spread_p { + *validity_array + .get_mut(parent) + .ok_or(Error::IndexOutOfBounds(parent, nodes_len))? = false; + stack.push(parent); + } + } + } + + let validity_array = validity_array; + + /// A little macro to produce a vector of valid children. + macro_rules! valid_children { + ($node:ident) => { + self.children_of($node)? + .filter(|child| validity_array.get(*child).copied() == Some(true)) + .collect::<Vec<usize>>() + }; + } + + dbg!(&validity_array); + + if validity_array.iter().all(|validity| !*validity) { + // every element is false + + let root = None; + + return Ok(Self { graph, root }); + } + + // Finally clone the connected component to the new graph. + + let root_label = GrammarLabel::new_closed(TNT::Non(0), 0, pos); + + let packed_label = ForestLabel::new(root_label, ForestLabelType::Packed); + + let plain_label = ForestLabel::new(root_label, ForestLabelType::Plain); + + let original_root_label; + + let original_root = if let Some(packed_node) = self.query_label(packed_label) { + original_root_label = packed_label; + + packed_node + } else if let Some(plain_node) = self.query_label(plain_label) { + original_root_label = plain_label; + + plain_node + } else { + let root = None; + return Ok(Self { graph, root }); + }; + + let mut roots_stack: Vec<usize>; + + if original_root_label.is_packed() { + roots_stack = valid_children!(original_root); + + match roots_stack.len() { + 0 => { + let root = None; + + return Ok(Self { graph, root }); + } + 1 => { + let child = *roots_stack.first().unwrap(); + + builder.add_vertex(self.vertex_label(child)?.ok_or(Error::NodeNoLabel(child))?); + } + _ => { + let builder_root = builder.add_vertex(original_root_label); + + for child in roots_stack.iter().copied() { + let child_node = builder.add_vertex( + self.vertex_label(child)?.ok_or(Error::NodeNoLabel(child))?, + ); + + builder.add_edge(builder_root, child_node, original_root_label)?; + } + } + } + } else { + builder.add_vertex(original_root_label); + + roots_stack = vec![original_root]; + } + + while let Some(top) = roots_stack.pop() { + let top_label = self.vertex_label(top)?.ok_or(Error::NodeNoLabel(top))?; + + let top_in_builder = match builder.query_label(top_label) { + Some(top_node) => top_node, + None => { + // an old cloned node now becomes a plain node + let plain_label = ForestLabel::new(top_label.label(), ForestLabelType::Plain); + + builder + .query_label(plain_label) + .unwrap_or_else(|| panic!("the top {top} should be planted already")) + } + }; + + 'children_loop: for child in self.children_of(top)? { + let child_label = self.vertex_label(child)?.ok_or(Error::NodeNoLabel(child))?; + + // filter out invalid children + if validity_array.get(child).copied() != Some(true) { + continue 'children_loop; + } + + // avoid unnecessary effort + if let Some(child_node) = builder.query_label(child_label) { + builder.add_edge(top_in_builder, child_node, child_label)?; + + continue 'children_loop; + } + + if child_label.is_packed() { + let child_valid_children = valid_children!(child); + + match child_valid_children.len() { + 0 => { + panic!("this case should not happen"); + } + 1 => { + // If a packed node only has one valid + // child, we clone a plain node instead. + + let child_child = *child_valid_children.first().unwrap(); + + let child_plain_label = + ForestLabel::new(child_label.label(), ForestLabelType::Plain); + + let child_plain_node = builder.add_vertex(child_plain_label); + + builder.add_edge( + top_in_builder, + child_plain_node, + child_plain_label, + )?; + + roots_stack.push(child_child); + } + _ => { + let child_node = builder.add_vertex(child_label); + + builder.add_edge(top_in_builder, child_node, child_label)?; + + roots_stack.push(child); + } + } + + continue 'children_loop; + } + + let child_node = builder.add_vertex(child_label); + + builder.add_edge(top_in_builder, child_node, child_label)?; + + roots_stack.push(child); + } + } + + Ok(Self { graph, root }) + } +} + +#[cfg(test)] +mod extract_tests { + use super::*; + + fn construct_test_forest( + ) -> Result<DefaultForest<ForestLabel<GrammarLabel>>, Box<dyn std::error::Error>> { + // node 0 + let mut result: DefaultForest<ForestLabel<GrammarLabel>> = DefaultForest::new_leaf( + GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Non(0)), 0, 3), + ); + + // node 1 + result.plant( + 0, + leaf!( + GrammarLabel::new_closed(GrammarLabelType::Rule(15), 0, 3), + _ + ), + false, + )?; + + result.plant( + 1, + DefaultForest::new_leaf(GrammarLabel::new_closed( + GrammarLabelType::TNT(TNT::Non(0)), + 0, + 3, + )), + true, + )?; + + // node 2 + result.plant( + 0, + leaf!(GrammarLabel::new_closed(GrammarLabelType::Rule(6), 0, 1), _), + false, + )?; + + // node 3 + result.plant( + 2, + leaf!( + GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Ter(0)), 0, 1), + _ + ), + false, + )?; + + // node 4 + result.plant( + 0, + leaf!(GrammarLabel::new_closed(GrammarLabelType::Rule(7), 1, 3), _), + false, + )?; + + // node 5 + result.plant( + 4, + leaf!( + GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Non(0)), 1, 3), + _ + ), + false, + )?; + + // node 6 + result.plant( + 5, + leaf!(GrammarLabel::new_closed(GrammarLabelType::Rule(3), 1, 2), _), + false, + )?; + + // node 7 + result.plant( + 6, + leaf!( + GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Non(1)), 1, 2), + _ + ), + false, + )?; + + // node 8 + result.plant( + 7, + leaf!( + GrammarLabel::new_closed(GrammarLabelType::Rule(11), 1, 2), + _ + ), + false, + )?; + + // node 9 + result.plant( + 8, + leaf!( + GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Ter(2)), 1, 2), + _ + ), + false, + )?; + + // Clone node 5 to have node 10 and node 11 + result.clone_node(5, 0, false)?; + + // node 12 + result.plant( + 11, + leaf!(GrammarLabel::new_closed(GrammarLabelType::Rule(3), 1, 3), _), + false, + )?; + + // node 13 + result.plant( + 12, + leaf!( + GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Non(1)), 1, 3), + _ + ), + false, + )?; + + result.plant( + 13, + leaf!( + GrammarLabel::new_closed(GrammarLabelType::Rule(11), 1, 2), + _ + ), + true, + )?; + + // node 14 + result.plant( + 13, + leaf!( + GrammarLabel::new_closed(GrammarLabelType::Rule(13), 2, 3), + _ + ), + false, + )?; + + // node 15 + result.plant( + 14, + leaf!( + GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Ter(2)), 2, 3), + _ + ), + false, + )?; + + // node 16 + result.plant( + 5, + leaf!(GrammarLabel::new(GrammarLabelType::Rule(4), 2), _), + false, + )?; + + // node 17 + result.plant( + 16, + leaf!(GrammarLabel::new(GrammarLabelType::TNT(TNT::Non(0)), 2), _), + false, + )?; + + // node 18 + result.plant( + 17, + leaf!(GrammarLabel::new_closed(GrammarLabelType::Rule(3), 2, 3), _), + false, + )?; + + // node 19 + result.plant( + 18, + leaf!( + GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Non(1)), 2, 3), + _ + ), + false, + )?; + + // node 20 + result.plant( + 19, + leaf!( + GrammarLabel::new_closed(GrammarLabelType::Rule(11), 2, 3), + _ + ), + false, + )?; + + result.plant( + 20, + leaf!( + GrammarLabel::new_closed(GrammarLabelType::TNT(TNT::Ter(2)), 2, 3), + _ + ), + true, + )?; + + Ok(result) + } + + #[test] + fn test_extract() -> Result<(), Box<dyn std::error::Error>> { + let forest = construct_test_forest()?; + + assert_eq!(forest.nodes_len(), 21); + + forest.print_viz("forest before extraction.gv")?; + + let extract_result = forest.extract(3)?; + + extract_result.print_viz("extracted forest.gv")?; + + Ok(()) + } +} diff --git a/chain/src/item/default/mod.rs b/chain/src/item/default/mod.rs index 47e8c90..28bdbee 100644 --- a/chain/src/item/default/mod.rs +++ b/chain/src/item/default/mod.rs @@ -12,7 +12,7 @@ use graph_macro::Graph; use std::collections::HashSet; -use core::fmt::Display; +use std::fmt::Display; /// The type of errors for forest operations. #[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd)] @@ -216,6 +216,15 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> { return Err(Error::IndexOutOfBounds(node_id, self.nodes_len())); } + // FIXME: We should take the presence of packed nodes into + // consideration. That is, the fragment might have already + // been planted and packed, and we need to account for such + // possibilities. + // + // Moreover, the fragments might also contain packed nodes, in + // which case we need to check these multiple possibilities + // are properly contained. + // We do a depth-first traversal to determine if every node // encountered has the same set of children (labels taken into // the consideration). @@ -450,7 +459,7 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> { // Make sure it only has one parent, which is the // representative node. - assert_eq!(parents.len(), 1); + // assert_eq!(parents.len(), 1); // We checked its length is 1, so it is safe to unwrap // here. @@ -670,6 +679,8 @@ impl<T: GraphLabel> DefaultForest<ForestLabel<T>> { pub mod splone; +pub mod extract; + impl<T: GraphLabel> DefaultForest<T> { /// Create a forest with only one leaf from a raw label, unlike /// `new_leaf`, which transforms the label for convenience. @@ -798,53 +809,6 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { Ok(()) } - /// Find the last child of the given node whose start and end - /// positions contain the given position. If no such child is - /// found, return `Ok(None)`. - /// - /// The returned tuple is of the form (child, index), where - /// `child` is the index of the child node in question, and - /// `index` means that the child is the `index`-th child of the - /// node. - #[allow(dead_code)] - pub(crate) fn position_search( - &self, - node: usize, - pos: usize, - ) -> Result<Option<(usize, usize)>, Error> { - fn range_contains(label: GrammarLabel, pos: usize) -> bool { - let start = label.start(); - - if let Some(end) = label.end() { - (start..end).contains(&pos) - } else { - (start..).contains(&pos) - } - } - - let node_label = self - .vertex_label(node)? - .ok_or(Error::NodeNoLabel(node))? - .label(); - - if !range_contains(node_label, pos) { - return Ok(None); - } - - for (index, child) in self.children_of(node)?.enumerate().rev() { - let child_label = self - .vertex_label(child)? - .ok_or(Error::NodeNoLabel(child))? - .label(); - - if range_contains(child_label, pos) { - return Ok(Some((child, index))); - } - } - - Ok(None) - } - /// Set the position of every node. /// /// If ALLP is non-nil or if the node is a terminal node, also set diff --git a/chain/src/item/default/splone.rs b/chain/src/item/default/splone.rs index 581d1dc..da13c56 100644 --- a/chain/src/item/default/splone.rs +++ b/chain/src/item/default/splone.rs @@ -516,7 +516,13 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { if let Some((fragment, _planted)) = fragment { let modified_degree = std::cmp::max(child_degree, 1) - 1; - if self.has_same_children(child, node, modified_degree, edge_index)? + dbg!(node, end, edge_index, modified_degree); + + dbg!(child, child_degree); + + dbg!(&fragment); + + if self.has_same_children(child, node, modified_degree, edge_index + 1)? && child_degree != 0 { let last_child = self.nth_child(child, child_degree - 1)?; @@ -651,7 +657,7 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { } else if labela.clone_index().is_some() { let mut parentsa = self.parents_of(nodea)?; - assert_eq!(parentsa.len(), 1); + // assert_eq!(parentsa.len(), 1); let parenta = parentsa.next().unwrap().node(); diff --git a/chain/src/item/genins.rs b/chain/src/item/genins.rs index 19f835b..e409c3c 100644 --- a/chain/src/item/genins.rs +++ b/chain/src/item/genins.rs @@ -32,7 +32,7 @@ pub(crate) fn index_out_of_bounds_conversion(ge: GrammarError) -> Error { /// Determine if a label is labelled by a terminal. fn is_labelled_by_terminal(label: GrammarLabelType) -> bool { - matches!(label.tnt(), Some(t) if matches!(t, TNT::Ter(_))) + matches!(label.tnt(), Some(tnt) if matches!(tnt, TNT::Ter(_))) } /// A helper function to generate a fragment of forest. @@ -211,7 +211,7 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { // Whether or not to print detailed graphs of each step of // operation for debugging purposes. - let to_print = false; + let to_print = true; let tnt_string = { let empty_p = atom_child_iter.len() == 0; @@ -376,7 +376,7 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { if let PaVi::Parent(node, edge, _) = pavi { let nth_child = self.nth_child(node, edge)?; - let reduced = self.reduction(nth_child, pos, ter, atom.borrow())?; + let reduced = self.reduction(nth_child, pos, ter, atom.borrow(), false)?; if reduced != nth_child && !self.is_empty_node(reduced)? { parents.clear(); @@ -700,10 +700,14 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { let reduction_fragment = atom.generate_virtual_frags(nt, t, None); - // NOTE: the case of the root is exceptional - if reduction_fragment.is_none() && self.root() != Some(node) { - return Err(Error::CannotClose(nt, t, node, node_label_start)); - } + // Maybe we do not have to force the reduciton here? + + // // NOTE: the case of the root is exceptional + // if reduction_fragment.is_none() && self.root() != Some(node) { + // dbg!(self.root()); + // self.print_viz("cannot close.gv").unwrap(); + // return Err(Error::CannotClose(nt, t, node, node_label_start)); + // } for frag in reduction_fragment.into_iter().flatten() { let mut frag = frag.clone(); diff --git a/chain/src/item/mod.rs b/chain/src/item/mod.rs index 54ca946..d2c3127 100644 --- a/chain/src/item/mod.rs +++ b/chain/src/item/mod.rs @@ -7,7 +7,7 @@ use graph::{error::Error as GError, GraphLabel, LabelGraph, Parent, ParentsGraph}; -use core::borrow::Borrow; +use std::borrow::Borrow; /// A parent or a virtual segment. /// @@ -96,7 +96,7 @@ enum ForestLabelType { Cloned(usize), } -impl core::fmt::Display for ForestLabelType { +impl std::fmt::Display for ForestLabelType { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { match self { Self::Packed => write!(f, "packed"), @@ -113,8 +113,8 @@ pub struct ForestLabel<T: GraphLabel> { status: ForestLabelType, } -impl<T: GraphLabel> core::fmt::Display for ForestLabel<T> { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { +impl<T: GraphLabel> std::fmt::Display for ForestLabel<T> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { if !matches!(self.status, ForestLabelType::Plain) { write!(f, "{}, {}", self.label, self.status) } else { @@ -132,8 +132,8 @@ pub enum ForestLabelError { ClonePack, } -impl core::fmt::Display for ForestLabelError { - fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { +impl std::fmt::Display for ForestLabelError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::PackClone => write!(f, "cannot pack a cloned node"), Self::ClonePack => write!(f, "cannot clone a packed node"), @@ -230,6 +230,9 @@ impl<T: GraphLabel> From<T> for ForestLabel<T> { } } +// REVIEW: Should we move this trait (and only this trait) to a +// separate crate, or is it enough to keep it here? + /// The expected behaviours of an item derivation forest. /// /// Note that it requires only a subset of the functionalities of diff --git a/chain/src/item/reduction.rs b/chain/src/item/reduction.rs index 89306e6..3c76c1e 100644 --- a/chain/src/item/reduction.rs +++ b/chain/src/item/reduction.rs @@ -122,6 +122,9 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { /// The parameter `ter` is used to fill in segments for virtual /// nodes if they are found along the way. /// + /// The parameter `accept_root` controls whether we want to + /// perform reduction on the root. + /// /// # Errors /// /// 1. Index out of bounds: some node number is corrupted. @@ -132,12 +135,13 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { pos: usize, ter: usize, atom: &DefaultAtom, + accept_root: bool, ) -> Result<usize, Error> { let mut result = node; // step 1: Determine if this needs reductions. - if self.root() == Some(node) { + if !accept_root && self.root() == Some(node) { return Ok(result); } @@ -152,9 +156,30 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { return Ok(result); } + // Data type for representing the status when performing a + // search. + + #[derive(PartialEq, Eq, Copy, Clone, Debug)] + enum Status { + Correct, + Incorrect, + Visited, + } + + impl From<bool> for Status { + fn from(value: bool) -> Self { + match value { + true => Self::Correct, + false => Self::Incorrect, + } + } + } + + use Status::*; + // step 2: Find descendents that need reductions. - let mut correct_ends: Map<usize, bool> = Default::default(); + let mut correct_ends: Map<usize, Status> = Default::default(); let mut order_of_correct_ends: Vec<usize> = Vec::new(); @@ -165,7 +190,7 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { use std::{borrow::BorrowMut, io::Write}; // Whether or not to write a debug file. - let to_write = false; + let to_write = true; if to_write { if let Ok(ref mut file) = file.borrow_mut() { @@ -181,14 +206,18 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { } } - if correct_ends.get(&top).is_some() { + let old_value = correct_ends.get(&top).copied(); + + if matches!(old_value, Some(Correct) | Some(Incorrect)) { continue 'stack_loop; } + correct_ends.insert(top, Visited); + let top_label = self.vertex_label(top)?.ok_or(Error::NodeNoLabel(top))?; if let Some(end) = top_label.label().end() { - correct_ends.insert(top, end == pos); + correct_ends.insert(top, (end == pos).into()); continue 'stack_loop; } @@ -216,8 +245,8 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { 'child_loop: for child in self.children_of(top)? { match correct_ends.get(&child).copied() { - Some(true) => { - correct_ends.insert(top, true); + Some(Correct) => { + correct_ends.insert(top, Correct); inserted = true; } @@ -232,12 +261,15 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { if has_unexplored_children { stack.push(top); - stack.extend(self.children_of(top)?); + stack.extend( + self.children_of(top)? + .filter(|child| correct_ends.get(child).is_none()), + ); } else if !inserted { - correct_ends.insert(top, false); + correct_ends.insert(top, Incorrect); } } else { - correct_ends.insert(top, false); + correct_ends.insert(top, Incorrect); } continue 'stack_loop; @@ -245,6 +277,7 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { if top_label.is_packed() { let mut has_unexplored_children = false; + let mut inserted = false; if to_write { if let Ok(ref mut file) = file.borrow_mut() { @@ -255,12 +288,13 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { for child in self.children_of(top)? { match correct_ends.get(&child).copied() { - Some(true) => { + Some(Correct) => { // NOTE: This is not recorded in the // correct orders, as we do not handle // packed nodes directly. - correct_ends.insert(top, true); + correct_ends.insert(top, Correct); + inserted = true; } None => { has_unexplored_children = true; @@ -280,9 +314,12 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { if has_unexplored_children { stack.push(top); - stack.extend(self.children_of(top)?); - } else if correct_ends.get(&top).is_none() { - correct_ends.insert(top, false); + stack.extend( + self.children_of(top)? + .filter(|child| correct_ends.get(child).is_none()), + ); + } else if !inserted { + correct_ends.insert(top, Incorrect); } continue 'stack_loop; @@ -309,7 +346,7 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { panic!("a terminal node {top} has no ending position?"); } Some(TNT::Non(nt)) => { - correct_ends.insert(top, true); + correct_ends.insert(top, Correct); self.close_pavi(atom.borrow(), PaVi::Virtual(nt, ter, top), pos)?; @@ -322,7 +359,7 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { Some(end) => end, }; - correct_ends.insert(top, end == pos); + correct_ends.insert(top, (end == pos).into()); if end == pos { order_of_correct_ends.push(top); @@ -341,10 +378,12 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { let last_child = self.nth_child(top, last_index)?; if let Some(child_correctness_value) = correct_ends.get(&last_child).copied() { - correct_ends.insert(top, child_correctness_value); + if child_correctness_value != Visited { + correct_ends.insert(top, child_correctness_value); - if child_correctness_value { - order_of_correct_ends.push(top); + if child_correctness_value == Correct { + order_of_correct_ends.push(top); + } } } else { stack.extend([top, last_child]); @@ -375,12 +414,6 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { #[cfg(debug_assertions)] { - let label_label_label = label.label().label(); - - assert!(label_label_label.rule().is_none()); - - assert!(matches!(label_label_label.tnt(), Some(TNT::Non(_)))); - assert!(label.label().end().is_none()); assert_ne!(degree, 0); diff --git a/chain/src/lib.rs b/chain/src/lib.rs index 9de1df7..bf3a8b4 100644 --- a/chain/src/lib.rs +++ b/chain/src/lib.rs @@ -240,6 +240,10 @@ pub trait Chain: LabelExtGraph<Edge> { /// language. fn unit(atom: Self::Atom) -> Result<Self, Self::Error>; + /// Produce the underlying atom, so that we can produce an initial + /// empty chain from the same atom again. + fn atom(&self) -> &Self::Atom; + /// Return true if and only if the language contains the empty /// string. fn epsilon(&self) -> Result<bool, Self::Error>; @@ -283,7 +287,8 @@ pub trait Chain: LabelExtGraph<Edge> { /// The argument `t` is the terminal we computet the derivative /// with. /// - /// The argument `pos` is the position within the input. + /// The argument `pos` is the zero-based position within the + /// input. /// /// The argument `no_item` determines whether we want the item /// derivation forest as well. @@ -307,9 +312,29 @@ pub trait Chain: LabelExtGraph<Edge> { Ok(()) } + // FIXME: I shall probably not use the trait of forests for this + // purpose, but I have not figured out yet wha the correct trait + // should be. + /// The type of output item that will be produced by this machine. + type Item: item::Forest<grammar::GrammarLabel>; + /// Signal to the parser that the end of the input is reached, so /// that the parser knows to generate suitable forests. - fn end_of_input(&mut self) -> Result<(), Self::Error>; + /// + /// This is not needed when recognizing instead of parsing. + /// + /// We also pass in the current position `pos` so that we have a + /// little flexibility in the position of the end of input. In + /// addition, this makes it easier to produce the suitable + /// forests. + /// + /// As a reminder, `pos` should be the position of the last + /// terminal, so if there are three terminals in total, the `pos` + /// should be `2` , instead of `3`. + /// + /// Similarly, we pass in the terminal at the position `pos` to + /// aid the production of the forest. + fn end_of_input(&mut self, pos: usize, ter: usize) -> Result<Self::Item, Self::Error>; } pub mod default; |