summaryrefslogtreecommitdiff
path: root/chain/src/default.rs
diff options
context:
space:
mode:
authorJSDurand <mmemmew@gmail.com>2023-06-18 15:03:34 +0800
committerJSDurand <mmemmew@gmail.com>2023-06-18 15:03:34 +0800
commita80db17473ff09cc72acba2c1975101e6dbedf39 (patch)
tree4d5dfcdfcd1537b51b92349d9a5274aa90a6d110 /chain/src/default.rs
parent6ce44bb3bdb79e8e727ee6fc7f5c6cd7fa0bb51e (diff)
fixed the bugs of node duplications and left-open nodes
There were two main issues in the previous version. One is that there are lots of duplications of nodes when manipulating the forest. This does not mean that labels repeat: by the use of the data type this cannot happen. What happened is that there were cloned nodes whose children are exactly equal. In this case there is no need to clone that node in the first place. This is now fixed by checking carefully before cloning, so that we do not clone unnecessary nodes. The other issue, which is perhaps more important, is that there are nodes which are not closed. This means that when there should be a reuction of grammar rules, the forest does not mark the corresponding node as already reduced. The incorrect forests thus caused is hard to fix: I tried several different approaches to fix it afterwards, but all to no avail. I also tried to record enough information to fix these nodes during the manipulations. It turned out that recording nodes is a dead end, as I cannot properly syncronize the information in the forest and the information in the chain-rule machine. Any inconsistencies will result in incorrect operations later on. The approach I finally adapt is to perform every possible reduction at each step. This might lead to some more nodes than what we need. But those are technically expected to be there after all, and it is easy to filter them out, so it is fine, from my point of view at the moment. Therefore, what remains is to filter those nodes out and connect it to the holy Emacs. :D
Diffstat (limited to 'chain/src/default.rs')
-rw-r--r--chain/src/default.rs353
1 files changed, 155 insertions, 198 deletions
diff --git a/chain/src/default.rs b/chain/src/default.rs
index e61df41..ed63ff4 100644
--- a/chain/src/default.rs
+++ b/chain/src/default.rs
@@ -38,6 +38,10 @@ pub enum Error {
CannotReserve(TryReserveError),
/// Cannot create label for cloning nodes.
CannotClone(ForestLabelError),
+ /// Cannot close the virtual node.
+ ///
+ /// The format is (nt, t, node, pos).
+ CannotClose(usize, usize, usize, usize),
/// Cannot find a suitable node to plant the new forest fragment.
CannotPlant,
/// Cannot split a packed node.
@@ -60,6 +64,12 @@ impl Display for Error {
Self::NodeNoLabel(n) => write!(f, "node {n} has no labels while it should have one"),
Self::CannotReserve(e) => write!(f, "cannot reserve memory: {e}"),
Self::CannotClone(fe) => write!(f, "cannot clone due to {fe}"),
+ Self::CannotClose(nt, t, node, pos) => write!(
+ f,
+ "cannot close the virtual node {node} \
+ with the expansion from nt {nt} by t \
+ {t} at pos {pos}"
+ ),
Self::CannotPlant => write!(f, "cannot plant a new forest fragment"),
Self::SplitPack(n) => write!(f, "cannot split the packed node {n}"),
Self::InvalidClone(n) => {
@@ -196,58 +206,6 @@ impl Iterator for DerIter {
}
}
-// TODO: Add a hashmap mapping tuples of forest positions (top,
-// bottom) to vectors of tuples of unsigned integers. Each tuple
-// represents a non-terminal and a rule. The tuple means that we need
-// to close the expansion of the non-terminal by the rule position, if
-// we want to move from bottom to top.
-//
-// After we move up one layer, we are now in a new forest position, so
-// we just query again with the new bottom and top. This means we do
-// not need to clear the map.
-
-// REVIEW: Why is the above needed?
-
-/// This represents a tuple of bottom and top forest positions.
-///
-/// It is supposed to be used as keys to query the reduction
-/// information. See the type [`Reducer`] for more details.
-#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Default)]
-pub(crate) struct BoTop {
- bottom: PaVi,
- top: PaVi,
-}
-
-impl BoTop {
- pub(crate) fn new(bottom: PaVi, top: PaVi) -> Self {
- Self { bottom, top }
- }
-
- pub(crate) fn top(&self) -> PaVi {
- self.top
- }
-
- pub(crate) fn bottom(&self) -> PaVi {
- self.bottom
- }
-}
-
-/// This hashmap records information for performing extra reductions
-/// when we insert items.
-///
-/// When we want to *jump* from the bottom to the top positions in the
-/// forest, we need to perform a set of reductions, in order to jump
-/// one layer upwards. After one step, we query again for the next
-/// step of reductions to perform.
-///
-/// # Reduction format
-///
-/// The value records a set of tuples of the form `(non-terminal,
-/// starting_position)`. Such a tuple means we want to reduce from
-/// the expansion of the given `non-terminal`, which expands from the
-/// given starting position. We need to restrict the
-pub(crate) type Reducer = Map<(usize, usize), HashSet<usize>>;
-
/// A default implementation for the [`Chain`] trait.
#[derive(Debug, Clone, Default, Graph)]
pub struct DefaultChain {
@@ -259,7 +217,6 @@ pub struct DefaultChain {
forest: DefaultForest<ForestLabel<GrammarLabel>>,
accepting_vec: Vec<bool>,
accepting_sources: Vec<PaVi>,
- reducer: Reducer,
}
impl DefaultChain {
@@ -431,8 +388,6 @@ impl Chain for DefaultChain {
let accepting_sources = Vec::new();
- let reducer = Default::default();
-
Ok(Self {
graph,
atom,
@@ -441,7 +396,6 @@ impl Chain for DefaultChain {
forest,
accepting_vec,
accepting_sources,
- reducer,
})
}
@@ -486,19 +440,6 @@ impl Chain for DefaultChain {
type DerIter = DerIter;
- // EXPERIMENT: Try the approach of keeping an additional vector of
- // vectors of unsigned integers. Each element of the vector
- // corresponds to an edge of the current node. Each element is a
- // vector. This vector represents the list of reductions that are
- // implied due to skipping edges without children.
- //
- // Then when we insert an item, we can use this list to perform
- // additional reductions. This can avoid the ugly and inefficient
- // position_search method currently adopted.
- //
- // Of course we can use an optional vector to prevent allocating
- // too much memory for edges whose corresponding vector is empty.
-
fn derive(
&mut self,
t: usize,
@@ -536,12 +477,11 @@ impl Chain for DefaultChain {
input: GeneratorInput<'_>,
mut child_iter: impl Iterator<Item = usize> + ExactSizeIterator + Clone,
atom_child_iter: impl Iterator<Item = usize> + Clone,
- reducer: &mut Reducer,
mut output: impl AsMut<Vec<(Roi, usize)>>,
) -> Result<bool, Error> {
let mut accepting = false;
- let (graph, atom, forest, accepting_vec, pavi, true_pavi) = input;
+ let (graph, atom, _forest, accepting_vec, pavi, true_pavi) = input;
// First check the values from iterators are all valid.
let graph_len = graph.nodes_len();
@@ -606,19 +546,8 @@ impl Chain for DefaultChain {
for child in child_iter.clone() {
for (child_label, child_child) in graph.labels_of(child).unwrap() {
let mut new_label = *child_label;
- new_label.set_true_source(true_pavi);
-
- let botop = BoTop::new(true_pavi, new_label.forest_source());
- let key = (
- forest.node_from_pavi(botop.top())?,
- forest.node_from_pavi(botop.bottom)?,
- );
-
- reducer
- .entry(key)
- .or_insert_with(Default::default)
- .insert(forest.node_from_pavi(new_label.true_source())?);
+ new_label.set_true_source(true_pavi);
output.extend(
std::iter::repeat(new_label.into())
@@ -658,8 +587,8 @@ impl Chain for DefaultChain {
generate_fragment([atom_moved.into(), Ter(ter).into()], pos)?;
self.forest.insert_item(
- &self.reducer,
*label,
+ t,
fragment,
atom_child_iter.clone(),
&self.atom,
@@ -681,7 +610,6 @@ impl Chain for DefaultChain {
generator_input,
child_iter.clone(),
atom_child_iter.clone(),
- &mut self.reducer,
&mut der_iter.singles,
)?;
@@ -717,8 +645,8 @@ impl Chain for DefaultChain {
generate_fragment([atom_moved.into(), Non(non).into()], pos)?;
first_segment_pavi = self.forest.insert_item(
- &self.reducer,
*label,
+ t,
first_fragment,
atom_child_iter.clone(),
&self.atom,
@@ -732,8 +660,8 @@ impl Chain for DefaultChain {
// iterator, in which case the passed
// label is only used for the PaVi.
virtual_pavi = self.forest.insert_item(
- &self.reducer,
Edge::new(0, first_segment_pavi, accepting),
+ t,
virtual_fragment,
std::iter::empty(),
&self.atom,
@@ -758,7 +686,6 @@ impl Chain for DefaultChain {
generator_input,
child_iter.clone(),
atom_child_iter.clone(),
- &mut self.reducer,
&mut new_edges,
)?;
@@ -773,27 +700,8 @@ impl Chain for DefaultChain {
match roi {
Roi::Re(edge) => {
let mut edge = *edge;
- edge.set_true_source(virtual_pavi);
-
- // FIXME: Modify reducer after first experiments.
-
- // let botop = BoTop::new(virtual_pavi, edge.forest_source());
- // let rule_pos = atom_child.div_euclid(2);
-
- // let rule_nt = atom
- // .get_rule_num(rule_pos)
- // .map_err(index_out_of_bounds_conversion)?;
-
- // let rule_pos = rule_pos
- // - atom
- // .nth_accumulator(rule_nt)
- // .map_err(index_out_of_bounds_conversion)?;
-
- // reducer
- // .entry(botop)
- // .or_insert_with(Default::default)
- // .insert((rule_nt, rule_pos));
+ edge.set_true_source(virtual_pavi);
der_iter.singles.push((Roi::Re(edge), *target));
}
@@ -831,23 +739,32 @@ impl Chain for DefaultChain {
// this has been checked in
// `generate_edges`
if self.atom.is_empty_node(atom_child).unwrap() {
- // REVIEW: flat flat map,
- // hmm...
-
- // FIXME: Set true_source as well.
-
- // FIXME: Modify two layers of reducer.
- der_iter.singles.extend(child_iter.clone().flat_map(
- |child| {
- self.graph.labels_of(child).unwrap().flat_map(
- |(child_label, child_child_iter)| {
- child_child_iter.map(|child_child| {
- ((*child_label).into(), child_child)
- })
- },
- )
- },
- ));
+ let mut extra_len = 0usize;
+
+ for child in child_iter.clone() {
+ extra_len += self
+ .graph
+ .labels_of(child)
+ .unwrap()
+ .map(|(_, child_child_iter)| child_child_iter.len())
+ .sum::<usize>();
+ }
+
+ der_iter.singles.try_reserve(extra_len)?;
+
+ for child in child_iter.clone() {
+ for (child_label, child_child_iter) in
+ self.graph.labels_of(child).unwrap()
+ {
+ let mut child_label = *child_label;
+
+ child_label.set_true_source(virtual_pavi);
+
+ der_iter.singles.extend(child_child_iter.map(
+ |child_child| (child_label.into(), child_child),
+ ));
+ }
+ }
}
}
}
@@ -866,8 +783,21 @@ impl Chain for DefaultChain {
Ok(der_iter)
}
- // FIXME: Refactor this.
+ // FIXME: Do nothing more than to make this at the end of input,
+ // actually.
+ //
+ // This means we only close the expansions at the last accepting
+ // sources.
+ //
+ // As to the remaining still open fragments, they should be
+ // considered as bugs and should be fixed in the function
+ // `insert_item` instead.
+
fn end_of_input(&mut self) -> Result<(), Self::Error> {
+ if !matches!(self.epsilon(), Ok(true)) {
+ return Ok(());
+ }
+
self.forest.remove_node(1)?;
let mut stack = Vec::new();
@@ -878,7 +808,7 @@ impl Chain for DefaultChain {
for pavi in self.accepting_sources.iter() {
match pavi {
- PaVi::Parent(node, edge) => {
+ PaVi::Parent(node, _edge, child) => {
// REVIEW: This is a garbage node that has to be
// added when the machine starts. Is there a way
// to avoid this garbage?
@@ -886,9 +816,7 @@ impl Chain for DefaultChain {
continue;
}
- let nth_child = self.forest.nth_child(*node, *edge)?;
-
- stack.push(nth_child);
+ stack.push(*child);
}
PaVi::Virtual(nt, t, node) => {
let node_label = self
@@ -911,83 +839,87 @@ impl Chain for DefaultChain {
}
}
- let mut seen_nodes: HashSet<usize> = HashSet::with_capacity(stack.len());
-
- // dbg!(&stack);
+ let forest_nodes_len = self.forest.nodes_len();
- // self.forest.print_viz("dbg forest.gv").unwrap();
+ let mut node_ends: Vec<Vec<usize>> = std::iter::repeat_with(Default::default)
+ .take(forest_nodes_len)
+ .collect();
- while let Some(mut top) = stack.pop() {
- if seen_nodes.contains(&top) {
- continue;
+ // NOTE: Use iterator to avoid checking bounds when accessing
+ // `node_ends`.
+ for (node, node_end) in self.forest.nodes().zip(node_ends.iter_mut()) {
+ if let Some(end) = self
+ .forest
+ .vertex_label(node)?
+ .ok_or(Error::NodeNoLabel(node))?
+ .label()
+ .end()
+ {
+ node_end.push(end);
}
+ }
- seen_nodes.insert(top);
+ #[derive(Copy, Clone)]
+ enum StackElement {
+ Seen(usize),
+ Unseen(usize),
+ }
- let mut top_label = self
- .forest
- .vertex_label(top)?
- .ok_or(Error::NodeNoLabel(top))?;
+ use StackElement::{Seen, Unseen};
- if !top_label.is_packed()
- && matches!(top_label.label().label().tnt(), Some(TNT::Non(_)))
- && top_label.label().end().is_none()
- {
- let degree = self.forest.degree(top)?;
- let last_index = if degree > 0 { degree - 1 } else { 0 };
+ // maybe we do not need so much space?
+ let mut stack = Vec::with_capacity(forest_nodes_len);
- let pos = if degree > 0 {
- let last_child = self.forest.nth_child(top, last_index)?;
- let last_child_label = self
- .forest
- .vertex_label(last_child)?
- .ok_or(Error::NodeNoLabel(last_child))?
- .label();
- let last_child_end = last_child_label.end();
-
- if !matches!(last_child_label.label().rule(),
- Some(rule) if
- self
- .atom
- .is_accepting(2*rule+1)
- .map_err(index_out_of_bounds_conversion)?)
- {
+ stack.push(Unseen(
+ self.forest.root().ok_or(Error::IndexOutOfBounds(0, 0))?,
+ ));
+
+ let mut seen_nodes = HashSet::with_capacity(forest_nodes_len);
+
+ // depth-first search
+ while let Some(top) = stack.pop() {
+ match top {
+ Seen(top) => {
+ // Every of its children should be processed
+ // already.
+
+ if !node_ends.get(top).unwrap().is_empty() {
continue;
}
- if let Some(pos) = last_child_end {
- pos
- } else {
- stack.extend(self.forest.children_of(last_child)?);
+ let last_index = self.forest.degree(top).unwrap() - 1;
+ let last_child = self.forest.nth_child(top, last_index)?;
+
+ *node_ends.get_mut(top).unwrap() = node_ends
+ .get(last_child)
+ .ok_or(Error::IndexOutOfBounds(last_child, forest_nodes_len))?
+ .clone();
+ }
+ Unseen(top) => {
+ if seen_nodes.contains(&top) {
continue;
}
- } else {
- top_label.label().start() + 1
- };
+ seen_nodes.insert(top);
- top = self.forest.splone(top, Some(pos), last_index, true)?;
- top_label = self
- .forest
- .vertex_label(top)?
- .ok_or(Error::NodeNoLabel(top))?;
- } else if top_label.is_packed()
- || top_label.label().label().rule().is_some() && top_label.label().end().is_none()
- {
- stack.extend(self.forest.children_of(top)?);
- }
+ // NOTE: After the above check we can safely unwrap.
- if top_label.clone_index().is_some() {
- let mut parents = self.forest.parents_of(top)?;
- if parents.len() != 1 {
- dbg!(top, top_label, parents.len());
- self.forest.print_viz("dbg forest.gv").unwrap();
- panic!("0 parent?");
- }
+ let mut top_no_children = true;
- top = parents.next().unwrap().node();
- }
+ for child in self.forest.children_of(top).unwrap() {
+ if top_no_children {
+ stack.push(Seen(top));
+ }
+
+ top_no_children = false;
+
+ stack.push(Unseen(child));
+ }
- stack.extend(self.forest.parents_of(top)?.map(|parent| parent.node()));
+ if !top_no_children {
+ continue;
+ }
+ }
+ }
}
Ok(())
@@ -1120,6 +1052,11 @@ mod test_chain {
if !no_item {
chain.end_of_input()?;
+ chain.forest.print_viz("forest.gv")?;
+
+ chain.graph.print_viz("chain.gv")?;
+ chain.atom.print_nfa("nfa.gv")?;
+ item::default::print_labels(&chain.atom, &chain.forest)?;
}
for label in chain.labels_of(chain.current())?.map(|(label, _)| label) {
@@ -1143,6 +1080,10 @@ mod test_chain {
#[test]
fn test_ambiguity() -> Result<(), Box<dyn std::error::Error>> {
+ if std::fs::metadata("debug.log").is_ok() {
+ std::fs::remove_file("debug.log")?;
+ }
+
let mut no_item = false;
for arg in std::env::args() {
@@ -1158,21 +1099,37 @@ mod test_chain {
let mut chain = DefaultChain::unit(atom)?;
chain.chain(0, 0, no_item)?;
- chain.forest.print_viz("forest0.gv")?;
+
+ if !no_item {
+ chain.forest.print_viz("forest0.gv")?;
+ }
+
chain.chain(2, 1, no_item)?;
- chain.forest.print_viz("forest1.gv")?;
+
+ if !no_item {
+ chain.forest.print_viz("forest1.gv")?;
+ }
+
chain.chain(2, 2, no_item)?;
- chain.forest.print_viz("forest2.gv")?;
+
+ if !no_item {
+ chain.forest.print_viz("forest2.gv")?;
+ }
+
chain.chain(2, 3, no_item)?;
- chain.forest.print_viz("forest3.gv")?;
+
+ if !no_item {
+ chain.forest.print_viz("forest3.gv")?;
+ }
+
chain.chain(1, 4, no_item)?;
- chain.forest.print_viz("forest4.gv")?;
if !no_item {
- chain.end_of_input()?;
+ // chain.end_of_input()?;
chain.forest.print_viz("forest.gv")?;
- chain.forest.print_closed_viz("closed.gv")?;
+ // chain.forest.print_closed_viz("closed.gv")?;
}
+
chain.graph.print_viz("chain.gv")?;
chain.atom.print_nfa("nfa.gv")?;
item::default::print_labels(&chain.atom, &chain.forest)?;