summaryrefslogtreecommitdiff
path: root/chain/src/default.rs
diff options
context:
space:
mode:
Diffstat (limited to 'chain/src/default.rs')
-rw-r--r--chain/src/default.rs353
1 files changed, 155 insertions, 198 deletions
diff --git a/chain/src/default.rs b/chain/src/default.rs
index e61df41..ed63ff4 100644
--- a/chain/src/default.rs
+++ b/chain/src/default.rs
@@ -38,6 +38,10 @@ pub enum Error {
CannotReserve(TryReserveError),
/// Cannot create label for cloning nodes.
CannotClone(ForestLabelError),
+ /// Cannot close the virtual node.
+ ///
+ /// The format is (nt, t, node, pos).
+ CannotClose(usize, usize, usize, usize),
/// Cannot find a suitable node to plant the new forest fragment.
CannotPlant,
/// Cannot split a packed node.
@@ -60,6 +64,12 @@ impl Display for Error {
Self::NodeNoLabel(n) => write!(f, "node {n} has no labels while it should have one"),
Self::CannotReserve(e) => write!(f, "cannot reserve memory: {e}"),
Self::CannotClone(fe) => write!(f, "cannot clone due to {fe}"),
+ Self::CannotClose(nt, t, node, pos) => write!(
+ f,
+ "cannot close the virtual node {node} \
+ with the expansion from nt {nt} by t \
+ {t} at pos {pos}"
+ ),
Self::CannotPlant => write!(f, "cannot plant a new forest fragment"),
Self::SplitPack(n) => write!(f, "cannot split the packed node {n}"),
Self::InvalidClone(n) => {
@@ -196,58 +206,6 @@ impl Iterator for DerIter {
}
}
-// TODO: Add a hashmap mapping tuples of forest positions (top,
-// bottom) to vectors of tuples of unsigned integers. Each tuple
-// represents a non-terminal and a rule. The tuple means that we need
-// to close the expansion of the non-terminal by the rule position, if
-// we want to move from bottom to top.
-//
-// After we move up one layer, we are now in a new forest position, so
-// we just query again with the new bottom and top. This means we do
-// not need to clear the map.
-
-// REVIEW: Why is the above needed?
-
-/// This represents a tuple of bottom and top forest positions.
-///
-/// It is supposed to be used as keys to query the reduction
-/// information. See the type [`Reducer`] for more details.
-#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Default)]
-pub(crate) struct BoTop {
- bottom: PaVi,
- top: PaVi,
-}
-
-impl BoTop {
- pub(crate) fn new(bottom: PaVi, top: PaVi) -> Self {
- Self { bottom, top }
- }
-
- pub(crate) fn top(&self) -> PaVi {
- self.top
- }
-
- pub(crate) fn bottom(&self) -> PaVi {
- self.bottom
- }
-}
-
-/// This hashmap records information for performing extra reductions
-/// when we insert items.
-///
-/// When we want to *jump* from the bottom to the top positions in the
-/// forest, we need to perform a set of reductions, in order to jump
-/// one layer upwards. After one step, we query again for the next
-/// step of reductions to perform.
-///
-/// # Reduction format
-///
-/// The value records a set of tuples of the form `(non-terminal,
-/// starting_position)`. Such a tuple means we want to reduce from
-/// the expansion of the given `non-terminal`, which expands from the
-/// given starting position. We need to restrict the
-pub(crate) type Reducer = Map<(usize, usize), HashSet<usize>>;
-
/// A default implementation for the [`Chain`] trait.
#[derive(Debug, Clone, Default, Graph)]
pub struct DefaultChain {
@@ -259,7 +217,6 @@ pub struct DefaultChain {
forest: DefaultForest<ForestLabel<GrammarLabel>>,
accepting_vec: Vec<bool>,
accepting_sources: Vec<PaVi>,
- reducer: Reducer,
}
impl DefaultChain {
@@ -431,8 +388,6 @@ impl Chain for DefaultChain {
let accepting_sources = Vec::new();
- let reducer = Default::default();
-
Ok(Self {
graph,
atom,
@@ -441,7 +396,6 @@ impl Chain for DefaultChain {
forest,
accepting_vec,
accepting_sources,
- reducer,
})
}
@@ -486,19 +440,6 @@ impl Chain for DefaultChain {
type DerIter = DerIter;
- // EXPERIMENT: Try the approach of keeping an additional vector of
- // vectors of unsigned integers. Each element of the vector
- // corresponds to an edge of the current node. Each element is a
- // vector. This vector represents the list of reductions that are
- // implied due to skipping edges without children.
- //
- // Then when we insert an item, we can use this list to perform
- // additional reductions. This can avoid the ugly and inefficient
- // position_search method currently adopted.
- //
- // Of course we can use an optional vector to prevent allocating
- // too much memory for edges whose corresponding vector is empty.
-
fn derive(
&mut self,
t: usize,
@@ -536,12 +477,11 @@ impl Chain for DefaultChain {
input: GeneratorInput<'_>,
mut child_iter: impl Iterator<Item = usize> + ExactSizeIterator + Clone,
atom_child_iter: impl Iterator<Item = usize> + Clone,
- reducer: &mut Reducer,
mut output: impl AsMut<Vec<(Roi, usize)>>,
) -> Result<bool, Error> {
let mut accepting = false;
- let (graph, atom, forest, accepting_vec, pavi, true_pavi) = input;
+ let (graph, atom, _forest, accepting_vec, pavi, true_pavi) = input;
// First check the values from iterators are all valid.
let graph_len = graph.nodes_len();
@@ -606,19 +546,8 @@ impl Chain for DefaultChain {
for child in child_iter.clone() {
for (child_label, child_child) in graph.labels_of(child).unwrap() {
let mut new_label = *child_label;
- new_label.set_true_source(true_pavi);
-
- let botop = BoTop::new(true_pavi, new_label.forest_source());
- let key = (
- forest.node_from_pavi(botop.top())?,
- forest.node_from_pavi(botop.bottom)?,
- );
-
- reducer
- .entry(key)
- .or_insert_with(Default::default)
- .insert(forest.node_from_pavi(new_label.true_source())?);
+ new_label.set_true_source(true_pavi);
output.extend(
std::iter::repeat(new_label.into())
@@ -658,8 +587,8 @@ impl Chain for DefaultChain {
generate_fragment([atom_moved.into(), Ter(ter).into()], pos)?;
self.forest.insert_item(
- &self.reducer,
*label,
+ t,
fragment,
atom_child_iter.clone(),
&self.atom,
@@ -681,7 +610,6 @@ impl Chain for DefaultChain {
generator_input,
child_iter.clone(),
atom_child_iter.clone(),
- &mut self.reducer,
&mut der_iter.singles,
)?;
@@ -717,8 +645,8 @@ impl Chain for DefaultChain {
generate_fragment([atom_moved.into(), Non(non).into()], pos)?;
first_segment_pavi = self.forest.insert_item(
- &self.reducer,
*label,
+ t,
first_fragment,
atom_child_iter.clone(),
&self.atom,
@@ -732,8 +660,8 @@ impl Chain for DefaultChain {
// iterator, in which case the passed
// label is only used for the PaVi.
virtual_pavi = self.forest.insert_item(
- &self.reducer,
Edge::new(0, first_segment_pavi, accepting),
+ t,
virtual_fragment,
std::iter::empty(),
&self.atom,
@@ -758,7 +686,6 @@ impl Chain for DefaultChain {
generator_input,
child_iter.clone(),
atom_child_iter.clone(),
- &mut self.reducer,
&mut new_edges,
)?;
@@ -773,27 +700,8 @@ impl Chain for DefaultChain {
match roi {
Roi::Re(edge) => {
let mut edge = *edge;
- edge.set_true_source(virtual_pavi);
-
- // FIXME: Modify reducer after first experiments.
-
- // let botop = BoTop::new(virtual_pavi, edge.forest_source());
- // let rule_pos = atom_child.div_euclid(2);
-
- // let rule_nt = atom
- // .get_rule_num(rule_pos)
- // .map_err(index_out_of_bounds_conversion)?;
-
- // let rule_pos = rule_pos
- // - atom
- // .nth_accumulator(rule_nt)
- // .map_err(index_out_of_bounds_conversion)?;
-
- // reducer
- // .entry(botop)
- // .or_insert_with(Default::default)
- // .insert((rule_nt, rule_pos));
+ edge.set_true_source(virtual_pavi);
der_iter.singles.push((Roi::Re(edge), *target));
}
@@ -831,23 +739,32 @@ impl Chain for DefaultChain {
// this has been checked in
// `generate_edges`
if self.atom.is_empty_node(atom_child).unwrap() {
- // REVIEW: flat flat map,
- // hmm...
-
- // FIXME: Set true_source as well.
-
- // FIXME: Modify two layers of reducer.
- der_iter.singles.extend(child_iter.clone().flat_map(
- |child| {
- self.graph.labels_of(child).unwrap().flat_map(
- |(child_label, child_child_iter)| {
- child_child_iter.map(|child_child| {
- ((*child_label).into(), child_child)
- })
- },
- )
- },
- ));
+ let mut extra_len = 0usize;
+
+ for child in child_iter.clone() {
+ extra_len += self
+ .graph
+ .labels_of(child)
+ .unwrap()
+ .map(|(_, child_child_iter)| child_child_iter.len())
+ .sum::<usize>();
+ }
+
+ der_iter.singles.try_reserve(extra_len)?;
+
+ for child in child_iter.clone() {
+ for (child_label, child_child_iter) in
+ self.graph.labels_of(child).unwrap()
+ {
+ let mut child_label = *child_label;
+
+ child_label.set_true_source(virtual_pavi);
+
+ der_iter.singles.extend(child_child_iter.map(
+ |child_child| (child_label.into(), child_child),
+ ));
+ }
+ }
}
}
}
@@ -866,8 +783,21 @@ impl Chain for DefaultChain {
Ok(der_iter)
}
- // FIXME: Refactor this.
+ // FIXME: Do nothing more than to make this at the end of input,
+ // actually.
+ //
+ // This means we only close the expansions at the last accepting
+ // sources.
+ //
+ // As to the remaining still open fragments, they should be
+ // considered as bugs and should be fixed in the function
+ // `insert_item` instead.
+
fn end_of_input(&mut self) -> Result<(), Self::Error> {
+ if !matches!(self.epsilon(), Ok(true)) {
+ return Ok(());
+ }
+
self.forest.remove_node(1)?;
let mut stack = Vec::new();
@@ -878,7 +808,7 @@ impl Chain for DefaultChain {
for pavi in self.accepting_sources.iter() {
match pavi {
- PaVi::Parent(node, edge) => {
+ PaVi::Parent(node, _edge, child) => {
// REVIEW: This is a garbage node that has to be
// added when the machine starts. Is there a way
// to avoid this garbage?
@@ -886,9 +816,7 @@ impl Chain for DefaultChain {
continue;
}
- let nth_child = self.forest.nth_child(*node, *edge)?;
-
- stack.push(nth_child);
+ stack.push(*child);
}
PaVi::Virtual(nt, t, node) => {
let node_label = self
@@ -911,83 +839,87 @@ impl Chain for DefaultChain {
}
}
- let mut seen_nodes: HashSet<usize> = HashSet::with_capacity(stack.len());
-
- // dbg!(&stack);
+ let forest_nodes_len = self.forest.nodes_len();
- // self.forest.print_viz("dbg forest.gv").unwrap();
+ let mut node_ends: Vec<Vec<usize>> = std::iter::repeat_with(Default::default)
+ .take(forest_nodes_len)
+ .collect();
- while let Some(mut top) = stack.pop() {
- if seen_nodes.contains(&top) {
- continue;
+ // NOTE: Use iterator to avoid checking bounds when accessing
+ // `node_ends`.
+ for (node, node_end) in self.forest.nodes().zip(node_ends.iter_mut()) {
+ if let Some(end) = self
+ .forest
+ .vertex_label(node)?
+ .ok_or(Error::NodeNoLabel(node))?
+ .label()
+ .end()
+ {
+ node_end.push(end);
}
+ }
- seen_nodes.insert(top);
+ #[derive(Copy, Clone)]
+ enum StackElement {
+ Seen(usize),
+ Unseen(usize),
+ }
- let mut top_label = self
- .forest
- .vertex_label(top)?
- .ok_or(Error::NodeNoLabel(top))?;
+ use StackElement::{Seen, Unseen};
- if !top_label.is_packed()
- && matches!(top_label.label().label().tnt(), Some(TNT::Non(_)))
- && top_label.label().end().is_none()
- {
- let degree = self.forest.degree(top)?;
- let last_index = if degree > 0 { degree - 1 } else { 0 };
+ // maybe we do not need so much space?
+ let mut stack = Vec::with_capacity(forest_nodes_len);
- let pos = if degree > 0 {
- let last_child = self.forest.nth_child(top, last_index)?;
- let last_child_label = self
- .forest
- .vertex_label(last_child)?
- .ok_or(Error::NodeNoLabel(last_child))?
- .label();
- let last_child_end = last_child_label.end();
-
- if !matches!(last_child_label.label().rule(),
- Some(rule) if
- self
- .atom
- .is_accepting(2*rule+1)
- .map_err(index_out_of_bounds_conversion)?)
- {
+ stack.push(Unseen(
+ self.forest.root().ok_or(Error::IndexOutOfBounds(0, 0))?,
+ ));
+
+ let mut seen_nodes = HashSet::with_capacity(forest_nodes_len);
+
+ // depth-first search
+ while let Some(top) = stack.pop() {
+ match top {
+ Seen(top) => {
+ // Every of its children should be processed
+ // already.
+
+ if !node_ends.get(top).unwrap().is_empty() {
continue;
}
- if let Some(pos) = last_child_end {
- pos
- } else {
- stack.extend(self.forest.children_of(last_child)?);
+ let last_index = self.forest.degree(top).unwrap() - 1;
+ let last_child = self.forest.nth_child(top, last_index)?;
+
+ *node_ends.get_mut(top).unwrap() = node_ends
+ .get(last_child)
+ .ok_or(Error::IndexOutOfBounds(last_child, forest_nodes_len))?
+ .clone();
+ }
+ Unseen(top) => {
+ if seen_nodes.contains(&top) {
continue;
}
- } else {
- top_label.label().start() + 1
- };
+ seen_nodes.insert(top);
- top = self.forest.splone(top, Some(pos), last_index, true)?;
- top_label = self
- .forest
- .vertex_label(top)?
- .ok_or(Error::NodeNoLabel(top))?;
- } else if top_label.is_packed()
- || top_label.label().label().rule().is_some() && top_label.label().end().is_none()
- {
- stack.extend(self.forest.children_of(top)?);
- }
+ // NOTE: After the above check we can safely unwrap.
- if top_label.clone_index().is_some() {
- let mut parents = self.forest.parents_of(top)?;
- if parents.len() != 1 {
- dbg!(top, top_label, parents.len());
- self.forest.print_viz("dbg forest.gv").unwrap();
- panic!("0 parent?");
- }
+ let mut top_no_children = true;
- top = parents.next().unwrap().node();
- }
+ for child in self.forest.children_of(top).unwrap() {
+ if top_no_children {
+ stack.push(Seen(top));
+ }
+
+ top_no_children = false;
+
+ stack.push(Unseen(child));
+ }
- stack.extend(self.forest.parents_of(top)?.map(|parent| parent.node()));
+ if !top_no_children {
+ continue;
+ }
+ }
+ }
}
Ok(())
@@ -1120,6 +1052,11 @@ mod test_chain {
if !no_item {
chain.end_of_input()?;
+ chain.forest.print_viz("forest.gv")?;
+
+ chain.graph.print_viz("chain.gv")?;
+ chain.atom.print_nfa("nfa.gv")?;
+ item::default::print_labels(&chain.atom, &chain.forest)?;
}
for label in chain.labels_of(chain.current())?.map(|(label, _)| label) {
@@ -1143,6 +1080,10 @@ mod test_chain {
#[test]
fn test_ambiguity() -> Result<(), Box<dyn std::error::Error>> {
+ if std::fs::metadata("debug.log").is_ok() {
+ std::fs::remove_file("debug.log")?;
+ }
+
let mut no_item = false;
for arg in std::env::args() {
@@ -1158,21 +1099,37 @@ mod test_chain {
let mut chain = DefaultChain::unit(atom)?;
chain.chain(0, 0, no_item)?;
- chain.forest.print_viz("forest0.gv")?;
+
+ if !no_item {
+ chain.forest.print_viz("forest0.gv")?;
+ }
+
chain.chain(2, 1, no_item)?;
- chain.forest.print_viz("forest1.gv")?;
+
+ if !no_item {
+ chain.forest.print_viz("forest1.gv")?;
+ }
+
chain.chain(2, 2, no_item)?;
- chain.forest.print_viz("forest2.gv")?;
+
+ if !no_item {
+ chain.forest.print_viz("forest2.gv")?;
+ }
+
chain.chain(2, 3, no_item)?;
- chain.forest.print_viz("forest3.gv")?;
+
+ if !no_item {
+ chain.forest.print_viz("forest3.gv")?;
+ }
+
chain.chain(1, 4, no_item)?;
- chain.forest.print_viz("forest4.gv")?;
if !no_item {
- chain.end_of_input()?;
+ // chain.end_of_input()?;
chain.forest.print_viz("forest.gv")?;
- chain.forest.print_closed_viz("closed.gv")?;
+ // chain.forest.print_closed_viz("closed.gv")?;
}
+
chain.graph.print_viz("chain.gv")?;
chain.atom.print_nfa("nfa.gv")?;
item::default::print_labels(&chain.atom, &chain.forest)?;