summaryrefslogtreecommitdiff
path: root/chain/src/default.rs
diff options
context:
space:
mode:
Diffstat (limited to 'chain/src/default.rs')
-rw-r--r--chain/src/default.rs405
1 files changed, 307 insertions, 98 deletions
diff --git a/chain/src/default.rs b/chain/src/default.rs
index c873de7..08e29ce 100644
--- a/chain/src/default.rs
+++ b/chain/src/default.rs
@@ -8,14 +8,16 @@
use super::*;
use crate::atom::{Atom, DefaultAtom};
use crate::item::{
- default::DefaultForest, generate_fragment, genins::virtual_generate_fragment, Forest,
+ default::DefaultForest, generate_fragment, genins::index_out_of_bounds_conversion, Forest,
ForestLabel, ForestLabelError,
};
use core::fmt::Display;
-use grammar::{Error as GrammarError, GrammarLabel, GrammarLabelType, TNT};
-use graph::{labelled::DLGBuilder, Builder, DLGraph, Graph, LabelExtGraph, LabelGraph};
+use grammar::{GrammarLabel, GrammarLabelType, START_NONTERMINAL, TNT};
+use graph::{
+ labelled::DLGBuilder, Builder, DLGraph, Graph, LabelExtGraph, LabelGraph, ParentsGraph,
+};
-use std::collections::{HashMap as Map, TryReserveError};
+use std::collections::{HashMap as Map, HashSet, TryReserveError};
/// The errors related to taking derivatives by chain rule.
#[non_exhaustive]
@@ -36,10 +38,10 @@ pub enum Error {
CannotClone(ForestLabelError),
/// Cannot find a suitable node to plant the new forest fragment.
CannotPlant,
- /// Trying to insert an empty item.
- EmptyItem,
/// Cannot split a packed node.
SplitPack(usize),
+ /// A cloned node should have exactly one parent.
+ InvalidClone(usize),
/// An invalid situation happens.
Invalid,
}
@@ -57,8 +59,10 @@ impl Display for Error {
Self::CannotReserve(e) => write!(f, "cannot reserve memory: {e}"),
Self::CannotClone(fe) => write!(f, "cannot clone due to {fe}"),
Self::CannotPlant => write!(f, "cannot plant a new forest fragment"),
- Self::EmptyItem => write!(f, "trying to insert an empty item"),
Self::SplitPack(n) => write!(f, "cannot split the packed node {n}"),
+ Self::InvalidClone(n) => {
+ write!(f, "the cloned node {n} should have exactly one parent")
+ }
Self::Invalid => write!(f, "invalid"),
}
}
@@ -86,6 +90,7 @@ impl From<ForestError> for Error {
ForestError::NodeNoLabel(n) => Error::NodeNoLabel(n),
ForestError::LabelConversion(ce) => Error::CannotClone(ce),
ForestError::SplitPack(n) => Error::SplitPack(n),
+ ForestError::InvalidClone(n) => Error::SplitPack(n),
}
}
}
@@ -110,7 +115,7 @@ impl Default for DerIterIndex {
}
/// A complex type used for storing values of edges with two layers.
-type SecondTypeValue = (PaSe, bool, Vec<(Roi, usize)>);
+type SecondTypeValue = (PaVi, bool, Vec<(Roi, usize)>);
/// An iterator of TwoLayers.
#[derive(Debug, Default)]
@@ -137,7 +142,7 @@ impl DerIter {
fn add_second_layer(
&mut self,
label: usize,
- forest_source: PaSe,
+ forest_source: PaVi,
accepting: bool,
edges: Vec<(Roi, usize)>,
) {
@@ -166,7 +171,7 @@ impl Iterator for DerIter {
Some(TwoLayers::Two(*key, forest_source, accepting, edges))
} else {
// this should not happen
- println!("a key does not exist in the hashmap: something is wrong when taking derivatives");
+ dbg!("a key does not exist in the hashmap: something is wrong when taking derivatives");
None
}
} else {
@@ -176,22 +181,15 @@ impl Iterator for DerIter {
// safely set the index to one.
self.index = DerIterIndex::Single(1);
- if let Some((edge, to)) = self.singles.first() {
- Some(TwoLayers::One(*edge, *to))
- } else {
- None
- }
- }
- }
- DerIterIndex::Single(index) => {
- if let Some((edge, to)) = self.singles.get(index) {
- self.index = DerIterIndex::Single(index + 1);
-
- Some(TwoLayers::One(*edge, *to))
- } else {
- None
+ self.singles
+ .first()
+ .map(|(edge, to)| TwoLayers::One(*edge, *to))
}
}
+ DerIterIndex::Single(index) => self.singles.get(index).map(|(edge, to)| {
+ self.index = DerIterIndex::Single(index + 1);
+ TwoLayers::One(*edge, *to)
+ }),
}
}
}
@@ -205,6 +203,7 @@ pub struct DefaultChain {
history: Vec<usize>,
forest: DefaultForest<ForestLabel<GrammarLabel>>,
accepting_vec: Vec<bool>,
+ accepting_sources: Vec<PaVi>,
}
impl DefaultChain {
@@ -217,7 +216,7 @@ impl DefaultChain {
/// Return the complete slice of histories.
#[inline]
pub fn history(&self) -> &[usize] {
- self.history.as_ref()
+ self.history.as_slice()
}
/// Return a reference to the associated forest.
@@ -228,7 +227,7 @@ impl DefaultChain {
/// Print the rule positions of the labels.
pub fn print_rule_positions(&self) -> Result<(), Box<dyn std::error::Error>> {
- let mut labels = std::collections::HashSet::<usize>::default();
+ let mut labels = HashSet::<usize>::default();
for node in 0..self.graph.nodes_len() {
labels.extend(self.graph.labels_of(node)?.map(|(label, _)| label.label));
@@ -395,10 +394,8 @@ impl Chain for DefaultChain {
let empty_state = atom.empty();
- // The zero-th non-terminal is assumed to be the starting
- // non-terminal, by convention.
let initial_nullable = atom
- .is_nullable(0)
+ .is_nullable(START_NONTERMINAL)
.map_err(|_| Error::IndexOutOfBounds(0, atom.non_num()))?;
builder.add_edge(
@@ -421,6 +418,8 @@ impl Chain for DefaultChain {
let accepting_vec = vec![true, initial_nullable];
+ let accepting_sources = Vec::new();
+
Ok(Self {
graph,
atom,
@@ -428,6 +427,7 @@ impl Chain for DefaultChain {
history,
forest,
accepting_vec,
+ accepting_sources,
})
}
@@ -455,18 +455,15 @@ impl Chain for DefaultChain {
edges: impl Iterator<Item = (Roi, usize)>,
) -> Result<(), Self::Error> {
for (roi, _) in edges {
- match roi.imaginary_part() {
- Some(target) => {
- if matches!(self.accepting_vec.get(target).copied(), Some(true)) {
- let accepting_vec_len = self.accepting_vec.len();
-
- *self
- .accepting_vec
- .get_mut(node_id)
- .ok_or(Error::IndexOutOfBounds(node_id, accepting_vec_len))? = true;
- }
+ if let Some(target) = roi.imaginary_part() {
+ if matches!(self.accepting_vec.get(target).copied(), Some(true)) {
+ let accepting_vec_len = self.accepting_vec.len();
+
+ *self
+ .accepting_vec
+ .get_mut(node_id)
+ .ok_or(Error::IndexOutOfBounds(node_id, accepting_vec_len))? = true;
}
- None => {}
}
}
@@ -475,24 +472,22 @@ impl Chain for DefaultChain {
type DerIter = DerIter;
+ // EXPERIMENT: Try the approach of keeping an additional vector of
+ // vectors of unsigned integers. Each element of the vector
+ // corresponds to an edge of the current node. Each element is a
+ // vector. This vector represents the list of reductions that are
+ // implied due to skipping edges without children.
+ //
+ // Then when we insert an item, we can use this list to perform
+ // additional reductions. This can avoid the ugly and inefficient
+ // position_search method currently adopted.
+ //
+ // Of course we can use an optional vector to prevent allocating
+ // too much memory for edges whose corresponding vector is empty.
+
fn derive(&mut self, t: usize, pos: usize) -> Result<Self::DerIter, Self::Error> {
use TNT::*;
- /// Convert an error telling us that an index is out of bounds.
- ///
- /// # Panics
- ///
- /// The function panics if the error is not of the expected
- /// kind.
- fn index_out_of_bounds_conversion(ge: GrammarError) -> Error {
- match ge {
- GrammarError::IndexOutOfBounds(index, bound) => {
- Error::IndexOutOfBounds(index, bound)
- }
- _ => Error::Invalid,
- }
- }
-
/// A helper function to generate edges to join.
///
/// It first checks if the base edge is accepting. If yes,
@@ -505,11 +500,14 @@ impl Chain for DefaultChain {
/// to save some allocations.
fn generate_edges(
chain: &DefaultChain,
- child_iter: impl Iterator<Item = usize> + ExactSizeIterator + Clone,
+ mut child_iter: impl Iterator<Item = usize> + ExactSizeIterator + Clone,
atom_child_iter: impl Iterator<Item = usize> + Clone,
- pase: PaSe,
+ pavi: PaVi,
+ true_pavi: PaVi,
mut output: impl AsMut<Vec<(Roi, usize)>>,
- ) -> Result<(), Error> {
+ ) -> Result<bool, Error> {
+ let mut accepting = false;
+
// First check the values from iterators are all valid.
let graph_len = chain.graph.nodes_len();
let atom_len = chain.atom.nodes_len();
@@ -540,11 +538,11 @@ impl Chain for DefaultChain {
for atom_child in atom_child_iter.clone() {
let atom_child_accepting = chain.atom.is_accepting(atom_child).unwrap();
- // let atom_child_empty_node = chain.atom.is_empty_node(atom_child).unwrap();
num += child_iter.len();
if atom_child_accepting {
+ accepting = true;
num += child_iter_total_degree;
}
}
@@ -561,7 +559,7 @@ impl Chain for DefaultChain {
let atom_child_accepting = chain.atom.is_accepting(atom_child).unwrap();
let atom_child_empty_node = chain.atom.is_empty_node(atom_child).unwrap();
- let roi = Edge::new(atom_child, pase, atom_child_accepting).into();
+ let roi = Edge::new(atom_child, pavi, atom_child_accepting).into();
if atom_child_empty_node {
output.extend(child_iter.clone().map(|child| (child.into(), child)));
@@ -572,18 +570,30 @@ impl Chain for DefaultChain {
if atom_child_accepting {
for child in child_iter.clone() {
for (child_label, child_child) in chain.graph.labels_of(child).unwrap() {
- output
- .extend(child_child.map(|target| ((*child_label).into(), target)));
+ // use the new `pavi` as `true_source`
+ let mut new_label = *child_label;
+ new_label.set_true_source(true_pavi);
+
+ output.extend(
+ std::iter::repeat(new_label.into())
+ .take(child_child.len())
+ .zip(child_child),
+ );
}
}
}
}
- Ok(())
+ accepting =
+ accepting && child_iter.any(|child| *chain.accepting_vec.get(child).unwrap());
+
+ Ok(accepting)
}
let mut der_iter = DerIter::default();
+ let mut accepting_pavi: HashSet<PaVi> = HashSet::new();
+
for (label, child_iter) in self.graph.labels_of(self.current)? {
for (atom_label, atom_child_iter) in self.atom.labels_of(label.label())? {
if atom_label.is_left_p() {
@@ -594,6 +604,10 @@ impl Chain for DefaultChain {
let atom_moved = atom_label.get_moved();
+ if pos == 4 {
+ dbg!(atom_label);
+ }
+
match *atom_label.get_value() {
Some(Ter(ter)) if ter == t => {
// prepare forest fragment
@@ -601,20 +615,44 @@ impl Chain for DefaultChain {
let fragment =
generate_fragment([atom_moved.into(), Ter(ter).into()], pos)?;
- let new_pase = self.forest.insert_item(
+ if pos == 4 {
+ dbg!(atom_moved, label);
+ self.forest
+ .print_viz(&format!(
+ "pos4tb - {atom_moved}-{:?}.gv",
+ label.true_source()
+ ))
+ .unwrap();
+ }
+
+ let new_pavi = self.forest.insert_item(
*label,
fragment,
atom_child_iter.clone(),
&self.atom,
)?;
- generate_edges(
+ if pos == 4 {
+ self.forest
+ .print_viz(&format!(
+ "pos4ta - {atom_moved}-{:?}.gv",
+ label.true_source()
+ ))
+ .unwrap();
+ }
+
+ let accepting = generate_edges(
self,
child_iter.clone(),
atom_child_iter.clone(),
- new_pase,
+ new_pavi,
+ new_pavi,
&mut der_iter.singles,
)?;
+
+ if accepting {
+ accepting_pavi.insert(new_pavi);
+ }
}
Some(Non(non)) => {
if !self
@@ -634,50 +672,75 @@ impl Chain for DefaultChain {
let first_fragment =
generate_fragment([atom_moved.into(), Non(non).into()], pos)?;
- let first_segment_pase = self.forest.insert_item(
+ if pos == 4 {
+ dbg!(atom_moved, label);
+ self.forest
+ .print_viz(&format!("pos4nb - {atom_moved}-{:?}.gv", label))
+ .unwrap();
+ }
+
+ let first_segment_pavi = self.forest.insert_item(
*label,
first_fragment,
atom_child_iter.clone(),
&self.atom,
)?;
+ if pos == 4 {
+ self.forest
+ .print_viz(&format!("pos4na - {atom_moved}-{:?}.gv", label))
+ .unwrap();
+ }
+
let accepting = self
.atom
.is_accepting(virtual_node)
.map_err(index_out_of_bounds_conversion)?;
let virtual_fragment =
- virtual_generate_fragment(&self.atom, non, t, pos)?;
+ DefaultForest::new_leaf(GrammarLabel::new(Ter(t), pos));
- // NOTE: We only need the PaSe from the
+ // NOTE: We only need the PaVi from the
// first segment, so we pass an empty
// iterator, in which case the passed
- // label is only used for the PaSe.
- let virtual_pase = self.forest.insert_item(
- Edge::new(0, first_segment_pase, accepting),
+ // label is only used for the PaVi.
+ let virtual_pavi = self.forest.insert_item(
+ Edge::new(0, first_segment_pavi, accepting),
virtual_fragment,
std::iter::empty(),
&self.atom,
)?;
+ if pos == 4 {
+ self.forest
+ .print_viz(&format!("pos4va - {atom_moved}-{:?}.gv", label))
+ .unwrap();
+ }
+
let mut new_edges = Vec::new();
- generate_edges(
+ let virtual_accepting = generate_edges(
self,
child_iter.clone(),
atom_child_iter.clone(),
- first_segment_pase,
+ first_segment_pavi,
+ virtual_pavi,
&mut new_edges,
)?;
+ if virtual_accepting {
+ accepting_pavi.insert(first_segment_pavi);
+ }
+
if accepting {
+ accepting_pavi.insert(virtual_pavi);
der_iter.singles.extend(new_edges.clone());
}
if !self.atom.is_empty_node(virtual_node).unwrap() {
der_iter.add_second_layer(
virtual_node,
- virtual_pase,
+ virtual_pavi,
accepting,
new_edges,
);
@@ -691,7 +754,7 @@ impl Chain for DefaultChain {
if self.atom.is_empty_node(atom_child).unwrap() {
der_iter.singles.extend(child_iter.clone().map(|child| {
(
- Edge::new(virtual_node, virtual_pase, accepting)
+ Edge::new(virtual_node, virtual_pavi, accepting)
.into(),
child,
)
@@ -728,8 +791,137 @@ impl Chain for DefaultChain {
}
}
+ self.accepting_sources.extend(accepting_pavi);
+
Ok(der_iter)
}
+
+ // FIXME: Refactor this.
+ fn end_of_input(&mut self) -> Result<(), Self::Error> {
+ self.forest.remove_node(1)?;
+
+ let mut stack = Vec::new();
+
+ dbg!(&self.accepting_sources);
+
+ self.forest.print_viz("dbg forest before.gv").unwrap();
+
+ for pavi in self.accepting_sources.iter() {
+ match pavi {
+ PaVi::Parent(node, edge) => {
+ // REVIEW: This is a garbage node that has to be
+ // added when the machine starts. Is there a way
+ // to avoid this garbage?
+ if *node == 1 {
+ continue;
+ }
+
+ let nth_child = self.forest.nth_child(*node, *edge)?;
+
+ stack.push(nth_child);
+ }
+ PaVi::Virtual(nt, t, node) => {
+ let node_label = self
+ .forest
+ .vertex_label(*node)?
+ .ok_or(Error::NodeNoLabel(*node))?;
+
+ if node_label.label().end().is_none() {
+ let reduction_fragment = self.atom.generate_virtual_frags(*nt, *t, None);
+
+ for frag in reduction_fragment.into_iter().flatten() {
+ let mut frag = frag.clone();
+ frag.set_pos(node_label.label().start())?;
+
+ stack.push(self.forest.plant_if_needed(*node, frag)?);
+ }
+ }
+ }
+ PaVi::Empty => {}
+ }
+ }
+
+ let mut seen_nodes: HashSet<usize> = HashSet::with_capacity(stack.len());
+
+ dbg!(&stack);
+
+ self.forest.print_viz("dbg forest.gv").unwrap();
+
+ while let Some(mut top) = stack.pop() {
+ if seen_nodes.contains(&top) {
+ continue;
+ }
+
+ seen_nodes.insert(top);
+
+ let mut top_label = self
+ .forest
+ .vertex_label(top)?
+ .ok_or(Error::NodeNoLabel(top))?;
+
+ if !top_label.is_packed()
+ && matches!(top_label.label().label().tnt(), Some(TNT::Non(_)))
+ && top_label.label().end().is_none()
+ {
+ let degree = self.forest.degree(top)?;
+ let last_index = if degree > 0 { degree - 1 } else { 0 };
+
+ let pos = if degree > 0 {
+ let last_child = self.forest.nth_child(top, last_index)?;
+ let last_child_label = self
+ .forest
+ .vertex_label(last_child)?
+ .ok_or(Error::NodeNoLabel(last_child))?
+ .label();
+ let last_child_end = last_child_label.end();
+
+ if !matches!(last_child_label.label().rule(),
+ Some(rule) if
+ self
+ .atom
+ .is_accepting(2*rule+1)
+ .map_err(index_out_of_bounds_conversion)?)
+ {
+ continue;
+ }
+
+ if let Some(pos) = last_child_end {
+ pos
+ } else {
+ stack.extend(self.forest.children_of(last_child)?);
+ continue;
+ }
+ } else {
+ top_label.label().start() + 1
+ };
+
+ top = self.forest.splone(top, Some(pos), last_index, true)?;
+ top_label = self
+ .forest
+ .vertex_label(top)?
+ .ok_or(Error::NodeNoLabel(top))?;
+ } else if top_label.is_packed()
+ || top_label.label().label().rule().is_some() && top_label.label().end().is_none()
+ {
+ stack.extend(self.forest.children_of(top)?);
+ }
+
+ if top_label.clone_index().is_some() {
+ let mut parents = self.forest.parents_of(top)?;
+ if parents.len() != 1 {
+ dbg!(top, top_label, parents.len());
+ self.forest.print_viz("dbg forest.gv").unwrap();
+ panic!("0 parent?");
+ }
+
+ top = parents.next().unwrap().node();
+ }
+
+ stack.extend(self.forest.parents_of(top)?.map(|parent| parent.node()));
+ }
+
+ Ok(())
+ }
}
#[cfg(test)]
@@ -847,23 +1039,11 @@ mod test_chain {
chain.chain(0, 11)?;
chain.chain(0, 12)?;
- let forest = &mut chain.forest;
-
- let node = forest
- .query_label(ForestLabel::from(GrammarLabel::new(TNT::Non(6), 6)))
- .unwrap();
-
- forest.splone(node, Some(13), forest.degree(node)?)?;
-
- let node = forest
- .query_label(ForestLabel::from(GrammarLabel::new(TNT::Non(1), 0)))
- .unwrap();
-
- forest.splone(node, Some(13), forest.degree(node)?)?;
-
- forest.splone(0, Some(13), forest.degree(0)?)?;
+ chain.end_of_input()?;
- forest.print_viz("forest.gv")?;
+ for label in chain.labels_of(chain.current())?.map(|(label, _)| label) {
+ dbg!(label);
+ }
assert!(matches!(chain.epsilon(), Ok(true)));
@@ -871,7 +1051,9 @@ mod test_chain {
{
chain.graph.print_viz("chain.gv")?;
chain.atom.print_nfa("nfa.gv")?;
+
chain.forest.print_viz("forest.gv")?;
+
item::default::print_labels(&chain.atom, &chain.forest)?;
}
@@ -886,21 +1068,38 @@ mod test_chain {
let mut chain = DefaultChain::unit(atom)?;
chain.chain(0, 0)?;
+ chain.forest.print_viz("forest0.gv")?;
chain.chain(2, 1)?;
+ chain.forest.print_viz("forest1.gv")?;
chain.chain(2, 2)?;
+ chain.forest.print_viz("forest2.gv")?;
chain.chain(2, 3)?;
+ chain.forest.print_viz("forest3.gv")?;
chain.chain(1, 4)?;
-
+ chain.forest.print_viz("forest4.gv")?;
+ chain.end_of_input()?;
chain.forest.print_viz("forest.gv")?;
+ chain.forest.print_closed_viz("closed.gv")?;
- dbg!(chain.current(), chain.history());
+ chain.graph.print_viz("chain.gv")?;
+ chain.atom.print_nfa("nfa.gv")?;
item::default::print_labels(&chain.atom, &chain.forest)?;
+ for label in chain.labels_of(chain.current())?.map(|(label, _)| label) {
+ dbg!(label);
+ }
+
+ dbg!(chain.current(), chain.history());
+
#[cfg(feature = "test-print-viz")]
{
chain.graph.print_viz("chain.gv")?;
chain.atom.print_nfa("nfa.gv")?;
+
chain.forest.print_viz("forest.gv")?;
+
+ chain.forest.print_closed_viz("closed.gv")?;
+
item::default::print_labels(&chain.atom, &chain.forest)?;
}
@@ -950,21 +1149,22 @@ mod test_chain {
let elapsed = start.elapsed();
- // assert!(matches!(chain.epsilon(), Ok(true)));
+ assert!(matches!(chain.epsilon(), Ok(true)));
dbg!(elapsed);
dbg!(chain.current());
assert_eq!(input.len(), chain.history().len());
- if std::fs::metadata("output/history").is_ok() {
- std::fs::remove_file("output/history")?;
+ let history_file_name = "output/history";
+ if std::fs::metadata(history_file_name).is_ok() {
+ std::fs::remove_file(history_file_name)?;
}
let mut history_file = std::fs::OpenOptions::new()
.create(true)
.write(true)
- .open("output/history")?;
+ .open(history_file_name)?;
use std::fmt::Write;
use std::io::Write as IOWrite;
@@ -985,10 +1185,19 @@ mod test_chain {
history_file.write_all(log_string.as_bytes())?;
+ for label in chain.labels_of(chain.current())?.map(|(label, _)| label) {
+ dbg!(label);
+ }
+
#[cfg(feature = "test-print-viz")]
{
chain.graph.print_viz("chain.gv")?;
chain.atom.print_nfa("nfa.gv")?;
+ item::default::print_labels(&chain.atom, &chain.forest)?;
+
+ chain.forest.print_viz("forest.gv")?;
+
+ chain.forest.print_closed_viz("closed.gv")?;
}
Ok(())