summaryrefslogtreecommitdiff
path: root/chain
diff options
context:
space:
mode:
authorJSDurand <mmemmew@gmail.com>2023-02-27 12:36:41 +0800
committerJSDurand <mmemmew@gmail.com>2023-02-27 12:36:41 +0800
commitfbaa420ed550e9c3e7cdc09d4a8ec22bfbd782a6 (patch)
treefad9722825bb3fa796dd52c3fd4a8bf46b958cf9 /chain
parentafad02bdff111ecccb0077b9c989e869723c231c (diff)
before a major refactor
I decide to adopt a new approach of recording and updating item derivation forests. Since this affects a lot of things, I decide to commit before the refactor, so that I can create a branch for that refactor.
Diffstat (limited to 'chain')
-rw-r--r--chain/src/atom/default.rs226
-rw-r--r--chain/src/atom/mod.rs28
-rw-r--r--chain/src/default.rs405
-rw-r--r--chain/src/item/default/mod.rs469
-rw-r--r--chain/src/item/default/splone.rs187
-rw-r--r--chain/src/item/forest-format.org10
-rw-r--r--chain/src/item/genins.rs494
-rw-r--r--chain/src/item/mod.rs85
-rw-r--r--chain/src/lib.rs38
-rw-r--r--chain/src/plan.org16
10 files changed, 1624 insertions, 334 deletions
diff --git a/chain/src/atom/default.rs b/chain/src/atom/default.rs
index ec53596..a55087a 100644
--- a/chain/src/atom/default.rs
+++ b/chain/src/atom/default.rs
@@ -2,18 +2,24 @@
//! [`Atom`][super::Atom] trait.
use super::*;
-use grammar::Grammar;
+use grammar::{Grammar, GrammarLabel, GrammarLabelType};
use graph::{error::Error as GraphError, Graph, LabelExtGraph, LabelGraph};
use nfa::{
default::{nfa::DefaultNFA, regex::DefaultRegex},
+ error::Error as NFAError,
LabelType, NfaLabel,
};
use core::fmt::Display;
-use std::collections::BTreeMap as Map;
+use std::{
+ collections::{hash_set::Iter, BTreeMap as Map, HashMap, HashSet},
+ iter::Copied,
+};
+
+use crate::item::{default::DefaultForest, ForestLabel};
/// A virtual node represents the derivative of a non-terminal symbol
-/// `S` with respect to a terminal symbol `t`.
+/// `s` with respect to a terminal symbol `t`.
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Ord, PartialOrd)]
struct VirtualNode {
s: usize,
@@ -34,6 +40,33 @@ impl VirtualNode {
type VirtualMap = Map<VirtualNode, usize>;
+/// A virtual trace stores the rule positions that are responsible for
+/// an edge from the virtual node \[nt\]^s to `target`.
+#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Ord, PartialOrd)]
+struct VirtualTrace {
+ nt: usize,
+ t: usize,
+ target: usize,
+}
+
+impl Display for VirtualTrace {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "VT[{}]^({}) -> {}", self.nt, self.t, self.target)
+ }
+}
+
+impl VirtualTrace {
+ fn new(nt: usize, t: usize, target: usize) -> Self {
+ Self { nt, t, target }
+ }
+}
+
+type VirtualTraceMap = Map<VirtualTrace, HashSet<usize>>;
+
+type VirtualFrag = DefaultForest<ForestLabel<GrammarLabel>>;
+
+type VirtualFragMap = Map<VirtualNode, Map<usize, VirtualFrag>>;
+
/// The type of atomic languages.
#[derive(Debug, Clone, Default)]
pub struct DefaultAtom {
@@ -43,6 +76,8 @@ pub struct DefaultAtom {
// NOTE: This is mostly for printing and debugging
regexp: Vec<DefaultRegex<TNT>>,
virtual_nodes: VirtualMap,
+ virtual_traces: VirtualTraceMap,
+ virtual_frags: VirtualFragMap,
}
impl DefaultAtom {
@@ -260,9 +295,9 @@ impl Nfa<LabelType<TNT>> for DefaultAtom {
#[inline]
fn to_nfa(
_regexps: &[impl nfa::Regex<nfa::default::regex::RegexType<LabelType<TNT>>>],
- _sub_pred: impl Fn(LabelType<TNT>) -> Result<nfa::SoC<LabelType<TNT>>, nfa::error::Error>,
+ _sub_pred: impl Fn(LabelType<TNT>) -> Result<nfa::SoC<LabelType<TNT>>, NFAError>,
_default: Option<LabelType<TNT>>,
- ) -> Result<Self::FromRegex<DOption<DOption<TNT>>>, nfa::error::Error> {
+ ) -> Result<Self::FromRegex<DOption<DOption<TNT>>>, NFAError> {
// NOTE: We cannot construct an atom from a set of regular
// languages alone. So it is appropriate to panic here, if
// one tries to do so, for some reason.
@@ -270,7 +305,7 @@ impl Nfa<LabelType<TNT>> for DefaultAtom {
}
#[inline]
- fn remove_dead(&mut self, reserve: impl FnMut(usize) -> bool) -> Result<(), nfa::error::Error> {
+ fn remove_dead(&mut self, reserve: impl FnMut(usize) -> bool) -> Result<(), NFAError> {
self.nfa.remove_dead(reserve)
}
@@ -281,7 +316,7 @@ impl Nfa<LabelType<TNT>> for DefaultAtom {
remove_after_p: bool,
transform: impl FnMut(nfa::TwoEdges<LabelType<TNT>>) -> LabelType<TNT>,
remove_predicate: impl FnMut(LabelType<TNT>) -> bool,
- ) -> Result<(), nfa::error::Error> {
+ ) -> Result<(), NFAError> {
self.nfa
.closure(predicate, remove_after_p, transform, remove_predicate)
}
@@ -296,8 +331,6 @@ impl DefaultAtom {
let mut nfa = grammar.left_closure_to_nfa(&regexp)?;
- use std::collections::{HashMap, HashSet};
-
let accumulators: Vec<usize> = {
let mut result = Vec::with_capacity(regexp.len() + 1);
result.push(0);
@@ -388,6 +421,12 @@ impl DefaultAtom {
// Now add the virtual nodes.
let mut virtual_nodes: VirtualMap = Default::default();
+ // Record virtual traces.
+ let mut virtual_traces: VirtualTraceMap = Default::default();
+
+ // Record virtual fragments.
+ let mut virtual_frags: VirtualFragMap = Default::default();
+
let nt_num = grammar.non_num();
assert!(nt_num <= accumulators.len());
@@ -403,19 +442,35 @@ impl DefaultAtom {
GraphError::IndexOutOfBounds(index, bound) => {
GrammarError::IndexOutOfBounds(index, bound)
}
- _ => unreachable!(),
+ // This is supposed to be unreachable, but we still
+ // give it a valid value.
+ _ => GrammarError::NFAFail(NFAError::Graph(ge)),
}
}
for nt in 0..nt_num {
+ // This is safe because of the above assertion.
+ let nt_start = *accumulators.get(nt).unwrap();
+
let children: std::collections::HashMap<_, _> = nfa
- // This is safe because of the above assertion.
- .labels_of(*accumulators.get(nt).unwrap())
+ .labels_of(nt_start)
.map_err(index_out_of_bounds_conversion)?
.map(|(label, target_iter)| (*label, target_iter))
.collect();
- type TerminalsValue = (HashSet<(LabelType<TNT>, usize, Option<Vec<usize>>)>, bool);
+ /// The tuples have the following meanings in order:
+ ///
+ /// - `LabelType` => the label for the edge
+ ///
+ /// - `usize` => the target of the edge
+ ///
+ /// - `Option<Vec<usize>>` => reduction information
+ ///
+ /// - `usize` => the rule position that caused this edge
+ type TerminalsValue = (
+ HashSet<(LabelType<TNT>, usize, Option<Vec<usize>>, usize)>,
+ bool,
+ );
let mut terminals_map: HashMap<usize, TerminalsValue> = HashMap::new();
@@ -431,9 +486,72 @@ impl DefaultAtom {
result
};
+ let virtual_trace = label.get_moved();
+
let mut accepting = false;
for child in children_iter {
+ // add a virtual fragment
+
+ let line: Vec<GrammarLabelType> = grammar
+ .query_expansion(nt_start, child)?
+ .iter()
+ .copied()
+ .flatten()
+ .flat_map(|(nt, rule)| [(*rule).into(), TNT::Non(*nt).into()])
+ .rev()
+ .chain(std::iter::once(TNT::Ter(t).into()))
+ .collect();
+
+ assert!(line.len() > 1);
+
+ // by our construction this must be a rule
+ let rule = line.get(line.len() - 2).unwrap().rule().unwrap();
+
+ use crate::default::Error as DError;
+
+ let frag = crate::item::genins::generate_fragment(line, 0).map_err(
+ |fe: DError| -> GrammarError {
+ match fe {
+ DError::IndexOutOfBounds(index, bound) => {
+ GrammarError::IndexOutOfBounds(index, bound)
+ }
+ DError::DuplicateNode(n) => GrammarError::NFAFail(
+ NFAError::Graph(GraphError::DuplicatedNode(n)),
+ ),
+ DError::DuplicateEdge(source, target) => GrammarError::NFAFail(
+ NFAError::Graph(GraphError::DuplicatedEdge(source, target)),
+ ),
+ DError::NodeNoLabel(n) => {
+ panic!("node {n} has no label!")
+ }
+ DError::CannotReserve(_) => unreachable!(
+ "generate_fragment should not signal this error"
+ ),
+ DError::CannotClone(_) => {
+ unreachable!("we are not cloning")
+ }
+ DError::CannotPlant => {
+ unreachable!("why can we not plant?")
+ }
+ DError::SplitPack(_) => {
+ unreachable!("we not not splitting")
+ }
+ DError::InvalidClone(_) => {
+ unreachable!("we are not cloning")
+ }
+ DError::Invalid => {
+ panic!("a label is wrongly planted?")
+ }
+ }
+ },
+ )?;
+
+ virtual_frags
+ .entry(VirtualNode::new(nt, t))
+ .or_insert_with(Default::default)
+ .insert(rule, frag);
+
accepting =
accepting
|| *accepting_vec.get(child).ok_or(
@@ -462,6 +580,7 @@ impl DefaultAtom {
.query_reduction(child, target)
.unwrap()
.map(|slice| slice.to_vec()),
+ virtual_trace,
)
}));
}
@@ -470,8 +589,21 @@ impl DefaultAtom {
}
for (t, (set, accepting)) in terminals_map.into_iter() {
+ // update virtual traces
+
+ for (_, target, _, pos) in set.iter() {
+ let trace = VirtualTrace::new(nt, t, *target);
+
+ virtual_traces
+ .entry(trace)
+ .or_insert_with(Default::default)
+ .insert(*pos);
+ }
+
+ // add a virtual node
+
let new_index = nfa
- .extend(set.iter().map(|(label, target, _)| (*label, *target)))
+ .extend(set.iter().map(|(label, target, _, _)| (*label, *target)))
.map_err(index_out_of_bounds_conversion)?;
if accepting_vec.get(new_index).is_none() {
@@ -486,7 +618,7 @@ impl DefaultAtom {
virtual_nodes.insert(virtual_node, new_index);
// update the reduction information
- for (_, target, info) in set.into_iter() {
+ for (_, target, info, _) in set {
if let Some(info) = info {
if !matches!(
grammar.query_reduction(new_index, target)?,
@@ -507,8 +639,60 @@ impl DefaultAtom {
regexp,
virtual_nodes,
accepting_vec,
+ virtual_traces,
+ virtual_frags,
})
}
+
+ /// Generate a vector of virtual fragments for a non-terminal and
+ /// a terminal.
+ ///
+ /// # RULE
+ ///
+ /// If one passes `Some(rule)` as the paramter, then this returns
+ /// only those fragments that begin with the specified rule.
+ ///
+ /// On the other hand, if one passes `None`, then this returns
+ /// only those fragments that can end the non-terminal expansion.
+ ///
+ /// # Guarantee
+ ///
+ /// It is guaranteed that the 1-th node of each fragment is a rule
+ /// number.
+ pub(crate) fn generate_virtual_frags(
+ &self,
+ nt: usize,
+ t: usize,
+ rule: Option<usize>,
+ ) -> Option<Vec<&VirtualFrag>> {
+ let vn = VirtualNode::new(nt, t);
+
+ if let Some(rule) = rule {
+ self.virtual_frags
+ .get(&vn)
+ .and_then(|map| map.get(&rule))
+ .map(|f| vec![f])
+ } else {
+ let result: Vec<&VirtualFrag> = self
+ .virtual_frags
+ .get(&vn)
+ .iter()
+ .copied()
+ .flatten()
+ .filter_map(|(rule, frag)| {
+ self.is_accepting(rule * 2 + 1)
+ .unwrap_or(false)
+ .then_some(frag)
+ })
+ .collect();
+
+ if result.is_empty() {
+ None
+ } else {
+ Some(result)
+ }
+ }
+ }
}
/// A convenient getter for the map of virtual nodes.
@@ -550,4 +734,16 @@ impl Atom for DefaultAtom {
self.accepting_vec.len(),
))
}
+
+ type Iter<'a> = Copied<Iter<'a, usize>>
+ where
+ Self: 'a;
+
+ fn trace(&self, nt: usize, t: usize, target: usize) -> Option<<Self as Atom>::Iter<'_>> {
+ let trace = VirtualTrace::new(nt, t, target);
+
+ self.virtual_traces
+ .get(&trace)
+ .map(|set| set.iter().copied())
+ }
}
diff --git a/chain/src/atom/mod.rs b/chain/src/atom/mod.rs
index 398edd2..c9dadb2 100644
--- a/chain/src/atom/mod.rs
+++ b/chain/src/atom/mod.rs
@@ -17,6 +17,16 @@ pub trait Atom: Nfa<LabelType<TNT>> + Deref<Target = Grammar> {
/// left-linear null closure of `nt` with respect to `t`.
fn atom(&self, nt: usize, t: usize) -> Result<Option<usize>, GrammarError>;
+ /// A type that iterates through the rule positions.
+ type Iter<'a>: Iterator<Item = usize> + 'a
+ where
+ Self: 'a;
+
+ /// Return an iterator of rule positions responsible for an edge
+ /// from the virtual node corresponding to the non-terminal `nt`
+ /// and terminal `t` to `target`.
+ fn trace(&self, nt: usize, t: usize, target: usize) -> Option<<Self as Atom>::Iter<'_>>;
+
/// Return the index of the empty state.
fn empty(&self) -> usize;
@@ -33,6 +43,9 @@ mod tests {
use super::*;
use grammar::test_grammar_helper::*;
+ #[cfg(feature = "test-print-viz")]
+ use graph::Graph;
+
#[test]
fn atom() -> Result<(), Box<dyn std::error::Error>> {
let grammar = new_notes_grammar()?;
@@ -41,6 +54,21 @@ mod tests {
println!("atom = {atom}");
+ #[cfg(feature = "test-print-viz")]
+ {
+ println!("virtual frag for 1, 3: ");
+
+ for (index, frag) in atom
+ .generate_virtual_frags(1, 3, None)
+ .iter()
+ .flatten()
+ .enumerate()
+ {
+ crate::item::default::print_labels(&atom, *frag)?;
+ frag.print_viz(&format!("frag {index}.gv"))?;
+ }
+ }
+
Ok(())
}
}
diff --git a/chain/src/default.rs b/chain/src/default.rs
index c873de7..08e29ce 100644
--- a/chain/src/default.rs
+++ b/chain/src/default.rs
@@ -8,14 +8,16 @@
use super::*;
use crate::atom::{Atom, DefaultAtom};
use crate::item::{
- default::DefaultForest, generate_fragment, genins::virtual_generate_fragment, Forest,
+ default::DefaultForest, generate_fragment, genins::index_out_of_bounds_conversion, Forest,
ForestLabel, ForestLabelError,
};
use core::fmt::Display;
-use grammar::{Error as GrammarError, GrammarLabel, GrammarLabelType, TNT};
-use graph::{labelled::DLGBuilder, Builder, DLGraph, Graph, LabelExtGraph, LabelGraph};
+use grammar::{GrammarLabel, GrammarLabelType, START_NONTERMINAL, TNT};
+use graph::{
+ labelled::DLGBuilder, Builder, DLGraph, Graph, LabelExtGraph, LabelGraph, ParentsGraph,
+};
-use std::collections::{HashMap as Map, TryReserveError};
+use std::collections::{HashMap as Map, HashSet, TryReserveError};
/// The errors related to taking derivatives by chain rule.
#[non_exhaustive]
@@ -36,10 +38,10 @@ pub enum Error {
CannotClone(ForestLabelError),
/// Cannot find a suitable node to plant the new forest fragment.
CannotPlant,
- /// Trying to insert an empty item.
- EmptyItem,
/// Cannot split a packed node.
SplitPack(usize),
+ /// A cloned node should have exactly one parent.
+ InvalidClone(usize),
/// An invalid situation happens.
Invalid,
}
@@ -57,8 +59,10 @@ impl Display for Error {
Self::CannotReserve(e) => write!(f, "cannot reserve memory: {e}"),
Self::CannotClone(fe) => write!(f, "cannot clone due to {fe}"),
Self::CannotPlant => write!(f, "cannot plant a new forest fragment"),
- Self::EmptyItem => write!(f, "trying to insert an empty item"),
Self::SplitPack(n) => write!(f, "cannot split the packed node {n}"),
+ Self::InvalidClone(n) => {
+ write!(f, "the cloned node {n} should have exactly one parent")
+ }
Self::Invalid => write!(f, "invalid"),
}
}
@@ -86,6 +90,7 @@ impl From<ForestError> for Error {
ForestError::NodeNoLabel(n) => Error::NodeNoLabel(n),
ForestError::LabelConversion(ce) => Error::CannotClone(ce),
ForestError::SplitPack(n) => Error::SplitPack(n),
+ ForestError::InvalidClone(n) => Error::SplitPack(n),
}
}
}
@@ -110,7 +115,7 @@ impl Default for DerIterIndex {
}
/// A complex type used for storing values of edges with two layers.
-type SecondTypeValue = (PaSe, bool, Vec<(Roi, usize)>);
+type SecondTypeValue = (PaVi, bool, Vec<(Roi, usize)>);
/// An iterator of TwoLayers.
#[derive(Debug, Default)]
@@ -137,7 +142,7 @@ impl DerIter {
fn add_second_layer(
&mut self,
label: usize,
- forest_source: PaSe,
+ forest_source: PaVi,
accepting: bool,
edges: Vec<(Roi, usize)>,
) {
@@ -166,7 +171,7 @@ impl Iterator for DerIter {
Some(TwoLayers::Two(*key, forest_source, accepting, edges))
} else {
// this should not happen
- println!("a key does not exist in the hashmap: something is wrong when taking derivatives");
+ dbg!("a key does not exist in the hashmap: something is wrong when taking derivatives");
None
}
} else {
@@ -176,22 +181,15 @@ impl Iterator for DerIter {
// safely set the index to one.
self.index = DerIterIndex::Single(1);
- if let Some((edge, to)) = self.singles.first() {
- Some(TwoLayers::One(*edge, *to))
- } else {
- None
- }
- }
- }
- DerIterIndex::Single(index) => {
- if let Some((edge, to)) = self.singles.get(index) {
- self.index = DerIterIndex::Single(index + 1);
-
- Some(TwoLayers::One(*edge, *to))
- } else {
- None
+ self.singles
+ .first()
+ .map(|(edge, to)| TwoLayers::One(*edge, *to))
}
}
+ DerIterIndex::Single(index) => self.singles.get(index).map(|(edge, to)| {
+ self.index = DerIterIndex::Single(index + 1);
+ TwoLayers::One(*edge, *to)
+ }),
}
}
}
@@ -205,6 +203,7 @@ pub struct DefaultChain {
history: Vec<usize>,
forest: DefaultForest<ForestLabel<GrammarLabel>>,
accepting_vec: Vec<bool>,
+ accepting_sources: Vec<PaVi>,
}
impl DefaultChain {
@@ -217,7 +216,7 @@ impl DefaultChain {
/// Return the complete slice of histories.
#[inline]
pub fn history(&self) -> &[usize] {
- self.history.as_ref()
+ self.history.as_slice()
}
/// Return a reference to the associated forest.
@@ -228,7 +227,7 @@ impl DefaultChain {
/// Print the rule positions of the labels.
pub fn print_rule_positions(&self) -> Result<(), Box<dyn std::error::Error>> {
- let mut labels = std::collections::HashSet::<usize>::default();
+ let mut labels = HashSet::<usize>::default();
for node in 0..self.graph.nodes_len() {
labels.extend(self.graph.labels_of(node)?.map(|(label, _)| label.label));
@@ -395,10 +394,8 @@ impl Chain for DefaultChain {
let empty_state = atom.empty();
- // The zero-th non-terminal is assumed to be the starting
- // non-terminal, by convention.
let initial_nullable = atom
- .is_nullable(0)
+ .is_nullable(START_NONTERMINAL)
.map_err(|_| Error::IndexOutOfBounds(0, atom.non_num()))?;
builder.add_edge(
@@ -421,6 +418,8 @@ impl Chain for DefaultChain {
let accepting_vec = vec![true, initial_nullable];
+ let accepting_sources = Vec::new();
+
Ok(Self {
graph,
atom,
@@ -428,6 +427,7 @@ impl Chain for DefaultChain {
history,
forest,
accepting_vec,
+ accepting_sources,
})
}
@@ -455,18 +455,15 @@ impl Chain for DefaultChain {
edges: impl Iterator<Item = (Roi, usize)>,
) -> Result<(), Self::Error> {
for (roi, _) in edges {
- match roi.imaginary_part() {
- Some(target) => {
- if matches!(self.accepting_vec.get(target).copied(), Some(true)) {
- let accepting_vec_len = self.accepting_vec.len();
-
- *self
- .accepting_vec
- .get_mut(node_id)
- .ok_or(Error::IndexOutOfBounds(node_id, accepting_vec_len))? = true;
- }
+ if let Some(target) = roi.imaginary_part() {
+ if matches!(self.accepting_vec.get(target).copied(), Some(true)) {
+ let accepting_vec_len = self.accepting_vec.len();
+
+ *self
+ .accepting_vec
+ .get_mut(node_id)
+ .ok_or(Error::IndexOutOfBounds(node_id, accepting_vec_len))? = true;
}
- None => {}
}
}
@@ -475,24 +472,22 @@ impl Chain for DefaultChain {
type DerIter = DerIter;
+ // EXPERIMENT: Try the approach of keeping an additional vector of
+ // vectors of unsigned integers. Each element of the vector
+ // corresponds to an edge of the current node. Each element is a
+ // vector. This vector represents the list of reductions that are
+ // implied due to skipping edges without children.
+ //
+ // Then when we insert an item, we can use this list to perform
+ // additional reductions. This can avoid the ugly and inefficient
+ // position_search method currently adopted.
+ //
+ // Of course we can use an optional vector to prevent allocating
+ // too much memory for edges whose corresponding vector is empty.
+
fn derive(&mut self, t: usize, pos: usize) -> Result<Self::DerIter, Self::Error> {
use TNT::*;
- /// Convert an error telling us that an index is out of bounds.
- ///
- /// # Panics
- ///
- /// The function panics if the error is not of the expected
- /// kind.
- fn index_out_of_bounds_conversion(ge: GrammarError) -> Error {
- match ge {
- GrammarError::IndexOutOfBounds(index, bound) => {
- Error::IndexOutOfBounds(index, bound)
- }
- _ => Error::Invalid,
- }
- }
-
/// A helper function to generate edges to join.
///
/// It first checks if the base edge is accepting. If yes,
@@ -505,11 +500,14 @@ impl Chain for DefaultChain {
/// to save some allocations.
fn generate_edges(
chain: &DefaultChain,
- child_iter: impl Iterator<Item = usize> + ExactSizeIterator + Clone,
+ mut child_iter: impl Iterator<Item = usize> + ExactSizeIterator + Clone,
atom_child_iter: impl Iterator<Item = usize> + Clone,
- pase: PaSe,
+ pavi: PaVi,
+ true_pavi: PaVi,
mut output: impl AsMut<Vec<(Roi, usize)>>,
- ) -> Result<(), Error> {
+ ) -> Result<bool, Error> {
+ let mut accepting = false;
+
// First check the values from iterators are all valid.
let graph_len = chain.graph.nodes_len();
let atom_len = chain.atom.nodes_len();
@@ -540,11 +538,11 @@ impl Chain for DefaultChain {
for atom_child in atom_child_iter.clone() {
let atom_child_accepting = chain.atom.is_accepting(atom_child).unwrap();
- // let atom_child_empty_node = chain.atom.is_empty_node(atom_child).unwrap();
num += child_iter.len();
if atom_child_accepting {
+ accepting = true;
num += child_iter_total_degree;
}
}
@@ -561,7 +559,7 @@ impl Chain for DefaultChain {
let atom_child_accepting = chain.atom.is_accepting(atom_child).unwrap();
let atom_child_empty_node = chain.atom.is_empty_node(atom_child).unwrap();
- let roi = Edge::new(atom_child, pase, atom_child_accepting).into();
+ let roi = Edge::new(atom_child, pavi, atom_child_accepting).into();
if atom_child_empty_node {
output.extend(child_iter.clone().map(|child| (child.into(), child)));
@@ -572,18 +570,30 @@ impl Chain for DefaultChain {
if atom_child_accepting {
for child in child_iter.clone() {
for (child_label, child_child) in chain.graph.labels_of(child).unwrap() {
- output
- .extend(child_child.map(|target| ((*child_label).into(), target)));
+ // use the new `pavi` as `true_source`
+ let mut new_label = *child_label;
+ new_label.set_true_source(true_pavi);
+
+ output.extend(
+ std::iter::repeat(new_label.into())
+ .take(child_child.len())
+ .zip(child_child),
+ );
}
}
}
}
- Ok(())
+ accepting =
+ accepting && child_iter.any(|child| *chain.accepting_vec.get(child).unwrap());
+
+ Ok(accepting)
}
let mut der_iter = DerIter::default();
+ let mut accepting_pavi: HashSet<PaVi> = HashSet::new();
+
for (label, child_iter) in self.graph.labels_of(self.current)? {
for (atom_label, atom_child_iter) in self.atom.labels_of(label.label())? {
if atom_label.is_left_p() {
@@ -594,6 +604,10 @@ impl Chain for DefaultChain {
let atom_moved = atom_label.get_moved();
+ if pos == 4 {
+ dbg!(atom_label);
+ }
+
match *atom_label.get_value() {
Some(Ter(ter)) if ter == t => {
// prepare forest fragment
@@ -601,20 +615,44 @@ impl Chain for DefaultChain {
let fragment =
generate_fragment([atom_moved.into(), Ter(ter).into()], pos)?;
- let new_pase = self.forest.insert_item(
+ if pos == 4 {
+ dbg!(atom_moved, label);
+ self.forest
+ .print_viz(&format!(
+ "pos4tb - {atom_moved}-{:?}.gv",
+ label.true_source()
+ ))
+ .unwrap();
+ }
+
+ let new_pavi = self.forest.insert_item(
*label,
fragment,
atom_child_iter.clone(),
&self.atom,
)?;
- generate_edges(
+ if pos == 4 {
+ self.forest
+ .print_viz(&format!(
+ "pos4ta - {atom_moved}-{:?}.gv",
+ label.true_source()
+ ))
+ .unwrap();
+ }
+
+ let accepting = generate_edges(
self,
child_iter.clone(),
atom_child_iter.clone(),
- new_pase,
+ new_pavi,
+ new_pavi,
&mut der_iter.singles,
)?;
+
+ if accepting {
+ accepting_pavi.insert(new_pavi);
+ }
}
Some(Non(non)) => {
if !self
@@ -634,50 +672,75 @@ impl Chain for DefaultChain {
let first_fragment =
generate_fragment([atom_moved.into(), Non(non).into()], pos)?;
- let first_segment_pase = self.forest.insert_item(
+ if pos == 4 {
+ dbg!(atom_moved, label);
+ self.forest
+ .print_viz(&format!("pos4nb - {atom_moved}-{:?}.gv", label))
+ .unwrap();
+ }
+
+ let first_segment_pavi = self.forest.insert_item(
*label,
first_fragment,
atom_child_iter.clone(),
&self.atom,
)?;
+ if pos == 4 {
+ self.forest
+ .print_viz(&format!("pos4na - {atom_moved}-{:?}.gv", label))
+ .unwrap();
+ }
+
let accepting = self
.atom
.is_accepting(virtual_node)
.map_err(index_out_of_bounds_conversion)?;
let virtual_fragment =
- virtual_generate_fragment(&self.atom, non, t, pos)?;
+ DefaultForest::new_leaf(GrammarLabel::new(Ter(t), pos));
- // NOTE: We only need the PaSe from the
+ // NOTE: We only need the PaVi from the
// first segment, so we pass an empty
// iterator, in which case the passed
- // label is only used for the PaSe.
- let virtual_pase = self.forest.insert_item(
- Edge::new(0, first_segment_pase, accepting),
+ // label is only used for the PaVi.
+ let virtual_pavi = self.forest.insert_item(
+ Edge::new(0, first_segment_pavi, accepting),
virtual_fragment,
std::iter::empty(),
&self.atom,
)?;
+ if pos == 4 {
+ self.forest
+ .print_viz(&format!("pos4va - {atom_moved}-{:?}.gv", label))
+ .unwrap();
+ }
+
let mut new_edges = Vec::new();
- generate_edges(
+ let virtual_accepting = generate_edges(
self,
child_iter.clone(),
atom_child_iter.clone(),
- first_segment_pase,
+ first_segment_pavi,
+ virtual_pavi,
&mut new_edges,
)?;
+ if virtual_accepting {
+ accepting_pavi.insert(first_segment_pavi);
+ }
+
if accepting {
+ accepting_pavi.insert(virtual_pavi);
der_iter.singles.extend(new_edges.clone());
}
if !self.atom.is_empty_node(virtual_node).unwrap() {
der_iter.add_second_layer(
virtual_node,
- virtual_pase,
+ virtual_pavi,
accepting,
new_edges,
);
@@ -691,7 +754,7 @@ impl Chain for DefaultChain {
if self.atom.is_empty_node(atom_child).unwrap() {
der_iter.singles.extend(child_iter.clone().map(|child| {
(
- Edge::new(virtual_node, virtual_pase, accepting)
+ Edge::new(virtual_node, virtual_pavi, accepting)
.into(),
child,
)
@@ -728,8 +791,137 @@ impl Chain for DefaultChain {
}
}
+ self.accepting_sources.extend(accepting_pavi);
+
Ok(der_iter)
}
+
+ // FIXME: Refactor this.
+ fn end_of_input(&mut self) -> Result<(), Self::Error> {
+ self.forest.remove_node(1)?;
+
+ let mut stack = Vec::new();
+
+ dbg!(&self.accepting_sources);
+
+ self.forest.print_viz("dbg forest before.gv").unwrap();
+
+ for pavi in self.accepting_sources.iter() {
+ match pavi {
+ PaVi::Parent(node, edge) => {
+ // REVIEW: This is a garbage node that has to be
+ // added when the machine starts. Is there a way
+ // to avoid this garbage?
+ if *node == 1 {
+ continue;
+ }
+
+ let nth_child = self.forest.nth_child(*node, *edge)?;
+
+ stack.push(nth_child);
+ }
+ PaVi::Virtual(nt, t, node) => {
+ let node_label = self
+ .forest
+ .vertex_label(*node)?
+ .ok_or(Error::NodeNoLabel(*node))?;
+
+ if node_label.label().end().is_none() {
+ let reduction_fragment = self.atom.generate_virtual_frags(*nt, *t, None);
+
+ for frag in reduction_fragment.into_iter().flatten() {
+ let mut frag = frag.clone();
+ frag.set_pos(node_label.label().start())?;
+
+ stack.push(self.forest.plant_if_needed(*node, frag)?);
+ }
+ }
+ }
+ PaVi::Empty => {}
+ }
+ }
+
+ let mut seen_nodes: HashSet<usize> = HashSet::with_capacity(stack.len());
+
+ dbg!(&stack);
+
+ self.forest.print_viz("dbg forest.gv").unwrap();
+
+ while let Some(mut top) = stack.pop() {
+ if seen_nodes.contains(&top) {
+ continue;
+ }
+
+ seen_nodes.insert(top);
+
+ let mut top_label = self
+ .forest
+ .vertex_label(top)?
+ .ok_or(Error::NodeNoLabel(top))?;
+
+ if !top_label.is_packed()
+ && matches!(top_label.label().label().tnt(), Some(TNT::Non(_)))
+ && top_label.label().end().is_none()
+ {
+ let degree = self.forest.degree(top)?;
+ let last_index = if degree > 0 { degree - 1 } else { 0 };
+
+ let pos = if degree > 0 {
+ let last_child = self.forest.nth_child(top, last_index)?;
+ let last_child_label = self
+ .forest
+ .vertex_label(last_child)?
+ .ok_or(Error::NodeNoLabel(last_child))?
+ .label();
+ let last_child_end = last_child_label.end();
+
+ if !matches!(last_child_label.label().rule(),
+ Some(rule) if
+ self
+ .atom
+ .is_accepting(2*rule+1)
+ .map_err(index_out_of_bounds_conversion)?)
+ {
+ continue;
+ }
+
+ if let Some(pos) = last_child_end {
+ pos
+ } else {
+ stack.extend(self.forest.children_of(last_child)?);
+ continue;
+ }
+ } else {
+ top_label.label().start() + 1
+ };
+
+ top = self.forest.splone(top, Some(pos), last_index, true)?;
+ top_label = self
+ .forest
+ .vertex_label(top)?
+ .ok_or(Error::NodeNoLabel(top))?;
+ } else if top_label.is_packed()
+ || top_label.label().label().rule().is_some() && top_label.label().end().is_none()
+ {
+ stack.extend(self.forest.children_of(top)?);
+ }
+
+ if top_label.clone_index().is_some() {
+ let mut parents = self.forest.parents_of(top)?;
+ if parents.len() != 1 {
+ dbg!(top, top_label, parents.len());
+ self.forest.print_viz("dbg forest.gv").unwrap();
+ panic!("0 parent?");
+ }
+
+ top = parents.next().unwrap().node();
+ }
+
+ stack.extend(self.forest.parents_of(top)?.map(|parent| parent.node()));
+ }
+
+ Ok(())
+ }
}
#[cfg(test)]
@@ -847,23 +1039,11 @@ mod test_chain {
chain.chain(0, 11)?;
chain.chain(0, 12)?;
- let forest = &mut chain.forest;
-
- let node = forest
- .query_label(ForestLabel::from(GrammarLabel::new(TNT::Non(6), 6)))
- .unwrap();
-
- forest.splone(node, Some(13), forest.degree(node)?)?;
-
- let node = forest
- .query_label(ForestLabel::from(GrammarLabel::new(TNT::Non(1), 0)))
- .unwrap();
-
- forest.splone(node, Some(13), forest.degree(node)?)?;
-
- forest.splone(0, Some(13), forest.degree(0)?)?;
+ chain.end_of_input()?;
- forest.print_viz("forest.gv")?;
+ for label in chain.labels_of(chain.current())?.map(|(label, _)| label) {
+ dbg!(label);
+ }
assert!(matches!(chain.epsilon(), Ok(true)));
@@ -871,7 +1051,9 @@ mod test_chain {
{
chain.graph.print_viz("chain.gv")?;
chain.atom.print_nfa("nfa.gv")?;
+
chain.forest.print_viz("forest.gv")?;
+
item::default::print_labels(&chain.atom, &chain.forest)?;
}
@@ -886,21 +1068,38 @@ mod test_chain {
let mut chain = DefaultChain::unit(atom)?;
chain.chain(0, 0)?;
+ chain.forest.print_viz("forest0.gv")?;
chain.chain(2, 1)?;
+ chain.forest.print_viz("forest1.gv")?;
chain.chain(2, 2)?;
+ chain.forest.print_viz("forest2.gv")?;
chain.chain(2, 3)?;
+ chain.forest.print_viz("forest3.gv")?;
chain.chain(1, 4)?;
-
+ chain.forest.print_viz("forest4.gv")?;
+ chain.end_of_input()?;
chain.forest.print_viz("forest.gv")?;
+ chain.forest.print_closed_viz("closed.gv")?;
- dbg!(chain.current(), chain.history());
+ chain.graph.print_viz("chain.gv")?;
+ chain.atom.print_nfa("nfa.gv")?;
item::default::print_labels(&chain.atom, &chain.forest)?;
+ for label in chain.labels_of(chain.current())?.map(|(label, _)| label) {
+ dbg!(label);
+ }
+
+ dbg!(chain.current(), chain.history());
+
#[cfg(feature = "test-print-viz")]
{
chain.graph.print_viz("chain.gv")?;
chain.atom.print_nfa("nfa.gv")?;
+
chain.forest.print_viz("forest.gv")?;
+
+ chain.forest.print_closed_viz("closed.gv")?;
+
item::default::print_labels(&chain.atom, &chain.forest)?;
}
@@ -950,21 +1149,22 @@ mod test_chain {
let elapsed = start.elapsed();
- // assert!(matches!(chain.epsilon(), Ok(true)));
+ assert!(matches!(chain.epsilon(), Ok(true)));
dbg!(elapsed);
dbg!(chain.current());
assert_eq!(input.len(), chain.history().len());
- if std::fs::metadata("output/history").is_ok() {
- std::fs::remove_file("output/history")?;
+ let history_file_name = "output/history";
+ if std::fs::metadata(history_file_name).is_ok() {
+ std::fs::remove_file(history_file_name)?;
}
let mut history_file = std::fs::OpenOptions::new()
.create(true)
.write(true)
- .open("output/history")?;
+ .open(history_file_name)?;
use std::fmt::Write;
use std::io::Write as IOWrite;
@@ -985,10 +1185,19 @@ mod test_chain {
history_file.write_all(log_string.as_bytes())?;
+ for label in chain.labels_of(chain.current())?.map(|(label, _)| label) {
+ dbg!(label);
+ }
+
#[cfg(feature = "test-print-viz")]
{
chain.graph.print_viz("chain.gv")?;
chain.atom.print_nfa("nfa.gv")?;
+ item::default::print_labels(&chain.atom, &chain.forest)?;
+
+ chain.forest.print_viz("forest.gv")?;
+
+ chain.forest.print_closed_viz("closed.gv")?;
}
Ok(())
diff --git a/chain/src/item/default/mod.rs b/chain/src/item/default/mod.rs
index 1a00368..22069d2 100644
--- a/chain/src/item/default/mod.rs
+++ b/chain/src/item/default/mod.rs
@@ -3,11 +3,13 @@
use super::*;
use crate::atom::default::DefaultAtom;
-use grammar::{GrammarLabel, GrammarLabelType};
+use grammar::{GrammarLabel, GrammarLabelType, TNT};
use graph::{
builder::BuilderMut, labelled::binary::PLGBuilderMut, Graph, LabelGraph, PLGraph, RedirectGraph,
};
+use std::collections::HashSet;
+
use core::fmt::Display;
/// The type of errors for forest operations.
@@ -30,6 +32,8 @@ pub enum Error {
LabelConversion(ForestLabelError),
/// Trying to split a packed node.
SplitPack(usize),
+ /// A cloned node should have exactly one parent.
+ InvalidClone(usize),
}
impl Display for Error {
@@ -43,6 +47,9 @@ impl Display for Error {
Error::NodeNoLabel(n) => write!(f, "node {n} has no labels, but it should have one"),
Error::LabelConversion(le) => write!(f, "fail to convert labels: {le}"),
Error::SplitPack(n) => write!(f, "cannot split the packed node {n}"),
+ Error::InvalidClone(n) => {
+ write!(f, "the cloned node {n} should have exactly one parent")
+ }
}
}
}
@@ -68,7 +75,7 @@ impl From<ForestLabelError> for Error {
/// A default implementation of forest.
#[derive(Debug, Clone)]
pub struct DefaultForest<T: GraphLabel> {
- graph: PLGraph<T>,
+ pub(crate) graph: PLGraph<T>,
root: Option<usize>,
}
@@ -262,15 +269,14 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> {
// the consideration).
let fragment = fragment.borrow();
+ let fragment_nodes_len = fragment.nodes_len();
- let mut frag_stack = Vec::with_capacity(fragment.nodes_len());
+ let mut frag_stack = Vec::with_capacity(fragment_nodes_len);
- let mut self_stack = Vec::with_capacity(fragment.nodes_len());
+ let mut self_stack = Vec::with_capacity(fragment_nodes_len);
- use std::collections::HashSet;
-
- let mut frag_seen: HashSet<usize> = HashSet::with_capacity(fragment.nodes_len());
- let mut self_seen: HashSet<usize> = HashSet::with_capacity(fragment.nodes_len());
+ let mut frag_seen: HashSet<usize> = HashSet::with_capacity(fragment_nodes_len);
+ let mut self_seen: HashSet<usize> = HashSet::with_capacity(fragment_nodes_len);
let frag_root = if let Some(root) = fragment.root() {
root
@@ -294,25 +300,25 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> {
if fragment.vertex_label(frag_top)? != self.vertex_label(self_top)? {
// not a prefix
- dbg!(
- frag_top,
- self_top,
- fragment.vertex_label(frag_top)?,
- self.vertex_label(self_top)?
- );
+ // dbg!(
+ // frag_top,
+ // self_top,
+ // fragment.vertex_label(frag_top)?,
+ // self.vertex_label(self_top)?
+ // );
return Ok(false);
}
let self_children = self.children_of(self_top)?;
let frag_children = fragment.children_of(frag_top)?;
- if frag_children.len() != self_children.len() {
- dbg!(
- frag_top,
- self_top,
- fragment.vertex_label(frag_top)?,
- self.vertex_label(self_top)?
- );
+ if frag_children.len() > self_children.len() {
+ // dbg!(
+ // frag_top,
+ // self_top,
+ // fragment.vertex_label(frag_top)?,
+ // self.vertex_label(self_top)?
+ // );
return Ok(false);
}
@@ -320,7 +326,7 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> {
if frag_seen.contains(&frag_child) && self_seen.contains(&self_child) {
continue;
} else if frag_seen.contains(&frag_child) || self_seen.contains(&self_child) {
- dbg!();
+ // dbg!();
return Ok(false);
}
@@ -352,7 +358,7 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> {
let root = if let Some(root) = fragment.root() {
root
} else {
- // Nothing to do to plant an empty forest.
+ // Nothing to plant.
return Ok(());
};
@@ -393,9 +399,27 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> {
return Ok(());
}
- // First adjoin those nodes and join the edges later.
+ // First adjoin the relevant nodes and join the edges later.
+
+ let mut used_nodes: Vec<bool> = std::iter::repeat(false).take(nodes_len).collect();
+
+ let mut stack = vec![root];
+
+ while let Some(top) = stack.pop() {
+ if used_nodes.get(top).copied() == Some(true) {
+ continue;
+ }
+
+ *used_nodes
+ .get_mut(top)
+ .ok_or(Error::IndexOutOfBounds(top, nodes_len))? = true;
+
+ stack.extend(fragment.children_of(top)?);
+ }
+
+ let used_nodes = used_nodes;
- for node in 0..nodes_len {
+ for node in (0..nodes_len).filter(|node| used_nodes.get(*node).copied() == Some(true)) {
let label = fragment
.vertex_label(node)?
.ok_or(Error::NodeNoLabel(node))?;
@@ -582,6 +606,93 @@ impl<T: GraphLabel> PartialEq for DefaultForest<ForestLabel<T>> {
}
}
+impl<T: GraphLabel> Eq for DefaultForest<ForestLabel<T>> {}
+
+impl<T: GraphLabel> DefaultForest<ForestLabel<T>> {
+ /// A convenient helper function to plant a fragment under a
+ /// node, if it has not been planted yet.
+ ///
+ /// To be precise, this function first checks if the node is
+ /// packed; if so, then it checks every of the cloned children, to
+ /// see if the fragment has already been planted. If none of the
+ /// clones have the fragment as a prefix, we make a new clone and
+ /// plant the fragment there.
+ ///
+ /// On the other hand, if the node is a plain node, then it checks
+ /// if the fragment is a prefix of the node. If so, do nothing,
+ /// else clone the node and plant the fragment there, unless the
+ /// node has no children yet, in which case just plant the node
+ /// there.
+ ///
+ /// A special case occurs when the node is the root node, in which
+ /// case we clone it only when its degree is larger than one.
+ ///
+ /// Return either the original node or the cloned node at the end.
+ pub(crate) fn plant_if_needed(
+ &mut self,
+ node: usize,
+ mut fragment: Self,
+ ) -> Result<usize, Error> {
+ if fragment.is_empty() {
+ return Ok(node);
+ }
+
+ let node_label = self.vertex_label(node)?.ok_or(Error::NodeNoLabel(node))?;
+
+ if node_label.is_packed() || node_label.clone_index().is_some() {
+ let mut planted = false;
+
+ let mut node = node;
+
+ if node_label.clone_index().is_some() {
+ let mut parents = self.parents_of(node)?;
+
+ assert_eq!(parents.len(), 1);
+
+ node = parents.next().unwrap().node();
+ }
+
+ for clone in self.children_of(node)? {
+ node = clone;
+
+ if self.is_prefix(clone, &fragment)? {
+ planted = true;
+
+ break;
+ }
+ }
+
+ if !planted {
+ let clone = self.clone_node(node, 0, false)?;
+
+ fragment.set_root(1)?;
+
+ self.plant(clone, fragment, false)?;
+
+ return Ok(clone);
+ }
+ } else {
+ let clone_threshold = if self.root == Some(node) { 1 } else { 0 };
+
+ let planted = self.is_prefix(node, &fragment)?;
+
+ fragment.set_root(1)?;
+
+ if !planted && self.degree(node)? > clone_threshold {
+ let clone = self.clone_node(node, 0, false)?;
+
+ self.plant(clone, fragment, false)?;
+
+ return Ok(clone);
+ } else if !planted {
+ self.plant(node, fragment, false)?;
+ }
+ }
+
+ Ok(node)
+ }
+}
+
pub mod splone;
impl<T: GraphLabel> DefaultForest<T> {
@@ -596,9 +707,18 @@ impl<T: GraphLabel> DefaultForest<T> {
Self { graph, root }
}
-}
-impl<T: GraphLabel> Eq for DefaultForest<ForestLabel<T>> {}
+ /// Set the root to the given node.
+ pub fn set_root(&mut self, root: usize) -> Result<(), Error> {
+ if root >= self.nodes_len() {
+ return Err(Error::IndexOutOfBounds(root, self.nodes_len()));
+ }
+
+ self.root = Some(root);
+
+ Ok(())
+ }
+}
impl DefaultForest<ForestLabel<GrammarLabel>> {
/// Prints the forest without nodes that do not have ending
@@ -613,6 +733,8 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
Ok(label) => {
if let Some(label) = label {
label.label().end().is_none()
+ || (matches!(self.degree(*node), Ok(0))
+ && matches!(self.parents_of(*node).map(|iter| iter.len()), Ok(0)))
} else {
true
}
@@ -674,6 +796,299 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
file.write_all(result.as_bytes())
}
+
+ /// Remove a node by removing all edges connecting with it.
+ pub fn remove_node(&mut self, node_id: usize) -> Result<(), Error> {
+ let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph);
+
+ let mut to_remove =
+ Vec::with_capacity(builder.degree(node_id)? + builder.parents_of(node_id)?.len());
+
+ to_remove.extend(
+ builder
+ .children_of(node_id)?
+ .map(|target| (node_id, target)),
+ );
+
+ to_remove.extend(
+ builder
+ .parents_of(node_id)?
+ .map(|parent| (parent.node(), node_id)),
+ );
+
+ for (source, target) in to_remove {
+ builder.remove_edge(source, target, |_| true)?;
+ }
+
+ Ok(())
+ }
+
+ /// Find the last child of the given node whose start and end
+ /// positions contain the given position. If no such child is
+ /// found, return `Ok(None)`.
+ ///
+ /// The returned tuple is of the form (child, index), where
+ /// `child` is the index of the child node in question, and
+ /// `index` means that the child is the `index`-th child of the
+ /// node.
+ pub(crate) fn position_search(
+ &self,
+ node: usize,
+ pos: usize,
+ ) -> Result<Option<(usize, usize)>, Error> {
+ fn range_contains(label: GrammarLabel, pos: usize) -> bool {
+ let start = label.start();
+
+ if let Some(end) = label.end() {
+ (start..end).contains(&pos)
+ } else {
+ (start..).contains(&pos)
+ }
+ }
+
+ let node_label = self
+ .vertex_label(node)?
+ .ok_or(Error::NodeNoLabel(node))?
+ .label();
+
+ if !range_contains(node_label, pos) {
+ return Ok(None);
+ }
+
+ for (index, child) in self.children_of(node)?.enumerate().rev() {
+ let child_label = self
+ .vertex_label(child)?
+ .ok_or(Error::NodeNoLabel(child))?
+ .label();
+
+ if range_contains(child_label, pos) {
+ return Ok(Some((child, index)));
+ }
+ }
+
+ Ok(None)
+ }
+
+ // /// Check if every child already has an end.
+ // fn every_child_is_completed(&self, node_id: usize, atom: &DefaultAtom) -> Result<bool, Error> {
+ // let children = self.children_of(node_id)?;
+
+ // if children.len() == 0 {
+ // return Ok(true);
+ // }
+
+ // let mut pos = self
+ // .vertex_label(node_id)?
+ // .ok_or(Error::NodeNoLabel(node_id))?
+ // .label()
+ // .start();
+
+ // let mut last_child_label = None;
+
+ // for child in children {
+ // let child_label = self
+ // .vertex_label(child)?
+ // .ok_or(Error::NodeNoLabel(child))?
+ // .label();
+
+ // last_child_label = Some(child_label);
+
+ // if child_label.start() != pos || child_label.end().is_none() {
+ // return Ok(false);
+ // }
+
+ // pos = child_label.end().unwrap();
+ // }
+
+ // if let Some(label) = last_child_label {
+ // if let Some(rule) = label.label().rule() {
+ // if !atom
+ // .is_accepting(2 * rule + 1)
+ // .map_err(|_| Error::IndexOutOfBounds(2 * rule + 1, atom.nodes_len()))?
+ // {
+ // return Ok(false);
+ // }
+ // }
+ // }
+
+ // Ok(true)
+ // }
+
+ // /// Complete the forest by trying to set the ending position of
+ // /// every node that does not have an end, by the ending position
+ // /// of its last child.
+ // pub fn complete(&mut self, atom: &DefaultAtom) -> Result<(), Error> {
+ // let mut stack: Vec<_> = self
+ // .nodes()
+ // .filter(|node| {
+ // let label = self.vertex_label(*node).unwrap().unwrap().label();
+
+ // label.label().rule().is_some() && label.end().is_some()
+ // })
+ // .collect();
+
+ // let mut second_stack: Vec<usize> = Vec::new();
+
+ // let mut pending_candidates: Vec<usize> = Vec::new();
+
+ // let nodes_len = self.nodes_len();
+
+ // let mut seen_nodes: HashSet<usize> = HashSet::with_capacity(nodes_len);
+
+ // while !stack.is_empty() {
+ // while let Some(mut top) = stack.pop() {
+ // if seen_nodes.contains(&top) {
+ // continue;
+ // }
+
+ // seen_nodes.insert(top);
+
+ // let top_label = self.vertex_label(top)?.unwrap();
+
+ // if top_label.clone_index().is_some() {
+ // let mut top_parents = self.parents_of(top)?;
+
+ // if top_parents.len() != 1 {
+ // return Err(Error::InvalidClone(top));
+ // }
+
+ // top = top_parents.next().unwrap().node();
+ // }
+
+ // let rule_parents = self.parents_of(top)?;
+
+ // let candidates = {
+ // let mut result = Vec::with_capacity(rule_parents.len());
+
+ // for parent in rule_parents {
+ // // for parent in self.parents_of(parent.node())? {
+ // // if self.degree(parent.node())? <= parent.edge() + 1 {
+ // result.push(parent);
+ // // }
+ // // }
+ // }
+
+ // result
+ // };
+
+ // for candidate in candidates {
+ // if matches!(self.vertex_label(candidate.node())?, Some(label) if label.label().end().is_none())
+ // {
+ // if self.every_child_is_completed(candidate.node(), atom)? {
+ // let last_child = self
+ // .nth_child(candidate.node(), self.degree(candidate.node())? - 1)?;
+ // let end = self
+ // .vertex_label(last_child)?
+ // .ok_or(Error::NodeNoLabel(last_child))?
+ // .label()
+ // .end();
+
+ // let sploned_node = self.splone(
+ // candidate.node(),
+ // end,
+ // self.degree(candidate.node())? - 1,
+ // true,
+ // )?;
+
+ // second_stack.push(sploned_node);
+ // } else {
+ // pending_candidates.push(candidate.node());
+ // }
+ // } else {
+ // second_stack.push(candidate.node());
+ // }
+ // }
+
+ // let mut new_pending = Vec::with_capacity(pending_candidates.len());
+
+ // while let Some(node) = pending_candidates.pop() {
+ // if self.every_child_is_completed(node, atom)? {
+ // let last_edge = self.degree(node)? - 1;
+ // let last_child = self.nth_child(node, last_edge)?;
+ // let end = self
+ // .vertex_label(last_child)?
+ // .ok_or(Error::NodeNoLabel(last_child))?
+ // .label()
+ // .end();
+
+ // let sploned_node = self.splone(node, end, last_edge, true)?;
+
+ // second_stack.push(sploned_node);
+ // } else {
+ // new_pending.push(node);
+ // }
+ // }
+
+ // std::mem::swap(&mut pending_candidates, &mut new_pending);
+ // }
+
+ // std::mem::swap(&mut stack, &mut second_stack);
+ // }
+
+ // Ok(())
+ // }
+
+ // /// Unconditionally set the label of the node to be the new label.
+ // ///
+ // /// # Warning
+ // ///
+ // /// Use with caution: it does not do anything special, so it is
+ // /// the responsibility of the caller to ensure this operation is
+ // /// legal.
+ // #[allow(dead_code)]
+ // pub(crate) fn set_label(&mut self, node: usize, label: GrammarLabel) -> Result<(), Error> {
+ // let status = self
+ // .vertex_label(node)?
+ // .ok_or(Error::NodeNoLabel(node))?
+ // .status;
+
+ // let label = ForestLabel::new(label, status);
+
+ // let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph);
+
+ // builder.set_label(node, label)?;
+
+ // Ok(())
+ // }
+
+ /// Set the position of every node.
+ pub(crate) fn set_pos(&mut self, pos: usize) -> Result<(), Error> {
+ let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph);
+
+ for node in 0..builder.nodes_len() {
+ let label = builder
+ .vertex_label(node)?
+ .ok_or(Error::NodeNoLabel(node))?;
+
+ let mut label_label = label.label();
+
+ label_label.set_start(pos);
+
+ if matches!(label_label.label().tnt(), Some(TNT::Ter(_))) {
+ label_label.set_end(pos + 1);
+ } else if builder.degree(node)? == 1 && label_label.label().rule().is_some() {
+ let child = builder.children_of(node)?.next().unwrap();
+
+ if matches!(
+ builder
+ .vertex_label(child)?
+ .ok_or(Error::NodeNoLabel(child))?
+ .label()
+ .label()
+ .tnt(),
+ Some(TNT::Ter(_))
+ ) {
+ label_label.set_end(pos + 1);
+ }
+ }
+
+ let new_label = ForestLabel::new(label_label, label.status);
+
+ builder.set_label(node, new_label)?;
+ }
+
+ Ok(())
+ }
}
/// Print the labels found in the forest, so that we can easily
diff --git a/chain/src/item/default/splone.rs b/chain/src/item/default/splone.rs
index 851968c..237b92a 100644
--- a/chain/src/item/default/splone.rs
+++ b/chain/src/item/default/splone.rs
@@ -1,4 +1,3 @@
-#![allow(dead_code)]
//! This module implements a function for "closing" and "splitting" a
//! node in a forest, and hence the name.
@@ -20,6 +19,7 @@ fn get_rule_label(label: GrammarLabel) -> Option<usize> {
}
/// Existing or non-existing label.
+#[derive(Debug, Copy, Clone)]
enum Eon {
Ex(usize),
Nex(ForestLabel<GrammarLabel>),
@@ -62,6 +62,12 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
/// [`split_node`][DefaultForest::<ForestLabel<GrammarLabel>>::split_node]
/// for details.
///
+ /// # `completingp`
+ ///
+ /// When we are completing the forest at the end, we do not wish
+ /// to keep the nodes at the end, so we pass a flag to inform the
+ /// function of this intention.
+ ///
/// # Return
///
/// The function returns the new, splitted or cloned node, or the
@@ -90,6 +96,7 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
node: usize,
end: Option<usize>,
edge_index: usize,
+ completingp: bool,
) -> Result<usize, Error> {
let node_label = self.vertex_label(node)?.ok_or(Error::NodeNoLabel(node))?;
@@ -130,7 +137,7 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
|| node_label.clone_index().is_some()
|| new_label.clone_index().is_some()
{
- return self.split_node(new_label, node, edge_index);
+ return self.split_node(new_label, node, edge_index, completingp);
}
// replace the label directly
@@ -151,6 +158,11 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
.ok_or(Error::NodeNoLabel(parent))?
.label();
+ if get_rule_label(parent_label).is_none() {
+ self.print_viz("dbg forest.gv").unwrap();
+ panic!("assumption fails");
+ }
+
assert!(get_rule_label(parent_label).is_some());
assert_eq!(builder.degree(parent)?, 1);
@@ -207,12 +219,13 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
new_label: ForestLabel<GrammarLabel>,
mut node: usize,
edge_index: usize,
+ completingp: bool,
) -> Result<usize, Error> {
let end = new_label.label().end();
let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph);
- let mut new_node = builder.add_vertex(new_label);
+ let new_node = builder.add_vertex(new_label);
let new_packed = if new_label.clone_index().is_some() {
let packed = builder
@@ -261,7 +274,13 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
.label();
assert!(get_rule_label(parent_label).is_some());
- assert_eq!(self.degree(parent.node())?, 1);
+
+ if self.degree(parent.node())? != 1 {
+ dbg!(parent);
+ self.print_viz("dbg forest.gv").unwrap();
+
+ panic!("assumption fails");
+ }
if let Some(pos) = end {
parent_label.set_end(pos);
@@ -276,11 +295,11 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
let new_parent = builder.add_vertex(parent_label);
if let Some(packed) = new_packed {
- new_node = packed;
+ builder.add_edge(new_parent, packed, new_label)?;
+ } else {
+ builder.add_edge(new_parent, new_node, new_label)?;
}
- builder.add_edge(new_parent, new_node, new_label)?;
-
result.extend(
self.parents_of(parent.node())?
.map(|parent_parent| (parent_parent, new_parent)),
@@ -291,21 +310,48 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
};
for (parent, new_child) in parents {
- if self.has_same_children_until(
- parent.node(),
- parent.node(),
- parent.edge(),
- new_child,
- )? {
- continue;
- }
+ if !completingp {
+ if self.has_same_children_until(
+ parent.node(),
+ parent.node(),
+ parent.edge(),
+ new_child,
+ )? {
+ continue;
+ }
- // we don't add a child to parent.edge() here.
- let cloned = self.clone_node(parent.node(), parent.edge(), false)?;
+ // we don't add a child to parent.edge() here.
+ let cloned = self.clone_node(parent.node(), parent.edge(), false)?;
- let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph);
+ let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph);
- builder.add_edge(cloned, new_child, new_label)?;
+ builder.add_edge(cloned, new_child, new_label)?;
+ } else {
+ if self.has_same_children_except(
+ parent.node(),
+ parent.node(),
+ parent.edge(),
+ new_child,
+ )? {
+ continue;
+ }
+
+ // we don't add a child to parent.edge() here.
+ let cloned = self.clone_node(parent.node(), parent.edge(), false)?;
+
+ let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph);
+
+ builder.add_edge(cloned, new_child, new_label)?;
+
+ let children_to_add: Vec<_> = builder
+ .children_of(parent.node())?
+ .skip(parent.edge() + 1)
+ .collect();
+
+ for child in children_to_add {
+ builder.add_edge(cloned, child, new_label)?;
+ }
+ }
}
Ok(new_node)
@@ -486,6 +532,85 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
unreachable!("should not examine children of a packed node")
}
}
+
+ /// Detect if a node has a branch that has the same children as
+ /// another node, except for a given index, where it points to another
+ /// given node.
+ ///
+ /// # Clones
+ ///
+ /// If `nodea` is a clone, it checks every clone to see if the
+ /// condition is satisfied for some clone.
+ fn has_same_children_except(
+ &self,
+ nodea: usize,
+ nodeb: usize,
+ edge_index: usize,
+ alternative: usize,
+ ) -> Result<bool, Error> {
+ let labela = self.vertex_label(nodea)?.ok_or(Error::NodeNoLabel(nodea))?;
+ let children_b = self.children_of(nodeb)?;
+
+ if children_b.len() < edge_index + 1 {
+ return Err(Error::IndexOutOfBounds(edge_index, children_b.len() - 1));
+ }
+
+ if labela.is_plain() {
+ let children_a = self.children_of(nodea)?;
+
+ if children_a.len() != children_b.len() {
+ return Ok(false);
+ }
+
+ for (index, (childa, childb)) in
+ children_a.zip(children_b).take(edge_index + 1).enumerate()
+ {
+ if index != edge_index {
+ if childa != childb {
+ return Ok(false);
+ }
+ } else if childa != alternative {
+ return Ok(false);
+ }
+ }
+
+ Ok(true)
+ } else if labela.clone_index().is_some() {
+ let mut parentsa = self.parents_of(nodea)?;
+
+ assert_eq!(parentsa.len(), 1);
+
+ let parenta = parentsa.next().unwrap().node();
+
+ 'branch_loop: for branch in self.children_of(parenta)? {
+ let children_a = self.children_of(branch)?;
+ let children_b = children_b.clone();
+
+ if children_a.len() < children_b.len() {
+ continue 'branch_loop;
+ }
+
+ for (index, (childa, childb)) in
+ children_a.zip(children_b).take(edge_index + 1).enumerate()
+ {
+ if index != edge_index {
+ if childa != childb {
+ continue 'branch_loop;
+ }
+ } else if childa != alternative {
+ continue 'branch_loop;
+ }
+ }
+
+ return Ok(true);
+ }
+
+ Ok(false)
+ } else {
+ // a packed node; this should not happen
+ unreachable!("should not examine children of a packed node")
+ }
+ }
}
#[cfg(test)]
@@ -688,7 +813,7 @@ mod test_splone {
fn test() -> Result<(), Box<dyn std::error::Error>> {
let mut test_forest = create_test_forest()?;
- test_forest.splone(6, Some(3), 1)?;
+ test_forest.splone(6, Some(3), 1, false)?;
let answer = splone_6_3_1()?;
@@ -698,7 +823,7 @@ mod test_splone {
panic!("splone(6, Some(3), 1) produced wrong forest");
}
- test_forest.splone(6, Some(3), 1)?;
+ test_forest.splone(6, Some(3), 1, false)?;
if test_forest != answer {
test_forest.print_viz("sploned forest.gv")?;
@@ -706,7 +831,7 @@ mod test_splone {
panic!("repeated splone(6, Some(3), 1) produced wrong forest");
}
- test_forest.splone(6, Some(2), 0)?;
+ test_forest.splone(6, Some(2), 0, false)?;
let answer = splone_6_2_0()?;
@@ -716,7 +841,7 @@ mod test_splone {
panic!("splone(6, Some(2), 0) produced wrong forest");
}
- test_forest.splone(6, Some(2), 0)?;
+ test_forest.splone(6, Some(2), 0, false)?;
if test_forest != answer {
test_forest.print_viz("sploned forest.gv")?;
@@ -724,7 +849,7 @@ mod test_splone {
panic!("repeated splone(6, Some(2), 0) produced wrong forest");
}
- test_forest.splone(6, None, 0)?;
+ test_forest.splone(6, None, 0, false)?;
let answer = splone_6_n_0()?;
@@ -734,7 +859,7 @@ mod test_splone {
panic!("splone(6, None, 0) produced wrong forest");
}
- test_forest.splone(14, None, 0)?;
+ test_forest.splone(14, None, 0, false)?;
if test_forest != answer {
test_forest.print_viz("sploned forest.gv")?;
@@ -742,7 +867,7 @@ mod test_splone {
panic!("repeated splone(6, None, 0) produced wrong forest");
}
- test_forest.splone(4, Some(3), 0)?;
+ test_forest.splone(4, Some(3), 0, false)?;
let answer = splone_4_3_0()?;
@@ -752,7 +877,7 @@ mod test_splone {
panic!("splone(4, Some(3), 0) produced wrong forest");
}
- test_forest.splone(4, Some(3), 0)?;
+ test_forest.splone(4, Some(3), 0, false)?;
if test_forest != answer {
test_forest.print_viz("sploned forest.gv")?;
@@ -760,8 +885,8 @@ mod test_splone {
panic!("repeated splone(4, Some(3), 0) produced wrong forest");
}
- test_forest.splone(17, Some(3), 0)?;
- test_forest.splone(15, Some(3), 0)?;
+ test_forest.splone(17, Some(3), 0, false)?;
+ test_forest.splone(15, Some(3), 0, false)?;
let answer = splone_17_3_0_15_3_0()?;
@@ -771,8 +896,8 @@ mod test_splone {
panic!("splone(17, Some(3), 0) - splone(15, Some(3), 0) produced wrong forest");
}
- test_forest.splone(17, Some(3), 0)?;
- test_forest.splone(15, Some(3), 0)?;
+ test_forest.splone(17, Some(3), 0, false)?;
+ test_forest.splone(15, Some(3), 0, false)?;
if test_forest != answer {
test_forest.print_viz("sploned forest.gv")?;
diff --git a/chain/src/item/forest-format.org b/chain/src/item/forest-format.org
new file mode 100644
index 0000000..eb0f150
--- /dev/null
+++ b/chain/src/item/forest-format.org
@@ -0,0 +1,10 @@
+#+TITLE: Format of forests
+#+DATE: [2023-02-13 Lun 22:05]
+#+AUTHOR: Durand
+
+In this document I try to explain the format of the forests returned
+by the parser generator.
+
+* Basic terms
+
+# TODO: Explain
diff --git a/chain/src/item/genins.rs b/chain/src/item/genins.rs
index 4c51d9a..feb45c6 100644
--- a/chain/src/item/genins.rs
+++ b/chain/src/item/genins.rs
@@ -7,19 +7,24 @@
//! semirings later on.
use super::*;
-use crate::{atom::DefaultAtom, default::Error, item::default::DefaultForest, Edge};
+use crate::{
+ atom::{Atom, DefaultAtom},
+ default::Error,
+ item::default::DefaultForest,
+ Edge,
+};
use grammar::{Error as GrammarError, GrammarLabel, GrammarLabelType, TNT};
-use graph::Graph;
+#[allow(unused_imports)]
+use graph::{builder::BuilderMut, labelled::binary::PLGBuilderMut, Graph, RedirectGraph};
use core::borrow::Borrow;
-use std::collections::HashSet as Set;
/// Convert an error telling us that an index is out of bounds.
///
/// # Panics
///
/// The function panics if the error is not of the expected kind.
-fn index_out_of_bounds_conversion(ge: GrammarError) -> Error {
+pub(crate) fn index_out_of_bounds_conversion(ge: GrammarError) -> Error {
match ge {
GrammarError::IndexOutOfBounds(index, bound) => Error::IndexOutOfBounds(index, bound),
_ => Error::Invalid,
@@ -105,11 +110,6 @@ pub fn virtual_generate_fragment(
Some(TNT::Ter(ter)) if ter == t)
{
for child in child_iter {
- // #[allow(unused_must_use)]
- // {
- // dbg!(non_start, child, atom.query_expansion(non_start, child));
- // }
-
let line: Vec<GrammarLabelType> = atom
.query_expansion(non_start, child)
.map_err(index_out_of_bounds_conversion)?
@@ -124,7 +124,15 @@ pub fn virtual_generate_fragment(
if result.is_empty() {
result = generate_fragment(line, pos)?;
} else {
- result.plant(0, generate_fragment(line, pos)?, false)?;
+ let mut new_fragment = generate_fragment(line, pos)?;
+
+ new_fragment.remove_node(0)?;
+
+ new_fragment.set_root(1)?;
+
+ let cloned = result.clone_node(0, 0, false)?;
+
+ result.plant(cloned, new_fragment, false)?;
}
}
}
@@ -133,90 +141,313 @@ pub fn virtual_generate_fragment(
Ok(result)
}
+// TODO: Examine `insert_item` again.
+
impl DefaultForest<ForestLabel<GrammarLabel>> {
/// Insert an item derivation forest into a recording forest.
///
/// We need the help of other things just for finding the correct
/// places to insert these item fragments.
- pub fn insert_item(
+ pub(crate) fn insert_item(
&mut self,
label: Edge,
fragment: impl Borrow<DefaultForest<ForestLabel<GrammarLabel>>>,
- atom_child_iter: impl Iterator<Item = usize> + ExactSizeIterator,
+ atom_child_iter: impl Iterator<Item = usize> + ExactSizeIterator + Clone,
atom: &DefaultAtom,
- ) -> Result<PaSe, Error> {
- let fragment = fragment.borrow();
+ ) -> Result<PaVi, Error> {
+ let root = if let Some(root) = self.root() {
+ root
+ } else {
+ panic!("the forest must be non-empty when we insert items");
+ };
+
+ let pavi = label.forest_source();
- let pase = label.forest_source();
+ let true_source = label.true_source();
+
+ let fragment = fragment.borrow();
let fragment_root = if let Some(root) = fragment.root() {
root
} else {
- return Err(Error::EmptyItem);
+ unreachable!("empty item");
};
- let pos = fragment
+ let fragment_root_label = fragment
.vertex_label(fragment_root)?
- .ok_or(Error::NodeNoLabel(fragment_root))?
- .label()
- .start();
+ .ok_or(Error::NodeNoLabel(fragment_root))?;
+
+ let pos = fragment_root_label.label().start();
+
+ if pavi != true_source {
+ dbg!(label, pos);
+
+ // Completing from true_source up to the pavi
+ let true_source_node = match true_source {
+ PaVi::Parent(node, edge) => {
+ let nth_child = self.nth_child(node, edge)?;
+ let nth_child_label = self
+ .vertex_label(nth_child)?
+ .ok_or(Error::NodeNoLabel(nth_child))?;
+
+ let nth_child_degree = self.degree(nth_child)?;
+ let nth_child_last = if nth_child_degree > 0 {
+ nth_child_degree - 1
+ } else {
+ 0
+ };
+
+ if matches!(nth_child_label.label().label().tnt(), Some(TNT::Non(_)))
+ && !nth_child_label.is_packed()
+ {
+ let sploned = self.splone(nth_child, Some(pos), nth_child_last, false)?;
+
+ sploned
+ } else {
+ nth_child
+ }
+ }
+ PaVi::Virtual(nt, t, mut node) => {
+ let node_label_start = self
+ .vertex_label(node)?
+ .ok_or(Error::NodeNoLabel(node))?
+ .label()
+ .start();
+
+ let reduction_fragment = atom.generate_virtual_frags(nt, t, None);
+
+ // FIXME: We shall "close" this fragment as well.
+
+ for frag in reduction_fragment.into_iter().flatten() {
+ let mut frag = frag.clone();
+ frag.set_pos(node_label_start)?;
+
+ // NOTE: The function `plant_if_needed`
+ // assumes that we want to plant the fragment
+ // as the first child of the node. This
+ // assumption holds in this case, but not in
+ // general.
+
+ node = self.plant_if_needed(node, frag)?;
+ }
+
+ node
+ }
+ PaVi::Empty => {
+ dbg!();
+ root
+ }
+ };
+
+ let true_source_pos = self
+ .vertex_label(true_source_node)?
+ .ok_or(Error::NodeNoLabel(true_source_node))?
+ .label()
+ .start();
+
+ let top_node = match pavi {
+ PaVi::Parent(node, edge) => self.nth_child(node, edge)?,
+ PaVi::Virtual(_nt, _t, _node) => {
+ dbg!(label);
+
+ self.print_viz("dbg forest.gv").unwrap();
+ fragment.print_viz("dbg fragment.gv").unwrap();
+
+ unreachable!("I do not think this is possible")
+ }
+ PaVi::Empty => {
+ unreachable!("The empty segment should not need to be reduced")
+ }
+ };
+
+ let mut stack = vec![vec![(top_node, 0)]];
+ let mut result_stack = Vec::new();
+
+ while let Some(mut top_stack) = stack.pop() {
+ let (node, _) = top_stack.pop().unwrap();
+
+ let node_label = self.vertex_label(node)?.ok_or(Error::NodeNoLabel(node))?;
+
+ if node_label.is_packed() {
+ for node in self.children_of(node)? {
+ let search_child = self.position_search(node, true_source_pos)?;
+
+ if let Some((child, index)) = search_child {
+ let mut top_stack = top_stack.clone();
+ // Fix the previous element
+ top_stack.push((node, index));
+
+ if child == true_source_node {
+ result_stack.push(top_stack);
+ } else {
+ top_stack.push((child, 0));
+
+ stack.push(top_stack);
+ }
+ }
+ }
+ } else {
+ let search_child = self.position_search(node, true_source_pos)?;
+
+ if let Some((child, index)) = search_child {
+ top_stack.push((node, index));
+
+ if child == true_source_node {
+ result_stack.push(top_stack);
+ } else {
+ top_stack.push((child, 0));
+
+ stack.push(top_stack);
+ }
+ }
+ }
+ }
+
+ // FIXME: We have to change the pavi as well, otherwise we
+ // are going to use the wrong branch for planting later.
+
+ for stack in result_stack {
+ // dbg!(&stack);
+ // self.print_viz("dbg forest.gv").unwrap();
+ // panic!("test");
+
+ let mut first_time = true;
+
+ for (node, index) in stack.into_iter().rev() {
+ if matches!(
+ self.vertex_label(node)?
+ .ok_or(Error::NodeNoLabel(node))?
+ .label()
+ .label()
+ .tnt(),
+ Some(TNT::Non(_))
+ ) {
+ let sploned = self.splone(node, Some(pos), index, false)?;
+
+ if !first_time {
+ let last_index = self.degree(sploned)? - 1;
+
+ let last_child = self.nth_child(sploned, last_index)?;
+
+ let last_child_label = self
+ .vertex_label(last_child)?
+ .ok_or(Error::NodeNoLabel(last_child))?
+ .label();
+
+ if last_child_label.end() != Some(pos) {
+ let closed_label = GrammarLabel::new_closed(
+ last_child_label.label(),
+ last_child_label.start(),
+ pos,
+ );
+
+ let closed_label_id = self
+ .query_label(closed_label.into())
+ .expect("last child closed label not found");
+
+ let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph);
- // Ensure the last node in the PaSe is a terminal or a
+ builder.redirect(sploned, last_index, closed_label_id)?;
+ }
+ }
+
+ first_time = false;
+ }
+ }
+ }
+ }
+
+ // Ensure the last node in the PaVi is a terminal or a
// non-terminal node, as an extra safety guard during
// development.
#[cfg(debug_assertions)]
{
- if let Some(parent) = pase.parent() {
- assert!(matches!(
- self.vertex_label(self.nth_child(parent.node(), parent.edge())?),
- Ok(Some(label))
- if label.label().label().tnt().is_some()));
- } else if let Some((_, leaf)) = pase.segment() {
- assert!(matches!(
- self.vertex_label(leaf),
- Ok(Some(label))
- if label.label().label().tnt().is_some()));
- } else {
- unreachable!("a pase is neither a parent nor a segment");
+ match pavi {
+ PaVi::Parent(node, edge) => {
+ assert!(matches!(
+ self.vertex_label(self.nth_child(node, edge)?),
+ Ok(Some(label))
+ if label.label().label().tnt().is_some()));
+ }
+ PaVi::Virtual(nt, t, node) => {
+ if !matches!(
+ self.vertex_label(node),
+ Ok(Some(label))
+ if matches!(
+ label.label().label().tnt(),
+ Some(TNT::Non(_))))
+ {
+ dbg!(node, self.vertex_label(node)?, pavi);
+
+ self.print_viz("dbg forest.gv").unwrap();
+
+ panic!("assumption fails");
+ }
+
+ if nt >= atom.non_num() {
+ dbg!();
+ return Err(Error::IndexOutOfBounds(nt, atom.non_num()));
+ }
+
+ if t >= atom.ter_num() {
+ dbg!();
+ return Err(Error::IndexOutOfBounds(t, atom.ter_num()));
+ }
+ }
+ PaVi::Empty => {}
}
}
- let mut is_empty_segment = false;
+ let is_empty_segment = pavi.is_empty();
let mut parents: Vec<Parent> = {
let mut result = Vec::new();
- if let Some(parent) = pase.parent() {
- result.push(parent);
- } else {
- let (root, leaf) = pase.segment().unwrap();
- let mut seen_nodes = Set::new();
+ match pavi {
+ PaVi::Parent(node, edge) => {
+ result.push(Parent::new(node, edge));
+ }
+ PaVi::Virtual(nt, t, node) => {
+ let node_label = self.vertex_label(node)?.ok_or(Error::NodeNoLabel(node))?;
- let mut stack = if root == leaf {
- // an empty segment means the root
- is_empty_segment = true;
+ for atom_child in atom_child_iter.clone() {
+ for rule in atom.trace(nt, t, atom_child).into_iter().flatten() {
+ let virtual_frag = atom.generate_virtual_frags(nt, t, Some(rule));
- result.push(Parent::new(root, 0));
- vec![]
- } else {
- vec![root]
- };
+ if let Some(frag) = virtual_frag {
+ let mut frag = (*frag.get(0).unwrap()).clone();
- while let Some(top) = stack.pop() {
- if seen_nodes.contains(&top) {
- continue;
- }
+ frag.set_pos(node_label.label().start())?;
- seen_nodes.insert(top);
+ let frag_nodes_len = frag.nodes_len();
- for (index, child) in self.children_of(top)?.enumerate() {
- if child == leaf {
- result.push(Parent::new(top, index));
- } else {
- stack.push(child);
+ assert!(frag_nodes_len > 1);
+
+ let last_but_one_label = frag
+ .vertex_label(frag_nodes_len - 2)?
+ .ok_or(Error::NodeNoLabel(frag_nodes_len - 2))?;
+
+ // NOTE: The function
+ // `plant_if_needed` assumes that we
+ // want to plant the fragment as the
+ // first child of the node. This
+ // assumption holds in this case, but
+ // not in general.
+
+ self.plant_if_needed(node, frag)?;
+
+ let rule_label_pos = self
+ .query_label(last_but_one_label)
+ .expect("the forest was wrongly planted");
+
+ result.push(Parent::new(rule_label_pos, 0));
+ }
}
}
}
+ PaVi::Empty => {
+ result.push(Parent::new(root, 0));
+ }
}
result
@@ -251,6 +482,7 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
for atom_child in atom_child_iter {
// dbg!(label.label(), atom_child);
+
// Find reduction information.
let reduction_info = atom
.query_reduction(label.label(), atom_child)
@@ -272,7 +504,8 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
.label(),
GrammarLabelType::TNT(TNT::Non(nt)) if nt == *reduction_nt
) {
- let sploned_node = self.splone(node.node(), Some(pos), node.edge())?;
+ let sploned_node =
+ self.splone(node.node(), Some(pos), node.edge(), false)?;
node_label = self
.vertex_label(sploned_node)?
@@ -282,16 +515,15 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
let mut parent_iter = self.parents_of(sploned_node)?;
#[cfg(debug_assertions)]
- {
- assert_eq!(parent_iter.len(), 1);
-
- assert!(self
- .vertex_label(sploned_node)?
- .ok_or(Error::NodeNoLabel(sploned_node))?
- .is_packed());
- }
+ assert_eq!(parent_iter.len(), 1);
node = parent_iter.next().unwrap();
+
+ #[cfg(debug_assertions)]
+ assert!(self
+ .vertex_label(node.node())?
+ .ok_or(Error::NodeNoLabel(node.node()))?
+ .is_packed());
} else {
node = Parent::new(sploned_node, node.edge());
}
@@ -337,19 +569,29 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
atom_child,
parents,
reduction_info,
- atom.query_reduction(label.label(), atom_child).unwrap()
+ atom.query_reduction(label.label(), atom_child).unwrap(),
+ is_empty_segment,
+ atom.trace(0, 3, atom_child)
+ .into_iter()
+ .flatten()
+ .collect::<Vec<_>>(),
);
self.print_viz("dbg forest.gv").unwrap();
#[cfg(test)]
- genins_test::print_labels(atom, self.borrow()).unwrap();
+ crate::item::default::print_labels(atom, self.borrow()).unwrap();
return Err(Error::CannotPlant);
}
- for parent in stack.into_iter() {
- let sploned_node = self.splone(parent.node(), None, parent.edge())?;
+ if pos == 4 && matches!(true_source, PaVi::Virtual(_, _, _)) {
+ dbg!(&stack, reduction_info, true_source, pavi);
+ self.print_viz(&format!("pos4ib.gv")).unwrap();
+ }
+
+ for parent in stack {
+ let sploned_node = self.splone(parent.node(), None, parent.edge(), false)?;
self.plant(sploned_node, fragment, non_empty)?;
@@ -357,24 +599,22 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
}
}
- // If the iterator is empty, just plant at the specified
- // child.
+ // If the iterator is empty, assert the fragment has length
+ // one, and do not plant anything.
if !non_empty {
- for parent in parents.into_iter() {
- let nth_child = self.nth_child(parent.node(), parent.edge())?;
-
- self.plant(nth_child, fragment, non_empty)?;
-
- non_empty = true;
- }
+ assert_eq!(fragment.nodes_len(), 1);
}
let result = if fragment.nodes_len() == 2 {
- let root_label = fragment.vertex_label(0)?.unwrap();
- let leaf_label = fragment.vertex_label(1)?.unwrap();
+ let root_label = fragment_root_label;
+ let leaf_label = fragment
+ .vertex_label(1 - fragment_root)?
+ .ok_or(Error::NodeNoLabel(1 - fragment_root))?;
// it has been planted, so should be safe.
- let node = self.query_label(root_label).unwrap();
+ let node = self
+ .query_label(root_label)
+ .expect("root label was not found");
let edge = {
let mut result = None;
@@ -394,16 +634,52 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
}
};
// dbg!(node, edge, root_label, leaf_label);
- PaSe::Parent(node, edge)
+ PaVi::Parent(node, edge)
} else {
- let root_label = fragment.vertex_label(0)?.unwrap();
- let leaf_label = fragment.vertex_label(fragment.nodes_len() - 1)?.unwrap();
- // dbg!(root_label, leaf_label);
-
- PaSe::Segment(
- self.query_label(root_label).unwrap(),
- self.query_label(leaf_label).unwrap(),
- )
+ assert_eq!(
+ fragment.nodes_len(),
+ 1,
+ "a virtual fragment should consist of a single terminal node."
+ );
+
+ let root_label = fragment_root_label;
+
+ let pavi_parent = pavi.parent().expect(
+ "When we insert a virtual fragment, the forest_source of
+ the label must be a parent.",
+ );
+
+ let nth_child = self.nth_child(pavi_parent.node(), pavi_parent.edge())?;
+
+ let nth_child_label = self
+ .vertex_label(nth_child)?
+ .ok_or(Error::NodeNoLabel(nth_child))?
+ .label()
+ .label();
+
+ let error_str = "When we insert a virtual fragment, the \
+ forest source of the label must point to \
+ a non-terminal node";
+
+ let nt = match nth_child_label.tnt().expect(error_str) {
+ TNT::Non(nt) => nt,
+ _ => {
+ dbg!(nth_child, nth_child_label);
+
+ panic!("{error_str}");
+ }
+ };
+
+ let t = match root_label.label().label().tnt().unwrap() {
+ TNT::Ter(t) => t,
+ _ => {
+ dbg!(root_label);
+
+ panic!("a virtual fragment should consist of a single terminal node")
+ }
+ };
+
+ PaVi::Virtual(nt, t, nth_child)
};
Ok(result)
@@ -413,40 +689,10 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
#[cfg(test)]
mod genins_test {
use super::*;
- #[allow(unused_imports)]
- use crate::{default::DefaultChain, item::default::leaf, Chain};
+ use crate::item::default::leaf;
use grammar::test_grammar_helper::*;
- pub fn print_labels(
- atom: impl Borrow<DefaultAtom>,
- forest: impl Borrow<DefaultForest<ForestLabel<GrammarLabel>>>,
- ) -> Result<(), Box<dyn std::error::Error>> {
- let forest = forest.borrow();
- let atom = atom.borrow();
-
- for node in forest.nodes() {
- let label = forest.vertex_label(node)?;
-
- if let Some(label) = label {
- let label = label.label.label();
-
- match label {
- GrammarLabelType::TNT(tnt) => {
- println!("{tnt} = {}", atom.name_of_tnt(tnt)?);
- }
- GrammarLabelType::Rule(pos) => {
- println!("pos {pos} = {}", atom.rule_pos_string(pos)?);
- }
- }
- } else {
- return Err(Error::NodeNoLabel(node).into());
- }
- }
-
- Ok(())
- }
-
#[test]
fn test_generate_fragment() -> Result<(), Box<dyn std::error::Error>> {
let grammar = new_notes_grammar()?;
@@ -497,7 +743,7 @@ mod genins_test {
0,
)?;
- print_labels(&atom, &virtual_fragment)?;
+ crate::item::default::print_labels(&atom, &virtual_fragment)?;
assert_eq!(virtual_fragment, test_fragment);
@@ -511,7 +757,7 @@ mod genins_test {
let test_fragment =
generate_fragment([TNT::Non(3).into(), 38.into(), TNT::Ter(2).into()], 1)?;
- print_labels(&atom, &virtual_fragment)?;
+ crate::item::default::print_labels(&atom, &virtual_fragment)?;
assert_eq!(virtual_fragment, test_fragment);
diff --git a/chain/src/item/mod.rs b/chain/src/item/mod.rs
index 284d640..39d04c7 100644
--- a/chain/src/item/mod.rs
+++ b/chain/src/item/mod.rs
@@ -9,43 +9,64 @@ use graph::{error::Error as GError, GraphLabel, LabelGraph, Parent, ParentsGraph
use core::borrow::Borrow;
-/// A parent or a segment.
+/// A parent or a virtual segment.
+///
+/// # Parent
///
/// A parent is a node with an edge index, which represents a certain
/// edge.
///
-/// A segment represents every edge from the root node to the single
-/// terminating node.
-#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
-pub enum PaSe {
+/// # Virtual Segment
+///
+/// A virtual segment represents an expansion from a non-terminal by a
+/// terminal. We do not directly add this segment when we encounter
+/// this expansion at the start because this expansion might contain
+/// multiple derivations some of which will not be used.
+///
+/// If we add the expansion immediately when we encounter it, we have
+/// to later discern and delete those unwanted derivations. This is
+/// asking for trouble, in my experiences.
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Default)]
+pub enum PaVi {
/// An edge from a node, as the n-th child.
Parent(usize, usize),
- /// A segment from a root to the single terminating node.
- Segment(usize, usize),
+ /// A virtual segment from a non-terminal by a terminal, rooted at
+ /// a node.
+ ///
+ /// # Tuple elements
+ ///
+ /// The contained tuple is of the form (nt, t, node), which means
+ /// a virtually added node at the `node` representing the
+ /// expansion from the non-terminal `nt` by the terminal `t`.
+ Virtual(usize, usize, usize),
+ /// This is an empty segment that represents the root node. This
+ /// is a special case for the unit state of the chain-rule
+ /// machine.
+ #[default]
+ Empty,
}
-impl From<Parent> for PaSe {
+impl From<Parent> for PaVi {
fn from(value: Parent) -> Self {
Self::Parent(value.node(), value.edge())
}
}
-impl Default for PaSe {
- fn default() -> Self {
- Self::Segment(0, 0)
- }
-}
-
-impl core::fmt::Display for PaSe {
+impl core::fmt::Display for PaVi {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Parent(node, edge) => write!(f, "the {edge}-th edge from {node}"),
- Self::Segment(root, leaf) => write!(f, "a segment from {root} to {leaf}"),
+ Self::Virtual(nt, t, node) => write!(
+ f,
+ "a virtual node for non-terminal {nt} and terminal {t} at node {node}"
+ ),
+ Self::Empty => write!(f, "empty segment at root"),
}
}
}
-impl PaSe {
+impl PaVi {
+ /// Get the Parent variant.
fn parent(self) -> Option<Parent> {
if let Self::Parent(node, edge) = self {
Some(Parent::new(node, edge))
@@ -54,12 +75,29 @@ impl PaSe {
}
}
- fn segment(self) -> Option<(usize, usize)> {
- if let Self::Segment(root, leaf) = self {
- Some((root, leaf))
- } else {
- None
- }
+ // /// Get the "Virtual" variant.
+ // ///
+ // /// # Name
+ // ///
+ // /// We cannot use the name "virtual" since it is a reserved
+ // /// keyword in Rust, so we use its French name.
+ // ///
+ // /// # Tuple elements
+ // ///
+ // /// The returned tuple is of the form (nt, t, node), which means a
+ // /// virtually added node at the `node` representing the expansion
+ // /// from the non-terminal `nt` by the terminal `t`.
+ // fn virtuel(self) -> Option<(usize, usize, usize)> {
+ // if let Self::Virtual(nt, t, node) = self {
+ // Some((nt, t, node))
+ // } else {
+ // None
+ // }
+ // }
+
+ /// Is this an empty segment?
+ fn is_empty(self) -> bool {
+ self == Self::Empty
}
}
@@ -101,7 +139,6 @@ impl<T: GraphLabel> core::fmt::Display for ForestLabel<T> {
}
/// The type of erros for converting forest labels.
-#[non_exhaustive]
#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)]
pub enum ForestLabelError {
/// Try to pack a cloned node.
diff --git a/chain/src/lib.rs b/chain/src/lib.rs
index a7740c2..d7fc519 100644
--- a/chain/src/lib.rs
+++ b/chain/src/lib.rs
@@ -16,7 +16,7 @@ use graph::{error::Error as GError, LabelExtGraph};
use item::default::Error as ForestError;
-use item::PaSe;
+use item::PaVi;
/// An edge in the Chain-Rule machine.
#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
@@ -24,18 +24,25 @@ pub struct Edge {
/// The position in the atomic languages.
label: usize,
/// The source of the associated forest edge.
- forest_source: PaSe,
+ forest_source: PaVi,
+ /// The bottom source on which we shall perform reduction.
+ ///
+ /// If this equals `forest_source`, no extra reduction needs to be
+ /// done.
+ true_source: PaVi,
/// Whether or not this edge is "accepting".
accepting: bool,
}
impl Edge {
/// Construct a new edge.
- pub fn new(label: usize, forest_source: PaSe, accepting: bool) -> Self {
+ pub fn new(label: usize, forest_source: PaVi, accepting: bool) -> Self {
+ let true_source = forest_source;
Self {
label,
forest_source,
accepting,
+ true_source,
}
}
@@ -50,9 +57,21 @@ impl Edge {
}
/// Return the associated forest edge of the edge.
- pub fn forest_source(&self) -> PaSe {
+ pub fn forest_source(&self) -> PaVi {
self.forest_source
}
+
+ /// Return the associated bottom edge of the edge from which
+ /// onwards we shall perform the reduction.
+ pub fn set_true_source(&mut self, true_source: PaVi) {
+ self.true_source = true_source;
+ }
+
+ /// Return the associated bottom edge of the edge from which
+ /// onwards we shall perform the reduction.
+ pub fn true_source(&self) -> PaVi {
+ self.true_source
+ }
}
impl core::fmt::Display for Edge {
@@ -137,7 +156,7 @@ pub enum TwoLayers {
/// the source of the associated forest edge of the second layer,
/// and the third is a list of edges, which are the common first
/// layers.
- Two(usize, PaSe, bool, Vec<(Roi, usize)>),
+ Two(usize, PaVi, bool, Vec<(Roi, usize)>),
}
/// The type of a collapsed iterator.
@@ -239,12 +258,15 @@ pub trait Chain: LabelExtGraph<Edge> {
/// An iterator that iterates all layers that need to be merged.
type DerIter: Iterator<Item = TwoLayers>;
+ // FIXME: Add a parameter to control whether to manipulate the
+ // forests or not.
+
/// Take the derivative by a terminal `t` at position `pos`.
fn derive(&mut self, t: usize, pos: usize) -> Result<Self::DerIter, Self::Error>;
/// Take the union of all derivatives.
fn union(&mut self, der_iter: Self::DerIter) -> Result<Vec<(Roi, usize)>, Self::Error> {
- // REVIEW: Find a way to avoid allocations.
+ // REVIEW: Think about the possibilities to avoid allocations.
Collapsed::<_, Self>::collapse(der_iter, self)
.collect::<Result<Vec<(Roi, usize)>, Self::Error>>()
.map(|mut v| {
@@ -276,6 +298,10 @@ pub trait Chain: LabelExtGraph<Edge> {
Ok(())
}
+
+ /// Signal to the parser that the end of the input is reached, so
+ /// that the parser knows to generate suitable forests.
+ fn end_of_input(&mut self) -> Result<(), Self::Error>;
}
pub mod default;
diff --git a/chain/src/plan.org b/chain/src/plan.org
index 02e14c9..520ba8f 100644
--- a/chain/src/plan.org
+++ b/chain/src/plan.org
@@ -2,18 +2,17 @@
#+AUTHOR: Durand
#+DATE: <2022-11-18 Ven 19:57>
-* Things to do [6/9]
+* Things to do [9/9]
- [X] Implement builders for graphs
- [X] Find sets of the first terminals for each non-terminal, in the
grammar module.
-- [-] Collect various grammars for testing. [5/6]
+- [X] Collect various grammars for testing. [5/5]
+ [X] One simple grammar
+ [X] Notes
+ [X] Parentheses
+ [X] Pathelogical left-recursive
+ [X] Pathelogical right-recursive
- + [ ] Pathelogically ambiguous
# More should be added
- [X] NFA [4/4]
+ [X] Add regular expression into NFA.
@@ -79,14 +78,13 @@
lack of this step might be the root cause of the failure of the
previous version of this project.
+ [X] Test atoms
-- [-] Implement semiring. [2/5]
+- [X] Implement semiring. [4/4]
+ [X] Define the trait.
+ [X] Define items and rules for the chain-rule parser, as an
item-based description.
- + [ ] Implement the boolean semiring.
- + [ ] Implement the natural number semiring.
- + [ ] Implement the free semiring.
-- [-] Implement languages. [5/6]
+ + [X] Implement the natural number semiring.
+ + [X] Implement the free semiring.
+- [X] Implement languages. [6/6]
+ [X] First define a trait with the expected behaviours.
+ [X] Then implement them as doubly labelled graphs.
+ [X] Each edge in the chain-rule machine needs to be labelled also
@@ -94,7 +92,7 @@
of those "plugs"!
+ [X] Then implement finding derivatives by use of the chain rule.
+ [X] Handle Semirings
- + [-] Tests
+ + [X] Tests
- [X] Miscellaneous [1/1]
+ [X] Implement a function for NFA that checks if a given predicate
is satisfied for each edge label.