From fbaa420ed550e9c3e7cdc09d4a8ec22bfbd782a6 Mon Sep 17 00:00:00 2001 From: JSDurand Date: Mon, 27 Feb 2023 12:36:41 +0800 Subject: before a major refactor I decide to adopt a new approach of recording and updating item derivation forests. Since this affects a lot of things, I decide to commit before the refactor, so that I can create a branch for that refactor. --- chain/src/atom/default.rs | 226 ++++++++++- chain/src/atom/mod.rs | 28 ++ chain/src/default.rs | 405 +++++++++++++++----- chain/src/item/default/mod.rs | 469 +++++++++++++++++++++-- chain/src/item/default/splone.rs | 187 ++++++++-- chain/src/item/forest-format.org | 10 + chain/src/item/genins.rs | 494 ++++++++++++++++++------- chain/src/item/mod.rs | 85 +++-- chain/src/lib.rs | 38 +- chain/src/plan.org | 16 +- grammar/src/label.rs | 6 + grammar/src/lib.rs | 20 + grammar/src/tests/test_grammar_left_closure.rs | 55 --- graph/src/labelled/binary.rs | 4 + nfa/Cargo.toml | 3 +- nfa/src/default/regex.rs | 3 +- nfa/src/lib.rs | 24 -- semiring/Cargo.toml | 5 + semiring/src/counting.rs | 43 +++ semiring/src/lib.rs | 16 +- 20 files changed, 1718 insertions(+), 419 deletions(-) create mode 100644 chain/src/item/forest-format.org create mode 100644 semiring/src/counting.rs diff --git a/chain/src/atom/default.rs b/chain/src/atom/default.rs index ec53596..a55087a 100644 --- a/chain/src/atom/default.rs +++ b/chain/src/atom/default.rs @@ -2,18 +2,24 @@ //! [`Atom`][super::Atom] trait. use super::*; -use grammar::Grammar; +use grammar::{Grammar, GrammarLabel, GrammarLabelType}; use graph::{error::Error as GraphError, Graph, LabelExtGraph, LabelGraph}; use nfa::{ default::{nfa::DefaultNFA, regex::DefaultRegex}, + error::Error as NFAError, LabelType, NfaLabel, }; use core::fmt::Display; -use std::collections::BTreeMap as Map; +use std::{ + collections::{hash_set::Iter, BTreeMap as Map, HashMap, HashSet}, + iter::Copied, +}; + +use crate::item::{default::DefaultForest, ForestLabel}; /// A virtual node represents the derivative of a non-terminal symbol -/// `S` with respect to a terminal symbol `t`. +/// `s` with respect to a terminal symbol `t`. #[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Ord, PartialOrd)] struct VirtualNode { s: usize, @@ -34,6 +40,33 @@ impl VirtualNode { type VirtualMap = Map; +/// A virtual trace stores the rule positions that are responsible for +/// an edge from the virtual node \[nt\]^s to `target`. +#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Ord, PartialOrd)] +struct VirtualTrace { + nt: usize, + t: usize, + target: usize, +} + +impl Display for VirtualTrace { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "VT[{}]^({}) -> {}", self.nt, self.t, self.target) + } +} + +impl VirtualTrace { + fn new(nt: usize, t: usize, target: usize) -> Self { + Self { nt, t, target } + } +} + +type VirtualTraceMap = Map>; + +type VirtualFrag = DefaultForest>; + +type VirtualFragMap = Map>; + /// The type of atomic languages. #[derive(Debug, Clone, Default)] pub struct DefaultAtom { @@ -43,6 +76,8 @@ pub struct DefaultAtom { // NOTE: This is mostly for printing and debugging regexp: Vec>, virtual_nodes: VirtualMap, + virtual_traces: VirtualTraceMap, + virtual_frags: VirtualFragMap, } impl DefaultAtom { @@ -260,9 +295,9 @@ impl Nfa> for DefaultAtom { #[inline] fn to_nfa( _regexps: &[impl nfa::Regex>>], - _sub_pred: impl Fn(LabelType) -> Result>, nfa::error::Error>, + _sub_pred: impl Fn(LabelType) -> Result>, NFAError>, _default: Option>, - ) -> Result>>, nfa::error::Error> { + ) -> Result>>, NFAError> { // NOTE: We cannot construct an atom from a set of regular // languages alone. So it is appropriate to panic here, if // one tries to do so, for some reason. @@ -270,7 +305,7 @@ impl Nfa> for DefaultAtom { } #[inline] - fn remove_dead(&mut self, reserve: impl FnMut(usize) -> bool) -> Result<(), nfa::error::Error> { + fn remove_dead(&mut self, reserve: impl FnMut(usize) -> bool) -> Result<(), NFAError> { self.nfa.remove_dead(reserve) } @@ -281,7 +316,7 @@ impl Nfa> for DefaultAtom { remove_after_p: bool, transform: impl FnMut(nfa::TwoEdges>) -> LabelType, remove_predicate: impl FnMut(LabelType) -> bool, - ) -> Result<(), nfa::error::Error> { + ) -> Result<(), NFAError> { self.nfa .closure(predicate, remove_after_p, transform, remove_predicate) } @@ -296,8 +331,6 @@ impl DefaultAtom { let mut nfa = grammar.left_closure_to_nfa(®exp)?; - use std::collections::{HashMap, HashSet}; - let accumulators: Vec = { let mut result = Vec::with_capacity(regexp.len() + 1); result.push(0); @@ -388,6 +421,12 @@ impl DefaultAtom { // Now add the virtual nodes. let mut virtual_nodes: VirtualMap = Default::default(); + // Record virtual traces. + let mut virtual_traces: VirtualTraceMap = Default::default(); + + // Record virtual fragments. + let mut virtual_frags: VirtualFragMap = Default::default(); + let nt_num = grammar.non_num(); assert!(nt_num <= accumulators.len()); @@ -403,19 +442,35 @@ impl DefaultAtom { GraphError::IndexOutOfBounds(index, bound) => { GrammarError::IndexOutOfBounds(index, bound) } - _ => unreachable!(), + // This is supposed to be unreachable, but we still + // give it a valid value. + _ => GrammarError::NFAFail(NFAError::Graph(ge)), } } for nt in 0..nt_num { + // This is safe because of the above assertion. + let nt_start = *accumulators.get(nt).unwrap(); + let children: std::collections::HashMap<_, _> = nfa - // This is safe because of the above assertion. - .labels_of(*accumulators.get(nt).unwrap()) + .labels_of(nt_start) .map_err(index_out_of_bounds_conversion)? .map(|(label, target_iter)| (*label, target_iter)) .collect(); - type TerminalsValue = (HashSet<(LabelType, usize, Option>)>, bool); + /// The tuples have the following meanings in order: + /// + /// - `LabelType` => the label for the edge + /// + /// - `usize` => the target of the edge + /// + /// - `Option>` => reduction information + /// + /// - `usize` => the rule position that caused this edge + type TerminalsValue = ( + HashSet<(LabelType, usize, Option>, usize)>, + bool, + ); let mut terminals_map: HashMap = HashMap::new(); @@ -431,9 +486,72 @@ impl DefaultAtom { result }; + let virtual_trace = label.get_moved(); + let mut accepting = false; for child in children_iter { + // add a virtual fragment + + let line: Vec = grammar + .query_expansion(nt_start, child)? + .iter() + .copied() + .flatten() + .flat_map(|(nt, rule)| [(*rule).into(), TNT::Non(*nt).into()]) + .rev() + .chain(std::iter::once(TNT::Ter(t).into())) + .collect(); + + assert!(line.len() > 1); + + // by our construction this must be a rule + let rule = line.get(line.len() - 2).unwrap().rule().unwrap(); + + use crate::default::Error as DError; + + let frag = crate::item::genins::generate_fragment(line, 0).map_err( + |fe: DError| -> GrammarError { + match fe { + DError::IndexOutOfBounds(index, bound) => { + GrammarError::IndexOutOfBounds(index, bound) + } + DError::DuplicateNode(n) => GrammarError::NFAFail( + NFAError::Graph(GraphError::DuplicatedNode(n)), + ), + DError::DuplicateEdge(source, target) => GrammarError::NFAFail( + NFAError::Graph(GraphError::DuplicatedEdge(source, target)), + ), + DError::NodeNoLabel(n) => { + panic!("node {n} has no label!") + } + DError::CannotReserve(_) => unreachable!( + "generate_fragment should not signal this error" + ), + DError::CannotClone(_) => { + unreachable!("we are not cloning") + } + DError::CannotPlant => { + unreachable!("why can we not plant?") + } + DError::SplitPack(_) => { + unreachable!("we not not splitting") + } + DError::InvalidClone(_) => { + unreachable!("we are not cloning") + } + DError::Invalid => { + panic!("a label is wrongly planted?") + } + } + }, + )?; + + virtual_frags + .entry(VirtualNode::new(nt, t)) + .or_insert_with(Default::default) + .insert(rule, frag); + accepting = accepting || *accepting_vec.get(child).ok_or( @@ -462,6 +580,7 @@ impl DefaultAtom { .query_reduction(child, target) .unwrap() .map(|slice| slice.to_vec()), + virtual_trace, ) })); } @@ -470,8 +589,21 @@ impl DefaultAtom { } for (t, (set, accepting)) in terminals_map.into_iter() { + // update virtual traces + + for (_, target, _, pos) in set.iter() { + let trace = VirtualTrace::new(nt, t, *target); + + virtual_traces + .entry(trace) + .or_insert_with(Default::default) + .insert(*pos); + } + + // add a virtual node + let new_index = nfa - .extend(set.iter().map(|(label, target, _)| (*label, *target))) + .extend(set.iter().map(|(label, target, _, _)| (*label, *target))) .map_err(index_out_of_bounds_conversion)?; if accepting_vec.get(new_index).is_none() { @@ -486,7 +618,7 @@ impl DefaultAtom { virtual_nodes.insert(virtual_node, new_index); // update the reduction information - for (_, target, info) in set.into_iter() { + for (_, target, info, _) in set { if let Some(info) = info { if !matches!( grammar.query_reduction(new_index, target)?, @@ -507,8 +639,60 @@ impl DefaultAtom { regexp, virtual_nodes, accepting_vec, + virtual_traces, + virtual_frags, }) } + + /// Generate a vector of virtual fragments for a non-terminal and + /// a terminal. + /// + /// # RULE + /// + /// If one passes `Some(rule)` as the paramter, then this returns + /// only those fragments that begin with the specified rule. + /// + /// On the other hand, if one passes `None`, then this returns + /// only those fragments that can end the non-terminal expansion. + /// + /// # Guarantee + /// + /// It is guaranteed that the 1-th node of each fragment is a rule + /// number. + pub(crate) fn generate_virtual_frags( + &self, + nt: usize, + t: usize, + rule: Option, + ) -> Option> { + let vn = VirtualNode::new(nt, t); + + if let Some(rule) = rule { + self.virtual_frags + .get(&vn) + .and_then(|map| map.get(&rule)) + .map(|f| vec![f]) + } else { + let result: Vec<&VirtualFrag> = self + .virtual_frags + .get(&vn) + .iter() + .copied() + .flatten() + .filter_map(|(rule, frag)| { + self.is_accepting(rule * 2 + 1) + .unwrap_or(false) + .then_some(frag) + }) + .collect(); + + if result.is_empty() { + None + } else { + Some(result) + } + } + } } /// A convenient getter for the map of virtual nodes. @@ -550,4 +734,16 @@ impl Atom for DefaultAtom { self.accepting_vec.len(), )) } + + type Iter<'a> = Copied> + where + Self: 'a; + + fn trace(&self, nt: usize, t: usize, target: usize) -> Option<::Iter<'_>> { + let trace = VirtualTrace::new(nt, t, target); + + self.virtual_traces + .get(&trace) + .map(|set| set.iter().copied()) + } } diff --git a/chain/src/atom/mod.rs b/chain/src/atom/mod.rs index 398edd2..c9dadb2 100644 --- a/chain/src/atom/mod.rs +++ b/chain/src/atom/mod.rs @@ -17,6 +17,16 @@ pub trait Atom: Nfa> + Deref { /// left-linear null closure of `nt` with respect to `t`. fn atom(&self, nt: usize, t: usize) -> Result, GrammarError>; + /// A type that iterates through the rule positions. + type Iter<'a>: Iterator + 'a + where + Self: 'a; + + /// Return an iterator of rule positions responsible for an edge + /// from the virtual node corresponding to the non-terminal `nt` + /// and terminal `t` to `target`. + fn trace(&self, nt: usize, t: usize, target: usize) -> Option<::Iter<'_>>; + /// Return the index of the empty state. fn empty(&self) -> usize; @@ -33,6 +43,9 @@ mod tests { use super::*; use grammar::test_grammar_helper::*; + #[cfg(feature = "test-print-viz")] + use graph::Graph; + #[test] fn atom() -> Result<(), Box> { let grammar = new_notes_grammar()?; @@ -41,6 +54,21 @@ mod tests { println!("atom = {atom}"); + #[cfg(feature = "test-print-viz")] + { + println!("virtual frag for 1, 3: "); + + for (index, frag) in atom + .generate_virtual_frags(1, 3, None) + .iter() + .flatten() + .enumerate() + { + crate::item::default::print_labels(&atom, *frag)?; + frag.print_viz(&format!("frag {index}.gv"))?; + } + } + Ok(()) } } diff --git a/chain/src/default.rs b/chain/src/default.rs index c873de7..08e29ce 100644 --- a/chain/src/default.rs +++ b/chain/src/default.rs @@ -8,14 +8,16 @@ use super::*; use crate::atom::{Atom, DefaultAtom}; use crate::item::{ - default::DefaultForest, generate_fragment, genins::virtual_generate_fragment, Forest, + default::DefaultForest, generate_fragment, genins::index_out_of_bounds_conversion, Forest, ForestLabel, ForestLabelError, }; use core::fmt::Display; -use grammar::{Error as GrammarError, GrammarLabel, GrammarLabelType, TNT}; -use graph::{labelled::DLGBuilder, Builder, DLGraph, Graph, LabelExtGraph, LabelGraph}; +use grammar::{GrammarLabel, GrammarLabelType, START_NONTERMINAL, TNT}; +use graph::{ + labelled::DLGBuilder, Builder, DLGraph, Graph, LabelExtGraph, LabelGraph, ParentsGraph, +}; -use std::collections::{HashMap as Map, TryReserveError}; +use std::collections::{HashMap as Map, HashSet, TryReserveError}; /// The errors related to taking derivatives by chain rule. #[non_exhaustive] @@ -36,10 +38,10 @@ pub enum Error { CannotClone(ForestLabelError), /// Cannot find a suitable node to plant the new forest fragment. CannotPlant, - /// Trying to insert an empty item. - EmptyItem, /// Cannot split a packed node. SplitPack(usize), + /// A cloned node should have exactly one parent. + InvalidClone(usize), /// An invalid situation happens. Invalid, } @@ -57,8 +59,10 @@ impl Display for Error { Self::CannotReserve(e) => write!(f, "cannot reserve memory: {e}"), Self::CannotClone(fe) => write!(f, "cannot clone due to {fe}"), Self::CannotPlant => write!(f, "cannot plant a new forest fragment"), - Self::EmptyItem => write!(f, "trying to insert an empty item"), Self::SplitPack(n) => write!(f, "cannot split the packed node {n}"), + Self::InvalidClone(n) => { + write!(f, "the cloned node {n} should have exactly one parent") + } Self::Invalid => write!(f, "invalid"), } } @@ -86,6 +90,7 @@ impl From for Error { ForestError::NodeNoLabel(n) => Error::NodeNoLabel(n), ForestError::LabelConversion(ce) => Error::CannotClone(ce), ForestError::SplitPack(n) => Error::SplitPack(n), + ForestError::InvalidClone(n) => Error::SplitPack(n), } } } @@ -110,7 +115,7 @@ impl Default for DerIterIndex { } /// A complex type used for storing values of edges with two layers. -type SecondTypeValue = (PaSe, bool, Vec<(Roi, usize)>); +type SecondTypeValue = (PaVi, bool, Vec<(Roi, usize)>); /// An iterator of TwoLayers. #[derive(Debug, Default)] @@ -137,7 +142,7 @@ impl DerIter { fn add_second_layer( &mut self, label: usize, - forest_source: PaSe, + forest_source: PaVi, accepting: bool, edges: Vec<(Roi, usize)>, ) { @@ -166,7 +171,7 @@ impl Iterator for DerIter { Some(TwoLayers::Two(*key, forest_source, accepting, edges)) } else { // this should not happen - println!("a key does not exist in the hashmap: something is wrong when taking derivatives"); + dbg!("a key does not exist in the hashmap: something is wrong when taking derivatives"); None } } else { @@ -176,22 +181,15 @@ impl Iterator for DerIter { // safely set the index to one. self.index = DerIterIndex::Single(1); - if let Some((edge, to)) = self.singles.first() { - Some(TwoLayers::One(*edge, *to)) - } else { - None - } - } - } - DerIterIndex::Single(index) => { - if let Some((edge, to)) = self.singles.get(index) { - self.index = DerIterIndex::Single(index + 1); - - Some(TwoLayers::One(*edge, *to)) - } else { - None + self.singles + .first() + .map(|(edge, to)| TwoLayers::One(*edge, *to)) } } + DerIterIndex::Single(index) => self.singles.get(index).map(|(edge, to)| { + self.index = DerIterIndex::Single(index + 1); + TwoLayers::One(*edge, *to) + }), } } } @@ -205,6 +203,7 @@ pub struct DefaultChain { history: Vec, forest: DefaultForest>, accepting_vec: Vec, + accepting_sources: Vec, } impl DefaultChain { @@ -217,7 +216,7 @@ impl DefaultChain { /// Return the complete slice of histories. #[inline] pub fn history(&self) -> &[usize] { - self.history.as_ref() + self.history.as_slice() } /// Return a reference to the associated forest. @@ -228,7 +227,7 @@ impl DefaultChain { /// Print the rule positions of the labels. pub fn print_rule_positions(&self) -> Result<(), Box> { - let mut labels = std::collections::HashSet::::default(); + let mut labels = HashSet::::default(); for node in 0..self.graph.nodes_len() { labels.extend(self.graph.labels_of(node)?.map(|(label, _)| label.label)); @@ -395,10 +394,8 @@ impl Chain for DefaultChain { let empty_state = atom.empty(); - // The zero-th non-terminal is assumed to be the starting - // non-terminal, by convention. let initial_nullable = atom - .is_nullable(0) + .is_nullable(START_NONTERMINAL) .map_err(|_| Error::IndexOutOfBounds(0, atom.non_num()))?; builder.add_edge( @@ -421,6 +418,8 @@ impl Chain for DefaultChain { let accepting_vec = vec![true, initial_nullable]; + let accepting_sources = Vec::new(); + Ok(Self { graph, atom, @@ -428,6 +427,7 @@ impl Chain for DefaultChain { history, forest, accepting_vec, + accepting_sources, }) } @@ -455,18 +455,15 @@ impl Chain for DefaultChain { edges: impl Iterator, ) -> Result<(), Self::Error> { for (roi, _) in edges { - match roi.imaginary_part() { - Some(target) => { - if matches!(self.accepting_vec.get(target).copied(), Some(true)) { - let accepting_vec_len = self.accepting_vec.len(); - - *self - .accepting_vec - .get_mut(node_id) - .ok_or(Error::IndexOutOfBounds(node_id, accepting_vec_len))? = true; - } + if let Some(target) = roi.imaginary_part() { + if matches!(self.accepting_vec.get(target).copied(), Some(true)) { + let accepting_vec_len = self.accepting_vec.len(); + + *self + .accepting_vec + .get_mut(node_id) + .ok_or(Error::IndexOutOfBounds(node_id, accepting_vec_len))? = true; } - None => {} } } @@ -475,24 +472,22 @@ impl Chain for DefaultChain { type DerIter = DerIter; + // EXPERIMENT: Try the approach of keeping an additional vector of + // vectors of unsigned integers. Each element of the vector + // corresponds to an edge of the current node. Each element is a + // vector. This vector represents the list of reductions that are + // implied due to skipping edges without children. + // + // Then when we insert an item, we can use this list to perform + // additional reductions. This can avoid the ugly and inefficient + // position_search method currently adopted. + // + // Of course we can use an optional vector to prevent allocating + // too much memory for edges whose corresponding vector is empty. + fn derive(&mut self, t: usize, pos: usize) -> Result { use TNT::*; - /// Convert an error telling us that an index is out of bounds. - /// - /// # Panics - /// - /// The function panics if the error is not of the expected - /// kind. - fn index_out_of_bounds_conversion(ge: GrammarError) -> Error { - match ge { - GrammarError::IndexOutOfBounds(index, bound) => { - Error::IndexOutOfBounds(index, bound) - } - _ => Error::Invalid, - } - } - /// A helper function to generate edges to join. /// /// It first checks if the base edge is accepting. If yes, @@ -505,11 +500,14 @@ impl Chain for DefaultChain { /// to save some allocations. fn generate_edges( chain: &DefaultChain, - child_iter: impl Iterator + ExactSizeIterator + Clone, + mut child_iter: impl Iterator + ExactSizeIterator + Clone, atom_child_iter: impl Iterator + Clone, - pase: PaSe, + pavi: PaVi, + true_pavi: PaVi, mut output: impl AsMut>, - ) -> Result<(), Error> { + ) -> Result { + let mut accepting = false; + // First check the values from iterators are all valid. let graph_len = chain.graph.nodes_len(); let atom_len = chain.atom.nodes_len(); @@ -540,11 +538,11 @@ impl Chain for DefaultChain { for atom_child in atom_child_iter.clone() { let atom_child_accepting = chain.atom.is_accepting(atom_child).unwrap(); - // let atom_child_empty_node = chain.atom.is_empty_node(atom_child).unwrap(); num += child_iter.len(); if atom_child_accepting { + accepting = true; num += child_iter_total_degree; } } @@ -561,7 +559,7 @@ impl Chain for DefaultChain { let atom_child_accepting = chain.atom.is_accepting(atom_child).unwrap(); let atom_child_empty_node = chain.atom.is_empty_node(atom_child).unwrap(); - let roi = Edge::new(atom_child, pase, atom_child_accepting).into(); + let roi = Edge::new(atom_child, pavi, atom_child_accepting).into(); if atom_child_empty_node { output.extend(child_iter.clone().map(|child| (child.into(), child))); @@ -572,18 +570,30 @@ impl Chain for DefaultChain { if atom_child_accepting { for child in child_iter.clone() { for (child_label, child_child) in chain.graph.labels_of(child).unwrap() { - output - .extend(child_child.map(|target| ((*child_label).into(), target))); + // use the new `pavi` as `true_source` + let mut new_label = *child_label; + new_label.set_true_source(true_pavi); + + output.extend( + std::iter::repeat(new_label.into()) + .take(child_child.len()) + .zip(child_child), + ); } } } } - Ok(()) + accepting = + accepting && child_iter.any(|child| *chain.accepting_vec.get(child).unwrap()); + + Ok(accepting) } let mut der_iter = DerIter::default(); + let mut accepting_pavi: HashSet = HashSet::new(); + for (label, child_iter) in self.graph.labels_of(self.current)? { for (atom_label, atom_child_iter) in self.atom.labels_of(label.label())? { if atom_label.is_left_p() { @@ -594,6 +604,10 @@ impl Chain for DefaultChain { let atom_moved = atom_label.get_moved(); + if pos == 4 { + dbg!(atom_label); + } + match *atom_label.get_value() { Some(Ter(ter)) if ter == t => { // prepare forest fragment @@ -601,20 +615,44 @@ impl Chain for DefaultChain { let fragment = generate_fragment([atom_moved.into(), Ter(ter).into()], pos)?; - let new_pase = self.forest.insert_item( + if pos == 4 { + dbg!(atom_moved, label); + self.forest + .print_viz(&format!( + "pos4tb - {atom_moved}-{:?}.gv", + label.true_source() + )) + .unwrap(); + } + + let new_pavi = self.forest.insert_item( *label, fragment, atom_child_iter.clone(), &self.atom, )?; - generate_edges( + if pos == 4 { + self.forest + .print_viz(&format!( + "pos4ta - {atom_moved}-{:?}.gv", + label.true_source() + )) + .unwrap(); + } + + let accepting = generate_edges( self, child_iter.clone(), atom_child_iter.clone(), - new_pase, + new_pavi, + new_pavi, &mut der_iter.singles, )?; + + if accepting { + accepting_pavi.insert(new_pavi); + } } Some(Non(non)) => { if !self @@ -634,50 +672,75 @@ impl Chain for DefaultChain { let first_fragment = generate_fragment([atom_moved.into(), Non(non).into()], pos)?; - let first_segment_pase = self.forest.insert_item( + if pos == 4 { + dbg!(atom_moved, label); + self.forest + .print_viz(&format!("pos4nb - {atom_moved}-{:?}.gv", label)) + .unwrap(); + } + + let first_segment_pavi = self.forest.insert_item( *label, first_fragment, atom_child_iter.clone(), &self.atom, )?; + if pos == 4 { + self.forest + .print_viz(&format!("pos4na - {atom_moved}-{:?}.gv", label)) + .unwrap(); + } + let accepting = self .atom .is_accepting(virtual_node) .map_err(index_out_of_bounds_conversion)?; let virtual_fragment = - virtual_generate_fragment(&self.atom, non, t, pos)?; + DefaultForest::new_leaf(GrammarLabel::new(Ter(t), pos)); - // NOTE: We only need the PaSe from the + // NOTE: We only need the PaVi from the // first segment, so we pass an empty // iterator, in which case the passed - // label is only used for the PaSe. - let virtual_pase = self.forest.insert_item( - Edge::new(0, first_segment_pase, accepting), + // label is only used for the PaVi. + let virtual_pavi = self.forest.insert_item( + Edge::new(0, first_segment_pavi, accepting), virtual_fragment, std::iter::empty(), &self.atom, )?; + if pos == 4 { + self.forest + .print_viz(&format!("pos4va - {atom_moved}-{:?}.gv", label)) + .unwrap(); + } + let mut new_edges = Vec::new(); - generate_edges( + let virtual_accepting = generate_edges( self, child_iter.clone(), atom_child_iter.clone(), - first_segment_pase, + first_segment_pavi, + virtual_pavi, &mut new_edges, )?; + if virtual_accepting { + accepting_pavi.insert(first_segment_pavi); + } + if accepting { + accepting_pavi.insert(virtual_pavi); der_iter.singles.extend(new_edges.clone()); } if !self.atom.is_empty_node(virtual_node).unwrap() { der_iter.add_second_layer( virtual_node, - virtual_pase, + virtual_pavi, accepting, new_edges, ); @@ -691,7 +754,7 @@ impl Chain for DefaultChain { if self.atom.is_empty_node(atom_child).unwrap() { der_iter.singles.extend(child_iter.clone().map(|child| { ( - Edge::new(virtual_node, virtual_pase, accepting) + Edge::new(virtual_node, virtual_pavi, accepting) .into(), child, ) @@ -728,8 +791,137 @@ impl Chain for DefaultChain { } } + self.accepting_sources.extend(accepting_pavi); + Ok(der_iter) } + + // FIXME: Refactor this. + fn end_of_input(&mut self) -> Result<(), Self::Error> { + self.forest.remove_node(1)?; + + let mut stack = Vec::new(); + + dbg!(&self.accepting_sources); + + self.forest.print_viz("dbg forest before.gv").unwrap(); + + for pavi in self.accepting_sources.iter() { + match pavi { + PaVi::Parent(node, edge) => { + // REVIEW: This is a garbage node that has to be + // added when the machine starts. Is there a way + // to avoid this garbage? + if *node == 1 { + continue; + } + + let nth_child = self.forest.nth_child(*node, *edge)?; + + stack.push(nth_child); + } + PaVi::Virtual(nt, t, node) => { + let node_label = self + .forest + .vertex_label(*node)? + .ok_or(Error::NodeNoLabel(*node))?; + + if node_label.label().end().is_none() { + let reduction_fragment = self.atom.generate_virtual_frags(*nt, *t, None); + + for frag in reduction_fragment.into_iter().flatten() { + let mut frag = frag.clone(); + frag.set_pos(node_label.label().start())?; + + stack.push(self.forest.plant_if_needed(*node, frag)?); + } + } + } + PaVi::Empty => {} + } + } + + let mut seen_nodes: HashSet = HashSet::with_capacity(stack.len()); + + dbg!(&stack); + + self.forest.print_viz("dbg forest.gv").unwrap(); + + while let Some(mut top) = stack.pop() { + if seen_nodes.contains(&top) { + continue; + } + + seen_nodes.insert(top); + + let mut top_label = self + .forest + .vertex_label(top)? + .ok_or(Error::NodeNoLabel(top))?; + + if !top_label.is_packed() + && matches!(top_label.label().label().tnt(), Some(TNT::Non(_))) + && top_label.label().end().is_none() + { + let degree = self.forest.degree(top)?; + let last_index = if degree > 0 { degree - 1 } else { 0 }; + + let pos = if degree > 0 { + let last_child = self.forest.nth_child(top, last_index)?; + let last_child_label = self + .forest + .vertex_label(last_child)? + .ok_or(Error::NodeNoLabel(last_child))? + .label(); + let last_child_end = last_child_label.end(); + + if !matches!(last_child_label.label().rule(), + Some(rule) if + self + .atom + .is_accepting(2*rule+1) + .map_err(index_out_of_bounds_conversion)?) + { + continue; + } + + if let Some(pos) = last_child_end { + pos + } else { + stack.extend(self.forest.children_of(last_child)?); + continue; + } + } else { + top_label.label().start() + 1 + }; + + top = self.forest.splone(top, Some(pos), last_index, true)?; + top_label = self + .forest + .vertex_label(top)? + .ok_or(Error::NodeNoLabel(top))?; + } else if top_label.is_packed() + || top_label.label().label().rule().is_some() && top_label.label().end().is_none() + { + stack.extend(self.forest.children_of(top)?); + } + + if top_label.clone_index().is_some() { + let mut parents = self.forest.parents_of(top)?; + if parents.len() != 1 { + dbg!(top, top_label, parents.len()); + self.forest.print_viz("dbg forest.gv").unwrap(); + panic!("0 parent?"); + } + + top = parents.next().unwrap().node(); + } + + stack.extend(self.forest.parents_of(top)?.map(|parent| parent.node())); + } + + Ok(()) + } } #[cfg(test)] @@ -847,23 +1039,11 @@ mod test_chain { chain.chain(0, 11)?; chain.chain(0, 12)?; - let forest = &mut chain.forest; - - let node = forest - .query_label(ForestLabel::from(GrammarLabel::new(TNT::Non(6), 6))) - .unwrap(); - - forest.splone(node, Some(13), forest.degree(node)?)?; - - let node = forest - .query_label(ForestLabel::from(GrammarLabel::new(TNT::Non(1), 0))) - .unwrap(); - - forest.splone(node, Some(13), forest.degree(node)?)?; - - forest.splone(0, Some(13), forest.degree(0)?)?; + chain.end_of_input()?; - forest.print_viz("forest.gv")?; + for label in chain.labels_of(chain.current())?.map(|(label, _)| label) { + dbg!(label); + } assert!(matches!(chain.epsilon(), Ok(true))); @@ -871,7 +1051,9 @@ mod test_chain { { chain.graph.print_viz("chain.gv")?; chain.atom.print_nfa("nfa.gv")?; + chain.forest.print_viz("forest.gv")?; + item::default::print_labels(&chain.atom, &chain.forest)?; } @@ -886,21 +1068,38 @@ mod test_chain { let mut chain = DefaultChain::unit(atom)?; chain.chain(0, 0)?; + chain.forest.print_viz("forest0.gv")?; chain.chain(2, 1)?; + chain.forest.print_viz("forest1.gv")?; chain.chain(2, 2)?; + chain.forest.print_viz("forest2.gv")?; chain.chain(2, 3)?; + chain.forest.print_viz("forest3.gv")?; chain.chain(1, 4)?; - + chain.forest.print_viz("forest4.gv")?; + chain.end_of_input()?; chain.forest.print_viz("forest.gv")?; + chain.forest.print_closed_viz("closed.gv")?; - dbg!(chain.current(), chain.history()); + chain.graph.print_viz("chain.gv")?; + chain.atom.print_nfa("nfa.gv")?; item::default::print_labels(&chain.atom, &chain.forest)?; + for label in chain.labels_of(chain.current())?.map(|(label, _)| label) { + dbg!(label); + } + + dbg!(chain.current(), chain.history()); + #[cfg(feature = "test-print-viz")] { chain.graph.print_viz("chain.gv")?; chain.atom.print_nfa("nfa.gv")?; + chain.forest.print_viz("forest.gv")?; + + chain.forest.print_closed_viz("closed.gv")?; + item::default::print_labels(&chain.atom, &chain.forest)?; } @@ -950,21 +1149,22 @@ mod test_chain { let elapsed = start.elapsed(); - // assert!(matches!(chain.epsilon(), Ok(true))); + assert!(matches!(chain.epsilon(), Ok(true))); dbg!(elapsed); dbg!(chain.current()); assert_eq!(input.len(), chain.history().len()); - if std::fs::metadata("output/history").is_ok() { - std::fs::remove_file("output/history")?; + let history_file_name = "output/history"; + if std::fs::metadata(history_file_name).is_ok() { + std::fs::remove_file(history_file_name)?; } let mut history_file = std::fs::OpenOptions::new() .create(true) .write(true) - .open("output/history")?; + .open(history_file_name)?; use std::fmt::Write; use std::io::Write as IOWrite; @@ -985,10 +1185,19 @@ mod test_chain { history_file.write_all(log_string.as_bytes())?; + for label in chain.labels_of(chain.current())?.map(|(label, _)| label) { + dbg!(label); + } + #[cfg(feature = "test-print-viz")] { chain.graph.print_viz("chain.gv")?; chain.atom.print_nfa("nfa.gv")?; + item::default::print_labels(&chain.atom, &chain.forest)?; + + chain.forest.print_viz("forest.gv")?; + + chain.forest.print_closed_viz("closed.gv")?; } Ok(()) diff --git a/chain/src/item/default/mod.rs b/chain/src/item/default/mod.rs index 1a00368..22069d2 100644 --- a/chain/src/item/default/mod.rs +++ b/chain/src/item/default/mod.rs @@ -3,11 +3,13 @@ use super::*; use crate::atom::default::DefaultAtom; -use grammar::{GrammarLabel, GrammarLabelType}; +use grammar::{GrammarLabel, GrammarLabelType, TNT}; use graph::{ builder::BuilderMut, labelled::binary::PLGBuilderMut, Graph, LabelGraph, PLGraph, RedirectGraph, }; +use std::collections::HashSet; + use core::fmt::Display; /// The type of errors for forest operations. @@ -30,6 +32,8 @@ pub enum Error { LabelConversion(ForestLabelError), /// Trying to split a packed node. SplitPack(usize), + /// A cloned node should have exactly one parent. + InvalidClone(usize), } impl Display for Error { @@ -43,6 +47,9 @@ impl Display for Error { Error::NodeNoLabel(n) => write!(f, "node {n} has no labels, but it should have one"), Error::LabelConversion(le) => write!(f, "fail to convert labels: {le}"), Error::SplitPack(n) => write!(f, "cannot split the packed node {n}"), + Error::InvalidClone(n) => { + write!(f, "the cloned node {n} should have exactly one parent") + } } } } @@ -68,7 +75,7 @@ impl From for Error { /// A default implementation of forest. #[derive(Debug, Clone)] pub struct DefaultForest { - graph: PLGraph, + pub(crate) graph: PLGraph, root: Option, } @@ -262,15 +269,14 @@ impl Forest for DefaultForest> { // the consideration). let fragment = fragment.borrow(); + let fragment_nodes_len = fragment.nodes_len(); - let mut frag_stack = Vec::with_capacity(fragment.nodes_len()); + let mut frag_stack = Vec::with_capacity(fragment_nodes_len); - let mut self_stack = Vec::with_capacity(fragment.nodes_len()); + let mut self_stack = Vec::with_capacity(fragment_nodes_len); - use std::collections::HashSet; - - let mut frag_seen: HashSet = HashSet::with_capacity(fragment.nodes_len()); - let mut self_seen: HashSet = HashSet::with_capacity(fragment.nodes_len()); + let mut frag_seen: HashSet = HashSet::with_capacity(fragment_nodes_len); + let mut self_seen: HashSet = HashSet::with_capacity(fragment_nodes_len); let frag_root = if let Some(root) = fragment.root() { root @@ -294,25 +300,25 @@ impl Forest for DefaultForest> { if fragment.vertex_label(frag_top)? != self.vertex_label(self_top)? { // not a prefix - dbg!( - frag_top, - self_top, - fragment.vertex_label(frag_top)?, - self.vertex_label(self_top)? - ); + // dbg!( + // frag_top, + // self_top, + // fragment.vertex_label(frag_top)?, + // self.vertex_label(self_top)? + // ); return Ok(false); } let self_children = self.children_of(self_top)?; let frag_children = fragment.children_of(frag_top)?; - if frag_children.len() != self_children.len() { - dbg!( - frag_top, - self_top, - fragment.vertex_label(frag_top)?, - self.vertex_label(self_top)? - ); + if frag_children.len() > self_children.len() { + // dbg!( + // frag_top, + // self_top, + // fragment.vertex_label(frag_top)?, + // self.vertex_label(self_top)? + // ); return Ok(false); } @@ -320,7 +326,7 @@ impl Forest for DefaultForest> { if frag_seen.contains(&frag_child) && self_seen.contains(&self_child) { continue; } else if frag_seen.contains(&frag_child) || self_seen.contains(&self_child) { - dbg!(); + // dbg!(); return Ok(false); } @@ -352,7 +358,7 @@ impl Forest for DefaultForest> { let root = if let Some(root) = fragment.root() { root } else { - // Nothing to do to plant an empty forest. + // Nothing to plant. return Ok(()); }; @@ -393,9 +399,27 @@ impl Forest for DefaultForest> { return Ok(()); } - // First adjoin those nodes and join the edges later. + // First adjoin the relevant nodes and join the edges later. + + let mut used_nodes: Vec = std::iter::repeat(false).take(nodes_len).collect(); + + let mut stack = vec![root]; + + while let Some(top) = stack.pop() { + if used_nodes.get(top).copied() == Some(true) { + continue; + } + + *used_nodes + .get_mut(top) + .ok_or(Error::IndexOutOfBounds(top, nodes_len))? = true; + + stack.extend(fragment.children_of(top)?); + } + + let used_nodes = used_nodes; - for node in 0..nodes_len { + for node in (0..nodes_len).filter(|node| used_nodes.get(*node).copied() == Some(true)) { let label = fragment .vertex_label(node)? .ok_or(Error::NodeNoLabel(node))?; @@ -582,6 +606,93 @@ impl PartialEq for DefaultForest> { } } +impl Eq for DefaultForest> {} + +impl DefaultForest> { + /// A convenient helper function to plant a fragment under a + /// node, if it has not been planted yet. + /// + /// To be precise, this function first checks if the node is + /// packed; if so, then it checks every of the cloned children, to + /// see if the fragment has already been planted. If none of the + /// clones have the fragment as a prefix, we make a new clone and + /// plant the fragment there. + /// + /// On the other hand, if the node is a plain node, then it checks + /// if the fragment is a prefix of the node. If so, do nothing, + /// else clone the node and plant the fragment there, unless the + /// node has no children yet, in which case just plant the node + /// there. + /// + /// A special case occurs when the node is the root node, in which + /// case we clone it only when its degree is larger than one. + /// + /// Return either the original node or the cloned node at the end. + pub(crate) fn plant_if_needed( + &mut self, + node: usize, + mut fragment: Self, + ) -> Result { + if fragment.is_empty() { + return Ok(node); + } + + let node_label = self.vertex_label(node)?.ok_or(Error::NodeNoLabel(node))?; + + if node_label.is_packed() || node_label.clone_index().is_some() { + let mut planted = false; + + let mut node = node; + + if node_label.clone_index().is_some() { + let mut parents = self.parents_of(node)?; + + assert_eq!(parents.len(), 1); + + node = parents.next().unwrap().node(); + } + + for clone in self.children_of(node)? { + node = clone; + + if self.is_prefix(clone, &fragment)? { + planted = true; + + break; + } + } + + if !planted { + let clone = self.clone_node(node, 0, false)?; + + fragment.set_root(1)?; + + self.plant(clone, fragment, false)?; + + return Ok(clone); + } + } else { + let clone_threshold = if self.root == Some(node) { 1 } else { 0 }; + + let planted = self.is_prefix(node, &fragment)?; + + fragment.set_root(1)?; + + if !planted && self.degree(node)? > clone_threshold { + let clone = self.clone_node(node, 0, false)?; + + self.plant(clone, fragment, false)?; + + return Ok(clone); + } else if !planted { + self.plant(node, fragment, false)?; + } + } + + Ok(node) + } +} + pub mod splone; impl DefaultForest { @@ -596,9 +707,18 @@ impl DefaultForest { Self { graph, root } } -} -impl Eq for DefaultForest> {} + /// Set the root to the given node. + pub fn set_root(&mut self, root: usize) -> Result<(), Error> { + if root >= self.nodes_len() { + return Err(Error::IndexOutOfBounds(root, self.nodes_len())); + } + + self.root = Some(root); + + Ok(()) + } +} impl DefaultForest> { /// Prints the forest without nodes that do not have ending @@ -613,6 +733,8 @@ impl DefaultForest> { Ok(label) => { if let Some(label) = label { label.label().end().is_none() + || (matches!(self.degree(*node), Ok(0)) + && matches!(self.parents_of(*node).map(|iter| iter.len()), Ok(0))) } else { true } @@ -674,6 +796,299 @@ impl DefaultForest> { file.write_all(result.as_bytes()) } + + /// Remove a node by removing all edges connecting with it. + pub fn remove_node(&mut self, node_id: usize) -> Result<(), Error> { + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + let mut to_remove = + Vec::with_capacity(builder.degree(node_id)? + builder.parents_of(node_id)?.len()); + + to_remove.extend( + builder + .children_of(node_id)? + .map(|target| (node_id, target)), + ); + + to_remove.extend( + builder + .parents_of(node_id)? + .map(|parent| (parent.node(), node_id)), + ); + + for (source, target) in to_remove { + builder.remove_edge(source, target, |_| true)?; + } + + Ok(()) + } + + /// Find the last child of the given node whose start and end + /// positions contain the given position. If no such child is + /// found, return `Ok(None)`. + /// + /// The returned tuple is of the form (child, index), where + /// `child` is the index of the child node in question, and + /// `index` means that the child is the `index`-th child of the + /// node. + pub(crate) fn position_search( + &self, + node: usize, + pos: usize, + ) -> Result, Error> { + fn range_contains(label: GrammarLabel, pos: usize) -> bool { + let start = label.start(); + + if let Some(end) = label.end() { + (start..end).contains(&pos) + } else { + (start..).contains(&pos) + } + } + + let node_label = self + .vertex_label(node)? + .ok_or(Error::NodeNoLabel(node))? + .label(); + + if !range_contains(node_label, pos) { + return Ok(None); + } + + for (index, child) in self.children_of(node)?.enumerate().rev() { + let child_label = self + .vertex_label(child)? + .ok_or(Error::NodeNoLabel(child))? + .label(); + + if range_contains(child_label, pos) { + return Ok(Some((child, index))); + } + } + + Ok(None) + } + + // /// Check if every child already has an end. + // fn every_child_is_completed(&self, node_id: usize, atom: &DefaultAtom) -> Result { + // let children = self.children_of(node_id)?; + + // if children.len() == 0 { + // return Ok(true); + // } + + // let mut pos = self + // .vertex_label(node_id)? + // .ok_or(Error::NodeNoLabel(node_id))? + // .label() + // .start(); + + // let mut last_child_label = None; + + // for child in children { + // let child_label = self + // .vertex_label(child)? + // .ok_or(Error::NodeNoLabel(child))? + // .label(); + + // last_child_label = Some(child_label); + + // if child_label.start() != pos || child_label.end().is_none() { + // return Ok(false); + // } + + // pos = child_label.end().unwrap(); + // } + + // if let Some(label) = last_child_label { + // if let Some(rule) = label.label().rule() { + // if !atom + // .is_accepting(2 * rule + 1) + // .map_err(|_| Error::IndexOutOfBounds(2 * rule + 1, atom.nodes_len()))? + // { + // return Ok(false); + // } + // } + // } + + // Ok(true) + // } + + // /// Complete the forest by trying to set the ending position of + // /// every node that does not have an end, by the ending position + // /// of its last child. + // pub fn complete(&mut self, atom: &DefaultAtom) -> Result<(), Error> { + // let mut stack: Vec<_> = self + // .nodes() + // .filter(|node| { + // let label = self.vertex_label(*node).unwrap().unwrap().label(); + + // label.label().rule().is_some() && label.end().is_some() + // }) + // .collect(); + + // let mut second_stack: Vec = Vec::new(); + + // let mut pending_candidates: Vec = Vec::new(); + + // let nodes_len = self.nodes_len(); + + // let mut seen_nodes: HashSet = HashSet::with_capacity(nodes_len); + + // while !stack.is_empty() { + // while let Some(mut top) = stack.pop() { + // if seen_nodes.contains(&top) { + // continue; + // } + + // seen_nodes.insert(top); + + // let top_label = self.vertex_label(top)?.unwrap(); + + // if top_label.clone_index().is_some() { + // let mut top_parents = self.parents_of(top)?; + + // if top_parents.len() != 1 { + // return Err(Error::InvalidClone(top)); + // } + + // top = top_parents.next().unwrap().node(); + // } + + // let rule_parents = self.parents_of(top)?; + + // let candidates = { + // let mut result = Vec::with_capacity(rule_parents.len()); + + // for parent in rule_parents { + // // for parent in self.parents_of(parent.node())? { + // // if self.degree(parent.node())? <= parent.edge() + 1 { + // result.push(parent); + // // } + // // } + // } + + // result + // }; + + // for candidate in candidates { + // if matches!(self.vertex_label(candidate.node())?, Some(label) if label.label().end().is_none()) + // { + // if self.every_child_is_completed(candidate.node(), atom)? { + // let last_child = self + // .nth_child(candidate.node(), self.degree(candidate.node())? - 1)?; + // let end = self + // .vertex_label(last_child)? + // .ok_or(Error::NodeNoLabel(last_child))? + // .label() + // .end(); + + // let sploned_node = self.splone( + // candidate.node(), + // end, + // self.degree(candidate.node())? - 1, + // true, + // )?; + + // second_stack.push(sploned_node); + // } else { + // pending_candidates.push(candidate.node()); + // } + // } else { + // second_stack.push(candidate.node()); + // } + // } + + // let mut new_pending = Vec::with_capacity(pending_candidates.len()); + + // while let Some(node) = pending_candidates.pop() { + // if self.every_child_is_completed(node, atom)? { + // let last_edge = self.degree(node)? - 1; + // let last_child = self.nth_child(node, last_edge)?; + // let end = self + // .vertex_label(last_child)? + // .ok_or(Error::NodeNoLabel(last_child))? + // .label() + // .end(); + + // let sploned_node = self.splone(node, end, last_edge, true)?; + + // second_stack.push(sploned_node); + // } else { + // new_pending.push(node); + // } + // } + + // std::mem::swap(&mut pending_candidates, &mut new_pending); + // } + + // std::mem::swap(&mut stack, &mut second_stack); + // } + + // Ok(()) + // } + + // /// Unconditionally set the label of the node to be the new label. + // /// + // /// # Warning + // /// + // /// Use with caution: it does not do anything special, so it is + // /// the responsibility of the caller to ensure this operation is + // /// legal. + // #[allow(dead_code)] + // pub(crate) fn set_label(&mut self, node: usize, label: GrammarLabel) -> Result<(), Error> { + // let status = self + // .vertex_label(node)? + // .ok_or(Error::NodeNoLabel(node))? + // .status; + + // let label = ForestLabel::new(label, status); + + // let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + // builder.set_label(node, label)?; + + // Ok(()) + // } + + /// Set the position of every node. + pub(crate) fn set_pos(&mut self, pos: usize) -> Result<(), Error> { + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + for node in 0..builder.nodes_len() { + let label = builder + .vertex_label(node)? + .ok_or(Error::NodeNoLabel(node))?; + + let mut label_label = label.label(); + + label_label.set_start(pos); + + if matches!(label_label.label().tnt(), Some(TNT::Ter(_))) { + label_label.set_end(pos + 1); + } else if builder.degree(node)? == 1 && label_label.label().rule().is_some() { + let child = builder.children_of(node)?.next().unwrap(); + + if matches!( + builder + .vertex_label(child)? + .ok_or(Error::NodeNoLabel(child))? + .label() + .label() + .tnt(), + Some(TNT::Ter(_)) + ) { + label_label.set_end(pos + 1); + } + } + + let new_label = ForestLabel::new(label_label, label.status); + + builder.set_label(node, new_label)?; + } + + Ok(()) + } } /// Print the labels found in the forest, so that we can easily diff --git a/chain/src/item/default/splone.rs b/chain/src/item/default/splone.rs index 851968c..237b92a 100644 --- a/chain/src/item/default/splone.rs +++ b/chain/src/item/default/splone.rs @@ -1,4 +1,3 @@ -#![allow(dead_code)] //! This module implements a function for "closing" and "splitting" a //! node in a forest, and hence the name. @@ -20,6 +19,7 @@ fn get_rule_label(label: GrammarLabel) -> Option { } /// Existing or non-existing label. +#[derive(Debug, Copy, Clone)] enum Eon { Ex(usize), Nex(ForestLabel), @@ -62,6 +62,12 @@ impl DefaultForest> { /// [`split_node`][DefaultForest::>::split_node] /// for details. /// + /// # `completingp` + /// + /// When we are completing the forest at the end, we do not wish + /// to keep the nodes at the end, so we pass a flag to inform the + /// function of this intention. + /// /// # Return /// /// The function returns the new, splitted or cloned node, or the @@ -90,6 +96,7 @@ impl DefaultForest> { node: usize, end: Option, edge_index: usize, + completingp: bool, ) -> Result { let node_label = self.vertex_label(node)?.ok_or(Error::NodeNoLabel(node))?; @@ -130,7 +137,7 @@ impl DefaultForest> { || node_label.clone_index().is_some() || new_label.clone_index().is_some() { - return self.split_node(new_label, node, edge_index); + return self.split_node(new_label, node, edge_index, completingp); } // replace the label directly @@ -151,6 +158,11 @@ impl DefaultForest> { .ok_or(Error::NodeNoLabel(parent))? .label(); + if get_rule_label(parent_label).is_none() { + self.print_viz("dbg forest.gv").unwrap(); + panic!("assumption fails"); + } + assert!(get_rule_label(parent_label).is_some()); assert_eq!(builder.degree(parent)?, 1); @@ -207,12 +219,13 @@ impl DefaultForest> { new_label: ForestLabel, mut node: usize, edge_index: usize, + completingp: bool, ) -> Result { let end = new_label.label().end(); let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); - let mut new_node = builder.add_vertex(new_label); + let new_node = builder.add_vertex(new_label); let new_packed = if new_label.clone_index().is_some() { let packed = builder @@ -261,7 +274,13 @@ impl DefaultForest> { .label(); assert!(get_rule_label(parent_label).is_some()); - assert_eq!(self.degree(parent.node())?, 1); + + if self.degree(parent.node())? != 1 { + dbg!(parent); + self.print_viz("dbg forest.gv").unwrap(); + + panic!("assumption fails"); + } if let Some(pos) = end { parent_label.set_end(pos); @@ -276,11 +295,11 @@ impl DefaultForest> { let new_parent = builder.add_vertex(parent_label); if let Some(packed) = new_packed { - new_node = packed; + builder.add_edge(new_parent, packed, new_label)?; + } else { + builder.add_edge(new_parent, new_node, new_label)?; } - builder.add_edge(new_parent, new_node, new_label)?; - result.extend( self.parents_of(parent.node())? .map(|parent_parent| (parent_parent, new_parent)), @@ -291,21 +310,48 @@ impl DefaultForest> { }; for (parent, new_child) in parents { - if self.has_same_children_until( - parent.node(), - parent.node(), - parent.edge(), - new_child, - )? { - continue; - } + if !completingp { + if self.has_same_children_until( + parent.node(), + parent.node(), + parent.edge(), + new_child, + )? { + continue; + } - // we don't add a child to parent.edge() here. - let cloned = self.clone_node(parent.node(), parent.edge(), false)?; + // we don't add a child to parent.edge() here. + let cloned = self.clone_node(parent.node(), parent.edge(), false)?; - let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); - builder.add_edge(cloned, new_child, new_label)?; + builder.add_edge(cloned, new_child, new_label)?; + } else { + if self.has_same_children_except( + parent.node(), + parent.node(), + parent.edge(), + new_child, + )? { + continue; + } + + // we don't add a child to parent.edge() here. + let cloned = self.clone_node(parent.node(), parent.edge(), false)?; + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + builder.add_edge(cloned, new_child, new_label)?; + + let children_to_add: Vec<_> = builder + .children_of(parent.node())? + .skip(parent.edge() + 1) + .collect(); + + for child in children_to_add { + builder.add_edge(cloned, child, new_label)?; + } + } } Ok(new_node) @@ -486,6 +532,85 @@ impl DefaultForest> { unreachable!("should not examine children of a packed node") } } + + /// Detect if a node has a branch that has the same children as + /// another node, except for a given index, where it points to another + /// given node. + /// + /// # Clones + /// + /// If `nodea` is a clone, it checks every clone to see if the + /// condition is satisfied for some clone. + fn has_same_children_except( + &self, + nodea: usize, + nodeb: usize, + edge_index: usize, + alternative: usize, + ) -> Result { + let labela = self.vertex_label(nodea)?.ok_or(Error::NodeNoLabel(nodea))?; + let children_b = self.children_of(nodeb)?; + + if children_b.len() < edge_index + 1 { + return Err(Error::IndexOutOfBounds(edge_index, children_b.len() - 1)); + } + + if labela.is_plain() { + let children_a = self.children_of(nodea)?; + + if children_a.len() != children_b.len() { + return Ok(false); + } + + for (index, (childa, childb)) in + children_a.zip(children_b).take(edge_index + 1).enumerate() + { + if index != edge_index { + if childa != childb { + return Ok(false); + } + } else if childa != alternative { + return Ok(false); + } + } + + Ok(true) + } else if labela.clone_index().is_some() { + let mut parentsa = self.parents_of(nodea)?; + + assert_eq!(parentsa.len(), 1); + + let parenta = parentsa.next().unwrap().node(); + + 'branch_loop: for branch in self.children_of(parenta)? { + let children_a = self.children_of(branch)?; + let children_b = children_b.clone(); + + if children_a.len() < children_b.len() { + continue 'branch_loop; + } + + for (index, (childa, childb)) in + children_a.zip(children_b).take(edge_index + 1).enumerate() + { + if index != edge_index { + if childa != childb { + continue 'branch_loop; + } + } else if childa != alternative { + continue 'branch_loop; + } + } + + return Ok(true); + } + + Ok(false) + } else { + // a packed node; this should not happen + unreachable!("should not examine children of a packed node") + } + } } #[cfg(test)] @@ -688,7 +813,7 @@ mod test_splone { fn test() -> Result<(), Box> { let mut test_forest = create_test_forest()?; - test_forest.splone(6, Some(3), 1)?; + test_forest.splone(6, Some(3), 1, false)?; let answer = splone_6_3_1()?; @@ -698,7 +823,7 @@ mod test_splone { panic!("splone(6, Some(3), 1) produced wrong forest"); } - test_forest.splone(6, Some(3), 1)?; + test_forest.splone(6, Some(3), 1, false)?; if test_forest != answer { test_forest.print_viz("sploned forest.gv")?; @@ -706,7 +831,7 @@ mod test_splone { panic!("repeated splone(6, Some(3), 1) produced wrong forest"); } - test_forest.splone(6, Some(2), 0)?; + test_forest.splone(6, Some(2), 0, false)?; let answer = splone_6_2_0()?; @@ -716,7 +841,7 @@ mod test_splone { panic!("splone(6, Some(2), 0) produced wrong forest"); } - test_forest.splone(6, Some(2), 0)?; + test_forest.splone(6, Some(2), 0, false)?; if test_forest != answer { test_forest.print_viz("sploned forest.gv")?; @@ -724,7 +849,7 @@ mod test_splone { panic!("repeated splone(6, Some(2), 0) produced wrong forest"); } - test_forest.splone(6, None, 0)?; + test_forest.splone(6, None, 0, false)?; let answer = splone_6_n_0()?; @@ -734,7 +859,7 @@ mod test_splone { panic!("splone(6, None, 0) produced wrong forest"); } - test_forest.splone(14, None, 0)?; + test_forest.splone(14, None, 0, false)?; if test_forest != answer { test_forest.print_viz("sploned forest.gv")?; @@ -742,7 +867,7 @@ mod test_splone { panic!("repeated splone(6, None, 0) produced wrong forest"); } - test_forest.splone(4, Some(3), 0)?; + test_forest.splone(4, Some(3), 0, false)?; let answer = splone_4_3_0()?; @@ -752,7 +877,7 @@ mod test_splone { panic!("splone(4, Some(3), 0) produced wrong forest"); } - test_forest.splone(4, Some(3), 0)?; + test_forest.splone(4, Some(3), 0, false)?; if test_forest != answer { test_forest.print_viz("sploned forest.gv")?; @@ -760,8 +885,8 @@ mod test_splone { panic!("repeated splone(4, Some(3), 0) produced wrong forest"); } - test_forest.splone(17, Some(3), 0)?; - test_forest.splone(15, Some(3), 0)?; + test_forest.splone(17, Some(3), 0, false)?; + test_forest.splone(15, Some(3), 0, false)?; let answer = splone_17_3_0_15_3_0()?; @@ -771,8 +896,8 @@ mod test_splone { panic!("splone(17, Some(3), 0) - splone(15, Some(3), 0) produced wrong forest"); } - test_forest.splone(17, Some(3), 0)?; - test_forest.splone(15, Some(3), 0)?; + test_forest.splone(17, Some(3), 0, false)?; + test_forest.splone(15, Some(3), 0, false)?; if test_forest != answer { test_forest.print_viz("sploned forest.gv")?; diff --git a/chain/src/item/forest-format.org b/chain/src/item/forest-format.org new file mode 100644 index 0000000..eb0f150 --- /dev/null +++ b/chain/src/item/forest-format.org @@ -0,0 +1,10 @@ +#+TITLE: Format of forests +#+DATE: [2023-02-13 Lun 22:05] +#+AUTHOR: Durand + +In this document I try to explain the format of the forests returned +by the parser generator. + +* Basic terms + +# TODO: Explain diff --git a/chain/src/item/genins.rs b/chain/src/item/genins.rs index 4c51d9a..feb45c6 100644 --- a/chain/src/item/genins.rs +++ b/chain/src/item/genins.rs @@ -7,19 +7,24 @@ //! semirings later on. use super::*; -use crate::{atom::DefaultAtom, default::Error, item::default::DefaultForest, Edge}; +use crate::{ + atom::{Atom, DefaultAtom}, + default::Error, + item::default::DefaultForest, + Edge, +}; use grammar::{Error as GrammarError, GrammarLabel, GrammarLabelType, TNT}; -use graph::Graph; +#[allow(unused_imports)] +use graph::{builder::BuilderMut, labelled::binary::PLGBuilderMut, Graph, RedirectGraph}; use core::borrow::Borrow; -use std::collections::HashSet as Set; /// Convert an error telling us that an index is out of bounds. /// /// # Panics /// /// The function panics if the error is not of the expected kind. -fn index_out_of_bounds_conversion(ge: GrammarError) -> Error { +pub(crate) fn index_out_of_bounds_conversion(ge: GrammarError) -> Error { match ge { GrammarError::IndexOutOfBounds(index, bound) => Error::IndexOutOfBounds(index, bound), _ => Error::Invalid, @@ -105,11 +110,6 @@ pub fn virtual_generate_fragment( Some(TNT::Ter(ter)) if ter == t) { for child in child_iter { - // #[allow(unused_must_use)] - // { - // dbg!(non_start, child, atom.query_expansion(non_start, child)); - // } - let line: Vec = atom .query_expansion(non_start, child) .map_err(index_out_of_bounds_conversion)? @@ -124,7 +124,15 @@ pub fn virtual_generate_fragment( if result.is_empty() { result = generate_fragment(line, pos)?; } else { - result.plant(0, generate_fragment(line, pos)?, false)?; + let mut new_fragment = generate_fragment(line, pos)?; + + new_fragment.remove_node(0)?; + + new_fragment.set_root(1)?; + + let cloned = result.clone_node(0, 0, false)?; + + result.plant(cloned, new_fragment, false)?; } } } @@ -133,90 +141,313 @@ pub fn virtual_generate_fragment( Ok(result) } +// TODO: Examine `insert_item` again. + impl DefaultForest> { /// Insert an item derivation forest into a recording forest. /// /// We need the help of other things just for finding the correct /// places to insert these item fragments. - pub fn insert_item( + pub(crate) fn insert_item( &mut self, label: Edge, fragment: impl Borrow>>, - atom_child_iter: impl Iterator + ExactSizeIterator, + atom_child_iter: impl Iterator + ExactSizeIterator + Clone, atom: &DefaultAtom, - ) -> Result { - let fragment = fragment.borrow(); + ) -> Result { + let root = if let Some(root) = self.root() { + root + } else { + panic!("the forest must be non-empty when we insert items"); + }; + + let pavi = label.forest_source(); - let pase = label.forest_source(); + let true_source = label.true_source(); + + let fragment = fragment.borrow(); let fragment_root = if let Some(root) = fragment.root() { root } else { - return Err(Error::EmptyItem); + unreachable!("empty item"); }; - let pos = fragment + let fragment_root_label = fragment .vertex_label(fragment_root)? - .ok_or(Error::NodeNoLabel(fragment_root))? - .label() - .start(); + .ok_or(Error::NodeNoLabel(fragment_root))?; + + let pos = fragment_root_label.label().start(); + + if pavi != true_source { + dbg!(label, pos); + + // Completing from true_source up to the pavi + let true_source_node = match true_source { + PaVi::Parent(node, edge) => { + let nth_child = self.nth_child(node, edge)?; + let nth_child_label = self + .vertex_label(nth_child)? + .ok_or(Error::NodeNoLabel(nth_child))?; + + let nth_child_degree = self.degree(nth_child)?; + let nth_child_last = if nth_child_degree > 0 { + nth_child_degree - 1 + } else { + 0 + }; + + if matches!(nth_child_label.label().label().tnt(), Some(TNT::Non(_))) + && !nth_child_label.is_packed() + { + let sploned = self.splone(nth_child, Some(pos), nth_child_last, false)?; + + sploned + } else { + nth_child + } + } + PaVi::Virtual(nt, t, mut node) => { + let node_label_start = self + .vertex_label(node)? + .ok_or(Error::NodeNoLabel(node))? + .label() + .start(); + + let reduction_fragment = atom.generate_virtual_frags(nt, t, None); + + // FIXME: We shall "close" this fragment as well. + + for frag in reduction_fragment.into_iter().flatten() { + let mut frag = frag.clone(); + frag.set_pos(node_label_start)?; + + // NOTE: The function `plant_if_needed` + // assumes that we want to plant the fragment + // as the first child of the node. This + // assumption holds in this case, but not in + // general. + + node = self.plant_if_needed(node, frag)?; + } + + node + } + PaVi::Empty => { + dbg!(); + root + } + }; + + let true_source_pos = self + .vertex_label(true_source_node)? + .ok_or(Error::NodeNoLabel(true_source_node))? + .label() + .start(); + + let top_node = match pavi { + PaVi::Parent(node, edge) => self.nth_child(node, edge)?, + PaVi::Virtual(_nt, _t, _node) => { + dbg!(label); + + self.print_viz("dbg forest.gv").unwrap(); + fragment.print_viz("dbg fragment.gv").unwrap(); + + unreachable!("I do not think this is possible") + } + PaVi::Empty => { + unreachable!("The empty segment should not need to be reduced") + } + }; + + let mut stack = vec![vec![(top_node, 0)]]; + let mut result_stack = Vec::new(); + + while let Some(mut top_stack) = stack.pop() { + let (node, _) = top_stack.pop().unwrap(); + + let node_label = self.vertex_label(node)?.ok_or(Error::NodeNoLabel(node))?; + + if node_label.is_packed() { + for node in self.children_of(node)? { + let search_child = self.position_search(node, true_source_pos)?; + + if let Some((child, index)) = search_child { + let mut top_stack = top_stack.clone(); + // Fix the previous element + top_stack.push((node, index)); + + if child == true_source_node { + result_stack.push(top_stack); + } else { + top_stack.push((child, 0)); + + stack.push(top_stack); + } + } + } + } else { + let search_child = self.position_search(node, true_source_pos)?; + + if let Some((child, index)) = search_child { + top_stack.push((node, index)); + + if child == true_source_node { + result_stack.push(top_stack); + } else { + top_stack.push((child, 0)); + + stack.push(top_stack); + } + } + } + } + + // FIXME: We have to change the pavi as well, otherwise we + // are going to use the wrong branch for planting later. + + for stack in result_stack { + // dbg!(&stack); + // self.print_viz("dbg forest.gv").unwrap(); + // panic!("test"); + + let mut first_time = true; + + for (node, index) in stack.into_iter().rev() { + if matches!( + self.vertex_label(node)? + .ok_or(Error::NodeNoLabel(node))? + .label() + .label() + .tnt(), + Some(TNT::Non(_)) + ) { + let sploned = self.splone(node, Some(pos), index, false)?; + + if !first_time { + let last_index = self.degree(sploned)? - 1; + + let last_child = self.nth_child(sploned, last_index)?; + + let last_child_label = self + .vertex_label(last_child)? + .ok_or(Error::NodeNoLabel(last_child))? + .label(); + + if last_child_label.end() != Some(pos) { + let closed_label = GrammarLabel::new_closed( + last_child_label.label(), + last_child_label.start(), + pos, + ); + + let closed_label_id = self + .query_label(closed_label.into()) + .expect("last child closed label not found"); + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); - // Ensure the last node in the PaSe is a terminal or a + builder.redirect(sploned, last_index, closed_label_id)?; + } + } + + first_time = false; + } + } + } + } + + // Ensure the last node in the PaVi is a terminal or a // non-terminal node, as an extra safety guard during // development. #[cfg(debug_assertions)] { - if let Some(parent) = pase.parent() { - assert!(matches!( - self.vertex_label(self.nth_child(parent.node(), parent.edge())?), - Ok(Some(label)) - if label.label().label().tnt().is_some())); - } else if let Some((_, leaf)) = pase.segment() { - assert!(matches!( - self.vertex_label(leaf), - Ok(Some(label)) - if label.label().label().tnt().is_some())); - } else { - unreachable!("a pase is neither a parent nor a segment"); + match pavi { + PaVi::Parent(node, edge) => { + assert!(matches!( + self.vertex_label(self.nth_child(node, edge)?), + Ok(Some(label)) + if label.label().label().tnt().is_some())); + } + PaVi::Virtual(nt, t, node) => { + if !matches!( + self.vertex_label(node), + Ok(Some(label)) + if matches!( + label.label().label().tnt(), + Some(TNT::Non(_)))) + { + dbg!(node, self.vertex_label(node)?, pavi); + + self.print_viz("dbg forest.gv").unwrap(); + + panic!("assumption fails"); + } + + if nt >= atom.non_num() { + dbg!(); + return Err(Error::IndexOutOfBounds(nt, atom.non_num())); + } + + if t >= atom.ter_num() { + dbg!(); + return Err(Error::IndexOutOfBounds(t, atom.ter_num())); + } + } + PaVi::Empty => {} } } - let mut is_empty_segment = false; + let is_empty_segment = pavi.is_empty(); let mut parents: Vec = { let mut result = Vec::new(); - if let Some(parent) = pase.parent() { - result.push(parent); - } else { - let (root, leaf) = pase.segment().unwrap(); - let mut seen_nodes = Set::new(); + match pavi { + PaVi::Parent(node, edge) => { + result.push(Parent::new(node, edge)); + } + PaVi::Virtual(nt, t, node) => { + let node_label = self.vertex_label(node)?.ok_or(Error::NodeNoLabel(node))?; - let mut stack = if root == leaf { - // an empty segment means the root - is_empty_segment = true; + for atom_child in atom_child_iter.clone() { + for rule in atom.trace(nt, t, atom_child).into_iter().flatten() { + let virtual_frag = atom.generate_virtual_frags(nt, t, Some(rule)); - result.push(Parent::new(root, 0)); - vec![] - } else { - vec![root] - }; + if let Some(frag) = virtual_frag { + let mut frag = (*frag.get(0).unwrap()).clone(); - while let Some(top) = stack.pop() { - if seen_nodes.contains(&top) { - continue; - } + frag.set_pos(node_label.label().start())?; - seen_nodes.insert(top); + let frag_nodes_len = frag.nodes_len(); - for (index, child) in self.children_of(top)?.enumerate() { - if child == leaf { - result.push(Parent::new(top, index)); - } else { - stack.push(child); + assert!(frag_nodes_len > 1); + + let last_but_one_label = frag + .vertex_label(frag_nodes_len - 2)? + .ok_or(Error::NodeNoLabel(frag_nodes_len - 2))?; + + // NOTE: The function + // `plant_if_needed` assumes that we + // want to plant the fragment as the + // first child of the node. This + // assumption holds in this case, but + // not in general. + + self.plant_if_needed(node, frag)?; + + let rule_label_pos = self + .query_label(last_but_one_label) + .expect("the forest was wrongly planted"); + + result.push(Parent::new(rule_label_pos, 0)); + } } } } + PaVi::Empty => { + result.push(Parent::new(root, 0)); + } } result @@ -251,6 +482,7 @@ impl DefaultForest> { for atom_child in atom_child_iter { // dbg!(label.label(), atom_child); + // Find reduction information. let reduction_info = atom .query_reduction(label.label(), atom_child) @@ -272,7 +504,8 @@ impl DefaultForest> { .label(), GrammarLabelType::TNT(TNT::Non(nt)) if nt == *reduction_nt ) { - let sploned_node = self.splone(node.node(), Some(pos), node.edge())?; + let sploned_node = + self.splone(node.node(), Some(pos), node.edge(), false)?; node_label = self .vertex_label(sploned_node)? @@ -282,16 +515,15 @@ impl DefaultForest> { let mut parent_iter = self.parents_of(sploned_node)?; #[cfg(debug_assertions)] - { - assert_eq!(parent_iter.len(), 1); - - assert!(self - .vertex_label(sploned_node)? - .ok_or(Error::NodeNoLabel(sploned_node))? - .is_packed()); - } + assert_eq!(parent_iter.len(), 1); node = parent_iter.next().unwrap(); + + #[cfg(debug_assertions)] + assert!(self + .vertex_label(node.node())? + .ok_or(Error::NodeNoLabel(node.node()))? + .is_packed()); } else { node = Parent::new(sploned_node, node.edge()); } @@ -337,19 +569,29 @@ impl DefaultForest> { atom_child, parents, reduction_info, - atom.query_reduction(label.label(), atom_child).unwrap() + atom.query_reduction(label.label(), atom_child).unwrap(), + is_empty_segment, + atom.trace(0, 3, atom_child) + .into_iter() + .flatten() + .collect::>(), ); self.print_viz("dbg forest.gv").unwrap(); #[cfg(test)] - genins_test::print_labels(atom, self.borrow()).unwrap(); + crate::item::default::print_labels(atom, self.borrow()).unwrap(); return Err(Error::CannotPlant); } - for parent in stack.into_iter() { - let sploned_node = self.splone(parent.node(), None, parent.edge())?; + if pos == 4 && matches!(true_source, PaVi::Virtual(_, _, _)) { + dbg!(&stack, reduction_info, true_source, pavi); + self.print_viz(&format!("pos4ib.gv")).unwrap(); + } + + for parent in stack { + let sploned_node = self.splone(parent.node(), None, parent.edge(), false)?; self.plant(sploned_node, fragment, non_empty)?; @@ -357,24 +599,22 @@ impl DefaultForest> { } } - // If the iterator is empty, just plant at the specified - // child. + // If the iterator is empty, assert the fragment has length + // one, and do not plant anything. if !non_empty { - for parent in parents.into_iter() { - let nth_child = self.nth_child(parent.node(), parent.edge())?; - - self.plant(nth_child, fragment, non_empty)?; - - non_empty = true; - } + assert_eq!(fragment.nodes_len(), 1); } let result = if fragment.nodes_len() == 2 { - let root_label = fragment.vertex_label(0)?.unwrap(); - let leaf_label = fragment.vertex_label(1)?.unwrap(); + let root_label = fragment_root_label; + let leaf_label = fragment + .vertex_label(1 - fragment_root)? + .ok_or(Error::NodeNoLabel(1 - fragment_root))?; // it has been planted, so should be safe. - let node = self.query_label(root_label).unwrap(); + let node = self + .query_label(root_label) + .expect("root label was not found"); let edge = { let mut result = None; @@ -394,16 +634,52 @@ impl DefaultForest> { } }; // dbg!(node, edge, root_label, leaf_label); - PaSe::Parent(node, edge) + PaVi::Parent(node, edge) } else { - let root_label = fragment.vertex_label(0)?.unwrap(); - let leaf_label = fragment.vertex_label(fragment.nodes_len() - 1)?.unwrap(); - // dbg!(root_label, leaf_label); - - PaSe::Segment( - self.query_label(root_label).unwrap(), - self.query_label(leaf_label).unwrap(), - ) + assert_eq!( + fragment.nodes_len(), + 1, + "a virtual fragment should consist of a single terminal node." + ); + + let root_label = fragment_root_label; + + let pavi_parent = pavi.parent().expect( + "When we insert a virtual fragment, the forest_source of + the label must be a parent.", + ); + + let nth_child = self.nth_child(pavi_parent.node(), pavi_parent.edge())?; + + let nth_child_label = self + .vertex_label(nth_child)? + .ok_or(Error::NodeNoLabel(nth_child))? + .label() + .label(); + + let error_str = "When we insert a virtual fragment, the \ + forest source of the label must point to \ + a non-terminal node"; + + let nt = match nth_child_label.tnt().expect(error_str) { + TNT::Non(nt) => nt, + _ => { + dbg!(nth_child, nth_child_label); + + panic!("{error_str}"); + } + }; + + let t = match root_label.label().label().tnt().unwrap() { + TNT::Ter(t) => t, + _ => { + dbg!(root_label); + + panic!("a virtual fragment should consist of a single terminal node") + } + }; + + PaVi::Virtual(nt, t, nth_child) }; Ok(result) @@ -413,40 +689,10 @@ impl DefaultForest> { #[cfg(test)] mod genins_test { use super::*; - #[allow(unused_imports)] - use crate::{default::DefaultChain, item::default::leaf, Chain}; + use crate::item::default::leaf; use grammar::test_grammar_helper::*; - pub fn print_labels( - atom: impl Borrow, - forest: impl Borrow>>, - ) -> Result<(), Box> { - let forest = forest.borrow(); - let atom = atom.borrow(); - - for node in forest.nodes() { - let label = forest.vertex_label(node)?; - - if let Some(label) = label { - let label = label.label.label(); - - match label { - GrammarLabelType::TNT(tnt) => { - println!("{tnt} = {}", atom.name_of_tnt(tnt)?); - } - GrammarLabelType::Rule(pos) => { - println!("pos {pos} = {}", atom.rule_pos_string(pos)?); - } - } - } else { - return Err(Error::NodeNoLabel(node).into()); - } - } - - Ok(()) - } - #[test] fn test_generate_fragment() -> Result<(), Box> { let grammar = new_notes_grammar()?; @@ -497,7 +743,7 @@ mod genins_test { 0, )?; - print_labels(&atom, &virtual_fragment)?; + crate::item::default::print_labels(&atom, &virtual_fragment)?; assert_eq!(virtual_fragment, test_fragment); @@ -511,7 +757,7 @@ mod genins_test { let test_fragment = generate_fragment([TNT::Non(3).into(), 38.into(), TNT::Ter(2).into()], 1)?; - print_labels(&atom, &virtual_fragment)?; + crate::item::default::print_labels(&atom, &virtual_fragment)?; assert_eq!(virtual_fragment, test_fragment); diff --git a/chain/src/item/mod.rs b/chain/src/item/mod.rs index 284d640..39d04c7 100644 --- a/chain/src/item/mod.rs +++ b/chain/src/item/mod.rs @@ -9,43 +9,64 @@ use graph::{error::Error as GError, GraphLabel, LabelGraph, Parent, ParentsGraph use core::borrow::Borrow; -/// A parent or a segment. +/// A parent or a virtual segment. +/// +/// # Parent /// /// A parent is a node with an edge index, which represents a certain /// edge. /// -/// A segment represents every edge from the root node to the single -/// terminating node. -#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] -pub enum PaSe { +/// # Virtual Segment +/// +/// A virtual segment represents an expansion from a non-terminal by a +/// terminal. We do not directly add this segment when we encounter +/// this expansion at the start because this expansion might contain +/// multiple derivations some of which will not be used. +/// +/// If we add the expansion immediately when we encounter it, we have +/// to later discern and delete those unwanted derivations. This is +/// asking for trouble, in my experiences. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Default)] +pub enum PaVi { /// An edge from a node, as the n-th child. Parent(usize, usize), - /// A segment from a root to the single terminating node. - Segment(usize, usize), + /// A virtual segment from a non-terminal by a terminal, rooted at + /// a node. + /// + /// # Tuple elements + /// + /// The contained tuple is of the form (nt, t, node), which means + /// a virtually added node at the `node` representing the + /// expansion from the non-terminal `nt` by the terminal `t`. + Virtual(usize, usize, usize), + /// This is an empty segment that represents the root node. This + /// is a special case for the unit state of the chain-rule + /// machine. + #[default] + Empty, } -impl From for PaSe { +impl From for PaVi { fn from(value: Parent) -> Self { Self::Parent(value.node(), value.edge()) } } -impl Default for PaSe { - fn default() -> Self { - Self::Segment(0, 0) - } -} - -impl core::fmt::Display for PaSe { +impl core::fmt::Display for PaVi { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::Parent(node, edge) => write!(f, "the {edge}-th edge from {node}"), - Self::Segment(root, leaf) => write!(f, "a segment from {root} to {leaf}"), + Self::Virtual(nt, t, node) => write!( + f, + "a virtual node for non-terminal {nt} and terminal {t} at node {node}" + ), + Self::Empty => write!(f, "empty segment at root"), } } } -impl PaSe { +impl PaVi { + /// Get the Parent variant. fn parent(self) -> Option { if let Self::Parent(node, edge) = self { Some(Parent::new(node, edge)) @@ -54,12 +75,29 @@ impl PaSe { } } - fn segment(self) -> Option<(usize, usize)> { - if let Self::Segment(root, leaf) = self { - Some((root, leaf)) - } else { - None - } + // /// Get the "Virtual" variant. + // /// + // /// # Name + // /// + // /// We cannot use the name "virtual" since it is a reserved + // /// keyword in Rust, so we use its French name. + // /// + // /// # Tuple elements + // /// + // /// The returned tuple is of the form (nt, t, node), which means a + // /// virtually added node at the `node` representing the expansion + // /// from the non-terminal `nt` by the terminal `t`. + // fn virtuel(self) -> Option<(usize, usize, usize)> { + // if let Self::Virtual(nt, t, node) = self { + // Some((nt, t, node)) + // } else { + // None + // } + // } + + /// Is this an empty segment? + fn is_empty(self) -> bool { + self == Self::Empty } } @@ -101,7 +139,6 @@ impl core::fmt::Display for ForestLabel { } /// The type of erros for converting forest labels. -#[non_exhaustive] #[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] pub enum ForestLabelError { /// Try to pack a cloned node. diff --git a/chain/src/lib.rs b/chain/src/lib.rs index a7740c2..d7fc519 100644 --- a/chain/src/lib.rs +++ b/chain/src/lib.rs @@ -16,7 +16,7 @@ use graph::{error::Error as GError, LabelExtGraph}; use item::default::Error as ForestError; -use item::PaSe; +use item::PaVi; /// An edge in the Chain-Rule machine. #[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)] @@ -24,18 +24,25 @@ pub struct Edge { /// The position in the atomic languages. label: usize, /// The source of the associated forest edge. - forest_source: PaSe, + forest_source: PaVi, + /// The bottom source on which we shall perform reduction. + /// + /// If this equals `forest_source`, no extra reduction needs to be + /// done. + true_source: PaVi, /// Whether or not this edge is "accepting". accepting: bool, } impl Edge { /// Construct a new edge. - pub fn new(label: usize, forest_source: PaSe, accepting: bool) -> Self { + pub fn new(label: usize, forest_source: PaVi, accepting: bool) -> Self { + let true_source = forest_source; Self { label, forest_source, accepting, + true_source, } } @@ -50,9 +57,21 @@ impl Edge { } /// Return the associated forest edge of the edge. - pub fn forest_source(&self) -> PaSe { + pub fn forest_source(&self) -> PaVi { self.forest_source } + + /// Return the associated bottom edge of the edge from which + /// onwards we shall perform the reduction. + pub fn set_true_source(&mut self, true_source: PaVi) { + self.true_source = true_source; + } + + /// Return the associated bottom edge of the edge from which + /// onwards we shall perform the reduction. + pub fn true_source(&self) -> PaVi { + self.true_source + } } impl core::fmt::Display for Edge { @@ -137,7 +156,7 @@ pub enum TwoLayers { /// the source of the associated forest edge of the second layer, /// and the third is a list of edges, which are the common first /// layers. - Two(usize, PaSe, bool, Vec<(Roi, usize)>), + Two(usize, PaVi, bool, Vec<(Roi, usize)>), } /// The type of a collapsed iterator. @@ -239,12 +258,15 @@ pub trait Chain: LabelExtGraph { /// An iterator that iterates all layers that need to be merged. type DerIter: Iterator; + // FIXME: Add a parameter to control whether to manipulate the + // forests or not. + /// Take the derivative by a terminal `t` at position `pos`. fn derive(&mut self, t: usize, pos: usize) -> Result; /// Take the union of all derivatives. fn union(&mut self, der_iter: Self::DerIter) -> Result, Self::Error> { - // REVIEW: Find a way to avoid allocations. + // REVIEW: Think about the possibilities to avoid allocations. Collapsed::<_, Self>::collapse(der_iter, self) .collect::, Self::Error>>() .map(|mut v| { @@ -276,6 +298,10 @@ pub trait Chain: LabelExtGraph { Ok(()) } + + /// Signal to the parser that the end of the input is reached, so + /// that the parser knows to generate suitable forests. + fn end_of_input(&mut self) -> Result<(), Self::Error>; } pub mod default; diff --git a/chain/src/plan.org b/chain/src/plan.org index 02e14c9..520ba8f 100644 --- a/chain/src/plan.org +++ b/chain/src/plan.org @@ -2,18 +2,17 @@ #+AUTHOR: Durand #+DATE: <2022-11-18 Ven 19:57> -* Things to do [6/9] +* Things to do [9/9] - [X] Implement builders for graphs - [X] Find sets of the first terminals for each non-terminal, in the grammar module. -- [-] Collect various grammars for testing. [5/6] +- [X] Collect various grammars for testing. [5/5] + [X] One simple grammar + [X] Notes + [X] Parentheses + [X] Pathelogical left-recursive + [X] Pathelogical right-recursive - + [ ] Pathelogically ambiguous # More should be added - [X] NFA [4/4] + [X] Add regular expression into NFA. @@ -79,14 +78,13 @@ lack of this step might be the root cause of the failure of the previous version of this project. + [X] Test atoms -- [-] Implement semiring. [2/5] +- [X] Implement semiring. [4/4] + [X] Define the trait. + [X] Define items and rules for the chain-rule parser, as an item-based description. - + [ ] Implement the boolean semiring. - + [ ] Implement the natural number semiring. - + [ ] Implement the free semiring. -- [-] Implement languages. [5/6] + + [X] Implement the natural number semiring. + + [X] Implement the free semiring. +- [X] Implement languages. [6/6] + [X] First define a trait with the expected behaviours. + [X] Then implement them as doubly labelled graphs. + [X] Each edge in the chain-rule machine needs to be labelled also @@ -94,7 +92,7 @@ of those "plugs"! + [X] Then implement finding derivatives by use of the chain rule. + [X] Handle Semirings - + [-] Tests + + [X] Tests - [X] Miscellaneous [1/1] + [X] Implement a function for NFA that checks if a given predicate is satisfied for each edge label. diff --git a/grammar/src/label.rs b/grammar/src/label.rs index 058baaf..e3f3422 100644 --- a/grammar/src/label.rs +++ b/grammar/src/label.rs @@ -119,6 +119,12 @@ impl GrammarLabel { self.end } + /// Set the start. + #[inline] + pub fn set_start(&mut self, pos: usize) { + self.start = pos; + } + /// Return the start in the input. #[inline] pub fn start(&self) -> usize { diff --git a/grammar/src/lib.rs b/grammar/src/lib.rs index ab0f693..21ce2b4 100644 --- a/grammar/src/lib.rs +++ b/grammar/src/lib.rs @@ -25,6 +25,12 @@ use std::{ fmt::Display, }; +/// The index of the starting non-terminal. +/// +/// By convention this is the zero-th non-terminal. I define this +/// constant just for the sake of clarity. +pub const START_NONTERMINAL: usize = 0; + /// The type of a terminal. /// /// For the time being this is a wrapper around a string, but in the @@ -478,8 +484,17 @@ impl Grammar { /// Return true if and only if the terminal can appear as the /// first terminal in a string expanded from the non-terminal. + /// + /// # Errors + /// + /// If `non_terminal` or `terminal` is out of bounds, the function + /// returns an error indicating this fact. #[inline] pub fn is_first_of(&self, non_terminal: usize, terminal: usize) -> Result { + if terminal >= self.ter_num() { + return Err(Error::IndexOutOfBounds(terminal, self.ter_num())); + } + Ok(self .firsts .get(non_terminal) @@ -488,6 +503,11 @@ impl Grammar { } /// Return true if and only if the non-terminal is nullable. + /// + /// # Error + /// + /// If `non_terminal` is out of bounds, return the corresponding + /// error. #[inline] pub fn is_nullable(&self, non_terminal: usize) -> Result { Ok(self diff --git a/grammar/src/tests/test_grammar_left_closure.rs b/grammar/src/tests/test_grammar_left_closure.rs index ffc7c0f..be1df9d 100644 --- a/grammar/src/tests/test_grammar_left_closure.rs +++ b/grammar/src/tests/test_grammar_left_closure.rs @@ -81,59 +81,6 @@ fn test_nfa() -> Result<(), Box> { Ok(()) } -#[test] -fn test_remove_epsilon() -> Result<(), Box> { - let mut lock = stdout().lock(); - - let mut grammar = new_paren_grammar()?; - - writeln!(lock, "grammar:")?; - writeln!(lock, "{grammar}")?; - - let closure = new_closure_regex(&mut grammar)?; - - let mut accumulator_value: usize = 0; - - for regex in closure.iter() { - writeln!( - lock, - "regex: {}", - regex.to_string_with(|tnt| { - match tnt { - TNT::Ter(t) => { - format!( - "({})", - grammar.name_of_tnt(grammar.unpack_tnt(t).unwrap()).unwrap() - ) - } - TNT::Non(_) => { - // hyper non-terminal - format!("({})", grammar.name_of_tnt(tnt).unwrap()) - } - } - })? - )?; - writeln!(lock, "regex len = {}", regex.nodes_len())?; - writeln!(lock, "offset = {accumulator_value}")?; - - accumulator_value += regex.nodes_len(); - } - - writeln!(lock, "total = {accumulator_value}")?; - - let mut nfa = grammar.left_closure_to_nfa(&closure)?; - - #[cfg(features = "test-print-viz")] - nfa.print_viz("nfa_orig.gv")?; - - nfa.remove_epsilon(|label| label.get_value().is_none())?; - - #[cfg(features = "test-print-viz")] - nfa.print_viz("nfa_no_epsilon.gv")?; - - Ok(()) -} - #[test] fn test_remove_dead() -> Result<(), Box> { let mut grammar = new_paren_grammar()?; @@ -177,8 +124,6 @@ fn test_remove_dead() -> Result<(), Box> { #[cfg(features = "test-print-viz")] nfa.print_viz("nfa_orig.gv")?; - // nfa.remove_epsilon(|label| label.get_value().is_none())?; - let accumulators: HashSet = accumulators.into_iter().collect(); println!("accumulators = {accumulators:?}"); diff --git a/graph/src/labelled/binary.rs b/graph/src/labelled/binary.rs index ce3a867..3b96b92 100644 --- a/graph/src/labelled/binary.rs +++ b/graph/src/labelled/binary.rs @@ -214,6 +214,10 @@ impl Graph for PLGraph { let mut post = String::new(); + // FIXME: Find a way to print only used nodes. Maybe remove + // unwanted edges from unwanted nodes, so that we can print + // out only those used nodes. + for node in self.nodes() { post.push_str(&format!( " {node} [label = \"{}\"]\n", diff --git a/nfa/Cargo.toml b/nfa/Cargo.toml index 7f48760..9eac932 100644 --- a/nfa/Cargo.toml +++ b/nfa/Cargo.toml @@ -7,8 +7,9 @@ edition = "2021" [dependencies] graph = { path = "../graph", optional = true } -receme = { path = "../receme" } +receme = { path = "../receme", optional = true } [features] default = ["default-graph"] default-graph = ["dep:graph"] +recursion = ["dep:receme"] diff --git a/nfa/src/default/regex.rs b/nfa/src/default/regex.rs index 5e0d53b..1c22687 100644 --- a/nfa/src/default/regex.rs +++ b/nfa/src/default/regex.rs @@ -4,6 +4,7 @@ use graph::{error::Error as GError, ALGraph, ExtGraph, Graph, GraphLabel}; use crate::{desrec::DesRec, error::Error, Regex}; +#[cfg(feature = "recursion")] use receme::{algebra::Algebra, catana::Cata}; use std::{ @@ -193,7 +194,7 @@ impl DefaultRegex { } } -// REVIEW: This may not be needed. +#[cfg(feature = "recursion")] impl Cata, A> for &DefaultRegex where A: Algebra>, diff --git a/nfa/src/lib.rs b/nfa/src/lib.rs index 07bbd5a..de71f25 100644 --- a/nfa/src/lib.rs +++ b/nfa/src/lib.rs @@ -210,17 +210,6 @@ pub trait Nfa: LabelExtGraph { /// different type for the result type. type FromRegex; - // FIXME: This should not be needed. - /// Remove all empty transitions from the nondeterministic finite - /// automaton. - #[inline] - fn remove_epsilon(&mut self, _f: F) -> Result<(), Error> - where - F: FnMut(T) -> bool, - { - unimplemented!("deprecated") - } - /// Return a state-minimal NFA equivalent with the original one. /// /// This is not required. It is just to allow me to experiment @@ -299,19 +288,6 @@ pub trait Nfa: LabelExtGraph { /// edges. We can call this "a poor man's removal of nodes". fn remove_dead(&mut self, reserve: impl FnMut(usize) -> bool) -> Result<(), Error>; - // FIXME: This should not be needed. - /// For each edge from A to B whose edge is considered nullable by - /// a function `f`, and for every edge from B to C, add an edge - /// from A to C. - /// - /// This is needed specifically by the algorithm to produce a set - /// of atomic languages that represent "null-closed" derived - /// languages. - #[inline] - fn nulling(&mut self, _f: impl FnMut(T) -> bool) -> Result<(), Error> { - unimplemented!("deprecated") - } - /// Return the *closure* of the nondeterministic finite automaton. /// /// # Definition diff --git a/semiring/Cargo.toml b/semiring/Cargo.toml index 0a82e32..caf6cb0 100644 --- a/semiring/Cargo.toml +++ b/semiring/Cargo.toml @@ -6,3 +6,8 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +grammar = { path = "../grammar" } + +[dev-dependencies] +grammar = { path = "../grammar", features = ["test-helper"] } + diff --git a/semiring/src/counting.rs b/semiring/src/counting.rs new file mode 100644 index 0000000..9095888 --- /dev/null +++ b/semiring/src/counting.rs @@ -0,0 +1,43 @@ +//! This module implements the "counting semiring", which counts the +//! number of ways a sentence can be derived by a grammar. + +use super::*; + +/// The counting semiring is a thin wrapper around the natural numbers +/// with the usual addition and multiplication. +#[derive(Copy, Clone, Debug, Ord, PartialOrd, Hash, Eq, PartialEq)] +pub struct Count(usize); + +impl Count { + /// Wrap the count in the wrapper. + pub fn new(count: usize) -> Self { + Self(count) + } + + /// Extract the count out. + pub fn value(&self) -> usize { + self.0 + } +} + +impl Semiring for Count { + fn zero() -> Self { + Self::new(0) + } + + fn one() -> Self { + Self::new(1) + } + + fn add(&mut self, other: &Self) { + self.0 = self.value() + other.value(); + } + + fn mul(&mut self, other: &Self) { + self.0 = self.value() * other.value(); + } + + fn valuation(_pos: usize, _grammar: &Grammar) -> Self { + Self::new(1) + } +} diff --git a/semiring/src/lib.rs b/semiring/src/lib.rs index 9d096db..6c0e0d5 100644 --- a/semiring/src/lib.rs +++ b/semiring/src/lib.rs @@ -15,6 +15,8 @@ //! description. Specifically, it will produce items as it parses the //! input string. Then we can execute these items by any interpreter. +use grammar::Grammar; + /// A semiring is equipped with two operations: the addition and the /// multiplication. It also has additive and multiplicative /// identities. Also the two operations are supposed to be @@ -29,14 +31,20 @@ pub trait Semiring { fn one() -> Self; /// The addition. - fn add(&mut self, other: impl AsRef); + fn add(&mut self, other: &Self); /// The multiplication. - fn mul(&mut self, other: impl AsRef); + fn mul(&mut self, other: &Self); + + /// A valuation associates a semiring value from a rule position. + /// + /// Since a position is not complete without a grammar, we also + /// give it a grammar. + fn valuation(pos: usize, grammar: &Grammar) -> Self; } -// TODO: Implement boolean semiring -// TODO: Implement counting semiring +pub mod counting; + // TODO: Implement derivation forest semiring #[cfg(test)] -- cgit v1.2.3-18-g5258