diff options
author | JSDurand <mmemmew@gmail.com> | 2023-02-27 12:36:41 +0800 |
---|---|---|
committer | JSDurand <mmemmew@gmail.com> | 2023-02-27 12:36:41 +0800 |
commit | fbaa420ed550e9c3e7cdc09d4a8ec22bfbd782a6 (patch) | |
tree | fad9722825bb3fa796dd52c3fd4a8bf46b958cf9 /chain/src/atom | |
parent | afad02bdff111ecccb0077b9c989e869723c231c (diff) |
before a major refactor
I decide to adopt a new approach of recording and updating item
derivation forests. Since this affects a lot of things, I decide to
commit before the refactor, so that I can create a branch for that
refactor.
Diffstat (limited to 'chain/src/atom')
-rw-r--r-- | chain/src/atom/default.rs | 226 | ||||
-rw-r--r-- | chain/src/atom/mod.rs | 28 |
2 files changed, 239 insertions, 15 deletions
diff --git a/chain/src/atom/default.rs b/chain/src/atom/default.rs index ec53596..a55087a 100644 --- a/chain/src/atom/default.rs +++ b/chain/src/atom/default.rs @@ -2,18 +2,24 @@ //! [`Atom`][super::Atom] trait. use super::*; -use grammar::Grammar; +use grammar::{Grammar, GrammarLabel, GrammarLabelType}; use graph::{error::Error as GraphError, Graph, LabelExtGraph, LabelGraph}; use nfa::{ default::{nfa::DefaultNFA, regex::DefaultRegex}, + error::Error as NFAError, LabelType, NfaLabel, }; use core::fmt::Display; -use std::collections::BTreeMap as Map; +use std::{ + collections::{hash_set::Iter, BTreeMap as Map, HashMap, HashSet}, + iter::Copied, +}; + +use crate::item::{default::DefaultForest, ForestLabel}; /// A virtual node represents the derivative of a non-terminal symbol -/// `S` with respect to a terminal symbol `t`. +/// `s` with respect to a terminal symbol `t`. #[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Ord, PartialOrd)] struct VirtualNode { s: usize, @@ -34,6 +40,33 @@ impl VirtualNode { type VirtualMap = Map<VirtualNode, usize>; +/// A virtual trace stores the rule positions that are responsible for +/// an edge from the virtual node \[nt\]^s to `target`. +#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Ord, PartialOrd)] +struct VirtualTrace { + nt: usize, + t: usize, + target: usize, +} + +impl Display for VirtualTrace { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "VT[{}]^({}) -> {}", self.nt, self.t, self.target) + } +} + +impl VirtualTrace { + fn new(nt: usize, t: usize, target: usize) -> Self { + Self { nt, t, target } + } +} + +type VirtualTraceMap = Map<VirtualTrace, HashSet<usize>>; + +type VirtualFrag = DefaultForest<ForestLabel<GrammarLabel>>; + +type VirtualFragMap = Map<VirtualNode, Map<usize, VirtualFrag>>; + /// The type of atomic languages. #[derive(Debug, Clone, Default)] pub struct DefaultAtom { @@ -43,6 +76,8 @@ pub struct DefaultAtom { // NOTE: This is mostly for printing and debugging regexp: Vec<DefaultRegex<TNT>>, virtual_nodes: VirtualMap, + virtual_traces: VirtualTraceMap, + virtual_frags: VirtualFragMap, } impl DefaultAtom { @@ -260,9 +295,9 @@ impl Nfa<LabelType<TNT>> for DefaultAtom { #[inline] fn to_nfa( _regexps: &[impl nfa::Regex<nfa::default::regex::RegexType<LabelType<TNT>>>], - _sub_pred: impl Fn(LabelType<TNT>) -> Result<nfa::SoC<LabelType<TNT>>, nfa::error::Error>, + _sub_pred: impl Fn(LabelType<TNT>) -> Result<nfa::SoC<LabelType<TNT>>, NFAError>, _default: Option<LabelType<TNT>>, - ) -> Result<Self::FromRegex<DOption<DOption<TNT>>>, nfa::error::Error> { + ) -> Result<Self::FromRegex<DOption<DOption<TNT>>>, NFAError> { // NOTE: We cannot construct an atom from a set of regular // languages alone. So it is appropriate to panic here, if // one tries to do so, for some reason. @@ -270,7 +305,7 @@ impl Nfa<LabelType<TNT>> for DefaultAtom { } #[inline] - fn remove_dead(&mut self, reserve: impl FnMut(usize) -> bool) -> Result<(), nfa::error::Error> { + fn remove_dead(&mut self, reserve: impl FnMut(usize) -> bool) -> Result<(), NFAError> { self.nfa.remove_dead(reserve) } @@ -281,7 +316,7 @@ impl Nfa<LabelType<TNT>> for DefaultAtom { remove_after_p: bool, transform: impl FnMut(nfa::TwoEdges<LabelType<TNT>>) -> LabelType<TNT>, remove_predicate: impl FnMut(LabelType<TNT>) -> bool, - ) -> Result<(), nfa::error::Error> { + ) -> Result<(), NFAError> { self.nfa .closure(predicate, remove_after_p, transform, remove_predicate) } @@ -296,8 +331,6 @@ impl DefaultAtom { let mut nfa = grammar.left_closure_to_nfa(®exp)?; - use std::collections::{HashMap, HashSet}; - let accumulators: Vec<usize> = { let mut result = Vec::with_capacity(regexp.len() + 1); result.push(0); @@ -388,6 +421,12 @@ impl DefaultAtom { // Now add the virtual nodes. let mut virtual_nodes: VirtualMap = Default::default(); + // Record virtual traces. + let mut virtual_traces: VirtualTraceMap = Default::default(); + + // Record virtual fragments. + let mut virtual_frags: VirtualFragMap = Default::default(); + let nt_num = grammar.non_num(); assert!(nt_num <= accumulators.len()); @@ -403,19 +442,35 @@ impl DefaultAtom { GraphError::IndexOutOfBounds(index, bound) => { GrammarError::IndexOutOfBounds(index, bound) } - _ => unreachable!(), + // This is supposed to be unreachable, but we still + // give it a valid value. + _ => GrammarError::NFAFail(NFAError::Graph(ge)), } } for nt in 0..nt_num { + // This is safe because of the above assertion. + let nt_start = *accumulators.get(nt).unwrap(); + let children: std::collections::HashMap<_, _> = nfa - // This is safe because of the above assertion. - .labels_of(*accumulators.get(nt).unwrap()) + .labels_of(nt_start) .map_err(index_out_of_bounds_conversion)? .map(|(label, target_iter)| (*label, target_iter)) .collect(); - type TerminalsValue = (HashSet<(LabelType<TNT>, usize, Option<Vec<usize>>)>, bool); + /// The tuples have the following meanings in order: + /// + /// - `LabelType` => the label for the edge + /// + /// - `usize` => the target of the edge + /// + /// - `Option<Vec<usize>>` => reduction information + /// + /// - `usize` => the rule position that caused this edge + type TerminalsValue = ( + HashSet<(LabelType<TNT>, usize, Option<Vec<usize>>, usize)>, + bool, + ); let mut terminals_map: HashMap<usize, TerminalsValue> = HashMap::new(); @@ -431,9 +486,72 @@ impl DefaultAtom { result }; + let virtual_trace = label.get_moved(); + let mut accepting = false; for child in children_iter { + // add a virtual fragment + + let line: Vec<GrammarLabelType> = grammar + .query_expansion(nt_start, child)? + .iter() + .copied() + .flatten() + .flat_map(|(nt, rule)| [(*rule).into(), TNT::Non(*nt).into()]) + .rev() + .chain(std::iter::once(TNT::Ter(t).into())) + .collect(); + + assert!(line.len() > 1); + + // by our construction this must be a rule + let rule = line.get(line.len() - 2).unwrap().rule().unwrap(); + + use crate::default::Error as DError; + + let frag = crate::item::genins::generate_fragment(line, 0).map_err( + |fe: DError| -> GrammarError { + match fe { + DError::IndexOutOfBounds(index, bound) => { + GrammarError::IndexOutOfBounds(index, bound) + } + DError::DuplicateNode(n) => GrammarError::NFAFail( + NFAError::Graph(GraphError::DuplicatedNode(n)), + ), + DError::DuplicateEdge(source, target) => GrammarError::NFAFail( + NFAError::Graph(GraphError::DuplicatedEdge(source, target)), + ), + DError::NodeNoLabel(n) => { + panic!("node {n} has no label!") + } + DError::CannotReserve(_) => unreachable!( + "generate_fragment should not signal this error" + ), + DError::CannotClone(_) => { + unreachable!("we are not cloning") + } + DError::CannotPlant => { + unreachable!("why can we not plant?") + } + DError::SplitPack(_) => { + unreachable!("we not not splitting") + } + DError::InvalidClone(_) => { + unreachable!("we are not cloning") + } + DError::Invalid => { + panic!("a label is wrongly planted?") + } + } + }, + )?; + + virtual_frags + .entry(VirtualNode::new(nt, t)) + .or_insert_with(Default::default) + .insert(rule, frag); + accepting = accepting || *accepting_vec.get(child).ok_or( @@ -462,6 +580,7 @@ impl DefaultAtom { .query_reduction(child, target) .unwrap() .map(|slice| slice.to_vec()), + virtual_trace, ) })); } @@ -470,8 +589,21 @@ impl DefaultAtom { } for (t, (set, accepting)) in terminals_map.into_iter() { + // update virtual traces + + for (_, target, _, pos) in set.iter() { + let trace = VirtualTrace::new(nt, t, *target); + + virtual_traces + .entry(trace) + .or_insert_with(Default::default) + .insert(*pos); + } + + // add a virtual node + let new_index = nfa - .extend(set.iter().map(|(label, target, _)| (*label, *target))) + .extend(set.iter().map(|(label, target, _, _)| (*label, *target))) .map_err(index_out_of_bounds_conversion)?; if accepting_vec.get(new_index).is_none() { @@ -486,7 +618,7 @@ impl DefaultAtom { virtual_nodes.insert(virtual_node, new_index); // update the reduction information - for (_, target, info) in set.into_iter() { + for (_, target, info, _) in set { if let Some(info) = info { if !matches!( grammar.query_reduction(new_index, target)?, @@ -507,8 +639,60 @@ impl DefaultAtom { regexp, virtual_nodes, accepting_vec, + virtual_traces, + virtual_frags, }) } + + /// Generate a vector of virtual fragments for a non-terminal and + /// a terminal. + /// + /// # RULE + /// + /// If one passes `Some(rule)` as the paramter, then this returns + /// only those fragments that begin with the specified rule. + /// + /// On the other hand, if one passes `None`, then this returns + /// only those fragments that can end the non-terminal expansion. + /// + /// # Guarantee + /// + /// It is guaranteed that the 1-th node of each fragment is a rule + /// number. + pub(crate) fn generate_virtual_frags( + &self, + nt: usize, + t: usize, + rule: Option<usize>, + ) -> Option<Vec<&VirtualFrag>> { + let vn = VirtualNode::new(nt, t); + + if let Some(rule) = rule { + self.virtual_frags + .get(&vn) + .and_then(|map| map.get(&rule)) + .map(|f| vec![f]) + } else { + let result: Vec<&VirtualFrag> = self + .virtual_frags + .get(&vn) + .iter() + .copied() + .flatten() + .filter_map(|(rule, frag)| { + self.is_accepting(rule * 2 + 1) + .unwrap_or(false) + .then_some(frag) + }) + .collect(); + + if result.is_empty() { + None + } else { + Some(result) + } + } + } } /// A convenient getter for the map of virtual nodes. @@ -550,4 +734,16 @@ impl Atom for DefaultAtom { self.accepting_vec.len(), )) } + + type Iter<'a> = Copied<Iter<'a, usize>> + where + Self: 'a; + + fn trace(&self, nt: usize, t: usize, target: usize) -> Option<<Self as Atom>::Iter<'_>> { + let trace = VirtualTrace::new(nt, t, target); + + self.virtual_traces + .get(&trace) + .map(|set| set.iter().copied()) + } } diff --git a/chain/src/atom/mod.rs b/chain/src/atom/mod.rs index 398edd2..c9dadb2 100644 --- a/chain/src/atom/mod.rs +++ b/chain/src/atom/mod.rs @@ -17,6 +17,16 @@ pub trait Atom: Nfa<LabelType<TNT>> + Deref<Target = Grammar> { /// left-linear null closure of `nt` with respect to `t`. fn atom(&self, nt: usize, t: usize) -> Result<Option<usize>, GrammarError>; + /// A type that iterates through the rule positions. + type Iter<'a>: Iterator<Item = usize> + 'a + where + Self: 'a; + + /// Return an iterator of rule positions responsible for an edge + /// from the virtual node corresponding to the non-terminal `nt` + /// and terminal `t` to `target`. + fn trace(&self, nt: usize, t: usize, target: usize) -> Option<<Self as Atom>::Iter<'_>>; + /// Return the index of the empty state. fn empty(&self) -> usize; @@ -33,6 +43,9 @@ mod tests { use super::*; use grammar::test_grammar_helper::*; + #[cfg(feature = "test-print-viz")] + use graph::Graph; + #[test] fn atom() -> Result<(), Box<dyn std::error::Error>> { let grammar = new_notes_grammar()?; @@ -41,6 +54,21 @@ mod tests { println!("atom = {atom}"); + #[cfg(feature = "test-print-viz")] + { + println!("virtual frag for 1, 3: "); + + for (index, frag) in atom + .generate_virtual_frags(1, 3, None) + .iter() + .flatten() + .enumerate() + { + crate::item::default::print_labels(&atom, *frag)?; + frag.print_viz(&format!("frag {index}.gv"))?; + } + } + Ok(()) } } |