summaryrefslogtreecommitdiff
path: root/chain/src/atom
diff options
context:
space:
mode:
authorJSDurand <mmemmew@gmail.com>2023-02-27 12:36:41 +0800
committerJSDurand <mmemmew@gmail.com>2023-02-27 12:36:41 +0800
commitfbaa420ed550e9c3e7cdc09d4a8ec22bfbd782a6 (patch)
treefad9722825bb3fa796dd52c3fd4a8bf46b958cf9 /chain/src/atom
parentafad02bdff111ecccb0077b9c989e869723c231c (diff)
before a major refactor
I decide to adopt a new approach of recording and updating item derivation forests. Since this affects a lot of things, I decide to commit before the refactor, so that I can create a branch for that refactor.
Diffstat (limited to 'chain/src/atom')
-rw-r--r--chain/src/atom/default.rs226
-rw-r--r--chain/src/atom/mod.rs28
2 files changed, 239 insertions, 15 deletions
diff --git a/chain/src/atom/default.rs b/chain/src/atom/default.rs
index ec53596..a55087a 100644
--- a/chain/src/atom/default.rs
+++ b/chain/src/atom/default.rs
@@ -2,18 +2,24 @@
//! [`Atom`][super::Atom] trait.
use super::*;
-use grammar::Grammar;
+use grammar::{Grammar, GrammarLabel, GrammarLabelType};
use graph::{error::Error as GraphError, Graph, LabelExtGraph, LabelGraph};
use nfa::{
default::{nfa::DefaultNFA, regex::DefaultRegex},
+ error::Error as NFAError,
LabelType, NfaLabel,
};
use core::fmt::Display;
-use std::collections::BTreeMap as Map;
+use std::{
+ collections::{hash_set::Iter, BTreeMap as Map, HashMap, HashSet},
+ iter::Copied,
+};
+
+use crate::item::{default::DefaultForest, ForestLabel};
/// A virtual node represents the derivative of a non-terminal symbol
-/// `S` with respect to a terminal symbol `t`.
+/// `s` with respect to a terminal symbol `t`.
#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Ord, PartialOrd)]
struct VirtualNode {
s: usize,
@@ -34,6 +40,33 @@ impl VirtualNode {
type VirtualMap = Map<VirtualNode, usize>;
+/// A virtual trace stores the rule positions that are responsible for
+/// an edge from the virtual node \[nt\]^s to `target`.
+#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Ord, PartialOrd)]
+struct VirtualTrace {
+ nt: usize,
+ t: usize,
+ target: usize,
+}
+
+impl Display for VirtualTrace {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "VT[{}]^({}) -> {}", self.nt, self.t, self.target)
+ }
+}
+
+impl VirtualTrace {
+ fn new(nt: usize, t: usize, target: usize) -> Self {
+ Self { nt, t, target }
+ }
+}
+
+type VirtualTraceMap = Map<VirtualTrace, HashSet<usize>>;
+
+type VirtualFrag = DefaultForest<ForestLabel<GrammarLabel>>;
+
+type VirtualFragMap = Map<VirtualNode, Map<usize, VirtualFrag>>;
+
/// The type of atomic languages.
#[derive(Debug, Clone, Default)]
pub struct DefaultAtom {
@@ -43,6 +76,8 @@ pub struct DefaultAtom {
// NOTE: This is mostly for printing and debugging
regexp: Vec<DefaultRegex<TNT>>,
virtual_nodes: VirtualMap,
+ virtual_traces: VirtualTraceMap,
+ virtual_frags: VirtualFragMap,
}
impl DefaultAtom {
@@ -260,9 +295,9 @@ impl Nfa<LabelType<TNT>> for DefaultAtom {
#[inline]
fn to_nfa(
_regexps: &[impl nfa::Regex<nfa::default::regex::RegexType<LabelType<TNT>>>],
- _sub_pred: impl Fn(LabelType<TNT>) -> Result<nfa::SoC<LabelType<TNT>>, nfa::error::Error>,
+ _sub_pred: impl Fn(LabelType<TNT>) -> Result<nfa::SoC<LabelType<TNT>>, NFAError>,
_default: Option<LabelType<TNT>>,
- ) -> Result<Self::FromRegex<DOption<DOption<TNT>>>, nfa::error::Error> {
+ ) -> Result<Self::FromRegex<DOption<DOption<TNT>>>, NFAError> {
// NOTE: We cannot construct an atom from a set of regular
// languages alone. So it is appropriate to panic here, if
// one tries to do so, for some reason.
@@ -270,7 +305,7 @@ impl Nfa<LabelType<TNT>> for DefaultAtom {
}
#[inline]
- fn remove_dead(&mut self, reserve: impl FnMut(usize) -> bool) -> Result<(), nfa::error::Error> {
+ fn remove_dead(&mut self, reserve: impl FnMut(usize) -> bool) -> Result<(), NFAError> {
self.nfa.remove_dead(reserve)
}
@@ -281,7 +316,7 @@ impl Nfa<LabelType<TNT>> for DefaultAtom {
remove_after_p: bool,
transform: impl FnMut(nfa::TwoEdges<LabelType<TNT>>) -> LabelType<TNT>,
remove_predicate: impl FnMut(LabelType<TNT>) -> bool,
- ) -> Result<(), nfa::error::Error> {
+ ) -> Result<(), NFAError> {
self.nfa
.closure(predicate, remove_after_p, transform, remove_predicate)
}
@@ -296,8 +331,6 @@ impl DefaultAtom {
let mut nfa = grammar.left_closure_to_nfa(&regexp)?;
- use std::collections::{HashMap, HashSet};
-
let accumulators: Vec<usize> = {
let mut result = Vec::with_capacity(regexp.len() + 1);
result.push(0);
@@ -388,6 +421,12 @@ impl DefaultAtom {
// Now add the virtual nodes.
let mut virtual_nodes: VirtualMap = Default::default();
+ // Record virtual traces.
+ let mut virtual_traces: VirtualTraceMap = Default::default();
+
+ // Record virtual fragments.
+ let mut virtual_frags: VirtualFragMap = Default::default();
+
let nt_num = grammar.non_num();
assert!(nt_num <= accumulators.len());
@@ -403,19 +442,35 @@ impl DefaultAtom {
GraphError::IndexOutOfBounds(index, bound) => {
GrammarError::IndexOutOfBounds(index, bound)
}
- _ => unreachable!(),
+ // This is supposed to be unreachable, but we still
+ // give it a valid value.
+ _ => GrammarError::NFAFail(NFAError::Graph(ge)),
}
}
for nt in 0..nt_num {
+ // This is safe because of the above assertion.
+ let nt_start = *accumulators.get(nt).unwrap();
+
let children: std::collections::HashMap<_, _> = nfa
- // This is safe because of the above assertion.
- .labels_of(*accumulators.get(nt).unwrap())
+ .labels_of(nt_start)
.map_err(index_out_of_bounds_conversion)?
.map(|(label, target_iter)| (*label, target_iter))
.collect();
- type TerminalsValue = (HashSet<(LabelType<TNT>, usize, Option<Vec<usize>>)>, bool);
+ /// The tuples have the following meanings in order:
+ ///
+ /// - `LabelType` => the label for the edge
+ ///
+ /// - `usize` => the target of the edge
+ ///
+ /// - `Option<Vec<usize>>` => reduction information
+ ///
+ /// - `usize` => the rule position that caused this edge
+ type TerminalsValue = (
+ HashSet<(LabelType<TNT>, usize, Option<Vec<usize>>, usize)>,
+ bool,
+ );
let mut terminals_map: HashMap<usize, TerminalsValue> = HashMap::new();
@@ -431,9 +486,72 @@ impl DefaultAtom {
result
};
+ let virtual_trace = label.get_moved();
+
let mut accepting = false;
for child in children_iter {
+ // add a virtual fragment
+
+ let line: Vec<GrammarLabelType> = grammar
+ .query_expansion(nt_start, child)?
+ .iter()
+ .copied()
+ .flatten()
+ .flat_map(|(nt, rule)| [(*rule).into(), TNT::Non(*nt).into()])
+ .rev()
+ .chain(std::iter::once(TNT::Ter(t).into()))
+ .collect();
+
+ assert!(line.len() > 1);
+
+ // by our construction this must be a rule
+ let rule = line.get(line.len() - 2).unwrap().rule().unwrap();
+
+ use crate::default::Error as DError;
+
+ let frag = crate::item::genins::generate_fragment(line, 0).map_err(
+ |fe: DError| -> GrammarError {
+ match fe {
+ DError::IndexOutOfBounds(index, bound) => {
+ GrammarError::IndexOutOfBounds(index, bound)
+ }
+ DError::DuplicateNode(n) => GrammarError::NFAFail(
+ NFAError::Graph(GraphError::DuplicatedNode(n)),
+ ),
+ DError::DuplicateEdge(source, target) => GrammarError::NFAFail(
+ NFAError::Graph(GraphError::DuplicatedEdge(source, target)),
+ ),
+ DError::NodeNoLabel(n) => {
+ panic!("node {n} has no label!")
+ }
+ DError::CannotReserve(_) => unreachable!(
+ "generate_fragment should not signal this error"
+ ),
+ DError::CannotClone(_) => {
+ unreachable!("we are not cloning")
+ }
+ DError::CannotPlant => {
+ unreachable!("why can we not plant?")
+ }
+ DError::SplitPack(_) => {
+ unreachable!("we not not splitting")
+ }
+ DError::InvalidClone(_) => {
+ unreachable!("we are not cloning")
+ }
+ DError::Invalid => {
+ panic!("a label is wrongly planted?")
+ }
+ }
+ },
+ )?;
+
+ virtual_frags
+ .entry(VirtualNode::new(nt, t))
+ .or_insert_with(Default::default)
+ .insert(rule, frag);
+
accepting =
accepting
|| *accepting_vec.get(child).ok_or(
@@ -462,6 +580,7 @@ impl DefaultAtom {
.query_reduction(child, target)
.unwrap()
.map(|slice| slice.to_vec()),
+ virtual_trace,
)
}));
}
@@ -470,8 +589,21 @@ impl DefaultAtom {
}
for (t, (set, accepting)) in terminals_map.into_iter() {
+ // update virtual traces
+
+ for (_, target, _, pos) in set.iter() {
+ let trace = VirtualTrace::new(nt, t, *target);
+
+ virtual_traces
+ .entry(trace)
+ .or_insert_with(Default::default)
+ .insert(*pos);
+ }
+
+ // add a virtual node
+
let new_index = nfa
- .extend(set.iter().map(|(label, target, _)| (*label, *target)))
+ .extend(set.iter().map(|(label, target, _, _)| (*label, *target)))
.map_err(index_out_of_bounds_conversion)?;
if accepting_vec.get(new_index).is_none() {
@@ -486,7 +618,7 @@ impl DefaultAtom {
virtual_nodes.insert(virtual_node, new_index);
// update the reduction information
- for (_, target, info) in set.into_iter() {
+ for (_, target, info, _) in set {
if let Some(info) = info {
if !matches!(
grammar.query_reduction(new_index, target)?,
@@ -507,8 +639,60 @@ impl DefaultAtom {
regexp,
virtual_nodes,
accepting_vec,
+ virtual_traces,
+ virtual_frags,
})
}
+
+ /// Generate a vector of virtual fragments for a non-terminal and
+ /// a terminal.
+ ///
+ /// # RULE
+ ///
+ /// If one passes `Some(rule)` as the paramter, then this returns
+ /// only those fragments that begin with the specified rule.
+ ///
+ /// On the other hand, if one passes `None`, then this returns
+ /// only those fragments that can end the non-terminal expansion.
+ ///
+ /// # Guarantee
+ ///
+ /// It is guaranteed that the 1-th node of each fragment is a rule
+ /// number.
+ pub(crate) fn generate_virtual_frags(
+ &self,
+ nt: usize,
+ t: usize,
+ rule: Option<usize>,
+ ) -> Option<Vec<&VirtualFrag>> {
+ let vn = VirtualNode::new(nt, t);
+
+ if let Some(rule) = rule {
+ self.virtual_frags
+ .get(&vn)
+ .and_then(|map| map.get(&rule))
+ .map(|f| vec![f])
+ } else {
+ let result: Vec<&VirtualFrag> = self
+ .virtual_frags
+ .get(&vn)
+ .iter()
+ .copied()
+ .flatten()
+ .filter_map(|(rule, frag)| {
+ self.is_accepting(rule * 2 + 1)
+ .unwrap_or(false)
+ .then_some(frag)
+ })
+ .collect();
+
+ if result.is_empty() {
+ None
+ } else {
+ Some(result)
+ }
+ }
+ }
}
/// A convenient getter for the map of virtual nodes.
@@ -550,4 +734,16 @@ impl Atom for DefaultAtom {
self.accepting_vec.len(),
))
}
+
+ type Iter<'a> = Copied<Iter<'a, usize>>
+ where
+ Self: 'a;
+
+ fn trace(&self, nt: usize, t: usize, target: usize) -> Option<<Self as Atom>::Iter<'_>> {
+ let trace = VirtualTrace::new(nt, t, target);
+
+ self.virtual_traces
+ .get(&trace)
+ .map(|set| set.iter().copied())
+ }
}
diff --git a/chain/src/atom/mod.rs b/chain/src/atom/mod.rs
index 398edd2..c9dadb2 100644
--- a/chain/src/atom/mod.rs
+++ b/chain/src/atom/mod.rs
@@ -17,6 +17,16 @@ pub trait Atom: Nfa<LabelType<TNT>> + Deref<Target = Grammar> {
/// left-linear null closure of `nt` with respect to `t`.
fn atom(&self, nt: usize, t: usize) -> Result<Option<usize>, GrammarError>;
+ /// A type that iterates through the rule positions.
+ type Iter<'a>: Iterator<Item = usize> + 'a
+ where
+ Self: 'a;
+
+ /// Return an iterator of rule positions responsible for an edge
+ /// from the virtual node corresponding to the non-terminal `nt`
+ /// and terminal `t` to `target`.
+ fn trace(&self, nt: usize, t: usize, target: usize) -> Option<<Self as Atom>::Iter<'_>>;
+
/// Return the index of the empty state.
fn empty(&self) -> usize;
@@ -33,6 +43,9 @@ mod tests {
use super::*;
use grammar::test_grammar_helper::*;
+ #[cfg(feature = "test-print-viz")]
+ use graph::Graph;
+
#[test]
fn atom() -> Result<(), Box<dyn std::error::Error>> {
let grammar = new_notes_grammar()?;
@@ -41,6 +54,21 @@ mod tests {
println!("atom = {atom}");
+ #[cfg(feature = "test-print-viz")]
+ {
+ println!("virtual frag for 1, 3: ");
+
+ for (index, frag) in atom
+ .generate_virtual_frags(1, 3, None)
+ .iter()
+ .flatten()
+ .enumerate()
+ {
+ crate::item::default::print_labels(&atom, *frag)?;
+ frag.print_viz(&format!("frag {index}.gv"))?;
+ }
+ }
+
Ok(())
}
}