//! This file provides a default implementation of the //! [`Atom`][super::Atom] trait. use super::*; use grammar::{Grammar, GrammarLabel, GrammarLabelType}; use graph::{error::Error as GraphError, Graph, LabelExtGraph, LabelGraph}; use nfa::{ default::{nfa::DefaultNFA, regex::DefaultRegex}, error::Error as NFAError, LabelType, NfaLabel, }; use graph_macro::Graph; use core::fmt::Display; use std::{ collections::{hash_set::Iter, BTreeMap as Map, HashMap, HashSet}, iter::Copied, }; use crate::item::{default::DefaultForest, ForestLabel}; /// A virtual node represents the derivative of a non-terminal symbol /// `s` with respect to a terminal symbol `t`. #[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Ord, PartialOrd)] struct VirtualNode { s: usize, t: usize, } impl Display for VirtualNode { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "VN[{}]^({})", self.s, self.t) } } impl VirtualNode { fn new(s: usize, t: usize) -> Self { Self { s, t } } } type VirtualMap = Map; /// A virtual trace stores the rule positions that are responsible for /// an edge from the virtual node \[nt\]^s to `target`. #[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Ord, PartialOrd)] struct VirtualTrace { nt: usize, t: usize, target: usize, } impl Display for VirtualTrace { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "VT[{}]^({}) -> {}", self.nt, self.t, self.target) } } impl VirtualTrace { fn new(nt: usize, t: usize, target: usize) -> Self { Self { nt, t, target } } } type VirtualTraceMap = Map>; type VirtualFrag = DefaultForest>; type VirtualFragMap = Map>; /// The type of atomic languages. #[derive(Debug, Clone, Default, Graph)] pub struct DefaultAtom { grammar: Grammar, #[graph] nfa: DefaultNFA>, accepting_vec: Vec, // NOTE: This is mostly for printing and debugging regexp: Vec>, virtual_nodes: VirtualMap, virtual_traces: VirtualTraceMap, virtual_frags: VirtualFragMap, } impl DefaultAtom { /// Return the string description of a rule position. pub fn rule_pos_string(&self, pos: usize) -> Result> { if pos == self.grammar.total() { return Ok("End of rules".to_owned()); } let rule_num = self.grammar.get_rule_num(pos)?; assert!(rule_num < self.grammar.non_num()); let display_tnt = |tnt| { format!( " {} ", self.name_of_tnt(match tnt { TNT::Non(_) => tnt, TNT::Ter(t) => self.unpack_tnt(t).unwrap(), }) .unwrap_or_else(|e| format!("{e}")) ) }; Ok(self.regexp.get(rule_num).unwrap().to_string_with_dot( display_tnt, if rule_num == 0 { pos } else { pos - self.grammar.nth_accumulator(rule_num)? }, )?) } /// Print nullable nodes. pub fn print_nullables(&self) { print!("printing nullables for the atom: "); for nullable in self .accepting_vec .iter() .enumerate() .filter_map(|(index, pred)| (*pred).then(|| index)) { print!("{nullable}, "); } println!(); } /// Print virtual nodes. pub fn print_virtual(&self) { println!("printing virtual nodes of the atom:"); for (vn, node) in self.virtual_nodes.iter() { println!("[{}]^{{({})}}: {}", vn.s, vn.t, node); } println!(); } /// Print the underlying NFA. pub fn print_nfa>(&self, filename: S) -> Result<(), std::io::Error> { self.nfa.print_viz(filename.as_ref())?; self.print_nullables(); self.print_virtual(); Ok(()) } } impl Display for DefaultAtom { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let grammar = &self.grammar; let error_to_string = |err| format!("{err}"); let tnt_to_string = |tnt, deco| { grammar .name_of_tnt(tnt) .map(|name| format!("{deco}({name})")) .unwrap_or_else(error_to_string) }; let display_tnt = |tnt| match tnt { TNT::Ter(t) => format!( "({})", grammar .unpack_tnt(t) .map(|tnt| tnt_to_string(tnt, "")) .unwrap_or_else(error_to_string) ), TNT::Non(_) => tnt_to_string(tnt, "H"), }; writeln!(f, "regular expressions:")?; let mut accumulators = Vec::with_capacity(self.regexp.len() + 1); accumulators.push(0usize); for regex in self.regexp.iter() { writeln!(f, "accumulator: {}", accumulators.last().unwrap())?; accumulators.push(regex.nodes_len() * 2 + accumulators.last().unwrap()); let string = regex.to_string_with(display_tnt)?; writeln!(f, "{string}")?; } writeln!(f, "total = {}", accumulators.last().unwrap())?; writeln!(f, "virtual nodes:")?; for (virtual_node, index) in self.virtual_nodes.iter() { writeln!(f, "{virtual_node}: {index}")?; } Ok(()) } } // Some boiler-plate delegation implementations for Graph and // LabelGraph, in order to implement Nfa. impl LabelGraph> for DefaultAtom { type Iter<'b> = > as LabelGraph>>::Iter<'b> where Self: 'b; type LabelIter<'b> = > as LabelGraph>>::LabelIter<'b> where Self: 'b, DOption: 'b; type EdgeLabelIter<'a> = > as LabelGraph>>::EdgeLabelIter<'a> where Self: 'a, DOption: 'a; #[inline] fn vertex_label(&self, node_id: usize) -> Result>, GraphError> { self.nfa.vertex_label(node_id) } #[inline] fn edge_label( &self, source: usize, target: usize, ) -> Result, GraphError> { self.nfa.edge_label(source, target) } #[inline] fn find_children_with_label( &self, node_id: usize, label: &LabelType, ) -> Result<>>::Iter<'_>, GraphError> { self.nfa.find_children_with_label(node_id, label) } #[inline] fn labels_of(&self, node_id: usize) -> Result, GraphError> { self.nfa.labels_of(node_id) } #[inline] fn has_edge_label( &self, node_id: usize, label: &LabelType, target: usize, ) -> Result { self.nfa.has_edge_label(node_id, label, target) } } impl LabelExtGraph> for DefaultAtom { #[inline] fn extend( &mut self, edges: impl IntoIterator, usize)>, ) -> Result { self.nfa.extend(edges) } } impl Nfa> for DefaultAtom { type FromRegex = (); #[inline] fn to_nfa( _regexps: &[impl nfa::Regex>>], _sub_pred: impl Fn(LabelType) -> Result>, NFAError>, _default: Option>, ) -> Result>>, NFAError> { // NOTE: We cannot construct an atom from a set of regular // languages alone. So it is appropriate to panic here, if // one tries to do so, for some reason. unimplemented!() } #[inline] fn remove_dead(&mut self, reserve: impl FnMut(usize) -> bool) -> Result<(), NFAError> { self.nfa.remove_dead(reserve) } #[inline] fn closure( &mut self, predicate: impl FnMut(LabelType) -> bool, remove_after_p: bool, transform: impl FnMut(nfa::TwoEdges>) -> LabelType, remove_predicate: impl FnMut(LabelType) -> bool, ) -> Result<(), NFAError> { self.nfa .closure(predicate, remove_after_p, transform, remove_predicate) } } impl DefaultAtom { /// Construct an atomic language from a grammar. pub fn from_grammar(mut grammar: Grammar) -> Result { grammar.compute_firsts()?; let regexp = grammar.left_closure()?; let mut nfa = grammar.left_closure_to_nfa(®exp)?; let accumulators: Vec = { let mut result = Vec::with_capacity(regexp.len() + 1); result.push(0); for regex in regexp.iter() { // Calling `unwrap` here is safe as `result` is always // non-empty. result.push(regex.nodes_len() * 2 + result.last().unwrap()); } result }; let accumulators_set: HashSet = accumulators.iter().copied().collect(); let nullables: HashSet = (0..grammar.non_num()) .filter(|n| matches!(grammar.is_nullable(*n), Ok(true))) .collect(); // Now record accepting information. let nfa_len = nfa.nodes_len(); let label_is_nullable = |label: NfaLabel>| { if let Some(label) = *label.get_value() { matches!(label, TNT::Non(n) if nullables.contains(&n)) } else { true } }; let mut accepting_vec: Vec = std::iter::repeat(false).take(nfa_len).collect(); for nfa_start in accumulators.iter().copied().take(regexp.len()) { *accepting_vec.get_mut(nfa_start + 1).unwrap() = true; } // The last is always accepting. *accepting_vec.get_mut(nfa_len - 1).unwrap() = true; let mut updated = true; while updated { updated = false; for node in nfa.nodes() { // skip those that do not need to be updated if *accepting_vec .get(node) .ok_or(GrammarError::IndexOutOfBounds(node, nfa_len))? { continue; } 'label_loop: for (label, target_iter) in nfa .labels_of(node) .map_err(|_| GrammarError::IndexOutOfBounds(node, nfa_len))? { if label_is_nullable(*label) { for target in target_iter { if *accepting_vec .get(target) .ok_or(GrammarError::IndexOutOfBounds(target, nfa_len))? { // accepting_vec[node] must have been // false, as we checked above *accepting_vec.get_mut(node).unwrap() = true; updated = true; break 'label_loop; } } } } } } // Perform nulling and remove_epsilon at the same time. nfa.closure( label_is_nullable, true, |two_edges| grammar.transform_label_null_epsilon(two_edges), |label| label.get_value().is_none(), )?; nfa.remove_dead(|node| accumulators_set.contains(&node))?; // Now add the virtual nodes. let mut virtual_nodes: VirtualMap = Default::default(); // Record virtual traces. let mut virtual_traces: VirtualTraceMap = Default::default(); // Record virtual fragments. let mut virtual_frags: VirtualFragMap = Default::default(); let nt_num = grammar.non_num(); assert!(nt_num <= accumulators.len()); /// Convert an error telling us that an index is out of bounds. /// /// # Panics /// /// The function panics if the error is not of the expected /// kind. fn index_out_of_bounds_conversion(ge: GraphError) -> GrammarError { match ge { GraphError::IndexOutOfBounds(index, bound) => { GrammarError::IndexOutOfBounds(index, bound) } // This is supposed to be unreachable, but we still // give it a valid value. _ => GrammarError::NFAFail(NFAError::Graph(ge)), } } for nt in 0..nt_num { // This is safe because of the above assertion. let nt_start = *accumulators.get(nt).unwrap(); let children: std::collections::HashMap<_, _> = nfa .labels_of(nt_start) .map_err(index_out_of_bounds_conversion)? .map(|(label, target_iter)| (*label, target_iter)) .collect(); /// The tuples have the following meanings in order: /// /// - `LabelType` => the label for the edge /// /// - `usize` => the target of the edge /// /// - `Option>` => reduction information /// /// - `usize` => the rule position that caused this edge type TerminalsValue = ( HashSet<(LabelType, usize, Option>, usize)>, bool, ); let mut terminals_map: HashMap = HashMap::new(); for (label, children_iter) in children.into_iter() { if let Some(TNT::Ter(t)) = *label.get_value() { let estimated_len = { let mut result = 0; for child in children_iter.clone() { result += nfa.degree(child).map_err(index_out_of_bounds_conversion)?; } result }; let virtual_trace = label.get_moved(); let mut accepting = false; for child in children_iter { // add a virtual fragment let line: Vec = grammar .query_expansion(nt_start, child)? .iter() .copied() .flatten() .flat_map(|(nt, rule)| [(*rule).into(), TNT::Non(*nt).into()]) .rev() .chain(std::iter::once(TNT::Ter(t).into())) .collect(); assert!(line.len() > 1); // by our construction this must be a rule let rule = line.get(line.len() - 2).unwrap().rule().unwrap(); use crate::default::Error as DError; let frag = crate::item::genins::generate_fragment(line, 0).map_err( |fe: DError| -> GrammarError { match fe { DError::IndexOutOfBounds(index, bound) => { GrammarError::IndexOutOfBounds(index, bound) } DError::DuplicateNode(n) => GrammarError::NFAFail( NFAError::Graph(GraphError::DuplicatedNode(n)), ), DError::DuplicateEdge(source, target) => GrammarError::NFAFail( NFAError::Graph(GraphError::DuplicatedEdge(source, target)), ), DError::NodeNoLabel(n) => { panic!("node {n} has no label!") } DError::CannotReserve(_) => unreachable!( "generate_fragment should not signal this error" ), DError::CannotClone(_) => { unreachable!("we are not cloning") } DError::CannotPlant => { unreachable!("why can we not plant?") } DError::SplitPack(_) => { unreachable!("we not not splitting") } DError::InvalidClone(_) => { unreachable!("we are not cloning") } DError::CannotClose(_, _, _, _) => { unreachable!("we are not closing virtual nodes") } DError::Invalid => { panic!("a label is wrongly planted?") } } }, )?; virtual_frags .entry(VirtualNode::new(nt, t)) .or_insert_with(Default::default) .insert(rule, frag); accepting = accepting || *accepting_vec.get(child).ok_or( GrammarError::IndexOutOfBounds(child, accepting_vec.len()), )?; if let Some((_, old_accepting)) = terminals_map.get_mut(&t) { *old_accepting = *old_accepting || accepting; } else { terminals_map .insert(t, (HashSet::with_capacity(estimated_len), accepting)); } for (child_label, child_children_iter) in nfa .labels_of(child) .map_err(index_out_of_bounds_conversion)? { // We checked this is safe above. let (set, _) = terminals_map.get_mut(&t).unwrap(); set.extend(child_children_iter.map(|target| { ( *child_label, target, grammar .query_reduction(child, target) .unwrap() .map(|slice| slice.to_vec()), virtual_trace, ) })); } } } } for (t, (set, accepting)) in terminals_map.into_iter() { // update virtual traces for (_, target, _, pos) in set.iter() { let trace = VirtualTrace::new(nt, t, *target); virtual_traces .entry(trace) .or_insert_with(Default::default) .insert(*pos); } // add a virtual node let new_index = nfa .extend(set.iter().map(|(label, target, _, _)| (*label, *target))) .map_err(index_out_of_bounds_conversion)?; if accepting_vec.get(new_index).is_none() { #[cfg(debug_assertions)] assert_eq!(new_index, accepting_vec.len()); accepting_vec.push(accepting); } let virtual_node = VirtualNode::new(nt, t); virtual_nodes.insert(virtual_node, new_index); // update the reduction information for (_, target, info, _) in set { if let Some(info) = info { if !matches!( grammar.query_reduction(new_index, target)?, Some(original_reduction) if original_reduction.len() >= info.len()) { grammar.set_reduction(new_index, target, info); } } } } } Ok(Self { grammar, nfa, regexp, virtual_nodes, accepting_vec, virtual_traces, virtual_frags, }) } /// Generate a vector of virtual fragments for a non-terminal and /// a terminal. /// /// # RULE /// /// If one passes `Some(rule)` as the paramter, then this returns /// only those fragments that begin with the specified rule. /// /// On the other hand, if one passes `None`, then this returns /// only those fragments that can end the non-terminal expansion. /// /// # Guarantee /// /// It is guaranteed that the 1-th node of each fragment is a rule /// number. pub(crate) fn generate_virtual_frags( &self, nt: usize, t: usize, rule: Option, ) -> Option> { let vn = VirtualNode::new(nt, t); if let Some(rule) = rule { self.virtual_frags .get(&vn) .and_then(|map| map.get(&rule)) .map(|f| vec![f]) } else { let result: Vec<&VirtualFrag> = self .virtual_frags .get(&vn) .iter() .copied() .flatten() .filter_map(|(rule, frag)| { self.is_accepting(rule * 2 + 1) .unwrap_or(false) .then_some(frag) }) .collect(); if result.is_empty() { None } else { Some(result) } } } } /// A convenient getter for the map of virtual nodes. fn query(map: &VirtualMap, nt: usize, t: usize) -> Option { map.get(&VirtualNode::new(nt, t)).copied() } impl std::ops::Deref for DefaultAtom { type Target = Grammar; fn deref(&self) -> &Self::Target { &self.grammar } } impl Atom for DefaultAtom { fn atom(&self, nt: usize, t: usize) -> Result, GrammarError> { if nt >= self.grammar.non_num() { return Err(GrammarError::IndexOutOfBounds(nt, self.grammar.non_num())); } if t >= self.grammar.ter_num() { return Err(GrammarError::IndexOutOfBounds(t, self.grammar.ter_num())); } Ok(query(&self.virtual_nodes, nt, t)) } fn empty(&self) -> usize { self.grammar.total() << 1 } fn is_accepting(&self, node_id: usize) -> Result { self.accepting_vec .get(node_id) .copied() .ok_or(GrammarError::IndexOutOfBounds( node_id, self.accepting_vec.len(), )) } type Iter<'a> = Copied> where Self: 'a; fn trace(&self, nt: usize, t: usize, target: usize) -> Option<::Iter<'_>> { let trace = VirtualTrace::new(nt, t, target); self.virtual_traces .get(&trace) .map(|set| set.iter().copied()) } fn accepting_len(&self) -> usize { self.accepting_vec.len() } }