//! This file provides a default implementation of the //! [`Atom`][super::Atom] trait. use super::*; use grammar::Grammar; use graph::{error::Error as GraphError, Graph, LabelExtGraph, LabelGraph}; use nfa::{ default::{nfa::DefaultNFA, regex::DefaultRegex}, LabelType, }; use core::fmt::Display; use std::collections::BTreeMap as Map; /// A virtual node represents the derivative of a non-terminal symbol /// `S` with respect to a terminal symbol `t`. #[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Ord, PartialOrd)] struct VirtualNode { s: usize, t: usize, } impl Display for VirtualNode { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "VN[{}]^({})", self.s, self.t) } } impl VirtualNode { fn new(s: usize, t: usize) -> Self { Self { s, t } } } type VirtualMap = Map; /// The type of atomic languages. #[derive(Debug, Clone, Default)] pub struct DefaultAtom { grammar: Grammar, nfa: DefaultNFA>, // NOTE: This is mostly for printing and debugging regexp: Vec>, virtual_nodes: VirtualMap, } impl Display for DefaultAtom { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let grammar = &self.grammar; let error_to_string = |err| format!("{err}"); let tnt_to_string = |tnt, deco| { grammar .name_of_tnt(tnt) .map(|name| format!("{deco}({name})")) .unwrap_or_else(error_to_string) }; let display_tnt = |tnt| match tnt { TNT::Ter(t) => format!( "({})", grammar .unpack_tnt(t) .map(|tnt| tnt_to_string(tnt, "")) .unwrap_or_else(error_to_string) ), TNT::Non(_) => tnt_to_string(tnt, "H"), }; writeln!(f, "regular expressions:")?; let mut accumulators = Vec::with_capacity(self.regexp.len() + 1); accumulators.push(0usize); for regex in self.regexp.iter() { writeln!(f, "accumulator: {}", accumulators.last().unwrap())?; accumulators.push(regex.nodes_len() * 2 + accumulators.last().unwrap()); let string = regex.to_string_with(display_tnt)?; writeln!(f, "{string}")?; } writeln!(f, "total = {}", accumulators.last().unwrap())?; writeln!(f, "virtual nodes:")?; for (virtual_node, index) in self.virtual_nodes.iter() { writeln!(f, "{virtual_node}: {index}")?; } Ok(()) } } // Some boiler-plate delegation implementations for Graph and // LabelGraph, in order to implement Nfa. impl Graph for DefaultAtom { type Iter<'b> = > as Graph>::Iter<'b> where Self: 'b; fn is_empty(&self) -> bool { self.nfa.is_empty() } fn nodes_len(&self) -> usize { self.nfa.nodes_len() } fn children_of(&self, node_id: usize) -> Result, GraphError> { self.nfa.children_of(node_id) } fn degree(&self, node_id: usize) -> Result { self.nfa.degree(node_id) } fn is_empty_node(&self, node_id: usize) -> Result { self.nfa.is_empty_node(node_id) } fn has_edge(&self, source: usize, target: usize) -> Result { self.nfa.has_edge(source, target) } fn replace_by_builder(&mut self, _builder: impl graph::Builder) { // NOTE: We cannot replace by a builder whose result is an // atom, not the underlying graph type. unimplemented!() } } impl LabelGraph> for DefaultAtom { type Iter<'b> = > as LabelGraph>>::Iter<'b> where Self: 'b; type LabelIter<'b> = > as LabelGraph>>::LabelIter<'b> where Self: 'b, DOption: 'b; type EdgeLabelIter<'a> = > as LabelGraph>>::EdgeLabelIter<'a> where Self: 'a, DOption: 'a; #[inline] fn vertex_label(&self, node_id: usize) -> Result>, GraphError> { self.nfa.vertex_label(node_id) } #[inline] fn edge_label( &self, source: usize, target: usize, ) -> Result, GraphError> { self.nfa.edge_label(source, target) } #[inline] fn find_children_with_label( &self, node_id: usize, label: &LabelType, ) -> Result<>>::Iter<'_>, GraphError> { self.nfa.find_children_with_label(node_id, label) } #[inline] fn labels_of(&self, node_id: usize) -> Result, GraphError> { self.nfa.labels_of(node_id) } #[inline] fn has_edge_label( &self, node_id: usize, label: &LabelType, target: usize, ) -> Result { self.nfa.has_edge_label(node_id, label, target) } } impl LabelExtGraph> for DefaultAtom { #[inline] fn extend( &mut self, edges: impl IntoIterator, usize)>, ) -> Result { self.nfa.extend(edges) } } impl Nfa> for DefaultAtom { type FromRegex = (); #[inline] fn to_nfa( _regexps: &[impl nfa::Regex>>], _sub_pred: impl Fn(LabelType) -> Result>, nfa::error::Error>, _default: Option>, ) -> Result>>, nfa::error::Error> { // NOTE: We cannot construct an atom from a set of regular // languages alone. So it is appropriate to panic here, if // one tries to do so, for some reason. unimplemented!() } #[inline] fn remove_dead(&mut self, reserve: impl FnMut(usize) -> bool) -> Result<(), nfa::error::Error> { self.nfa.remove_dead(reserve) } #[inline] fn closure( &mut self, predicate: impl FnMut(LabelType) -> bool, remove_after_p: bool, transform: impl FnMut(nfa::TwoEdges>) -> LabelType, remove_predicate: impl FnMut(LabelType) -> bool, ) -> Result<(), nfa::error::Error> { self.nfa .closure(predicate, remove_after_p, transform, remove_predicate) } } impl DefaultAtom { /// Construct an atomic language from a grammar. pub fn from_grammar(mut grammar: Grammar) -> Result { grammar.compute_firsts()?; let regexp = grammar.left_closure()?; let mut nfa = grammar.left_closure_to_nfa(®exp)?; use std::collections::{HashMap, HashSet}; let accumulators: Vec = { let mut result = Vec::with_capacity(regexp.len() + 1); result.push(0); for regex in regexp.iter() { // Calling `unwrap` here is safe as `result` is always // non-empty. result.push(regex.nodes_len() * 2 + result.last().unwrap()); } result }; let accumulators_set: HashSet = accumulators.iter().copied().collect(); let nullables: HashSet = (0..grammar.non_num()) .filter(|n| matches!(grammar.is_nullable(*n), Ok(true))) .collect(); // Perform nulling and remove_epsilon at the same time. nfa.closure( |label| { if let Some(label) = *label.get_value() { matches!(label, TNT::Non(n) if nullables.contains(&n)) } else { true } }, true, |two_edges| grammar.transform_label_null_epsilon(two_edges), |label| label.get_value().is_none(), )?; nfa.remove_dead(|node| accumulators_set.contains(&node))?; // Now add the virtual nodes. let mut virtual_nodes: VirtualMap = Default::default(); let nt_num = grammar.non_num(); assert!(nt_num <= accumulators.len()); /// Convert an error telling us that an index is out of bounds. /// /// # Panics /// /// The function panics if the error is not of the expected /// kind. fn index_out_of_bounds_conversion(ge: GraphError) -> GrammarError { match ge { GraphError::IndexOutOfBounds(index, bound) => { GrammarError::IndexOutOfBounds(index, bound) } _ => unreachable!(), } } for nt in 0..nt_num { let children: std::collections::HashMap<_, _> = nfa // This is safe because of the above assertion. .labels_of(*accumulators.get(nt).unwrap()) .map_err(index_out_of_bounds_conversion)? .map(|(label, target_iter)| (*label, target_iter)) .collect(); let mut terminals_map: HashMap, usize)>> = HashMap::new(); for (label, children_iter) in children.into_iter() { if let Some(TNT::Ter(t)) = *label.get_value() { terminals_map .entry(t) .or_insert_with(|| HashSet::with_capacity(children_iter.len())) .extend(children_iter.map(|target| (label, target))); } } for (t, set) in terminals_map.into_iter() { let new_index = nfa .extend(set.into_iter()) .map_err(index_out_of_bounds_conversion)?; let virtual_node = VirtualNode::new(nt, t); virtual_nodes.insert(virtual_node, new_index); } } Ok(Self { grammar, nfa, regexp, virtual_nodes, }) } } /// A convenient getter for the map of virtual nodes. fn query(map: &VirtualMap, nt: usize, t: usize) -> Option { map.get(&VirtualNode::new(nt, t)).copied() } impl Atom for DefaultAtom { fn atom(&self, nt: usize, t: usize) -> Result, GrammarError> { if nt >= self.grammar.non_num() { return Err(GrammarError::IndexOutOfBounds(nt, self.grammar.non_num())); } if t >= self.grammar.ter_num() { return Err(GrammarError::IndexOutOfBounds(t, self.grammar.ter_num())); } Ok(query(&self.virtual_nodes, nt, t)) } fn empty(&self) -> usize { self.grammar.total() << 1 } }