//! This file provides a default implementation of the //! [`Atom`][super::Atom] trait. use super::*; use grammar::Grammar; use graph::{error::Error as GraphError, Graph, LabelExtGraph, LabelGraph}; use nfa::default::{nfa::DefaultNFA, regex::DefaultRegex}; use core::fmt::Display; use std::collections::BTreeMap as Map; /// A virtual node represents the derivative of a non-terminal symbol /// `S` with respect to a terminal symbol `t`. #[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Ord, PartialOrd)] struct VirtualNode { s: usize, t: usize, } impl Display for VirtualNode { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "VN[{}]^({})", self.s, self.t) } } impl VirtualNode { fn new(s: usize, t: usize) -> Self { Self { s, t } } } type VirtualMap = Map; /// The type of atomic languages. #[derive(Debug, Clone, Default)] pub struct DefaultAtom { grammar: Grammar, nfa: DefaultNFA>, // NOTE: This is mostly for printing and debugging regexp: Vec>, virtual_nodes: VirtualMap, } impl Display for DefaultAtom { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let grammar = &self.grammar; let error_to_string = |err| format!("{err}"); let tnt_to_string = |tnt, deco| { grammar .name_of_tnt(tnt) .map(|name| format!("{deco}({name})")) .unwrap_or_else(error_to_string) }; let display_tnt = |tnt| match tnt { TNT::Ter(t) => format!( "({})", grammar .unpack_tnt(t) .map(|tnt| tnt_to_string(tnt, "")) .unwrap_or_else(error_to_string) ), TNT::Non(_) => tnt_to_string(tnt, "H"), }; writeln!(f, "regular expressions:")?; let mut accumulators = Vec::with_capacity(self.regexp.len() + 1); accumulators.push(0usize); for regex in self.regexp.iter() { writeln!(f, "accumulator: {}", accumulators.last().unwrap())?; accumulators.push(regex.nodes_len() * 2 + accumulators.last().unwrap()); let string = regex.to_string_with(display_tnt)?; writeln!(f, "{string}")?; } writeln!(f, "total = {}", accumulators.last().unwrap())?; writeln!(f, "virtual nodes:")?; for (virtual_node, index) in self.virtual_nodes.iter() { writeln!(f, "{virtual_node}: {index}")?; } Ok(()) } } // Some boiler-plate delegation implementations for Graph and // LabelGraph, in order to implement Nfa. impl Graph for DefaultAtom { type Iter<'b> = > as Graph>::Iter<'b> where Self: 'b; fn is_empty(&self) -> bool { self.nfa.is_empty() } fn nodes_len(&self) -> usize { self.nfa.nodes_len() } fn children_of(&self, node_id: usize) -> Result, GraphError> { self.nfa.children_of(node_id) } fn degree(&self, node_id: usize) -> Result { self.nfa.degree(node_id) } fn is_empty_node(&self, node_id: usize) -> Result { self.nfa.is_empty_node(node_id) } fn has_edge(&self, source: usize, target: usize) -> Result { self.nfa.has_edge(source, target) } fn replace_by_builder(&mut self, _builder: impl graph::Builder) { // NOTE: We cannot replace by a builder whose result is an // atom, not the underlying graph type. unimplemented!() } } impl LabelGraph> for DefaultAtom { type Iter<'b> = > as LabelGraph>>::Iter<'b> where Self: 'b; type LabelIter<'b> = > as LabelGraph>>::LabelIter<'b> where Self: 'b, DOption: 'b; type EdgeLabelIter<'a> = > as LabelGraph>>::EdgeLabelIter<'a> where Self: 'a, DOption: 'a; #[inline] fn vertex_label(&self, node_id: usize) -> Result>, GraphError> { self.nfa.vertex_label(node_id) } #[inline] fn edge_label( &self, source: usize, target: usize, ) -> Result, GraphError> { self.nfa.edge_label(source, target) } #[inline] fn find_children_with_label( &self, node_id: usize, label: &DOption, ) -> Result<>>::Iter<'_>, GraphError> { self.nfa.find_children_with_label(node_id, label) } #[inline] fn labels_of(&self, node_id: usize) -> Result, GraphError> { self.nfa.labels_of(node_id) } #[inline] fn has_edge_label( &self, node_id: usize, label: &DOption, target: usize, ) -> Result { self.nfa.has_edge_label(node_id, label, target) } } impl LabelExtGraph> for DefaultAtom { #[inline] fn extend( &mut self, edges: impl IntoIterator, usize)>, ) -> Result { self.nfa.extend(edges) } } impl Nfa> for DefaultAtom { #[inline] fn remove_epsilon(&mut self, f: F) -> Result<(), nfa::error::Error> where F: Fn(DOption) -> bool, { self.nfa.remove_epsilon(f) } type FromRegex = (); #[inline] fn to_nfa( _regexps: &[impl nfa::Regex>>], _sub_pred: impl Fn(DOption) -> Result>, nfa::error::Error>, _default: Option>, ) -> Result>>, nfa::error::Error> { // NOTE: We cannot construct an atom from a set of regular // languages alone. So it is appropriate to panic here, if // one tries to do so, for some reason. unimplemented!() } #[inline] fn remove_dead(&mut self, reserve: impl Fn(usize) -> bool) -> Result<(), nfa::error::Error> { self.nfa.remove_dead(reserve) } #[inline] fn nulling(&mut self, f: impl Fn(DOption) -> bool) -> Result<(), nfa::error::Error> { self.nfa.nulling(f) } } impl DefaultAtom { /// Construct an atomic language from a grammar. pub fn from_grammar(mut grammar: Grammar) -> Result { grammar.compute_firsts()?; let regexp = grammar.left_closure()?; let mut nfa = grammar.left_closure_to_nfa(®exp)?; use std::collections::HashSet; let accumulators: Vec = { let mut result = Vec::with_capacity(regexp.len() + 1); result.push(0); for regex in regexp.iter() { result.push(regex.nodes_len() * 2 + result.last().unwrap()); } result.into_iter().collect() }; let accumulators_set: HashSet = accumulators.iter().copied().collect(); nfa.nulling(|label| { if let Some(label) = *label { match label { TNT::Ter(_) => false, // Panics if a non-terminal references an invalid node // here. TNT::Non(n) => grammar.is_nullable(n).unwrap(), } } else { true } })?; nfa.remove_epsilon(|label| label.is_none())?; nfa.remove_dead(|node| accumulators_set.contains(&node))?; // now add the virtual nodes let mut virtual_nodes: VirtualMap = Default::default(); let nt_num = grammar.non_num(); assert!(nt_num <= accumulators.len()); // Convert an error telling us that an index is out of bounds. // // Panics if the error is not of the expected kind. fn index_out_of_bounds_conversion(ge: GraphError) -> GrammarError { match ge { GraphError::IndexOutOfBounds(index, bound) => { GrammarError::IndexOutOfBounds(index, bound) } _ => unreachable!(), } } for nt in 0..nt_num { let children: std::collections::HashMap, Vec<_>> = nfa // this is safe because of the above assertion. .labels_of(*accumulators.get(nt).unwrap()) .map_err(index_out_of_bounds_conversion)? .map(|(label, target_iter)| (*label, target_iter.collect())) .collect(); for (label, children_vec) in children.into_iter() { if let Some(TNT::Ter(t)) = *label { let new_index = nfa .extend(children_vec.into_iter().map(|target| (label, target))) .map_err(index_out_of_bounds_conversion)?; let virtual_node = VirtualNode::new(nt, t); virtual_nodes.insert(virtual_node, new_index); } } } Ok(Self { grammar, nfa, regexp, virtual_nodes, }) } } /// A convenient getter for the map of virtual nodes. fn query(map: &VirtualMap, nt: usize, t: usize) -> Option { map.get(&VirtualNode::new(nt, t)).copied() } impl Atom for DefaultAtom { fn atom(&self, nt: usize, t: usize) -> Result, GrammarError> { if nt >= self.grammar.non_num() { return Err(GrammarError::IndexOutOfBounds(nt, self.grammar.non_num())); } if t >= self.grammar.ter_num() { return Err(GrammarError::IndexOutOfBounds(t, self.grammar.ter_num())); } Ok(query(&self.virtual_nodes, nt, t)) } fn empty(&self) -> usize { assert_eq!(self.nfa.nodes_len() - 2, self.grammar.non_num() * 2); self.nfa.nodes_len() - 2 } }