diff options
Diffstat (limited to 'chain/src/atom')
-rw-r--r-- | chain/src/atom/default.rs | 342 | ||||
-rw-r--r-- | chain/src/atom/mod.rs | 41 |
2 files changed, 383 insertions, 0 deletions
diff --git a/chain/src/atom/default.rs b/chain/src/atom/default.rs new file mode 100644 index 0000000..72989b3 --- /dev/null +++ b/chain/src/atom/default.rs @@ -0,0 +1,342 @@ +//! This file provides a default implementation of the +//! [`Atom`][super::Atom] trait. + +use super::*; +use grammar::Grammar; +use graph::{error::Error as GraphError, Graph, LabelExtGraph, LabelGraph}; +use nfa::default::{nfa::DefaultNFA, regex::DefaultRegex}; + +use core::fmt::Display; +use std::collections::BTreeMap as Map; + +/// A virtual node represents the derivative of a non-terminal symbol +/// `S` with respect to a terminal symbol `t`. +#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Ord, PartialOrd)] +struct VirtualNode { + s: usize, + t: usize, +} + +impl Display for VirtualNode { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "VN[{}]^({})", self.s, self.t) + } +} + +impl VirtualNode { + fn new(s: usize, t: usize) -> Self { + Self { s, t } + } +} + +type VirtualMap = Map<VirtualNode, usize>; + +/// The type of atomic languages. +#[derive(Debug, Clone, Default)] +pub struct DefaultAtom { + grammar: Grammar, + nfa: DefaultNFA<DOption<TNT>>, + // NOTE: This is mostly for printing and debugging + regexp: Vec<DefaultRegex<TNT>>, + virtual_nodes: VirtualMap, +} + +impl Display for DefaultAtom { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let grammar = &self.grammar; + + let error_to_string = |err| format!("{err}"); + let tnt_to_string = |tnt, deco| { + grammar + .name_of_tnt(tnt) + .map(|name| format!("{deco}({name})")) + .unwrap_or_else(error_to_string) + }; + + let display_tnt = |tnt| match tnt { + TNT::Ter(t) => format!( + "({})", + grammar + .unpack_tnt(t) + .map(|tnt| tnt_to_string(tnt, "")) + .unwrap_or_else(error_to_string) + ), + TNT::Non(_) => tnt_to_string(tnt, "H"), + }; + + writeln!(f, "regular expressions:")?; + + let mut accumulators = Vec::with_capacity(self.regexp.len() + 1); + accumulators.push(0usize); + + for regex in self.regexp.iter() { + writeln!(f, "accumulator: {}", accumulators.last().unwrap())?; + + accumulators.push(regex.nodes_len() * 2 + accumulators.last().unwrap()); + + let string = regex.to_string_with(display_tnt)?; + + writeln!(f, "{string}")?; + } + + writeln!(f, "total = {}", accumulators.last().unwrap())?; + + writeln!(f, "virtual nodes:")?; + + for (virtual_node, index) in self.virtual_nodes.iter() { + writeln!(f, "{virtual_node}: {index}")?; + } + + Ok(()) + } +} + +// Some boiler-plate delegation implementations for Graph and +// LabelGraph, in order to implement Nfa. + +impl Graph for DefaultAtom { + type Iter<'b> = <DefaultNFA<DOption<TNT>> as Graph>::Iter<'b> + where + Self: 'b; + + fn is_empty(&self) -> bool { + self.nfa.is_empty() + } + + fn nodes_len(&self) -> usize { + self.nfa.nodes_len() + } + + fn children_of(&self, node_id: usize) -> Result<Self::Iter<'_>, GraphError> { + self.nfa.children_of(node_id) + } + + fn degree(&self, node_id: usize) -> Result<usize, GraphError> { + self.nfa.degree(node_id) + } + + fn is_empty_node(&self, node_id: usize) -> Result<bool, GraphError> { + self.nfa.is_empty_node(node_id) + } + + fn has_edge(&self, source: usize, target: usize) -> Result<bool, GraphError> { + self.nfa.has_edge(source, target) + } + + fn replace_by_builder(&mut self, _builder: impl graph::Builder<Result = Self>) { + // NOTE: We cannot replace by a builder whose result is an + // atom, not the underlying graph type. + unimplemented!() + } +} + +impl LabelGraph<DOption<TNT>> for DefaultAtom { + type Iter<'b> = <DefaultNFA<DOption<TNT>> as LabelGraph<DOption<TNT>>>::Iter<'b> + where + Self: 'b; + + type LabelIter<'b> = <DefaultNFA<DOption<TNT>> as LabelGraph<DOption<TNT>>>::LabelIter<'b> + where + Self: 'b, + DOption<TNT>: 'b; + + type EdgeLabelIter<'a> = <DefaultNFA<DOption<TNT>> as LabelGraph<DOption<TNT>>>::EdgeLabelIter<'a> + where + Self: 'a, + DOption<TNT>: 'a; + + #[inline] + fn vertex_label(&self, node_id: usize) -> Result<Option<DOption<TNT>>, GraphError> { + self.nfa.vertex_label(node_id) + } + + #[inline] + fn edge_label( + &self, + source: usize, + target: usize, + ) -> Result<Self::EdgeLabelIter<'_>, GraphError> { + self.nfa.edge_label(source, target) + } + + #[inline] + fn find_children_with_label( + &self, + node_id: usize, + label: &DOption<TNT>, + ) -> Result<<Self as LabelGraph<DOption<TNT>>>::Iter<'_>, GraphError> { + self.nfa.find_children_with_label(node_id, label) + } + + #[inline] + fn labels_of(&self, node_id: usize) -> Result<Self::LabelIter<'_>, GraphError> { + self.nfa.labels_of(node_id) + } + + #[inline] + fn has_edge_label( + &self, + node_id: usize, + label: &DOption<TNT>, + target: usize, + ) -> Result<bool, GraphError> { + self.nfa.has_edge_label(node_id, label, target) + } +} + +impl LabelExtGraph<DOption<TNT>> for DefaultAtom { + #[inline] + fn extend( + &mut self, + edges: impl IntoIterator<Item = (DOption<TNT>, usize)>, + ) -> Result<usize, GraphError> { + self.nfa.extend(edges) + } +} + +impl Nfa<DOption<TNT>> for DefaultAtom { + #[inline] + fn remove_epsilon<F>(&mut self, f: F) -> Result<(), nfa::error::Error> + where + F: Fn(DOption<TNT>) -> bool, + { + self.nfa.remove_epsilon(f) + } + + type FromRegex<S: graph::GraphLabel + std::fmt::Display + Default> = (); + + #[inline] + fn to_nfa( + _regexps: &[impl nfa::Regex<nfa::default::regex::RegexType<DOption<TNT>>>], + _sub_pred: impl Fn(DOption<TNT>) -> Result<nfa::SoC<DOption<TNT>>, nfa::error::Error>, + _default: Option<DOption<TNT>>, + ) -> Result<Self::FromRegex<DOption<DOption<TNT>>>, nfa::error::Error> { + // NOTE: We cannot construct an atom from a set of regular + // languages alone. So it is appropriate to panic here, if + // one tries to do so, for some reason. + unimplemented!() + } + + #[inline] + fn remove_dead(&mut self, reserve: impl Fn(usize) -> bool) -> Result<(), nfa::error::Error> { + self.nfa.remove_dead(reserve) + } + + #[inline] + fn nulling(&mut self, f: impl Fn(DOption<TNT>) -> bool) -> Result<(), nfa::error::Error> { + self.nfa.nulling(f) + } +} + +impl DefaultAtom { + /// Construct an atomic language from a grammar. + pub fn from_grammar(mut grammar: Grammar) -> Result<Self, GrammarError> { + grammar.compute_firsts()?; + + let regexp = grammar.left_closure()?; + + let mut nfa = grammar.left_closure_to_nfa(®exp)?; + + use std::collections::HashSet; + + let accumulators: Vec<usize> = { + let mut result = Vec::with_capacity(regexp.len() + 1); + result.push(0); + + for regex in regexp.iter() { + result.push(regex.nodes_len() * 2 + result.last().unwrap()); + } + + result.into_iter().collect() + }; + + let accumulators_set: HashSet<usize> = accumulators.iter().copied().collect(); + + nfa.nulling(|label| { + if let Some(label) = *label { + match label { + TNT::Ter(_) => false, + // Panics if a non-terminal references an invalid node + // here. + TNT::Non(n) => grammar.is_nullable(n).unwrap(), + } + } else { + true + } + })?; + nfa.remove_epsilon(|label| label.is_none())?; + nfa.remove_dead(|node| accumulators_set.contains(&node))?; + + // now add the virtual nodes + let mut virtual_nodes: VirtualMap = Default::default(); + + let nt_num = grammar.non_num(); + + assert!(nt_num <= accumulators.len()); + + // Convert an error telling us that an index is out of bounds. + // + // Panics if the error is not of the expected kind. + fn index_out_of_bounds_conversion(ge: GraphError) -> GrammarError { + match ge { + GraphError::IndexOutOfBounds(index, bound) => { + GrammarError::IndexOutOfBounds(index, bound) + } + _ => unreachable!(), + } + } + + for nt in 0..nt_num { + let children: std::collections::HashMap<DOption<TNT>, Vec<_>> = nfa + // this is safe because of the above assertion. + .labels_of(*accumulators.get(nt).unwrap()) + .map_err(index_out_of_bounds_conversion)? + .map(|(label, target_iter)| (*label, target_iter.collect())) + .collect(); + + for (label, children_vec) in children.into_iter() { + if let Some(TNT::Ter(t)) = *label { + let new_index = nfa + .extend(children_vec.into_iter().map(|target| (label, target))) + .map_err(index_out_of_bounds_conversion)?; + + let virtual_node = VirtualNode::new(nt, t); + + virtual_nodes.insert(virtual_node, new_index); + } + } + } + + Ok(Self { + grammar, + nfa, + regexp, + virtual_nodes, + }) + } +} + +/// A convenient getter for the map of virtual nodes. +fn query(map: &VirtualMap, nt: usize, t: usize) -> Option<usize> { + map.get(&VirtualNode::new(nt, t)).copied() +} + +impl Atom for DefaultAtom { + fn atom(&self, nt: usize, t: usize) -> Result<Option<usize>, GrammarError> { + if nt >= self.grammar.non_num() { + return Err(GrammarError::IndexOutOfBounds(nt, self.grammar.non_num())); + } + + if t >= self.grammar.ter_num() { + return Err(GrammarError::IndexOutOfBounds(t, self.grammar.ter_num())); + } + + Ok(query(&self.virtual_nodes, nt, t)) + } + + fn empty(&self) -> usize { + assert_eq!(self.nfa.nodes_len() - 2, self.grammar.non_num() * 2); + + self.nfa.nodes_len() - 2 + } +} diff --git a/chain/src/atom/mod.rs b/chain/src/atom/mod.rs new file mode 100644 index 0000000..084acca --- /dev/null +++ b/chain/src/atom/mod.rs @@ -0,0 +1,41 @@ +//! This file defines the behaviour of the Atomic languages, and +//! provides a default implementation. +//! +//! Here I do not to substitute external packages' implementations in +//! the future, so why define a trait for the atomic languages? +//! Because this way I can easily substitute other implementations if +//! I have better ideas in the future. + +use grammar::{Error as GrammarError, TNT}; +use nfa::{DOption, Nfa}; + +/// The expected behaviours of an atomic language. +pub trait Atom: Nfa<DOption<TNT>> { + /// Return the index of a node representing the derivative of the + /// left-linear null closure of `nt` with respect to `t`. + fn atom(&self, nt: usize, t: usize) -> Result<Option<usize>, GrammarError>; + + /// Return the index of the empty state. + fn empty(&self) -> usize; +} + +pub mod default; + +pub use default::DefaultAtom; + +#[cfg(test)] +mod tests { + use super::*; + use grammar::test_grammar_helper::*; + + #[test] + fn atom() -> Result<(), Box<dyn std::error::Error>> { + let grammar = new_notes_grammar()?; + + let atom = DefaultAtom::from_grammar(grammar)?; + + println!("atom = {atom}"); + + Ok(()) + } +} |