summaryrefslogtreecommitdiff
path: root/chain/src/atom/default.rs
diff options
context:
space:
mode:
Diffstat (limited to 'chain/src/atom/default.rs')
-rw-r--r--chain/src/atom/default.rs342
1 files changed, 342 insertions, 0 deletions
diff --git a/chain/src/atom/default.rs b/chain/src/atom/default.rs
new file mode 100644
index 0000000..72989b3
--- /dev/null
+++ b/chain/src/atom/default.rs
@@ -0,0 +1,342 @@
+//! This file provides a default implementation of the
+//! [`Atom`][super::Atom] trait.
+
+use super::*;
+use grammar::Grammar;
+use graph::{error::Error as GraphError, Graph, LabelExtGraph, LabelGraph};
+use nfa::default::{nfa::DefaultNFA, regex::DefaultRegex};
+
+use core::fmt::Display;
+use std::collections::BTreeMap as Map;
+
+/// A virtual node represents the derivative of a non-terminal symbol
+/// `S` with respect to a terminal symbol `t`.
+#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Ord, PartialOrd)]
+struct VirtualNode {
+ s: usize,
+ t: usize,
+}
+
+impl Display for VirtualNode {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "VN[{}]^({})", self.s, self.t)
+ }
+}
+
+impl VirtualNode {
+ fn new(s: usize, t: usize) -> Self {
+ Self { s, t }
+ }
+}
+
+type VirtualMap = Map<VirtualNode, usize>;
+
+/// The type of atomic languages.
+#[derive(Debug, Clone, Default)]
+pub struct DefaultAtom {
+ grammar: Grammar,
+ nfa: DefaultNFA<DOption<TNT>>,
+ // NOTE: This is mostly for printing and debugging
+ regexp: Vec<DefaultRegex<TNT>>,
+ virtual_nodes: VirtualMap,
+}
+
+impl Display for DefaultAtom {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ let grammar = &self.grammar;
+
+ let error_to_string = |err| format!("{err}");
+ let tnt_to_string = |tnt, deco| {
+ grammar
+ .name_of_tnt(tnt)
+ .map(|name| format!("{deco}({name})"))
+ .unwrap_or_else(error_to_string)
+ };
+
+ let display_tnt = |tnt| match tnt {
+ TNT::Ter(t) => format!(
+ "({})",
+ grammar
+ .unpack_tnt(t)
+ .map(|tnt| tnt_to_string(tnt, ""))
+ .unwrap_or_else(error_to_string)
+ ),
+ TNT::Non(_) => tnt_to_string(tnt, "H"),
+ };
+
+ writeln!(f, "regular expressions:")?;
+
+ let mut accumulators = Vec::with_capacity(self.regexp.len() + 1);
+ accumulators.push(0usize);
+
+ for regex in self.regexp.iter() {
+ writeln!(f, "accumulator: {}", accumulators.last().unwrap())?;
+
+ accumulators.push(regex.nodes_len() * 2 + accumulators.last().unwrap());
+
+ let string = regex.to_string_with(display_tnt)?;
+
+ writeln!(f, "{string}")?;
+ }
+
+ writeln!(f, "total = {}", accumulators.last().unwrap())?;
+
+ writeln!(f, "virtual nodes:")?;
+
+ for (virtual_node, index) in self.virtual_nodes.iter() {
+ writeln!(f, "{virtual_node}: {index}")?;
+ }
+
+ Ok(())
+ }
+}
+
+// Some boiler-plate delegation implementations for Graph and
+// LabelGraph, in order to implement Nfa.
+
+impl Graph for DefaultAtom {
+ type Iter<'b> = <DefaultNFA<DOption<TNT>> as Graph>::Iter<'b>
+ where
+ Self: 'b;
+
+ fn is_empty(&self) -> bool {
+ self.nfa.is_empty()
+ }
+
+ fn nodes_len(&self) -> usize {
+ self.nfa.nodes_len()
+ }
+
+ fn children_of(&self, node_id: usize) -> Result<Self::Iter<'_>, GraphError> {
+ self.nfa.children_of(node_id)
+ }
+
+ fn degree(&self, node_id: usize) -> Result<usize, GraphError> {
+ self.nfa.degree(node_id)
+ }
+
+ fn is_empty_node(&self, node_id: usize) -> Result<bool, GraphError> {
+ self.nfa.is_empty_node(node_id)
+ }
+
+ fn has_edge(&self, source: usize, target: usize) -> Result<bool, GraphError> {
+ self.nfa.has_edge(source, target)
+ }
+
+ fn replace_by_builder(&mut self, _builder: impl graph::Builder<Result = Self>) {
+ // NOTE: We cannot replace by a builder whose result is an
+ // atom, not the underlying graph type.
+ unimplemented!()
+ }
+}
+
+impl LabelGraph<DOption<TNT>> for DefaultAtom {
+ type Iter<'b> = <DefaultNFA<DOption<TNT>> as LabelGraph<DOption<TNT>>>::Iter<'b>
+ where
+ Self: 'b;
+
+ type LabelIter<'b> = <DefaultNFA<DOption<TNT>> as LabelGraph<DOption<TNT>>>::LabelIter<'b>
+ where
+ Self: 'b,
+ DOption<TNT>: 'b;
+
+ type EdgeLabelIter<'a> = <DefaultNFA<DOption<TNT>> as LabelGraph<DOption<TNT>>>::EdgeLabelIter<'a>
+ where
+ Self: 'a,
+ DOption<TNT>: 'a;
+
+ #[inline]
+ fn vertex_label(&self, node_id: usize) -> Result<Option<DOption<TNT>>, GraphError> {
+ self.nfa.vertex_label(node_id)
+ }
+
+ #[inline]
+ fn edge_label(
+ &self,
+ source: usize,
+ target: usize,
+ ) -> Result<Self::EdgeLabelIter<'_>, GraphError> {
+ self.nfa.edge_label(source, target)
+ }
+
+ #[inline]
+ fn find_children_with_label(
+ &self,
+ node_id: usize,
+ label: &DOption<TNT>,
+ ) -> Result<<Self as LabelGraph<DOption<TNT>>>::Iter<'_>, GraphError> {
+ self.nfa.find_children_with_label(node_id, label)
+ }
+
+ #[inline]
+ fn labels_of(&self, node_id: usize) -> Result<Self::LabelIter<'_>, GraphError> {
+ self.nfa.labels_of(node_id)
+ }
+
+ #[inline]
+ fn has_edge_label(
+ &self,
+ node_id: usize,
+ label: &DOption<TNT>,
+ target: usize,
+ ) -> Result<bool, GraphError> {
+ self.nfa.has_edge_label(node_id, label, target)
+ }
+}
+
+impl LabelExtGraph<DOption<TNT>> for DefaultAtom {
+ #[inline]
+ fn extend(
+ &mut self,
+ edges: impl IntoIterator<Item = (DOption<TNT>, usize)>,
+ ) -> Result<usize, GraphError> {
+ self.nfa.extend(edges)
+ }
+}
+
+impl Nfa<DOption<TNT>> for DefaultAtom {
+ #[inline]
+ fn remove_epsilon<F>(&mut self, f: F) -> Result<(), nfa::error::Error>
+ where
+ F: Fn(DOption<TNT>) -> bool,
+ {
+ self.nfa.remove_epsilon(f)
+ }
+
+ type FromRegex<S: graph::GraphLabel + std::fmt::Display + Default> = ();
+
+ #[inline]
+ fn to_nfa(
+ _regexps: &[impl nfa::Regex<nfa::default::regex::RegexType<DOption<TNT>>>],
+ _sub_pred: impl Fn(DOption<TNT>) -> Result<nfa::SoC<DOption<TNT>>, nfa::error::Error>,
+ _default: Option<DOption<TNT>>,
+ ) -> Result<Self::FromRegex<DOption<DOption<TNT>>>, nfa::error::Error> {
+ // NOTE: We cannot construct an atom from a set of regular
+ // languages alone. So it is appropriate to panic here, if
+ // one tries to do so, for some reason.
+ unimplemented!()
+ }
+
+ #[inline]
+ fn remove_dead(&mut self, reserve: impl Fn(usize) -> bool) -> Result<(), nfa::error::Error> {
+ self.nfa.remove_dead(reserve)
+ }
+
+ #[inline]
+ fn nulling(&mut self, f: impl Fn(DOption<TNT>) -> bool) -> Result<(), nfa::error::Error> {
+ self.nfa.nulling(f)
+ }
+}
+
+impl DefaultAtom {
+ /// Construct an atomic language from a grammar.
+ pub fn from_grammar(mut grammar: Grammar) -> Result<Self, GrammarError> {
+ grammar.compute_firsts()?;
+
+ let regexp = grammar.left_closure()?;
+
+ let mut nfa = grammar.left_closure_to_nfa(&regexp)?;
+
+ use std::collections::HashSet;
+
+ let accumulators: Vec<usize> = {
+ let mut result = Vec::with_capacity(regexp.len() + 1);
+ result.push(0);
+
+ for regex in regexp.iter() {
+ result.push(regex.nodes_len() * 2 + result.last().unwrap());
+ }
+
+ result.into_iter().collect()
+ };
+
+ let accumulators_set: HashSet<usize> = accumulators.iter().copied().collect();
+
+ nfa.nulling(|label| {
+ if let Some(label) = *label {
+ match label {
+ TNT::Ter(_) => false,
+ // Panics if a non-terminal references an invalid node
+ // here.
+ TNT::Non(n) => grammar.is_nullable(n).unwrap(),
+ }
+ } else {
+ true
+ }
+ })?;
+ nfa.remove_epsilon(|label| label.is_none())?;
+ nfa.remove_dead(|node| accumulators_set.contains(&node))?;
+
+ // now add the virtual nodes
+ let mut virtual_nodes: VirtualMap = Default::default();
+
+ let nt_num = grammar.non_num();
+
+ assert!(nt_num <= accumulators.len());
+
+ // Convert an error telling us that an index is out of bounds.
+ //
+ // Panics if the error is not of the expected kind.
+ fn index_out_of_bounds_conversion(ge: GraphError) -> GrammarError {
+ match ge {
+ GraphError::IndexOutOfBounds(index, bound) => {
+ GrammarError::IndexOutOfBounds(index, bound)
+ }
+ _ => unreachable!(),
+ }
+ }
+
+ for nt in 0..nt_num {
+ let children: std::collections::HashMap<DOption<TNT>, Vec<_>> = nfa
+ // this is safe because of the above assertion.
+ .labels_of(*accumulators.get(nt).unwrap())
+ .map_err(index_out_of_bounds_conversion)?
+ .map(|(label, target_iter)| (*label, target_iter.collect()))
+ .collect();
+
+ for (label, children_vec) in children.into_iter() {
+ if let Some(TNT::Ter(t)) = *label {
+ let new_index = nfa
+ .extend(children_vec.into_iter().map(|target| (label, target)))
+ .map_err(index_out_of_bounds_conversion)?;
+
+ let virtual_node = VirtualNode::new(nt, t);
+
+ virtual_nodes.insert(virtual_node, new_index);
+ }
+ }
+ }
+
+ Ok(Self {
+ grammar,
+ nfa,
+ regexp,
+ virtual_nodes,
+ })
+ }
+}
+
+/// A convenient getter for the map of virtual nodes.
+fn query(map: &VirtualMap, nt: usize, t: usize) -> Option<usize> {
+ map.get(&VirtualNode::new(nt, t)).copied()
+}
+
+impl Atom for DefaultAtom {
+ fn atom(&self, nt: usize, t: usize) -> Result<Option<usize>, GrammarError> {
+ if nt >= self.grammar.non_num() {
+ return Err(GrammarError::IndexOutOfBounds(nt, self.grammar.non_num()));
+ }
+
+ if t >= self.grammar.ter_num() {
+ return Err(GrammarError::IndexOutOfBounds(t, self.grammar.ter_num()));
+ }
+
+ Ok(query(&self.virtual_nodes, nt, t))
+ }
+
+ fn empty(&self) -> usize {
+ assert_eq!(self.nfa.nodes_len() - 2, self.grammar.non_num() * 2);
+
+ self.nfa.nodes_len() - 2
+ }
+}