summaryrefslogtreecommitdiff
path: root/chain/src/atom
diff options
context:
space:
mode:
Diffstat (limited to 'chain/src/atom')
-rw-r--r--chain/src/atom/default.rs214
-rw-r--r--chain/src/atom/mod.rs9
2 files changed, 207 insertions, 16 deletions
diff --git a/chain/src/atom/default.rs b/chain/src/atom/default.rs
index 90133f4..0dc24c3 100644
--- a/chain/src/atom/default.rs
+++ b/chain/src/atom/default.rs
@@ -6,7 +6,7 @@ use grammar::Grammar;
use graph::{error::Error as GraphError, Graph, LabelExtGraph, LabelGraph};
use nfa::{
default::{nfa::DefaultNFA, regex::DefaultRegex},
- LabelType,
+ LabelType, NfaLabel,
};
use core::fmt::Display;
@@ -39,11 +39,55 @@ type VirtualMap = Map<VirtualNode, usize>;
pub struct DefaultAtom {
grammar: Grammar,
nfa: DefaultNFA<LabelType<TNT>>,
+ accepting_vec: Vec<bool>,
// NOTE: This is mostly for printing and debugging
regexp: Vec<DefaultRegex<TNT>>,
virtual_nodes: VirtualMap,
}
+impl DefaultAtom {
+ /// Return the string description of a rule position.
+ pub fn rule_pos_string(&self, pos: usize) -> Result<String, Box<dyn std::error::Error>> {
+ let rule_num = self.grammar.get_rule_num(pos)?;
+
+ assert!(rule_num < self.grammar.non_num());
+
+ let display_tnt = |tnt| self.name_of_tnt(tnt).unwrap_or_else(|e| format!("{e}"));
+
+ Ok(self.regexp.get(rule_num).unwrap().to_string_with_dot(
+ display_tnt,
+ if rule_num == 0 {
+ pos
+ } else {
+ pos - self.grammar.nth_accumulator(rule_num - 1)?
+ },
+ )?)
+ }
+
+ /// Print the underlying NFA.
+ pub fn print_nfa<S: AsRef<str>>(&self, filename: S) -> Result<(), std::io::Error> {
+ self.nfa.print_viz(filename.as_ref())?;
+
+ let nullables: Vec<_> = self
+ .accepting_vec
+ .iter()
+ .enumerate()
+ .filter_map(|(index, pred)| if *pred { Some(index) } else { None })
+ .collect();
+
+ if !nullables.is_empty() {
+ println!("nullables: {nullables:?}");
+ }
+
+ println!("printing virtual nodes:");
+ for (vn, node) in self.virtual_nodes.iter() {
+ println!("[{}]^{{({})}}: {}", vn.s, vn.t, node);
+ }
+
+ Ok(())
+ }
+}
+
impl Display for DefaultAtom {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let grammar = &self.grammar;
@@ -260,15 +304,67 @@ impl DefaultAtom {
.filter(|n| matches!(grammar.is_nullable(*n), Ok(true)))
.collect();
+ // Now record accepting information.
+
+ let nfa_len = nfa.nodes_len();
+
+ let label_is_nullable = |label: NfaLabel<DOption<TNT>>| {
+ if let Some(label) = *label.get_value() {
+ matches!(label, TNT::Non(n) if nullables.contains(&n))
+ } else {
+ true
+ }
+ };
+
+ let mut accepting_vec: Vec<bool> = std::iter::repeat(false).take(nfa_len).collect();
+
+ for nfa_start in accumulators.iter().copied().take(regexp.len()) {
+ *accepting_vec.get_mut(nfa_start + 1).unwrap() = true;
+ }
+
+ // The last is always accepting.
+ *accepting_vec.get_mut(nfa_len - 1).unwrap() = true;
+
+ let mut updated = true;
+
+ while updated {
+ updated = false;
+
+ for node in nfa.nodes() {
+ // skip those that do not need to be updated
+ if *accepting_vec
+ .get(node)
+ .ok_or(GrammarError::IndexOutOfBounds(node, nfa_len))?
+ {
+ continue;
+ }
+
+ 'label_loop: for (label, target_iter) in nfa
+ .labels_of(node)
+ .map_err(|_| GrammarError::IndexOutOfBounds(node, nfa_len))?
+ {
+ if label_is_nullable(*label) {
+ for target in target_iter {
+ if *accepting_vec
+ .get(target)
+ .ok_or(GrammarError::IndexOutOfBounds(target, nfa_len))?
+ {
+ // accepting_vec[node] must have been
+ // false, as we checked above
+ *accepting_vec.get_mut(node).unwrap() = true;
+ updated = true;
+
+ break 'label_loop;
+ }
+ }
+ }
+ }
+ }
+ }
+
// Perform nulling and remove_epsilon at the same time.
nfa.closure(
- |label| {
- if let Some(label) = *label.get_value() {
- matches!(label, TNT::Non(n) if nullables.contains(&n))
- } else {
- true
- }
- },
+ label_is_nullable,
true,
|two_edges| grammar.transform_label_null_epsilon(two_edges),
|label| label.get_value().is_none(),
@@ -298,6 +394,8 @@ impl DefaultAtom {
}
}
+ // dbg!(&accumulators);
+
for nt in 0..nt_num {
let children: std::collections::HashMap<_, _> = nfa
// This is safe because of the above assertion.
@@ -306,23 +404,92 @@ impl DefaultAtom {
.map(|(label, target_iter)| (*label, target_iter))
.collect();
- let mut terminals_map: HashMap<usize, HashSet<(LabelType<TNT>, usize)>> =
+ let mut terminals_map: HashMap<usize, (HashSet<(LabelType<TNT>, usize)>, bool)> =
HashMap::new();
for (label, children_iter) in children.into_iter() {
if let Some(TNT::Ter(t)) = *label.get_value() {
- terminals_map
- .entry(t)
- .or_insert_with(|| HashSet::with_capacity(children_iter.len()))
- .extend(children_iter.map(|target| (label, target)));
+ let estimated_len = {
+ let mut result = 0;
+
+ for child in children_iter.clone() {
+ result += nfa.degree(child).map_err(index_out_of_bounds_conversion)?;
+ }
+
+ result
+ };
+
+ let mut accepting = false;
+
+ for child in children_iter {
+ accepting = accepting
+ || *accepting_vec.get(child).ok_or_else(|| {
+ GrammarError::IndexOutOfBounds(child, accepting_vec.len())
+ })?;
+
+ if nt == 3
+ && nfa.degree(child).map_err(index_out_of_bounds_conversion)? == 0
+ {
+ println!("accepting = {accepting}");
+ }
+
+ if let Some((_, old_accepting)) = terminals_map.get_mut(&t) {
+ *old_accepting = *old_accepting || accepting;
+ } else {
+ terminals_map
+ .insert(t, (HashSet::with_capacity(estimated_len), accepting));
+ }
+
+ for (child_label, child_children_iter) in nfa
+ .labels_of(child)
+ .map_err(index_out_of_bounds_conversion)?
+ {
+ // We checked this is safe above.
+ let (set, _) = terminals_map.get_mut(&t).unwrap();
+
+ set.extend(child_children_iter.map(|target| (*child_label, target)));
+ }
+ }
}
}
- for (t, set) in terminals_map.into_iter() {
+ if nt == 3 {
+ println!("map = {terminals_map:?}");
+ }
+
+ for (t, (set, accepting)) in terminals_map.into_iter() {
let new_index = nfa
.extend(set.into_iter())
.map_err(index_out_of_bounds_conversion)?;
+ if accepting_vec.get(new_index).is_none() {
+ #[cfg(debug_assertions)]
+ assert_eq!(new_index, accepting_vec.len());
+
+ // let mut updated = false;
+ // let nfa_len = nfa.nodes_len();
+
+ // 'label_loop: for (label, target_iter) in nfa
+ // .labels_of(new_index)
+ // .map_err(|_| GrammarError::IndexOutOfBounds(new_index, nfa_len))?
+ // {
+ // if label_is_nullable(*label) {
+ // for target in target_iter {
+ // if *accepting_vec
+ // .get(target)
+ // .ok_or(GrammarError::IndexOutOfBounds(target, nfa_len))?
+ // {
+ // updated = true;
+
+ // break 'label_loop;
+ // }
+ // }
+ // }
+ // }
+
+ accepting_vec.push(accepting);
+ }
+
let virtual_node = VirtualNode::new(nt, t);
virtual_nodes.insert(virtual_node, new_index);
@@ -334,6 +501,7 @@ impl DefaultAtom {
nfa,
regexp,
virtual_nodes,
+ accepting_vec,
})
}
}
@@ -343,6 +511,14 @@ fn query(map: &VirtualMap, nt: usize, t: usize) -> Option<usize> {
map.get(&VirtualNode::new(nt, t)).copied()
}
+impl std::ops::Deref for DefaultAtom {
+ type Target = Grammar;
+
+ fn deref(&self) -> &Self::Target {
+ &self.grammar
+ }
+}
+
impl Atom for DefaultAtom {
fn atom(&self, nt: usize, t: usize) -> Result<Option<usize>, GrammarError> {
if nt >= self.grammar.non_num() {
@@ -359,4 +535,14 @@ impl Atom for DefaultAtom {
fn empty(&self) -> usize {
self.grammar.total() << 1
}
+
+ fn is_accepting(&self, node_id: usize) -> Result<bool, GrammarError> {
+ self.accepting_vec
+ .get(node_id)
+ .copied()
+ .ok_or(GrammarError::IndexOutOfBounds(
+ node_id,
+ self.accepting_vec.len(),
+ ))
+ }
}
diff --git a/chain/src/atom/mod.rs b/chain/src/atom/mod.rs
index 065640b..398edd2 100644
--- a/chain/src/atom/mod.rs
+++ b/chain/src/atom/mod.rs
@@ -6,17 +6,22 @@
//! Because this way I can easily substitute other implementations if
//! I have better ideas in the future.
-use grammar::{Error as GrammarError, TNT};
+use grammar::{Error as GrammarError, Grammar, TNT};
use nfa::{DOption, LabelType, Nfa};
+use std::ops::Deref;
+
/// The expected behaviours of an atomic language.
-pub trait Atom: Nfa<LabelType<TNT>> {
+pub trait Atom: Nfa<LabelType<TNT>> + Deref<Target = Grammar> {
/// Return the index of a node representing the derivative of the
/// left-linear null closure of `nt` with respect to `t`.
fn atom(&self, nt: usize, t: usize) -> Result<Option<usize>, GrammarError>;
/// Return the index of the empty state.
fn empty(&self) -> usize;
+
+ /// Tell whether a node is accepting.
+ fn is_accepting(&self, node_id: usize) -> Result<bool, GrammarError>;
}
pub mod default;