summaryrefslogtreecommitdiff
path: root/chain/src/atom
diff options
context:
space:
mode:
authorJSDurand <mmemmew@gmail.com>2023-01-11 23:47:26 +0800
committerJSDurand <mmemmew@gmail.com>2023-01-11 23:47:26 +0800
commit1a3d346f413325ed37848a6b2526e8e729269833 (patch)
treeab8812f8094d096c68aee53cf516e986cc9a273a /chain/src/atom
parentf27d604d93ce583d4404e1874664e08382ea2f00 (diff)
Record left-linear expansion and forest format
Now the grammar will record the left-linear expansions when generating the nondeterministic finite automaton frmo its rules, and will record whether an edge in the nondeterministic finite automaton comes from a left-linear expansion. The latter is needed because while performing a chain-rule derivation, we do not need the left-linear expanded derivations in the "first layer". This might well have been the root cause of the bad performance of the previous version of this package. Also I have figured out how to properly generate and handle parse forests while manipulating the "chain-rule machine".
Diffstat (limited to 'chain/src/atom')
-rw-r--r--chain/src/atom/default.rs140
-rw-r--r--chain/src/atom/mod.rs4
2 files changed, 82 insertions, 62 deletions
diff --git a/chain/src/atom/default.rs b/chain/src/atom/default.rs
index 72989b3..90133f4 100644
--- a/chain/src/atom/default.rs
+++ b/chain/src/atom/default.rs
@@ -4,7 +4,10 @@
use super::*;
use grammar::Grammar;
use graph::{error::Error as GraphError, Graph, LabelExtGraph, LabelGraph};
-use nfa::default::{nfa::DefaultNFA, regex::DefaultRegex};
+use nfa::{
+ default::{nfa::DefaultNFA, regex::DefaultRegex},
+ LabelType,
+};
use core::fmt::Display;
use std::collections::BTreeMap as Map;
@@ -35,7 +38,7 @@ type VirtualMap = Map<VirtualNode, usize>;
#[derive(Debug, Clone, Default)]
pub struct DefaultAtom {
grammar: Grammar,
- nfa: DefaultNFA<DOption<TNT>>,
+ nfa: DefaultNFA<LabelType<TNT>>,
// NOTE: This is mostly for printing and debugging
regexp: Vec<DefaultRegex<TNT>>,
virtual_nodes: VirtualMap,
@@ -95,7 +98,7 @@ impl Display for DefaultAtom {
// LabelGraph, in order to implement Nfa.
impl Graph for DefaultAtom {
- type Iter<'b> = <DefaultNFA<DOption<TNT>> as Graph>::Iter<'b>
+ type Iter<'b> = <DefaultNFA<LabelType<TNT>> as Graph>::Iter<'b>
where
Self: 'b;
@@ -130,23 +133,23 @@ impl Graph for DefaultAtom {
}
}
-impl LabelGraph<DOption<TNT>> for DefaultAtom {
- type Iter<'b> = <DefaultNFA<DOption<TNT>> as LabelGraph<DOption<TNT>>>::Iter<'b>
+impl LabelGraph<LabelType<TNT>> for DefaultAtom {
+ type Iter<'b> = <DefaultNFA<LabelType<TNT>> as LabelGraph<LabelType<TNT>>>::Iter<'b>
where
Self: 'b;
- type LabelIter<'b> = <DefaultNFA<DOption<TNT>> as LabelGraph<DOption<TNT>>>::LabelIter<'b>
+ type LabelIter<'b> = <DefaultNFA<LabelType<TNT>> as LabelGraph<LabelType<TNT>>>::LabelIter<'b>
where
Self: 'b,
DOption<TNT>: 'b;
- type EdgeLabelIter<'a> = <DefaultNFA<DOption<TNT>> as LabelGraph<DOption<TNT>>>::EdgeLabelIter<'a>
+ type EdgeLabelIter<'a> = <DefaultNFA<LabelType<TNT>> as LabelGraph<LabelType<TNT>>>::EdgeLabelIter<'a>
where
Self: 'a,
DOption<TNT>: 'a;
#[inline]
- fn vertex_label(&self, node_id: usize) -> Result<Option<DOption<TNT>>, GraphError> {
+ fn vertex_label(&self, node_id: usize) -> Result<Option<LabelType<TNT>>, GraphError> {
self.nfa.vertex_label(node_id)
}
@@ -163,8 +166,8 @@ impl LabelGraph<DOption<TNT>> for DefaultAtom {
fn find_children_with_label(
&self,
node_id: usize,
- label: &DOption<TNT>,
- ) -> Result<<Self as LabelGraph<DOption<TNT>>>::Iter<'_>, GraphError> {
+ label: &LabelType<TNT>,
+ ) -> Result<<Self as LabelGraph<LabelType<TNT>>>::Iter<'_>, GraphError> {
self.nfa.find_children_with_label(node_id, label)
}
@@ -177,39 +180,31 @@ impl LabelGraph<DOption<TNT>> for DefaultAtom {
fn has_edge_label(
&self,
node_id: usize,
- label: &DOption<TNT>,
+ label: &LabelType<TNT>,
target: usize,
) -> Result<bool, GraphError> {
self.nfa.has_edge_label(node_id, label, target)
}
}
-impl LabelExtGraph<DOption<TNT>> for DefaultAtom {
+impl LabelExtGraph<LabelType<TNT>> for DefaultAtom {
#[inline]
fn extend(
&mut self,
- edges: impl IntoIterator<Item = (DOption<TNT>, usize)>,
+ edges: impl IntoIterator<Item = (LabelType<TNT>, usize)>,
) -> Result<usize, GraphError> {
self.nfa.extend(edges)
}
}
-impl Nfa<DOption<TNT>> for DefaultAtom {
- #[inline]
- fn remove_epsilon<F>(&mut self, f: F) -> Result<(), nfa::error::Error>
- where
- F: Fn(DOption<TNT>) -> bool,
- {
- self.nfa.remove_epsilon(f)
- }
-
+impl Nfa<LabelType<TNT>> for DefaultAtom {
type FromRegex<S: graph::GraphLabel + std::fmt::Display + Default> = ();
#[inline]
fn to_nfa(
- _regexps: &[impl nfa::Regex<nfa::default::regex::RegexType<DOption<TNT>>>],
- _sub_pred: impl Fn(DOption<TNT>) -> Result<nfa::SoC<DOption<TNT>>, nfa::error::Error>,
- _default: Option<DOption<TNT>>,
+ _regexps: &[impl nfa::Regex<nfa::default::regex::RegexType<LabelType<TNT>>>],
+ _sub_pred: impl Fn(LabelType<TNT>) -> Result<nfa::SoC<LabelType<TNT>>, nfa::error::Error>,
+ _default: Option<LabelType<TNT>>,
) -> Result<Self::FromRegex<DOption<DOption<TNT>>>, nfa::error::Error> {
// NOTE: We cannot construct an atom from a set of regular
// languages alone. So it is appropriate to panic here, if
@@ -218,13 +213,20 @@ impl Nfa<DOption<TNT>> for DefaultAtom {
}
#[inline]
- fn remove_dead(&mut self, reserve: impl Fn(usize) -> bool) -> Result<(), nfa::error::Error> {
+ fn remove_dead(&mut self, reserve: impl FnMut(usize) -> bool) -> Result<(), nfa::error::Error> {
self.nfa.remove_dead(reserve)
}
#[inline]
- fn nulling(&mut self, f: impl Fn(DOption<TNT>) -> bool) -> Result<(), nfa::error::Error> {
- self.nfa.nulling(f)
+ fn closure(
+ &mut self,
+ predicate: impl FnMut(LabelType<TNT>) -> bool,
+ remove_after_p: bool,
+ transform: impl FnMut(nfa::TwoEdges<LabelType<TNT>>) -> LabelType<TNT>,
+ remove_predicate: impl FnMut(LabelType<TNT>) -> bool,
+ ) -> Result<(), nfa::error::Error> {
+ self.nfa
+ .closure(predicate, remove_after_p, transform, remove_predicate)
}
}
@@ -237,46 +239,56 @@ impl DefaultAtom {
let mut nfa = grammar.left_closure_to_nfa(&regexp)?;
- use std::collections::HashSet;
+ use std::collections::{HashMap, HashSet};
let accumulators: Vec<usize> = {
let mut result = Vec::with_capacity(regexp.len() + 1);
result.push(0);
for regex in regexp.iter() {
+ // Calling `unwrap` here is safe as `result` is always
+ // non-empty.
result.push(regex.nodes_len() * 2 + result.last().unwrap());
}
- result.into_iter().collect()
+ result
};
let accumulators_set: HashSet<usize> = accumulators.iter().copied().collect();
- nfa.nulling(|label| {
- if let Some(label) = *label {
- match label {
- TNT::Ter(_) => false,
- // Panics if a non-terminal references an invalid node
- // here.
- TNT::Non(n) => grammar.is_nullable(n).unwrap(),
+ let nullables: HashSet<usize> = (0..grammar.non_num())
+ .filter(|n| matches!(grammar.is_nullable(*n), Ok(true)))
+ .collect();
+
+ // Perform nulling and remove_epsilon at the same time.
+ nfa.closure(
+ |label| {
+ if let Some(label) = *label.get_value() {
+ matches!(label, TNT::Non(n) if nullables.contains(&n))
+ } else {
+ true
}
- } else {
- true
- }
- })?;
- nfa.remove_epsilon(|label| label.is_none())?;
+ },
+ true,
+ |two_edges| grammar.transform_label_null_epsilon(two_edges),
+ |label| label.get_value().is_none(),
+ )?;
+
nfa.remove_dead(|node| accumulators_set.contains(&node))?;
- // now add the virtual nodes
+ // Now add the virtual nodes.
let mut virtual_nodes: VirtualMap = Default::default();
let nt_num = grammar.non_num();
assert!(nt_num <= accumulators.len());
- // Convert an error telling us that an index is out of bounds.
- //
- // Panics if the error is not of the expected kind.
+ /// Convert an error telling us that an index is out of bounds.
+ ///
+ /// # Panics
+ ///
+ /// The function panics if the error is not of the expected
+ /// kind.
fn index_out_of_bounds_conversion(ge: GraphError) -> GrammarError {
match ge {
GraphError::IndexOutOfBounds(index, bound) => {
@@ -287,24 +299,34 @@ impl DefaultAtom {
}
for nt in 0..nt_num {
- let children: std::collections::HashMap<DOption<TNT>, Vec<_>> = nfa
- // this is safe because of the above assertion.
+ let children: std::collections::HashMap<_, _> = nfa
+ // This is safe because of the above assertion.
.labels_of(*accumulators.get(nt).unwrap())
.map_err(index_out_of_bounds_conversion)?
- .map(|(label, target_iter)| (*label, target_iter.collect()))
+ .map(|(label, target_iter)| (*label, target_iter))
.collect();
- for (label, children_vec) in children.into_iter() {
- if let Some(TNT::Ter(t)) = *label {
- let new_index = nfa
- .extend(children_vec.into_iter().map(|target| (label, target)))
- .map_err(index_out_of_bounds_conversion)?;
+ let mut terminals_map: HashMap<usize, HashSet<(LabelType<TNT>, usize)>> =
+ HashMap::new();
- let virtual_node = VirtualNode::new(nt, t);
-
- virtual_nodes.insert(virtual_node, new_index);
+ for (label, children_iter) in children.into_iter() {
+ if let Some(TNT::Ter(t)) = *label.get_value() {
+ terminals_map
+ .entry(t)
+ .or_insert_with(|| HashSet::with_capacity(children_iter.len()))
+ .extend(children_iter.map(|target| (label, target)));
}
}
+
+ for (t, set) in terminals_map.into_iter() {
+ let new_index = nfa
+ .extend(set.into_iter())
+ .map_err(index_out_of_bounds_conversion)?;
+
+ let virtual_node = VirtualNode::new(nt, t);
+
+ virtual_nodes.insert(virtual_node, new_index);
+ }
}
Ok(Self {
@@ -335,8 +357,6 @@ impl Atom for DefaultAtom {
}
fn empty(&self) -> usize {
- assert_eq!(self.nfa.nodes_len() - 2, self.grammar.non_num() * 2);
-
- self.nfa.nodes_len() - 2
+ self.grammar.total() << 1
}
}
diff --git a/chain/src/atom/mod.rs b/chain/src/atom/mod.rs
index 084acca..065640b 100644
--- a/chain/src/atom/mod.rs
+++ b/chain/src/atom/mod.rs
@@ -7,10 +7,10 @@
//! I have better ideas in the future.
use grammar::{Error as GrammarError, TNT};
-use nfa::{DOption, Nfa};
+use nfa::{DOption, LabelType, Nfa};
/// The expected behaviours of an atomic language.
-pub trait Atom: Nfa<DOption<TNT>> {
+pub trait Atom: Nfa<LabelType<TNT>> {
/// Return the index of a node representing the derivative of the
/// left-linear null closure of `nt` with respect to `t`.
fn atom(&self, nt: usize, t: usize) -> Result<Option<usize>, GrammarError>;