summaryrefslogtreecommitdiff
path: root/chain/src
diff options
context:
space:
mode:
authorJSDurand <mmemmew@gmail.com>2023-01-20 13:48:26 +0800
committerJSDurand <mmemmew@gmail.com>2023-01-20 13:48:26 +0800
commit18d7955b7d84c00467ede38baae53f4ce1fb6908 (patch)
tree97d0746b82816a21d980636e50f8cdbeb804b518 /chain/src
parent8f8d3d1a3c276be4be2e5d2e767ada564c47279a (diff)
chain: a prototype is added.
I have an ostensibly working prototype now. Further tests are needed to make sure that the algorithm meets the time complexity requirement, though.
Diffstat (limited to 'chain/src')
-rw-r--r--chain/src/atom/default.rs214
-rw-r--r--chain/src/atom/mod.rs9
-rw-r--r--chain/src/default.rs784
-rw-r--r--chain/src/lib.rs194
-rw-r--r--chain/src/plan.org10
5 files changed, 1160 insertions, 51 deletions
diff --git a/chain/src/atom/default.rs b/chain/src/atom/default.rs
index 90133f4..0dc24c3 100644
--- a/chain/src/atom/default.rs
+++ b/chain/src/atom/default.rs
@@ -6,7 +6,7 @@ use grammar::Grammar;
use graph::{error::Error as GraphError, Graph, LabelExtGraph, LabelGraph};
use nfa::{
default::{nfa::DefaultNFA, regex::DefaultRegex},
- LabelType,
+ LabelType, NfaLabel,
};
use core::fmt::Display;
@@ -39,11 +39,55 @@ type VirtualMap = Map<VirtualNode, usize>;
pub struct DefaultAtom {
grammar: Grammar,
nfa: DefaultNFA<LabelType<TNT>>,
+ accepting_vec: Vec<bool>,
// NOTE: This is mostly for printing and debugging
regexp: Vec<DefaultRegex<TNT>>,
virtual_nodes: VirtualMap,
}
+impl DefaultAtom {
+ /// Return the string description of a rule position.
+ pub fn rule_pos_string(&self, pos: usize) -> Result<String, Box<dyn std::error::Error>> {
+ let rule_num = self.grammar.get_rule_num(pos)?;
+
+ assert!(rule_num < self.grammar.non_num());
+
+ let display_tnt = |tnt| self.name_of_tnt(tnt).unwrap_or_else(|e| format!("{e}"));
+
+ Ok(self.regexp.get(rule_num).unwrap().to_string_with_dot(
+ display_tnt,
+ if rule_num == 0 {
+ pos
+ } else {
+ pos - self.grammar.nth_accumulator(rule_num - 1)?
+ },
+ )?)
+ }
+
+ /// Print the underlying NFA.
+ pub fn print_nfa<S: AsRef<str>>(&self, filename: S) -> Result<(), std::io::Error> {
+ self.nfa.print_viz(filename.as_ref())?;
+
+ let nullables: Vec<_> = self
+ .accepting_vec
+ .iter()
+ .enumerate()
+ .filter_map(|(index, pred)| if *pred { Some(index) } else { None })
+ .collect();
+
+ if !nullables.is_empty() {
+ println!("nullables: {nullables:?}");
+ }
+
+ println!("printing virtual nodes:");
+ for (vn, node) in self.virtual_nodes.iter() {
+ println!("[{}]^{{({})}}: {}", vn.s, vn.t, node);
+ }
+
+ Ok(())
+ }
+}
+
impl Display for DefaultAtom {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let grammar = &self.grammar;
@@ -260,15 +304,67 @@ impl DefaultAtom {
.filter(|n| matches!(grammar.is_nullable(*n), Ok(true)))
.collect();
+ // Now record accepting information.
+
+ let nfa_len = nfa.nodes_len();
+
+ let label_is_nullable = |label: NfaLabel<DOption<TNT>>| {
+ if let Some(label) = *label.get_value() {
+ matches!(label, TNT::Non(n) if nullables.contains(&n))
+ } else {
+ true
+ }
+ };
+
+ let mut accepting_vec: Vec<bool> = std::iter::repeat(false).take(nfa_len).collect();
+
+ for nfa_start in accumulators.iter().copied().take(regexp.len()) {
+ *accepting_vec.get_mut(nfa_start + 1).unwrap() = true;
+ }
+
+ // The last is always accepting.
+ *accepting_vec.get_mut(nfa_len - 1).unwrap() = true;
+
+ let mut updated = true;
+
+ while updated {
+ updated = false;
+
+ for node in nfa.nodes() {
+ // skip those that do not need to be updated
+ if *accepting_vec
+ .get(node)
+ .ok_or(GrammarError::IndexOutOfBounds(node, nfa_len))?
+ {
+ continue;
+ }
+
+ 'label_loop: for (label, target_iter) in nfa
+ .labels_of(node)
+ .map_err(|_| GrammarError::IndexOutOfBounds(node, nfa_len))?
+ {
+ if label_is_nullable(*label) {
+ for target in target_iter {
+ if *accepting_vec
+ .get(target)
+ .ok_or(GrammarError::IndexOutOfBounds(target, nfa_len))?
+ {
+ // accepting_vec[node] must have been
+ // false, as we checked above
+ *accepting_vec.get_mut(node).unwrap() = true;
+ updated = true;
+
+ break 'label_loop;
+ }
+ }
+ }
+ }
+ }
+ }
+
// Perform nulling and remove_epsilon at the same time.
nfa.closure(
- |label| {
- if let Some(label) = *label.get_value() {
- matches!(label, TNT::Non(n) if nullables.contains(&n))
- } else {
- true
- }
- },
+ label_is_nullable,
true,
|two_edges| grammar.transform_label_null_epsilon(two_edges),
|label| label.get_value().is_none(),
@@ -298,6 +394,8 @@ impl DefaultAtom {
}
}
+ // dbg!(&accumulators);
+
for nt in 0..nt_num {
let children: std::collections::HashMap<_, _> = nfa
// This is safe because of the above assertion.
@@ -306,23 +404,92 @@ impl DefaultAtom {
.map(|(label, target_iter)| (*label, target_iter))
.collect();
- let mut terminals_map: HashMap<usize, HashSet<(LabelType<TNT>, usize)>> =
+ let mut terminals_map: HashMap<usize, (HashSet<(LabelType<TNT>, usize)>, bool)> =
HashMap::new();
for (label, children_iter) in children.into_iter() {
if let Some(TNT::Ter(t)) = *label.get_value() {
- terminals_map
- .entry(t)
- .or_insert_with(|| HashSet::with_capacity(children_iter.len()))
- .extend(children_iter.map(|target| (label, target)));
+ let estimated_len = {
+ let mut result = 0;
+
+ for child in children_iter.clone() {
+ result += nfa.degree(child).map_err(index_out_of_bounds_conversion)?;
+ }
+
+ result
+ };
+
+ let mut accepting = false;
+
+ for child in children_iter {
+ accepting = accepting
+ || *accepting_vec.get(child).ok_or_else(|| {
+ GrammarError::IndexOutOfBounds(child, accepting_vec.len())
+ })?;
+
+ if nt == 3
+ && nfa.degree(child).map_err(index_out_of_bounds_conversion)? == 0
+ {
+ println!("accepting = {accepting}");
+ }
+
+ if let Some((_, old_accepting)) = terminals_map.get_mut(&t) {
+ *old_accepting = *old_accepting || accepting;
+ } else {
+ terminals_map
+ .insert(t, (HashSet::with_capacity(estimated_len), accepting));
+ }
+
+ for (child_label, child_children_iter) in nfa
+ .labels_of(child)
+ .map_err(index_out_of_bounds_conversion)?
+ {
+ // We checked this is safe above.
+ let (set, _) = terminals_map.get_mut(&t).unwrap();
+
+ set.extend(child_children_iter.map(|target| (*child_label, target)));
+ }
+ }
}
}
- for (t, set) in terminals_map.into_iter() {
+ if nt == 3 {
+ println!("map = {terminals_map:?}");
+ }
+
+ for (t, (set, accepting)) in terminals_map.into_iter() {
let new_index = nfa
.extend(set.into_iter())
.map_err(index_out_of_bounds_conversion)?;
+ if accepting_vec.get(new_index).is_none() {
+ #[cfg(debug_assertions)]
+ assert_eq!(new_index, accepting_vec.len());
+
+ // let mut updated = false;
+ // let nfa_len = nfa.nodes_len();
+
+ // 'label_loop: for (label, target_iter) in nfa
+ // .labels_of(new_index)
+ // .map_err(|_| GrammarError::IndexOutOfBounds(new_index, nfa_len))?
+ // {
+ // if label_is_nullable(*label) {
+ // for target in target_iter {
+ // if *accepting_vec
+ // .get(target)
+ // .ok_or(GrammarError::IndexOutOfBounds(target, nfa_len))?
+ // {
+ // updated = true;
+
+ // break 'label_loop;
+ // }
+ // }
+ // }
+ // }
+
+ accepting_vec.push(accepting);
+ }
+
let virtual_node = VirtualNode::new(nt, t);
virtual_nodes.insert(virtual_node, new_index);
@@ -334,6 +501,7 @@ impl DefaultAtom {
nfa,
regexp,
virtual_nodes,
+ accepting_vec,
})
}
}
@@ -343,6 +511,14 @@ fn query(map: &VirtualMap, nt: usize, t: usize) -> Option<usize> {
map.get(&VirtualNode::new(nt, t)).copied()
}
+impl std::ops::Deref for DefaultAtom {
+ type Target = Grammar;
+
+ fn deref(&self) -> &Self::Target {
+ &self.grammar
+ }
+}
+
impl Atom for DefaultAtom {
fn atom(&self, nt: usize, t: usize) -> Result<Option<usize>, GrammarError> {
if nt >= self.grammar.non_num() {
@@ -359,4 +535,14 @@ impl Atom for DefaultAtom {
fn empty(&self) -> usize {
self.grammar.total() << 1
}
+
+ fn is_accepting(&self, node_id: usize) -> Result<bool, GrammarError> {
+ self.accepting_vec
+ .get(node_id)
+ .copied()
+ .ok_or(GrammarError::IndexOutOfBounds(
+ node_id,
+ self.accepting_vec.len(),
+ ))
+ }
}
diff --git a/chain/src/atom/mod.rs b/chain/src/atom/mod.rs
index 065640b..398edd2 100644
--- a/chain/src/atom/mod.rs
+++ b/chain/src/atom/mod.rs
@@ -6,17 +6,22 @@
//! Because this way I can easily substitute other implementations if
//! I have better ideas in the future.
-use grammar::{Error as GrammarError, TNT};
+use grammar::{Error as GrammarError, Grammar, TNT};
use nfa::{DOption, LabelType, Nfa};
+use std::ops::Deref;
+
/// The expected behaviours of an atomic language.
-pub trait Atom: Nfa<LabelType<TNT>> {
+pub trait Atom: Nfa<LabelType<TNT>> + Deref<Target = Grammar> {
/// Return the index of a node representing the derivative of the
/// left-linear null closure of `nt` with respect to `t`.
fn atom(&self, nt: usize, t: usize) -> Result<Option<usize>, GrammarError>;
/// Return the index of the empty state.
fn empty(&self) -> usize;
+
+ /// Tell whether a node is accepting.
+ fn is_accepting(&self, node_id: usize) -> Result<bool, GrammarError>;
}
pub mod default;
diff --git a/chain/src/default.rs b/chain/src/default.rs
index e04be9f..697b997 100644
--- a/chain/src/default.rs
+++ b/chain/src/default.rs
@@ -6,18 +6,47 @@
//! modular design makes that easy.
use super::*;
+use crate::atom::{Atom, DefaultAtom};
use core::fmt::Display;
+use forest::{default::DefaultForest, Forest};
+use grammar::{Error as GrammarError, GrammarLabel, GrammarLabelType, TNT};
+#[allow(unused_imports)]
+use graph::{
+ labelled::DLGBuilder, Builder, DLGraph, Graph, LabelExtGraph, LabelGraph, ParentsGraph,
+};
+
+use std::collections::{HashMap as Map, TryReserveError};
/// The errors related to taking derivatives by chain rule.
+#[non_exhaustive]
#[derive(Debug)]
pub enum Error {
+ /// General error for indices out of bounds.
+ IndexOutOfBounds(usize, usize),
+ /// The forest encounters a duplicate node, for some reason.
+ DuplicateNode(usize),
+ /// The chain rule machine encounters a duplicate edge, for some
+ /// reason.
+ DuplicateEdge(usize, usize),
+ /// A node has no labels while it is required to have one.
+ NodeNoLabel(usize),
+ /// Reserving memory fails.
+ CannotReserve(TryReserveError),
/// An invalid situation happens.
Invalid,
}
impl Display for Error {
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self {
+ Self::IndexOutOfBounds(index, bound) => write!(f, "index {index} out of bound {bound}"),
+ Self::DuplicateNode(n) => write!(f, "the forest has a node {n} with a duplicate label"),
+ Self::DuplicateEdge(source, target) => write!(
+ f,
+ "the forest has a duplicate edge from {source} to {target}"
+ ),
+ Self::NodeNoLabel(n) => write!(f, "node {n} has no labels while it should have one"),
+ Self::CannotReserve(e) => write!(f, "cannot reserve memory: {e}"),
Self::Invalid => write!(f, "invalid"),
}
}
@@ -25,22 +54,761 @@ impl Display for Error {
impl std::error::Error for Error {}
+impl From<GError> for Error {
+ fn from(value: GError) -> Self {
+ match value {
+ GError::IndexOutOfBounds(index, bound) => Self::IndexOutOfBounds(index, bound),
+ GError::DuplicatedNode(n) => Self::DuplicateNode(n),
+ GError::DuplicatedEdge(source, target) => Self::DuplicateEdge(source, target),
+ _ => Self::Invalid,
+ }
+ }
+}
+
+impl From<ForestError> for Error {
+ fn from(e: ForestError) -> Self {
+ match e {
+ ForestError::IndexOutOfBounds(index, bound) => Error::IndexOutOfBounds(index, bound),
+ ForestError::DuplicatedNode(n) => Error::DuplicateNode(n),
+ ForestError::InvalidGraphError(ge) => ge.into(),
+ ForestError::NodeNoLabel(n) => Error::NodeNoLabel(n),
+ }
+ }
+}
+
+impl From<TryReserveError> for Error {
+ fn from(value: TryReserveError) -> Self {
+ Self::CannotReserve(value)
+ }
+}
+
+/// The type of an index into an element in [`DerIter`].
+#[derive(Debug, Copy, Clone)]
+enum DerIterIndex {
+ Single(usize),
+ Map(usize),
+}
+
+impl Default for DerIterIndex {
+ fn default() -> Self {
+ Self::Map(0)
+ }
+}
+
+/// A complex type used for storing values of edges with two layers.
+type SecondTypeValue = (Parent, bool, Vec<(Edge, usize)>);
+
+/// An iterator of TwoLayers.
+#[derive(Debug, Default)]
+pub struct DerIter {
+ /// Stores edges of only one layer.
+ singles: Vec<(Edge, usize)>,
+ /// Stores edges with two layers. They are grouped by their
+ /// labels of the second layer.
+ ///
+ /// The values are tuples (forest_source, accepting, edges), where
+ /// the edges are the grouped edges of the first layer and the
+ /// destination.
+ seconds: Map<usize, SecondTypeValue>,
+ /// We want to iterate the elements of the map, for which purpose
+ /// we need an array. Since hashmaps provide no arrays, we keep
+ /// an array of keys for iteration purposes.
+ second_array: Vec<usize>,
+ /// The index of the current element, either in `second_array` or
+ /// in `singles` .
+ index: DerIterIndex,
+}
+
+impl DerIter {
+ fn add_second_layer(
+ &mut self,
+ label: usize,
+ forest_source: Parent,
+ accepting: bool,
+ edges: Vec<(Edge, usize)>,
+ ) {
+ if let Some((_, _, vec)) = self.seconds.get_mut(&label) {
+ vec.extend(edges);
+ } else {
+ self.seconds
+ .insert(label, (forest_source, accepting, edges));
+
+ self.second_array.push(label);
+ }
+ }
+}
+
+impl Iterator for DerIter {
+ type Item = TwoLayers;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ // We iterate through two layered edges first.
+ match self.index {
+ DerIterIndex::Map(index) => {
+ if let Some(key) = self.second_array.get(index) {
+ if let Some((forest_source, accepting, edges)) = self.seconds.remove(key) {
+ self.index = DerIterIndex::Map(index + 1);
+
+ Some(TwoLayers::Two(*key, forest_source, accepting, edges))
+ } else {
+ // this should not happen
+ println!("a key does not exist in the hashmap: something is wrong when taking derivatives");
+ None
+ }
+ } else {
+ self.index = DerIterIndex::Single(0);
+
+ if let Some((edge, to)) = self.singles.first() {
+ self.index = DerIterIndex::Single(1);
+
+ Some(TwoLayers::One(*edge, *to))
+ } else {
+ None
+ }
+ }
+ }
+ DerIterIndex::Single(index) => {
+ if let Some((edge, to)) = self.singles.get(index) {
+ self.index = DerIterIndex::Single(index + 1);
+
+ Some(TwoLayers::One(*edge, *to))
+ } else {
+ None
+ }
+ }
+ }
+ }
+}
+
/// A default implementation for the [`Chain`] trait.
#[derive(Debug, Clone, Default)]
-pub struct DefaultChain {}
+pub struct DefaultChain {
+ graph: DLGraph<Edge>,
+ atom: DefaultAtom,
+ current: usize,
+ history: Vec<usize>,
+ forest: DefaultForest<GrammarLabel>,
+ accepting_vec: Vec<bool>,
+}
+
+impl DefaultChain {
+ /// Return the current node.
+ #[inline]
+ pub fn current(&self) -> usize {
+ self.current
+ }
+
+ /// Return the complete slice of histories.
+ #[inline]
+ pub fn history(&self) -> &[usize] {
+ self.history.as_ref()
+ }
+
+ /// Return a reference to the associated forest.
+ #[inline]
+ pub fn forest(&self) -> &DefaultForest<GrammarLabel> {
+ &self.forest
+ }
+
+ /// Print the rule positions of the labels.
+ pub fn print_rule_positions(&self) -> Result<(), Box<dyn std::error::Error>> {
+ let mut labels = std::collections::HashSet::<usize>::default();
+
+ for node in 0..self.graph.nodes_len() {
+ labels.extend(self.graph.labels_of(node)?.map(|(label, _)| label.label));
+ }
+
+ for label in labels.into_iter() {
+ println!("{}", self.atom.rule_pos_string(label)?);
+ }
+
+ Ok(())
+ }
+}
+
+impl Graph for DefaultChain {
+ type Iter<'a> = <DLGraph<Edge> as Graph>::Iter<'a>
+ where
+ Self: 'a;
+
+ #[inline]
+ fn is_empty(&self) -> bool {
+ self.graph.is_empty()
+ }
+
+ #[inline]
+ fn nodes_len(&self) -> usize {
+ self.graph.nodes_len()
+ }
+
+ #[inline]
+ fn edges_len(&self) -> Option<usize> {
+ self.graph.edges_len()
+ }
+
+ #[inline]
+ fn children_of(&self, node_id: usize) -> Result<Self::Iter<'_>, GError> {
+ self.graph.children_of(node_id)
+ }
+
+ #[inline]
+ fn degree(&self, node_id: usize) -> Result<usize, GError> {
+ self.graph.degree(node_id)
+ }
+
+ #[inline]
+ fn is_empty_node(&self, node_id: usize) -> Result<bool, GError> {
+ self.graph.is_empty_node(node_id)
+ }
+
+ #[inline]
+ fn has_edge(&self, source: usize, target: usize) -> Result<bool, GError> {
+ self.graph.has_edge(source, target)
+ }
+
+ fn replace_by_builder(&mut self, _builder: impl graph::Builder<Result = Self>) {
+ unimplemented!("I shall refactor this")
+ }
+}
+
+impl LabelGraph<Edge> for DefaultChain {
+ type Iter<'a> = <DLGraph<Edge> as LabelGraph<Edge>>::Iter<'a>
+ where
+ Self: 'a;
+
+ type LabelIter<'a> = <DLGraph<Edge> as LabelGraph<Edge>>::LabelIter<'a>
+ where
+ Self: 'a,
+ Edge: 'a;
+
+ type EdgeLabelIter<'a> = <DLGraph<Edge> as LabelGraph<Edge>>::EdgeLabelIter<'a>
+ where
+ Self: 'a,
+ Edge: 'a;
+
+ #[inline]
+ fn edge_label(&self, source: usize, target: usize) -> Result<Self::EdgeLabelIter<'_>, GError> {
+ self.graph.edge_label(source, target)
+ }
+
+ #[inline]
+ fn find_children_with_label(
+ &self,
+ node_id: usize,
+ label: &Edge,
+ ) -> Result<<Self as LabelGraph<Edge>>::Iter<'_>, GError> {
+ self.graph.find_children_with_label(node_id, label)
+ }
+
+ #[inline]
+ fn labels_of(&self, node_id: usize) -> Result<Self::LabelIter<'_>, GError> {
+ self.graph.labels_of(node_id)
+ }
+
+ #[inline]
+ fn has_edge_label(&self, node_id: usize, label: &Edge, target: usize) -> Result<bool, GError> {
+ self.graph.has_edge_label(node_id, label, target)
+ }
+}
+
+impl LabelExtGraph<Edge> for DefaultChain {
+ #[inline]
+ fn extend(&mut self, edges: impl IntoIterator<Item = (Edge, usize)>) -> Result<usize, GError> {
+ let new = self.graph.extend(edges)?;
+ let accepting_len = self.accepting_vec.len();
+
+ if self.accepting_vec.get(new).is_none() {
+ // assert it can only grow by one node at a time.
+ #[cfg(debug_assertions)]
+ assert_eq!(new, accepting_len);
+
+ let mut updated = false;
+
+ for (label, child_iter) in self.graph.labels_of(new)? {
+ let old_accepting = {
+ let mut result = false;
+ for child in child_iter {
+ if *self
+ .accepting_vec
+ .get(child)
+ .ok_or(GError::IndexOutOfBounds(child, accepting_len))?
+ {
+ result = true;
+ break;
+ }
+ }
+
+ result
+ };
+
+ if !old_accepting {
+ self.accepting_vec.push(false);
+ updated = true;
+
+ break;
+ }
+
+ if label.is_accepting() {
+ self.accepting_vec.push(true);
+ updated = true;
+
+ break;
+ }
+ }
+
+ if !updated {
+ self.accepting_vec.push(false);
+ }
+ }
+
+ Ok(new)
+ }
+}
impl Chain for DefaultChain {
type Error = Error;
- fn unit() -> Self {
- todo!()
+ type Atom = DefaultAtom;
+
+ fn unit(atom: Self::Atom) -> Result<Self, Self::Error> {
+ let mut builder: DLGBuilder<Edge> = Default::default();
+
+ let root = builder.add_vertex();
+ let first = builder.add_vertex();
+
+ let empty_state = atom.empty();
+
+ let initial_nullable = atom
+ .is_nullable(0)
+ .map_err(|_| Error::IndexOutOfBounds(0, atom.non_num()))?;
+
+ builder.add_edge(
+ first,
+ root,
+ Edge::new(empty_state, Parent::new(0, 0), initial_nullable),
+ )?;
+
+ let graph = builder.build();
+
+ let forest =
+ DefaultForest::new_leaf(GrammarLabel::new(GrammarLabelType::TNT(TNT::Non(0)), 0));
+
+ #[cfg(debug_assertions)]
+ assert_eq!(forest.root(), Some(0));
+
+ let current = 1;
+
+ let history = Vec::new();
+
+ let accepting_vec = vec![true, initial_nullable];
+
+ Ok(Self {
+ graph,
+ atom,
+ current,
+ history,
+ forest,
+ accepting_vec,
+ })
+ }
+
+ fn epsilon(&self) -> Result<bool, Self::Error> {
+ self.accepting_vec
+ .get(self.current)
+ .copied()
+ .ok_or(Error::IndexOutOfBounds(
+ self.current,
+ self.accepting_vec.len(),
+ ))
+ }
+
+ fn update_history(&mut self, new: usize) {
+ debug_assert!(new < self.graph.nodes_len());
+
+ self.history.push(self.current);
+
+ self.current = new;
+ }
+
+ type DerIter = DerIter;
+
+ fn derive(&mut self, t: usize, pos: usize) -> Result<Self::DerIter, Self::Error> {
+ use TNT::*;
+
+ /// Convert an error telling us that an index is out of bounds.
+ ///
+ /// # Panics
+ ///
+ /// The function panics if the error is not of the expected
+ /// kind.
+ fn index_out_of_bounds_conversion(ge: GrammarError) -> Error {
+ match ge {
+ GrammarError::IndexOutOfBounds(index, bound) => {
+ Error::IndexOutOfBounds(index, bound)
+ }
+ _ => panic!("wrong error kind"),
+ }
+ }
+
+ /// A helper function to generate edges to join.
+ ///
+ /// It first checks if the base edge is accepting. If yes,
+ /// then pull in the children of the target.
+ ///
+ /// Then check if the label of the base edge has children. If
+ /// no, then do not add this base edge itself.
+ ///
+ /// The generated edges will be pushed to `output` directly,
+ /// to save some allocations.
+ // TODO: Handle forests as well.
+ fn generate_edges(
+ chain: &DefaultChain,
+ child_iter: impl Iterator<Item = usize> + ExactSizeIterator + Clone,
+ atom_child_iter: impl Iterator<Item = usize> + Clone,
+ mut output: impl AsMut<Vec<(Edge, usize)>>,
+ ) -> Result<(), Error> {
+ // First check the values from iterators are all valid.
+ let graph_len = chain.graph.nodes_len();
+ let atom_len = chain.atom.nodes_len();
+
+ for child in child_iter.clone() {
+ if !chain.graph.has_node(child) {
+ return Err(Error::IndexOutOfBounds(child, graph_len));
+ }
+ }
+
+ for atom_child in atom_child_iter.clone() {
+ if !chain.atom.has_node(atom_child) {
+ return Err(Error::IndexOutOfBounds(atom_child, atom_len));
+ }
+ }
+
+ // From now on the nodes are all valid, so we can just
+ // call `unwrap`.
+
+ // Then calculate the number of edges to append, to avoid
+ // repeated allocations
+ let mut num = 0usize;
+
+ let child_iter_total_degree = child_iter
+ .clone()
+ .map(|child| chain.graph.degree(child).unwrap())
+ .sum::<usize>();
+
+ for atom_child in atom_child_iter.clone() {
+ let atom_child_accepting = chain.atom.is_accepting(atom_child).unwrap();
+ let atom_child_empty_node = chain.atom.is_empty_node(atom_child).unwrap();
+
+ if !atom_child_empty_node {
+ num += child_iter.len();
+ }
+
+ if atom_child_accepting {
+ num += child_iter_total_degree;
+ }
+ }
+
+ let num = num;
+
+ let output = output.as_mut();
+
+ output.try_reserve(num)?;
+
+ // now push into output
+
+ let parent = Parent::new(0, 0);
+
+ for atom_child in atom_child_iter.clone() {
+ let atom_child_accepting = chain.atom.is_accepting(atom_child).unwrap();
+ let atom_child_empty_node = chain.atom.is_empty_node(atom_child).unwrap();
+
+ let edge = Edge::new(atom_child, parent, atom_child_accepting);
+
+ if !atom_child_empty_node {
+ output.extend(child_iter.clone().map(|child| (edge, child)));
+ }
+
+ if atom_child_accepting {
+ for child in child_iter.clone() {
+ for (child_label, child_child) in chain.graph.labels_of(child).unwrap() {
+ output.extend(child_child.map(|target| (*child_label, target)));
+ }
+ }
+ }
+ }
+
+ Ok(())
+ }
+
+ let mut der_iter = DerIter::default();
+
+ for (label, child_iter) in self.graph.labels_of(self.current)? {
+ for (atom_label, atom_child_iter) in self.atom.labels_of(label.label())? {
+ if atom_label.is_left_p() {
+ // We do not consider left-linearly expanded
+ // children in the first layer.
+ continue;
+ }
+
+ match *atom_label.get_value() {
+ Some(Ter(ter)) if ter == t => {
+ generate_edges(
+ self,
+ child_iter.clone(),
+ atom_child_iter.clone(),
+ &mut der_iter.singles,
+ )?;
+ }
+ Some(Non(non)) => {
+ let virtual_node = self
+ .atom
+ .atom(non, t)
+ .map_err(index_out_of_bounds_conversion)?;
+
+ if let Some(virtual_node) = virtual_node {
+ let accepting = self
+ .atom
+ .is_accepting(virtual_node)
+ .map_err(index_out_of_bounds_conversion)?;
+
+ let mut new_edges = Vec::new();
+
+ generate_edges(
+ self,
+ child_iter.clone(),
+ atom_child_iter.clone(),
+ &mut new_edges,
+ )?;
+
+ if accepting {
+ der_iter.singles.extend(new_edges.clone());
+ }
+
+ let parent = Parent::new(0, 0);
+
+ if !self.atom.is_empty_node(virtual_node).unwrap() {
+ der_iter.add_second_layer(
+ virtual_node,
+ parent,
+ accepting,
+ new_edges,
+ );
+
+ // account for atom_children without
+ // children.
+
+ for atom_child in atom_child_iter {
+ // this has been checked in
+ // `generate_edges`
+ if self.atom.is_empty_node(atom_child).unwrap() {
+ der_iter.singles.extend(child_iter.clone().map(|child| {
+ (Edge::new(virtual_node, parent, accepting), child)
+ }));
+ }
+ }
+ } else {
+ for atom_child in atom_child_iter {
+ // this has been checked in
+ // `generate_edges`
+ if self.atom.is_empty_node(atom_child).unwrap() {
+ // flat flat map, hmm...
+ der_iter.singles.extend(child_iter.clone().flat_map(
+ |child| {
+ self.graph.labels_of(child).unwrap().flat_map(
+ |(child_label, child_child_iter)| {
+ child_child_iter.map(|child_child| {
+ (*child_label, child_child)
+ })
+ },
+ )
+ },
+ ));
+ }
+ }
+ }
+ }
+ }
+ _ => {
+ continue;
+ }
+ }
+ }
+ }
+
+ Ok(der_iter)
+ }
+}
+
+#[cfg(test)]
+mod test_der_iter {
+ use super::*;
+
+ #[test]
+ fn test() -> Result<(), Box<dyn std::error::Error>> {
+ let mut der_iter = DerIter::default();
+
+ let parent = Parent::new(0, 0);
+
+ der_iter.singles.push((Edge::new(0, parent, true), 0));
+
+ der_iter.singles.push((Edge::new(1, parent, true), 0));
+
+ der_iter.singles.push((Edge::new(2, parent, true), 0));
+
+ der_iter.add_second_layer(3, parent, true, vec![(Edge::new(4, parent, true), 1)]);
+
+ der_iter.add_second_layer(6, parent, true, vec![(Edge::new(5, parent, true), 1)]);
+
+ // add an entry with a repeated label
+ der_iter.add_second_layer(3, parent, true, vec![(Edge::new(7, parent, true), 2)]);
+
+ assert_eq!(
+ der_iter.next(),
+ Some(TwoLayers::Two(
+ 3,
+ parent,
+ true,
+ vec![
+ (Edge::new(4, parent, true), 1),
+ (Edge::new(7, parent, true), 2)
+ ]
+ ))
+ );
+
+ assert_eq!(
+ der_iter.next(),
+ Some(TwoLayers::Two(
+ 6,
+ parent,
+ true,
+ vec![(Edge::new(5, parent, true), 1)]
+ ))
+ );
+
+ assert_eq!(
+ der_iter.next(),
+ Some(TwoLayers::One(Edge::new(0, parent, true), 0))
+ );
+
+ assert_eq!(
+ der_iter.next(),
+ Some(TwoLayers::One(Edge::new(1, parent, true), 0))
+ );
+
+ assert_eq!(
+ der_iter.next(),
+ Some(TwoLayers::One(Edge::new(2, parent, true), 0))
+ );
+
+ assert_eq!(der_iter.next(), None);
+ assert_eq!(der_iter.next(), None);
+
+ Ok(())
}
+}
+
+#[cfg(test)]
+mod test_chain {
+ use super::*;
+ use grammar::test_grammar_helper::*;
+
+ #[test]
+ fn base_test() -> Result<(), Box<dyn std::error::Error>> {
+ let grammar = new_notes_grammar()?;
+
+ let atom = DefaultAtom::from_grammar(grammar)?;
+
+ let mut chain = DefaultChain::unit(atom)?;
+
+ chain.chain(3, 00)?;
+ chain.chain(1, 01)?;
+ chain.chain(2, 02)?;
+ chain.chain(2, 03)?;
+ chain.chain(2, 04)?;
+ chain.chain(0, 05)?;
+ chain.chain(5, 06)?;
+ chain.chain(1, 07)?;
+ chain.chain(6, 08)?;
+ chain.chain(6, 09)?;
+ chain.chain(6, 10)?;
+ chain.chain(0, 11)?;
+ chain.chain(0, 12)?;
+
+ assert!(matches!(chain.epsilon(), Ok(true)));
+
+ #[cfg(feature = "test-print-viz")]
+ {
+ chain.graph.print_viz("chain.gv")?;
+ chain.atom.print_nfa("nfa.gv")?;
+ }
- fn chain(&mut self, _t: usize) {
- todo!()
+ Ok(())
}
- fn epsilon(&self) -> bool {
- todo!()
+ #[test]
+ fn test_speed() -> Result<(), Box<dyn std::error::Error>> {
+ let grammar = new_notes_grammar_no_regexp()?;
+
+ println!("grammar: {grammar}");
+
+ let atom = DefaultAtom::from_grammar(grammar)?;
+
+ let mut chain = DefaultChain::unit(atom)?;
+
+ let input_template = vec![3, 1, 2, 2, 2, 0, 5, 1, 6, 6, 6, 0, 0];
+
+ let repeat_times = {
+ let mut result = 1;
+
+ for arg in std::env::args() {
+ let parse_as_digit: Result<usize, _> = arg.parse();
+
+ if let Ok(parse_result) = parse_as_digit {
+ result = parse_result;
+
+ break;
+ }
+ }
+
+ result
+ };
+
+ println!("repeating {repeat_times} times");
+
+ let input = {
+ let mut result = Vec::with_capacity(input_template.len() * repeat_times);
+
+ for _ in 0..repeat_times {
+ result.extend(input_template.iter().copied());
+ }
+
+ result
+ };
+
+ let start = std::time::Instant::now();
+
+ for (index, t) in input.iter().copied().enumerate() {
+ chain.chain(t, index)?;
+ }
+
+ let elapsed = start.elapsed();
+
+ // assert!(matches!(chain.epsilon(), Ok(true)));
+
+ dbg!(elapsed);
+ dbg!(chain.current());
+
+ println!("index: terminal, history");
+ for (index, t) in input.iter().copied().enumerate().take(input.len() - 1) {
+ println!("{index}: {t}, {}", chain.history().get(index).unwrap());
+ }
+
+ #[cfg(feature = "test-print-viz")]
+ {
+ chain.graph.print_viz("chain.gv")?;
+ chain.atom.print_nfa("nfa.gv")?;
+ }
+
+ Ok(())
}
}
diff --git a/chain/src/lib.rs b/chain/src/lib.rs
index 4e21e1d..91c37f7 100644
--- a/chain/src/lib.rs
+++ b/chain/src/lib.rs
@@ -10,41 +10,189 @@
pub mod atom;
-// TODO: Define errors.
+use graph::{error::Error as GError, LabelExtGraph, Parent};
+
+use forest::Error as ForestError;
+
+/// An edge in the Chain-Rule machine.
+#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
+pub struct Edge {
+ /// The position in the atomic languages.
+ label: usize,
+ /// The source of the associated forest edge.
+ forest_source: Parent,
+ /// Whether or not this edge is "accepting".
+ accepting: bool,
+}
+
+impl Edge {
+ /// Construct a new edge.
+ pub fn new(label: usize, forest_source: Parent, accepting: bool) -> Self {
+ Self {
+ label,
+ forest_source,
+ accepting,
+ }
+ }
+
+ /// Return the label of the edge.
+ pub fn label(&self) -> usize {
+ self.label
+ }
+
+ /// Tell whether or not the edge is accepting.
+ pub fn is_accepting(&self) -> bool {
+ self.accepting
+ }
+
+ /// Return the associated forest edge of the edge.
+ pub fn forest_source(&self) -> Parent {
+ self.forest_source
+ }
+}
+
+impl core::fmt::Display for Edge {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ let label = self.label();
+ let forest_source = self.forest_source();
+
+ write!(
+ f,
+ "edge label {label} with forest source {} and edge index {}",
+ forest_source.node(),
+ forest_source.edge()
+ )
+ }
+}
+
+/// Each derivation is a concatenation of two items, so there are two
+/// layers. But some items have no children and are accepting, in
+/// which case we just skip that item completely, for performance
+/// reasons, and hence there could be only one layer as well.
+///
+/// It might even happen that both layers have no children, in which
+/// case we shall just put all previous edges here.
+#[derive(Debug, Clone, Eq, PartialEq)]
+pub enum TwoLayers {
+ /// One layer has no children.
+ One(Edge, usize),
+ // REVIEW: Maybe we do not need to store an edge in the forest: we
+ // only need the source of the edge?
+ /// Both layers have children.
+ ///
+ /// The first element is the label of the second layer, the second
+ /// the source of the associated forest edge of the second layer,
+ /// and the third is a list of edges, which are the common first
+ /// layers.
+ Two(usize, Parent, bool, Vec<(Edge, usize)>),
+}
+
+/// The type of a collapsed iterator.
+pub struct Collapsed<'a, I, C>
+where
+ I: Iterator<Item = TwoLayers>,
+ C: Chain<DerIter = I>,
+{
+ iter: I,
+ chain: &'a mut C,
+ stop: bool,
+}
+
+impl<'a, I, C> Collapsed<'a, I, C>
+where
+ I: Iterator<Item = TwoLayers>,
+ C: Chain<DerIter = I>,
+{
+ /// Collapse an iterator.
+ pub fn collapse(iter: I, chain: &'a mut C) -> Self {
+ let stop = false;
+
+ Self { iter, chain, stop }
+ }
+}
+
+impl<'a, I, C> Iterator for Collapsed<'a, I, C>
+where
+ I: Iterator<Item = TwoLayers>,
+ C: Chain<DerIter = I>,
+{
+ type Item = Result<(Edge, usize), <C as Chain>::Error>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ if self.stop {
+ return None;
+ }
+
+ if let Some(two_layer) = self.iter.next() {
+ match two_layer {
+ TwoLayers::One(edge, to) => Some(Ok((edge, to))),
+ TwoLayers::Two(label, forest_source, accepting, edges) => {
+ let new_index = match self.chain.extend(edges.into_iter()) {
+ Ok(new) => new,
+ Err(error) => {
+ // Prevent further iterations.
+
+ return Some(Err(error.into()));
+ }
+ };
+
+ Some(Ok((Edge::new(label, forest_source, accepting), new_index)))
+ }
+ }
+ } else {
+ None
+ }
+ }
+}
/// The expected behaviours of a language which can take derivatives
/// by chain rule.
-pub trait Chain: Default {
+pub trait Chain: LabelExtGraph<Edge> {
/// The implementations should choose a type to represent errors.
- type Error: std::error::Error;
+ type Error: std::error::Error + From<GError> + From<ForestError>;
+
+ /// A type of atomic languages that is chosen for this chain rule
+ /// machine.
+ type Atom: atom::Atom;
/// Represents the language that is present after we parse the
/// empty string, that is the initial configuration of the
/// language. This may or may not be different from what
/// `Default::default` gives.
- fn unit() -> Self;
-
- /// Take the derivative by a terminal symbol.
- ///
- /// This takes care of the union and the prepending operations.
- ///
- /// # A little remark about the design
- ///
- /// I have thought to separate different operations (like the
- /// union, the prepending, and single derivatives) and then define
- /// a function to put everything together. But I think that
- /// design is not convenient to use. Also, I really do not need
- /// those operations other than to provide this derivative
- /// operation, so why define them? And putting all things
- /// together may reduce the number of bugs caused by wrong uses of
- /// those component functions, and can reduce the amount of
- /// documentation strings a library user needs to read, in order
- /// to make use of this trait. So I ended up with this design.
- fn chain(&mut self, t: usize);
+ fn unit(atom: Self::Atom) -> Result<Self, Self::Error>;
/// Return true if and only if the language contains the empty
/// string.
- fn epsilon(&self) -> bool;
+ fn epsilon(&self) -> Result<bool, Self::Error>;
+
+ /// Update the history
+ fn update_history(&mut self, new: usize);
+
+ /// An iterator that iterates all layers that need to be merged.
+ type DerIter: Iterator<Item = TwoLayers>;
+
+ /// Take the derivative by a terminal symbol at position `POS`.
+ fn derive(&mut self, t: usize, pos: usize) -> Result<Self::DerIter, Self::Error>;
+
+ /// Take the union of all derivatives.
+ fn union(&mut self, der_iter: Self::DerIter) -> Result<Vec<(Edge, usize)>, Self::Error> {
+ // REVIEW: Find a way to avoid allocations.
+ Collapsed::<_, Self>::collapse(der_iter, self).collect()
+ }
+
+ /// Use chain rule to compute the derivative with respect to a
+ /// terminal.
+ fn chain(&mut self, t: usize, pos: usize) -> Result<(), Self::Error> {
+ let der_iter = self.derive(t, pos)?;
+
+ let edges = self.union(der_iter)?;
+
+ let new_index = self.extend(edges.into_iter())?;
+
+ self.update_history(new_index);
+
+ Ok(())
+ }
}
pub mod default;
diff --git a/chain/src/plan.org b/chain/src/plan.org
index 84192f0..c2a9037 100644
--- a/chain/src/plan.org
+++ b/chain/src/plan.org
@@ -88,13 +88,15 @@
+ [X] Define a trait with the expected behaviours.
+ [X] Implement them as parents-knowing graphs.
+ [X] Test
-- [-] Implement languages. [1/4]
+- [-] Implement languages. [4/6]
+ [X] First define a trait with the expected behaviours.
- + [ ] Then implement them as doubly labelled graphs.
- + [ ] Then implement finding derivatives by use of the chain rule.
- + [ ] Each edge in the chain-rule machine needs to be labelled also
+ + [X] Then implement them as doubly labelled graphs.
+ + [X] Each edge in the chain-rule machine needs to be labelled also
with a position in the forest. This perfectly solves the problem
of those "plugs"!
+ + [X] Then implement finding derivatives by use of the chain rule.
+ + [ ] Handle Forests
+ + [-] Tests
- [ ] Implement semiring. [0/5]
+ [ ] Define the trait.
+ [ ] Implement the boolean semiring.