From 18d7955b7d84c00467ede38baae53f4ce1fb6908 Mon Sep 17 00:00:00 2001 From: JSDurand Date: Fri, 20 Jan 2023 13:48:26 +0800 Subject: chain: a prototype is added. I have an ostensibly working prototype now. Further tests are needed to make sure that the algorithm meets the time complexity requirement, though. --- chain/Cargo.toml | 4 + chain/src/atom/default.rs | 214 ++++++++++++- chain/src/atom/mod.rs | 9 +- chain/src/default.rs | 784 +++++++++++++++++++++++++++++++++++++++++++++- chain/src/lib.rs | 194 ++++++++++-- chain/src/plan.org | 10 +- 6 files changed, 1164 insertions(+), 51 deletions(-) (limited to 'chain') diff --git a/chain/Cargo.toml b/chain/Cargo.toml index 265954c..c55586d 100644 --- a/chain/Cargo.toml +++ b/chain/Cargo.toml @@ -5,6 +5,10 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[features] +default = [] +test-print-viz = [] + [dependencies] nfa = { path = "../nfa" } graph = { path = "../graph" } diff --git a/chain/src/atom/default.rs b/chain/src/atom/default.rs index 90133f4..0dc24c3 100644 --- a/chain/src/atom/default.rs +++ b/chain/src/atom/default.rs @@ -6,7 +6,7 @@ use grammar::Grammar; use graph::{error::Error as GraphError, Graph, LabelExtGraph, LabelGraph}; use nfa::{ default::{nfa::DefaultNFA, regex::DefaultRegex}, - LabelType, + LabelType, NfaLabel, }; use core::fmt::Display; @@ -39,11 +39,55 @@ type VirtualMap = Map; pub struct DefaultAtom { grammar: Grammar, nfa: DefaultNFA>, + accepting_vec: Vec, // NOTE: This is mostly for printing and debugging regexp: Vec>, virtual_nodes: VirtualMap, } +impl DefaultAtom { + /// Return the string description of a rule position. + pub fn rule_pos_string(&self, pos: usize) -> Result> { + let rule_num = self.grammar.get_rule_num(pos)?; + + assert!(rule_num < self.grammar.non_num()); + + let display_tnt = |tnt| self.name_of_tnt(tnt).unwrap_or_else(|e| format!("{e}")); + + Ok(self.regexp.get(rule_num).unwrap().to_string_with_dot( + display_tnt, + if rule_num == 0 { + pos + } else { + pos - self.grammar.nth_accumulator(rule_num - 1)? + }, + )?) + } + + /// Print the underlying NFA. + pub fn print_nfa>(&self, filename: S) -> Result<(), std::io::Error> { + self.nfa.print_viz(filename.as_ref())?; + + let nullables: Vec<_> = self + .accepting_vec + .iter() + .enumerate() + .filter_map(|(index, pred)| if *pred { Some(index) } else { None }) + .collect(); + + if !nullables.is_empty() { + println!("nullables: {nullables:?}"); + } + + println!("printing virtual nodes:"); + for (vn, node) in self.virtual_nodes.iter() { + println!("[{}]^{{({})}}: {}", vn.s, vn.t, node); + } + + Ok(()) + } +} + impl Display for DefaultAtom { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let grammar = &self.grammar; @@ -260,15 +304,67 @@ impl DefaultAtom { .filter(|n| matches!(grammar.is_nullable(*n), Ok(true))) .collect(); + // Now record accepting information. + + let nfa_len = nfa.nodes_len(); + + let label_is_nullable = |label: NfaLabel>| { + if let Some(label) = *label.get_value() { + matches!(label, TNT::Non(n) if nullables.contains(&n)) + } else { + true + } + }; + + let mut accepting_vec: Vec = std::iter::repeat(false).take(nfa_len).collect(); + + for nfa_start in accumulators.iter().copied().take(regexp.len()) { + *accepting_vec.get_mut(nfa_start + 1).unwrap() = true; + } + + // The last is always accepting. + *accepting_vec.get_mut(nfa_len - 1).unwrap() = true; + + let mut updated = true; + + while updated { + updated = false; + + for node in nfa.nodes() { + // skip those that do not need to be updated + if *accepting_vec + .get(node) + .ok_or(GrammarError::IndexOutOfBounds(node, nfa_len))? + { + continue; + } + + 'label_loop: for (label, target_iter) in nfa + .labels_of(node) + .map_err(|_| GrammarError::IndexOutOfBounds(node, nfa_len))? + { + if label_is_nullable(*label) { + for target in target_iter { + if *accepting_vec + .get(target) + .ok_or(GrammarError::IndexOutOfBounds(target, nfa_len))? + { + // accepting_vec[node] must have been + // false, as we checked above + *accepting_vec.get_mut(node).unwrap() = true; + updated = true; + + break 'label_loop; + } + } + } + } + } + } + // Perform nulling and remove_epsilon at the same time. nfa.closure( - |label| { - if let Some(label) = *label.get_value() { - matches!(label, TNT::Non(n) if nullables.contains(&n)) - } else { - true - } - }, + label_is_nullable, true, |two_edges| grammar.transform_label_null_epsilon(two_edges), |label| label.get_value().is_none(), @@ -298,6 +394,8 @@ impl DefaultAtom { } } + // dbg!(&accumulators); + for nt in 0..nt_num { let children: std::collections::HashMap<_, _> = nfa // This is safe because of the above assertion. @@ -306,23 +404,92 @@ impl DefaultAtom { .map(|(label, target_iter)| (*label, target_iter)) .collect(); - let mut terminals_map: HashMap, usize)>> = + let mut terminals_map: HashMap, usize)>, bool)> = HashMap::new(); for (label, children_iter) in children.into_iter() { if let Some(TNT::Ter(t)) = *label.get_value() { - terminals_map - .entry(t) - .or_insert_with(|| HashSet::with_capacity(children_iter.len())) - .extend(children_iter.map(|target| (label, target))); + let estimated_len = { + let mut result = 0; + + for child in children_iter.clone() { + result += nfa.degree(child).map_err(index_out_of_bounds_conversion)?; + } + + result + }; + + let mut accepting = false; + + for child in children_iter { + accepting = accepting + || *accepting_vec.get(child).ok_or_else(|| { + GrammarError::IndexOutOfBounds(child, accepting_vec.len()) + })?; + + if nt == 3 + && nfa.degree(child).map_err(index_out_of_bounds_conversion)? == 0 + { + println!("accepting = {accepting}"); + } + + if let Some((_, old_accepting)) = terminals_map.get_mut(&t) { + *old_accepting = *old_accepting || accepting; + } else { + terminals_map + .insert(t, (HashSet::with_capacity(estimated_len), accepting)); + } + + for (child_label, child_children_iter) in nfa + .labels_of(child) + .map_err(index_out_of_bounds_conversion)? + { + // We checked this is safe above. + let (set, _) = terminals_map.get_mut(&t).unwrap(); + + set.extend(child_children_iter.map(|target| (*child_label, target))); + } + } } } - for (t, set) in terminals_map.into_iter() { + if nt == 3 { + println!("map = {terminals_map:?}"); + } + + for (t, (set, accepting)) in terminals_map.into_iter() { let new_index = nfa .extend(set.into_iter()) .map_err(index_out_of_bounds_conversion)?; + if accepting_vec.get(new_index).is_none() { + #[cfg(debug_assertions)] + assert_eq!(new_index, accepting_vec.len()); + + // let mut updated = false; + // let nfa_len = nfa.nodes_len(); + + // 'label_loop: for (label, target_iter) in nfa + // .labels_of(new_index) + // .map_err(|_| GrammarError::IndexOutOfBounds(new_index, nfa_len))? + // { + // if label_is_nullable(*label) { + // for target in target_iter { + // if *accepting_vec + // .get(target) + // .ok_or(GrammarError::IndexOutOfBounds(target, nfa_len))? + // { + // updated = true; + + // break 'label_loop; + // } + // } + // } + // } + + accepting_vec.push(accepting); + } + let virtual_node = VirtualNode::new(nt, t); virtual_nodes.insert(virtual_node, new_index); @@ -334,6 +501,7 @@ impl DefaultAtom { nfa, regexp, virtual_nodes, + accepting_vec, }) } } @@ -343,6 +511,14 @@ fn query(map: &VirtualMap, nt: usize, t: usize) -> Option { map.get(&VirtualNode::new(nt, t)).copied() } +impl std::ops::Deref for DefaultAtom { + type Target = Grammar; + + fn deref(&self) -> &Self::Target { + &self.grammar + } +} + impl Atom for DefaultAtom { fn atom(&self, nt: usize, t: usize) -> Result, GrammarError> { if nt >= self.grammar.non_num() { @@ -359,4 +535,14 @@ impl Atom for DefaultAtom { fn empty(&self) -> usize { self.grammar.total() << 1 } + + fn is_accepting(&self, node_id: usize) -> Result { + self.accepting_vec + .get(node_id) + .copied() + .ok_or(GrammarError::IndexOutOfBounds( + node_id, + self.accepting_vec.len(), + )) + } } diff --git a/chain/src/atom/mod.rs b/chain/src/atom/mod.rs index 065640b..398edd2 100644 --- a/chain/src/atom/mod.rs +++ b/chain/src/atom/mod.rs @@ -6,17 +6,22 @@ //! Because this way I can easily substitute other implementations if //! I have better ideas in the future. -use grammar::{Error as GrammarError, TNT}; +use grammar::{Error as GrammarError, Grammar, TNT}; use nfa::{DOption, LabelType, Nfa}; +use std::ops::Deref; + /// The expected behaviours of an atomic language. -pub trait Atom: Nfa> { +pub trait Atom: Nfa> + Deref { /// Return the index of a node representing the derivative of the /// left-linear null closure of `nt` with respect to `t`. fn atom(&self, nt: usize, t: usize) -> Result, GrammarError>; /// Return the index of the empty state. fn empty(&self) -> usize; + + /// Tell whether a node is accepting. + fn is_accepting(&self, node_id: usize) -> Result; } pub mod default; diff --git a/chain/src/default.rs b/chain/src/default.rs index e04be9f..697b997 100644 --- a/chain/src/default.rs +++ b/chain/src/default.rs @@ -6,18 +6,47 @@ //! modular design makes that easy. use super::*; +use crate::atom::{Atom, DefaultAtom}; use core::fmt::Display; +use forest::{default::DefaultForest, Forest}; +use grammar::{Error as GrammarError, GrammarLabel, GrammarLabelType, TNT}; +#[allow(unused_imports)] +use graph::{ + labelled::DLGBuilder, Builder, DLGraph, Graph, LabelExtGraph, LabelGraph, ParentsGraph, +}; + +use std::collections::{HashMap as Map, TryReserveError}; /// The errors related to taking derivatives by chain rule. +#[non_exhaustive] #[derive(Debug)] pub enum Error { + /// General error for indices out of bounds. + IndexOutOfBounds(usize, usize), + /// The forest encounters a duplicate node, for some reason. + DuplicateNode(usize), + /// The chain rule machine encounters a duplicate edge, for some + /// reason. + DuplicateEdge(usize, usize), + /// A node has no labels while it is required to have one. + NodeNoLabel(usize), + /// Reserving memory fails. + CannotReserve(TryReserveError), /// An invalid situation happens. Invalid, } impl Display for Error { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { match self { + Self::IndexOutOfBounds(index, bound) => write!(f, "index {index} out of bound {bound}"), + Self::DuplicateNode(n) => write!(f, "the forest has a node {n} with a duplicate label"), + Self::DuplicateEdge(source, target) => write!( + f, + "the forest has a duplicate edge from {source} to {target}" + ), + Self::NodeNoLabel(n) => write!(f, "node {n} has no labels while it should have one"), + Self::CannotReserve(e) => write!(f, "cannot reserve memory: {e}"), Self::Invalid => write!(f, "invalid"), } } @@ -25,22 +54,761 @@ impl Display for Error { impl std::error::Error for Error {} +impl From for Error { + fn from(value: GError) -> Self { + match value { + GError::IndexOutOfBounds(index, bound) => Self::IndexOutOfBounds(index, bound), + GError::DuplicatedNode(n) => Self::DuplicateNode(n), + GError::DuplicatedEdge(source, target) => Self::DuplicateEdge(source, target), + _ => Self::Invalid, + } + } +} + +impl From for Error { + fn from(e: ForestError) -> Self { + match e { + ForestError::IndexOutOfBounds(index, bound) => Error::IndexOutOfBounds(index, bound), + ForestError::DuplicatedNode(n) => Error::DuplicateNode(n), + ForestError::InvalidGraphError(ge) => ge.into(), + ForestError::NodeNoLabel(n) => Error::NodeNoLabel(n), + } + } +} + +impl From for Error { + fn from(value: TryReserveError) -> Self { + Self::CannotReserve(value) + } +} + +/// The type of an index into an element in [`DerIter`]. +#[derive(Debug, Copy, Clone)] +enum DerIterIndex { + Single(usize), + Map(usize), +} + +impl Default for DerIterIndex { + fn default() -> Self { + Self::Map(0) + } +} + +/// A complex type used for storing values of edges with two layers. +type SecondTypeValue = (Parent, bool, Vec<(Edge, usize)>); + +/// An iterator of TwoLayers. +#[derive(Debug, Default)] +pub struct DerIter { + /// Stores edges of only one layer. + singles: Vec<(Edge, usize)>, + /// Stores edges with two layers. They are grouped by their + /// labels of the second layer. + /// + /// The values are tuples (forest_source, accepting, edges), where + /// the edges are the grouped edges of the first layer and the + /// destination. + seconds: Map, + /// We want to iterate the elements of the map, for which purpose + /// we need an array. Since hashmaps provide no arrays, we keep + /// an array of keys for iteration purposes. + second_array: Vec, + /// The index of the current element, either in `second_array` or + /// in `singles` . + index: DerIterIndex, +} + +impl DerIter { + fn add_second_layer( + &mut self, + label: usize, + forest_source: Parent, + accepting: bool, + edges: Vec<(Edge, usize)>, + ) { + if let Some((_, _, vec)) = self.seconds.get_mut(&label) { + vec.extend(edges); + } else { + self.seconds + .insert(label, (forest_source, accepting, edges)); + + self.second_array.push(label); + } + } +} + +impl Iterator for DerIter { + type Item = TwoLayers; + + fn next(&mut self) -> Option { + // We iterate through two layered edges first. + match self.index { + DerIterIndex::Map(index) => { + if let Some(key) = self.second_array.get(index) { + if let Some((forest_source, accepting, edges)) = self.seconds.remove(key) { + self.index = DerIterIndex::Map(index + 1); + + Some(TwoLayers::Two(*key, forest_source, accepting, edges)) + } else { + // this should not happen + println!("a key does not exist in the hashmap: something is wrong when taking derivatives"); + None + } + } else { + self.index = DerIterIndex::Single(0); + + if let Some((edge, to)) = self.singles.first() { + self.index = DerIterIndex::Single(1); + + Some(TwoLayers::One(*edge, *to)) + } else { + None + } + } + } + DerIterIndex::Single(index) => { + if let Some((edge, to)) = self.singles.get(index) { + self.index = DerIterIndex::Single(index + 1); + + Some(TwoLayers::One(*edge, *to)) + } else { + None + } + } + } + } +} + /// A default implementation for the [`Chain`] trait. #[derive(Debug, Clone, Default)] -pub struct DefaultChain {} +pub struct DefaultChain { + graph: DLGraph, + atom: DefaultAtom, + current: usize, + history: Vec, + forest: DefaultForest, + accepting_vec: Vec, +} + +impl DefaultChain { + /// Return the current node. + #[inline] + pub fn current(&self) -> usize { + self.current + } + + /// Return the complete slice of histories. + #[inline] + pub fn history(&self) -> &[usize] { + self.history.as_ref() + } + + /// Return a reference to the associated forest. + #[inline] + pub fn forest(&self) -> &DefaultForest { + &self.forest + } + + /// Print the rule positions of the labels. + pub fn print_rule_positions(&self) -> Result<(), Box> { + let mut labels = std::collections::HashSet::::default(); + + for node in 0..self.graph.nodes_len() { + labels.extend(self.graph.labels_of(node)?.map(|(label, _)| label.label)); + } + + for label in labels.into_iter() { + println!("{}", self.atom.rule_pos_string(label)?); + } + + Ok(()) + } +} + +impl Graph for DefaultChain { + type Iter<'a> = as Graph>::Iter<'a> + where + Self: 'a; + + #[inline] + fn is_empty(&self) -> bool { + self.graph.is_empty() + } + + #[inline] + fn nodes_len(&self) -> usize { + self.graph.nodes_len() + } + + #[inline] + fn edges_len(&self) -> Option { + self.graph.edges_len() + } + + #[inline] + fn children_of(&self, node_id: usize) -> Result, GError> { + self.graph.children_of(node_id) + } + + #[inline] + fn degree(&self, node_id: usize) -> Result { + self.graph.degree(node_id) + } + + #[inline] + fn is_empty_node(&self, node_id: usize) -> Result { + self.graph.is_empty_node(node_id) + } + + #[inline] + fn has_edge(&self, source: usize, target: usize) -> Result { + self.graph.has_edge(source, target) + } + + fn replace_by_builder(&mut self, _builder: impl graph::Builder) { + unimplemented!("I shall refactor this") + } +} + +impl LabelGraph for DefaultChain { + type Iter<'a> = as LabelGraph>::Iter<'a> + where + Self: 'a; + + type LabelIter<'a> = as LabelGraph>::LabelIter<'a> + where + Self: 'a, + Edge: 'a; + + type EdgeLabelIter<'a> = as LabelGraph>::EdgeLabelIter<'a> + where + Self: 'a, + Edge: 'a; + + #[inline] + fn edge_label(&self, source: usize, target: usize) -> Result, GError> { + self.graph.edge_label(source, target) + } + + #[inline] + fn find_children_with_label( + &self, + node_id: usize, + label: &Edge, + ) -> Result<>::Iter<'_>, GError> { + self.graph.find_children_with_label(node_id, label) + } + + #[inline] + fn labels_of(&self, node_id: usize) -> Result, GError> { + self.graph.labels_of(node_id) + } + + #[inline] + fn has_edge_label(&self, node_id: usize, label: &Edge, target: usize) -> Result { + self.graph.has_edge_label(node_id, label, target) + } +} + +impl LabelExtGraph for DefaultChain { + #[inline] + fn extend(&mut self, edges: impl IntoIterator) -> Result { + let new = self.graph.extend(edges)?; + let accepting_len = self.accepting_vec.len(); + + if self.accepting_vec.get(new).is_none() { + // assert it can only grow by one node at a time. + #[cfg(debug_assertions)] + assert_eq!(new, accepting_len); + + let mut updated = false; + + for (label, child_iter) in self.graph.labels_of(new)? { + let old_accepting = { + let mut result = false; + for child in child_iter { + if *self + .accepting_vec + .get(child) + .ok_or(GError::IndexOutOfBounds(child, accepting_len))? + { + result = true; + break; + } + } + + result + }; + + if !old_accepting { + self.accepting_vec.push(false); + updated = true; + + break; + } + + if label.is_accepting() { + self.accepting_vec.push(true); + updated = true; + + break; + } + } + + if !updated { + self.accepting_vec.push(false); + } + } + + Ok(new) + } +} impl Chain for DefaultChain { type Error = Error; - fn unit() -> Self { - todo!() + type Atom = DefaultAtom; + + fn unit(atom: Self::Atom) -> Result { + let mut builder: DLGBuilder = Default::default(); + + let root = builder.add_vertex(); + let first = builder.add_vertex(); + + let empty_state = atom.empty(); + + let initial_nullable = atom + .is_nullable(0) + .map_err(|_| Error::IndexOutOfBounds(0, atom.non_num()))?; + + builder.add_edge( + first, + root, + Edge::new(empty_state, Parent::new(0, 0), initial_nullable), + )?; + + let graph = builder.build(); + + let forest = + DefaultForest::new_leaf(GrammarLabel::new(GrammarLabelType::TNT(TNT::Non(0)), 0)); + + #[cfg(debug_assertions)] + assert_eq!(forest.root(), Some(0)); + + let current = 1; + + let history = Vec::new(); + + let accepting_vec = vec![true, initial_nullable]; + + Ok(Self { + graph, + atom, + current, + history, + forest, + accepting_vec, + }) + } + + fn epsilon(&self) -> Result { + self.accepting_vec + .get(self.current) + .copied() + .ok_or(Error::IndexOutOfBounds( + self.current, + self.accepting_vec.len(), + )) + } + + fn update_history(&mut self, new: usize) { + debug_assert!(new < self.graph.nodes_len()); + + self.history.push(self.current); + + self.current = new; + } + + type DerIter = DerIter; + + fn derive(&mut self, t: usize, pos: usize) -> Result { + use TNT::*; + + /// Convert an error telling us that an index is out of bounds. + /// + /// # Panics + /// + /// The function panics if the error is not of the expected + /// kind. + fn index_out_of_bounds_conversion(ge: GrammarError) -> Error { + match ge { + GrammarError::IndexOutOfBounds(index, bound) => { + Error::IndexOutOfBounds(index, bound) + } + _ => panic!("wrong error kind"), + } + } + + /// A helper function to generate edges to join. + /// + /// It first checks if the base edge is accepting. If yes, + /// then pull in the children of the target. + /// + /// Then check if the label of the base edge has children. If + /// no, then do not add this base edge itself. + /// + /// The generated edges will be pushed to `output` directly, + /// to save some allocations. + // TODO: Handle forests as well. + fn generate_edges( + chain: &DefaultChain, + child_iter: impl Iterator + ExactSizeIterator + Clone, + atom_child_iter: impl Iterator + Clone, + mut output: impl AsMut>, + ) -> Result<(), Error> { + // First check the values from iterators are all valid. + let graph_len = chain.graph.nodes_len(); + let atom_len = chain.atom.nodes_len(); + + for child in child_iter.clone() { + if !chain.graph.has_node(child) { + return Err(Error::IndexOutOfBounds(child, graph_len)); + } + } + + for atom_child in atom_child_iter.clone() { + if !chain.atom.has_node(atom_child) { + return Err(Error::IndexOutOfBounds(atom_child, atom_len)); + } + } + + // From now on the nodes are all valid, so we can just + // call `unwrap`. + + // Then calculate the number of edges to append, to avoid + // repeated allocations + let mut num = 0usize; + + let child_iter_total_degree = child_iter + .clone() + .map(|child| chain.graph.degree(child).unwrap()) + .sum::(); + + for atom_child in atom_child_iter.clone() { + let atom_child_accepting = chain.atom.is_accepting(atom_child).unwrap(); + let atom_child_empty_node = chain.atom.is_empty_node(atom_child).unwrap(); + + if !atom_child_empty_node { + num += child_iter.len(); + } + + if atom_child_accepting { + num += child_iter_total_degree; + } + } + + let num = num; + + let output = output.as_mut(); + + output.try_reserve(num)?; + + // now push into output + + let parent = Parent::new(0, 0); + + for atom_child in atom_child_iter.clone() { + let atom_child_accepting = chain.atom.is_accepting(atom_child).unwrap(); + let atom_child_empty_node = chain.atom.is_empty_node(atom_child).unwrap(); + + let edge = Edge::new(atom_child, parent, atom_child_accepting); + + if !atom_child_empty_node { + output.extend(child_iter.clone().map(|child| (edge, child))); + } + + if atom_child_accepting { + for child in child_iter.clone() { + for (child_label, child_child) in chain.graph.labels_of(child).unwrap() { + output.extend(child_child.map(|target| (*child_label, target))); + } + } + } + } + + Ok(()) + } + + let mut der_iter = DerIter::default(); + + for (label, child_iter) in self.graph.labels_of(self.current)? { + for (atom_label, atom_child_iter) in self.atom.labels_of(label.label())? { + if atom_label.is_left_p() { + // We do not consider left-linearly expanded + // children in the first layer. + continue; + } + + match *atom_label.get_value() { + Some(Ter(ter)) if ter == t => { + generate_edges( + self, + child_iter.clone(), + atom_child_iter.clone(), + &mut der_iter.singles, + )?; + } + Some(Non(non)) => { + let virtual_node = self + .atom + .atom(non, t) + .map_err(index_out_of_bounds_conversion)?; + + if let Some(virtual_node) = virtual_node { + let accepting = self + .atom + .is_accepting(virtual_node) + .map_err(index_out_of_bounds_conversion)?; + + let mut new_edges = Vec::new(); + + generate_edges( + self, + child_iter.clone(), + atom_child_iter.clone(), + &mut new_edges, + )?; + + if accepting { + der_iter.singles.extend(new_edges.clone()); + } + + let parent = Parent::new(0, 0); + + if !self.atom.is_empty_node(virtual_node).unwrap() { + der_iter.add_second_layer( + virtual_node, + parent, + accepting, + new_edges, + ); + + // account for atom_children without + // children. + + for atom_child in atom_child_iter { + // this has been checked in + // `generate_edges` + if self.atom.is_empty_node(atom_child).unwrap() { + der_iter.singles.extend(child_iter.clone().map(|child| { + (Edge::new(virtual_node, parent, accepting), child) + })); + } + } + } else { + for atom_child in atom_child_iter { + // this has been checked in + // `generate_edges` + if self.atom.is_empty_node(atom_child).unwrap() { + // flat flat map, hmm... + der_iter.singles.extend(child_iter.clone().flat_map( + |child| { + self.graph.labels_of(child).unwrap().flat_map( + |(child_label, child_child_iter)| { + child_child_iter.map(|child_child| { + (*child_label, child_child) + }) + }, + ) + }, + )); + } + } + } + } + } + _ => { + continue; + } + } + } + } + + Ok(der_iter) + } +} + +#[cfg(test)] +mod test_der_iter { + use super::*; + + #[test] + fn test() -> Result<(), Box> { + let mut der_iter = DerIter::default(); + + let parent = Parent::new(0, 0); + + der_iter.singles.push((Edge::new(0, parent, true), 0)); + + der_iter.singles.push((Edge::new(1, parent, true), 0)); + + der_iter.singles.push((Edge::new(2, parent, true), 0)); + + der_iter.add_second_layer(3, parent, true, vec![(Edge::new(4, parent, true), 1)]); + + der_iter.add_second_layer(6, parent, true, vec![(Edge::new(5, parent, true), 1)]); + + // add an entry with a repeated label + der_iter.add_second_layer(3, parent, true, vec![(Edge::new(7, parent, true), 2)]); + + assert_eq!( + der_iter.next(), + Some(TwoLayers::Two( + 3, + parent, + true, + vec![ + (Edge::new(4, parent, true), 1), + (Edge::new(7, parent, true), 2) + ] + )) + ); + + assert_eq!( + der_iter.next(), + Some(TwoLayers::Two( + 6, + parent, + true, + vec![(Edge::new(5, parent, true), 1)] + )) + ); + + assert_eq!( + der_iter.next(), + Some(TwoLayers::One(Edge::new(0, parent, true), 0)) + ); + + assert_eq!( + der_iter.next(), + Some(TwoLayers::One(Edge::new(1, parent, true), 0)) + ); + + assert_eq!( + der_iter.next(), + Some(TwoLayers::One(Edge::new(2, parent, true), 0)) + ); + + assert_eq!(der_iter.next(), None); + assert_eq!(der_iter.next(), None); + + Ok(()) } +} + +#[cfg(test)] +mod test_chain { + use super::*; + use grammar::test_grammar_helper::*; + + #[test] + fn base_test() -> Result<(), Box> { + let grammar = new_notes_grammar()?; + + let atom = DefaultAtom::from_grammar(grammar)?; + + let mut chain = DefaultChain::unit(atom)?; + + chain.chain(3, 00)?; + chain.chain(1, 01)?; + chain.chain(2, 02)?; + chain.chain(2, 03)?; + chain.chain(2, 04)?; + chain.chain(0, 05)?; + chain.chain(5, 06)?; + chain.chain(1, 07)?; + chain.chain(6, 08)?; + chain.chain(6, 09)?; + chain.chain(6, 10)?; + chain.chain(0, 11)?; + chain.chain(0, 12)?; + + assert!(matches!(chain.epsilon(), Ok(true))); + + #[cfg(feature = "test-print-viz")] + { + chain.graph.print_viz("chain.gv")?; + chain.atom.print_nfa("nfa.gv")?; + } - fn chain(&mut self, _t: usize) { - todo!() + Ok(()) } - fn epsilon(&self) -> bool { - todo!() + #[test] + fn test_speed() -> Result<(), Box> { + let grammar = new_notes_grammar_no_regexp()?; + + println!("grammar: {grammar}"); + + let atom = DefaultAtom::from_grammar(grammar)?; + + let mut chain = DefaultChain::unit(atom)?; + + let input_template = vec![3, 1, 2, 2, 2, 0, 5, 1, 6, 6, 6, 0, 0]; + + let repeat_times = { + let mut result = 1; + + for arg in std::env::args() { + let parse_as_digit: Result = arg.parse(); + + if let Ok(parse_result) = parse_as_digit { + result = parse_result; + + break; + } + } + + result + }; + + println!("repeating {repeat_times} times"); + + let input = { + let mut result = Vec::with_capacity(input_template.len() * repeat_times); + + for _ in 0..repeat_times { + result.extend(input_template.iter().copied()); + } + + result + }; + + let start = std::time::Instant::now(); + + for (index, t) in input.iter().copied().enumerate() { + chain.chain(t, index)?; + } + + let elapsed = start.elapsed(); + + // assert!(matches!(chain.epsilon(), Ok(true))); + + dbg!(elapsed); + dbg!(chain.current()); + + println!("index: terminal, history"); + for (index, t) in input.iter().copied().enumerate().take(input.len() - 1) { + println!("{index}: {t}, {}", chain.history().get(index).unwrap()); + } + + #[cfg(feature = "test-print-viz")] + { + chain.graph.print_viz("chain.gv")?; + chain.atom.print_nfa("nfa.gv")?; + } + + Ok(()) } } diff --git a/chain/src/lib.rs b/chain/src/lib.rs index 4e21e1d..91c37f7 100644 --- a/chain/src/lib.rs +++ b/chain/src/lib.rs @@ -10,41 +10,189 @@ pub mod atom; -// TODO: Define errors. +use graph::{error::Error as GError, LabelExtGraph, Parent}; + +use forest::Error as ForestError; + +/// An edge in the Chain-Rule machine. +#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)] +pub struct Edge { + /// The position in the atomic languages. + label: usize, + /// The source of the associated forest edge. + forest_source: Parent, + /// Whether or not this edge is "accepting". + accepting: bool, +} + +impl Edge { + /// Construct a new edge. + pub fn new(label: usize, forest_source: Parent, accepting: bool) -> Self { + Self { + label, + forest_source, + accepting, + } + } + + /// Return the label of the edge. + pub fn label(&self) -> usize { + self.label + } + + /// Tell whether or not the edge is accepting. + pub fn is_accepting(&self) -> bool { + self.accepting + } + + /// Return the associated forest edge of the edge. + pub fn forest_source(&self) -> Parent { + self.forest_source + } +} + +impl core::fmt::Display for Edge { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let label = self.label(); + let forest_source = self.forest_source(); + + write!( + f, + "edge label {label} with forest source {} and edge index {}", + forest_source.node(), + forest_source.edge() + ) + } +} + +/// Each derivation is a concatenation of two items, so there are two +/// layers. But some items have no children and are accepting, in +/// which case we just skip that item completely, for performance +/// reasons, and hence there could be only one layer as well. +/// +/// It might even happen that both layers have no children, in which +/// case we shall just put all previous edges here. +#[derive(Debug, Clone, Eq, PartialEq)] +pub enum TwoLayers { + /// One layer has no children. + One(Edge, usize), + // REVIEW: Maybe we do not need to store an edge in the forest: we + // only need the source of the edge? + /// Both layers have children. + /// + /// The first element is the label of the second layer, the second + /// the source of the associated forest edge of the second layer, + /// and the third is a list of edges, which are the common first + /// layers. + Two(usize, Parent, bool, Vec<(Edge, usize)>), +} + +/// The type of a collapsed iterator. +pub struct Collapsed<'a, I, C> +where + I: Iterator, + C: Chain, +{ + iter: I, + chain: &'a mut C, + stop: bool, +} + +impl<'a, I, C> Collapsed<'a, I, C> +where + I: Iterator, + C: Chain, +{ + /// Collapse an iterator. + pub fn collapse(iter: I, chain: &'a mut C) -> Self { + let stop = false; + + Self { iter, chain, stop } + } +} + +impl<'a, I, C> Iterator for Collapsed<'a, I, C> +where + I: Iterator, + C: Chain, +{ + type Item = Result<(Edge, usize), ::Error>; + + fn next(&mut self) -> Option { + if self.stop { + return None; + } + + if let Some(two_layer) = self.iter.next() { + match two_layer { + TwoLayers::One(edge, to) => Some(Ok((edge, to))), + TwoLayers::Two(label, forest_source, accepting, edges) => { + let new_index = match self.chain.extend(edges.into_iter()) { + Ok(new) => new, + Err(error) => { + // Prevent further iterations. + + return Some(Err(error.into())); + } + }; + + Some(Ok((Edge::new(label, forest_source, accepting), new_index))) + } + } + } else { + None + } + } +} /// The expected behaviours of a language which can take derivatives /// by chain rule. -pub trait Chain: Default { +pub trait Chain: LabelExtGraph { /// The implementations should choose a type to represent errors. - type Error: std::error::Error; + type Error: std::error::Error + From + From; + + /// A type of atomic languages that is chosen for this chain rule + /// machine. + type Atom: atom::Atom; /// Represents the language that is present after we parse the /// empty string, that is the initial configuration of the /// language. This may or may not be different from what /// `Default::default` gives. - fn unit() -> Self; - - /// Take the derivative by a terminal symbol. - /// - /// This takes care of the union and the prepending operations. - /// - /// # A little remark about the design - /// - /// I have thought to separate different operations (like the - /// union, the prepending, and single derivatives) and then define - /// a function to put everything together. But I think that - /// design is not convenient to use. Also, I really do not need - /// those operations other than to provide this derivative - /// operation, so why define them? And putting all things - /// together may reduce the number of bugs caused by wrong uses of - /// those component functions, and can reduce the amount of - /// documentation strings a library user needs to read, in order - /// to make use of this trait. So I ended up with this design. - fn chain(&mut self, t: usize); + fn unit(atom: Self::Atom) -> Result; /// Return true if and only if the language contains the empty /// string. - fn epsilon(&self) -> bool; + fn epsilon(&self) -> Result; + + /// Update the history + fn update_history(&mut self, new: usize); + + /// An iterator that iterates all layers that need to be merged. + type DerIter: Iterator; + + /// Take the derivative by a terminal symbol at position `POS`. + fn derive(&mut self, t: usize, pos: usize) -> Result; + + /// Take the union of all derivatives. + fn union(&mut self, der_iter: Self::DerIter) -> Result, Self::Error> { + // REVIEW: Find a way to avoid allocations. + Collapsed::<_, Self>::collapse(der_iter, self).collect() + } + + /// Use chain rule to compute the derivative with respect to a + /// terminal. + fn chain(&mut self, t: usize, pos: usize) -> Result<(), Self::Error> { + let der_iter = self.derive(t, pos)?; + + let edges = self.union(der_iter)?; + + let new_index = self.extend(edges.into_iter())?; + + self.update_history(new_index); + + Ok(()) + } } pub mod default; diff --git a/chain/src/plan.org b/chain/src/plan.org index 84192f0..c2a9037 100644 --- a/chain/src/plan.org +++ b/chain/src/plan.org @@ -88,13 +88,15 @@ + [X] Define a trait with the expected behaviours. + [X] Implement them as parents-knowing graphs. + [X] Test -- [-] Implement languages. [1/4] +- [-] Implement languages. [4/6] + [X] First define a trait with the expected behaviours. - + [ ] Then implement them as doubly labelled graphs. - + [ ] Then implement finding derivatives by use of the chain rule. - + [ ] Each edge in the chain-rule machine needs to be labelled also + + [X] Then implement them as doubly labelled graphs. + + [X] Each edge in the chain-rule machine needs to be labelled also with a position in the forest. This perfectly solves the problem of those "plugs"! + + [X] Then implement finding derivatives by use of the chain rule. + + [ ] Handle Forests + + [-] Tests - [ ] Implement semiring. [0/5] + [ ] Define the trait. + [ ] Implement the boolean semiring. -- cgit v1.2.3-18-g5258