summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJSDurand <mmemmew@gmail.com>2023-01-20 13:48:26 +0800
committerJSDurand <mmemmew@gmail.com>2023-01-20 13:48:26 +0800
commit18d7955b7d84c00467ede38baae53f4ce1fb6908 (patch)
tree97d0746b82816a21d980636e50f8cdbeb804b518
parent8f8d3d1a3c276be4be2e5d2e767ada564c47279a (diff)
chain: a prototype is added.
I have an ostensibly working prototype now. Further tests are needed to make sure that the algorithm meets the time complexity requirement, though.
-rw-r--r--ChangeLog10
-rw-r--r--chain/Cargo.toml4
-rw-r--r--chain/src/atom/default.rs214
-rw-r--r--chain/src/atom/mod.rs9
-rw-r--r--chain/src/default.rs784
-rw-r--r--chain/src/lib.rs194
-rw-r--r--chain/src/plan.org10
-rw-r--r--forest/src/default.rs78
-rw-r--r--forest/src/design.org3
-rw-r--r--forest/src/lib.rs3
-rw-r--r--grammar/src/label.rs124
-rw-r--r--grammar/src/left_closure.rs6
-rw-r--r--grammar/src/lib.rs83
-rw-r--r--grammar/src/test_grammar_helper.rs128
-rw-r--r--graph/src/error.rs3
-rw-r--r--graph/src/labelled/binary.rs44
-rw-r--r--graph/src/labelled/double.rs14
-rw-r--r--graph/src/lib.rs10
-rw-r--r--graph_macro/src/lib.rs9
-rw-r--r--nfa/src/default/nfa.rs2
-rw-r--r--nfa/src/default/regex.rs141
-rw-r--r--nfa/src/lib.rs4
22 files changed, 1770 insertions, 107 deletions
diff --git a/ChangeLog b/ChangeLog
index a064c8c..ac1dfdd 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2023-01-20 Jean Sévère Durand <durand@jsdurand.xyz>
+
+ * chain: A prototype is added, and passes some tests. But I am
+ still testing if its performance meets the time complexity
+ requirement.
+
+2023-01-13 Jean Sévère Durand <durand@jsdurand.xyz>
+
+ * forest: A prototype is completed, and passes some tests.
+
2022-11-15 Jean Sévère Durand <durand@jsdurand.xyz>
* nfa: Stop worrying about monadic anamorphisms.
diff --git a/chain/Cargo.toml b/chain/Cargo.toml
index 265954c..c55586d 100644
--- a/chain/Cargo.toml
+++ b/chain/Cargo.toml
@@ -5,6 +5,10 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+[features]
+default = []
+test-print-viz = []
+
[dependencies]
nfa = { path = "../nfa" }
graph = { path = "../graph" }
diff --git a/chain/src/atom/default.rs b/chain/src/atom/default.rs
index 90133f4..0dc24c3 100644
--- a/chain/src/atom/default.rs
+++ b/chain/src/atom/default.rs
@@ -6,7 +6,7 @@ use grammar::Grammar;
use graph::{error::Error as GraphError, Graph, LabelExtGraph, LabelGraph};
use nfa::{
default::{nfa::DefaultNFA, regex::DefaultRegex},
- LabelType,
+ LabelType, NfaLabel,
};
use core::fmt::Display;
@@ -39,11 +39,55 @@ type VirtualMap = Map<VirtualNode, usize>;
pub struct DefaultAtom {
grammar: Grammar,
nfa: DefaultNFA<LabelType<TNT>>,
+ accepting_vec: Vec<bool>,
// NOTE: This is mostly for printing and debugging
regexp: Vec<DefaultRegex<TNT>>,
virtual_nodes: VirtualMap,
}
+impl DefaultAtom {
+ /// Return the string description of a rule position.
+ pub fn rule_pos_string(&self, pos: usize) -> Result<String, Box<dyn std::error::Error>> {
+ let rule_num = self.grammar.get_rule_num(pos)?;
+
+ assert!(rule_num < self.grammar.non_num());
+
+ let display_tnt = |tnt| self.name_of_tnt(tnt).unwrap_or_else(|e| format!("{e}"));
+
+ Ok(self.regexp.get(rule_num).unwrap().to_string_with_dot(
+ display_tnt,
+ if rule_num == 0 {
+ pos
+ } else {
+ pos - self.grammar.nth_accumulator(rule_num - 1)?
+ },
+ )?)
+ }
+
+ /// Print the underlying NFA.
+ pub fn print_nfa<S: AsRef<str>>(&self, filename: S) -> Result<(), std::io::Error> {
+ self.nfa.print_viz(filename.as_ref())?;
+
+ let nullables: Vec<_> = self
+ .accepting_vec
+ .iter()
+ .enumerate()
+ .filter_map(|(index, pred)| if *pred { Some(index) } else { None })
+ .collect();
+
+ if !nullables.is_empty() {
+ println!("nullables: {nullables:?}");
+ }
+
+ println!("printing virtual nodes:");
+ for (vn, node) in self.virtual_nodes.iter() {
+ println!("[{}]^{{({})}}: {}", vn.s, vn.t, node);
+ }
+
+ Ok(())
+ }
+}
+
impl Display for DefaultAtom {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let grammar = &self.grammar;
@@ -260,15 +304,67 @@ impl DefaultAtom {
.filter(|n| matches!(grammar.is_nullable(*n), Ok(true)))
.collect();
+ // Now record accepting information.
+
+ let nfa_len = nfa.nodes_len();
+
+ let label_is_nullable = |label: NfaLabel<DOption<TNT>>| {
+ if let Some(label) = *label.get_value() {
+ matches!(label, TNT::Non(n) if nullables.contains(&n))
+ } else {
+ true
+ }
+ };
+
+ let mut accepting_vec: Vec<bool> = std::iter::repeat(false).take(nfa_len).collect();
+
+ for nfa_start in accumulators.iter().copied().take(regexp.len()) {
+ *accepting_vec.get_mut(nfa_start + 1).unwrap() = true;
+ }
+
+ // The last is always accepting.
+ *accepting_vec.get_mut(nfa_len - 1).unwrap() = true;
+
+ let mut updated = true;
+
+ while updated {
+ updated = false;
+
+ for node in nfa.nodes() {
+ // skip those that do not need to be updated
+ if *accepting_vec
+ .get(node)
+ .ok_or(GrammarError::IndexOutOfBounds(node, nfa_len))?
+ {
+ continue;
+ }
+
+ 'label_loop: for (label, target_iter) in nfa
+ .labels_of(node)
+ .map_err(|_| GrammarError::IndexOutOfBounds(node, nfa_len))?
+ {
+ if label_is_nullable(*label) {
+ for target in target_iter {
+ if *accepting_vec
+ .get(target)
+ .ok_or(GrammarError::IndexOutOfBounds(target, nfa_len))?
+ {
+ // accepting_vec[node] must have been
+ // false, as we checked above
+ *accepting_vec.get_mut(node).unwrap() = true;
+ updated = true;
+
+ break 'label_loop;
+ }
+ }
+ }
+ }
+ }
+ }
+
// Perform nulling and remove_epsilon at the same time.
nfa.closure(
- |label| {
- if let Some(label) = *label.get_value() {
- matches!(label, TNT::Non(n) if nullables.contains(&n))
- } else {
- true
- }
- },
+ label_is_nullable,
true,
|two_edges| grammar.transform_label_null_epsilon(two_edges),
|label| label.get_value().is_none(),
@@ -298,6 +394,8 @@ impl DefaultAtom {
}
}
+ // dbg!(&accumulators);
+
for nt in 0..nt_num {
let children: std::collections::HashMap<_, _> = nfa
// This is safe because of the above assertion.
@@ -306,23 +404,92 @@ impl DefaultAtom {
.map(|(label, target_iter)| (*label, target_iter))
.collect();
- let mut terminals_map: HashMap<usize, HashSet<(LabelType<TNT>, usize)>> =
+ let mut terminals_map: HashMap<usize, (HashSet<(LabelType<TNT>, usize)>, bool)> =
HashMap::new();
for (label, children_iter) in children.into_iter() {
if let Some(TNT::Ter(t)) = *label.get_value() {
- terminals_map
- .entry(t)
- .or_insert_with(|| HashSet::with_capacity(children_iter.len()))
- .extend(children_iter.map(|target| (label, target)));
+ let estimated_len = {
+ let mut result = 0;
+
+ for child in children_iter.clone() {
+ result += nfa.degree(child).map_err(index_out_of_bounds_conversion)?;
+ }
+
+ result
+ };
+
+ let mut accepting = false;
+
+ for child in children_iter {
+ accepting = accepting
+ || *accepting_vec.get(child).ok_or_else(|| {
+ GrammarError::IndexOutOfBounds(child, accepting_vec.len())
+ })?;
+
+ if nt == 3
+ && nfa.degree(child).map_err(index_out_of_bounds_conversion)? == 0
+ {
+ println!("accepting = {accepting}");
+ }
+
+ if let Some((_, old_accepting)) = terminals_map.get_mut(&t) {
+ *old_accepting = *old_accepting || accepting;
+ } else {
+ terminals_map
+ .insert(t, (HashSet::with_capacity(estimated_len), accepting));
+ }
+
+ for (child_label, child_children_iter) in nfa
+ .labels_of(child)
+ .map_err(index_out_of_bounds_conversion)?
+ {
+ // We checked this is safe above.
+ let (set, _) = terminals_map.get_mut(&t).unwrap();
+
+ set.extend(child_children_iter.map(|target| (*child_label, target)));
+ }
+ }
}
}
- for (t, set) in terminals_map.into_iter() {
+ if nt == 3 {
+ println!("map = {terminals_map:?}");
+ }
+
+ for (t, (set, accepting)) in terminals_map.into_iter() {
let new_index = nfa
.extend(set.into_iter())
.map_err(index_out_of_bounds_conversion)?;
+ if accepting_vec.get(new_index).is_none() {
+ #[cfg(debug_assertions)]
+ assert_eq!(new_index, accepting_vec.len());
+
+ // let mut updated = false;
+ // let nfa_len = nfa.nodes_len();
+
+ // 'label_loop: for (label, target_iter) in nfa
+ // .labels_of(new_index)
+ // .map_err(|_| GrammarError::IndexOutOfBounds(new_index, nfa_len))?
+ // {
+ // if label_is_nullable(*label) {
+ // for target in target_iter {
+ // if *accepting_vec
+ // .get(target)
+ // .ok_or(GrammarError::IndexOutOfBounds(target, nfa_len))?
+ // {
+ // updated = true;
+
+ // break 'label_loop;
+ // }
+ // }
+ // }
+ // }
+
+ accepting_vec.push(accepting);
+ }
+
let virtual_node = VirtualNode::new(nt, t);
virtual_nodes.insert(virtual_node, new_index);
@@ -334,6 +501,7 @@ impl DefaultAtom {
nfa,
regexp,
virtual_nodes,
+ accepting_vec,
})
}
}
@@ -343,6 +511,14 @@ fn query(map: &VirtualMap, nt: usize, t: usize) -> Option<usize> {
map.get(&VirtualNode::new(nt, t)).copied()
}
+impl std::ops::Deref for DefaultAtom {
+ type Target = Grammar;
+
+ fn deref(&self) -> &Self::Target {
+ &self.grammar
+ }
+}
+
impl Atom for DefaultAtom {
fn atom(&self, nt: usize, t: usize) -> Result<Option<usize>, GrammarError> {
if nt >= self.grammar.non_num() {
@@ -359,4 +535,14 @@ impl Atom for DefaultAtom {
fn empty(&self) -> usize {
self.grammar.total() << 1
}
+
+ fn is_accepting(&self, node_id: usize) -> Result<bool, GrammarError> {
+ self.accepting_vec
+ .get(node_id)
+ .copied()
+ .ok_or(GrammarError::IndexOutOfBounds(
+ node_id,
+ self.accepting_vec.len(),
+ ))
+ }
}
diff --git a/chain/src/atom/mod.rs b/chain/src/atom/mod.rs
index 065640b..398edd2 100644
--- a/chain/src/atom/mod.rs
+++ b/chain/src/atom/mod.rs
@@ -6,17 +6,22 @@
//! Because this way I can easily substitute other implementations if
//! I have better ideas in the future.
-use grammar::{Error as GrammarError, TNT};
+use grammar::{Error as GrammarError, Grammar, TNT};
use nfa::{DOption, LabelType, Nfa};
+use std::ops::Deref;
+
/// The expected behaviours of an atomic language.
-pub trait Atom: Nfa<LabelType<TNT>> {
+pub trait Atom: Nfa<LabelType<TNT>> + Deref<Target = Grammar> {
/// Return the index of a node representing the derivative of the
/// left-linear null closure of `nt` with respect to `t`.
fn atom(&self, nt: usize, t: usize) -> Result<Option<usize>, GrammarError>;
/// Return the index of the empty state.
fn empty(&self) -> usize;
+
+ /// Tell whether a node is accepting.
+ fn is_accepting(&self, node_id: usize) -> Result<bool, GrammarError>;
}
pub mod default;
diff --git a/chain/src/default.rs b/chain/src/default.rs
index e04be9f..697b997 100644
--- a/chain/src/default.rs
+++ b/chain/src/default.rs
@@ -6,18 +6,47 @@
//! modular design makes that easy.
use super::*;
+use crate::atom::{Atom, DefaultAtom};
use core::fmt::Display;
+use forest::{default::DefaultForest, Forest};
+use grammar::{Error as GrammarError, GrammarLabel, GrammarLabelType, TNT};
+#[allow(unused_imports)]
+use graph::{
+ labelled::DLGBuilder, Builder, DLGraph, Graph, LabelExtGraph, LabelGraph, ParentsGraph,
+};
+
+use std::collections::{HashMap as Map, TryReserveError};
/// The errors related to taking derivatives by chain rule.
+#[non_exhaustive]
#[derive(Debug)]
pub enum Error {
+ /// General error for indices out of bounds.
+ IndexOutOfBounds(usize, usize),
+ /// The forest encounters a duplicate node, for some reason.
+ DuplicateNode(usize),
+ /// The chain rule machine encounters a duplicate edge, for some
+ /// reason.
+ DuplicateEdge(usize, usize),
+ /// A node has no labels while it is required to have one.
+ NodeNoLabel(usize),
+ /// Reserving memory fails.
+ CannotReserve(TryReserveError),
/// An invalid situation happens.
Invalid,
}
impl Display for Error {
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self {
+ Self::IndexOutOfBounds(index, bound) => write!(f, "index {index} out of bound {bound}"),
+ Self::DuplicateNode(n) => write!(f, "the forest has a node {n} with a duplicate label"),
+ Self::DuplicateEdge(source, target) => write!(
+ f,
+ "the forest has a duplicate edge from {source} to {target}"
+ ),
+ Self::NodeNoLabel(n) => write!(f, "node {n} has no labels while it should have one"),
+ Self::CannotReserve(e) => write!(f, "cannot reserve memory: {e}"),
Self::Invalid => write!(f, "invalid"),
}
}
@@ -25,22 +54,761 @@ impl Display for Error {
impl std::error::Error for Error {}
+impl From<GError> for Error {
+ fn from(value: GError) -> Self {
+ match value {
+ GError::IndexOutOfBounds(index, bound) => Self::IndexOutOfBounds(index, bound),
+ GError::DuplicatedNode(n) => Self::DuplicateNode(n),
+ GError::DuplicatedEdge(source, target) => Self::DuplicateEdge(source, target),
+ _ => Self::Invalid,
+ }
+ }
+}
+
+impl From<ForestError> for Error {
+ fn from(e: ForestError) -> Self {
+ match e {
+ ForestError::IndexOutOfBounds(index, bound) => Error::IndexOutOfBounds(index, bound),
+ ForestError::DuplicatedNode(n) => Error::DuplicateNode(n),
+ ForestError::InvalidGraphError(ge) => ge.into(),
+ ForestError::NodeNoLabel(n) => Error::NodeNoLabel(n),
+ }
+ }
+}
+
+impl From<TryReserveError> for Error {
+ fn from(value: TryReserveError) -> Self {
+ Self::CannotReserve(value)
+ }
+}
+
+/// The type of an index into an element in [`DerIter`].
+#[derive(Debug, Copy, Clone)]
+enum DerIterIndex {
+ Single(usize),
+ Map(usize),
+}
+
+impl Default for DerIterIndex {
+ fn default() -> Self {
+ Self::Map(0)
+ }
+}
+
+/// A complex type used for storing values of edges with two layers.
+type SecondTypeValue = (Parent, bool, Vec<(Edge, usize)>);
+
+/// An iterator of TwoLayers.
+#[derive(Debug, Default)]
+pub struct DerIter {
+ /// Stores edges of only one layer.
+ singles: Vec<(Edge, usize)>,
+ /// Stores edges with two layers. They are grouped by their
+ /// labels of the second layer.
+ ///
+ /// The values are tuples (forest_source, accepting, edges), where
+ /// the edges are the grouped edges of the first layer and the
+ /// destination.
+ seconds: Map<usize, SecondTypeValue>,
+ /// We want to iterate the elements of the map, for which purpose
+ /// we need an array. Since hashmaps provide no arrays, we keep
+ /// an array of keys for iteration purposes.
+ second_array: Vec<usize>,
+ /// The index of the current element, either in `second_array` or
+ /// in `singles` .
+ index: DerIterIndex,
+}
+
+impl DerIter {
+ fn add_second_layer(
+ &mut self,
+ label: usize,
+ forest_source: Parent,
+ accepting: bool,
+ edges: Vec<(Edge, usize)>,
+ ) {
+ if let Some((_, _, vec)) = self.seconds.get_mut(&label) {
+ vec.extend(edges);
+ } else {
+ self.seconds
+ .insert(label, (forest_source, accepting, edges));
+
+ self.second_array.push(label);
+ }
+ }
+}
+
+impl Iterator for DerIter {
+ type Item = TwoLayers;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ // We iterate through two layered edges first.
+ match self.index {
+ DerIterIndex::Map(index) => {
+ if let Some(key) = self.second_array.get(index) {
+ if let Some((forest_source, accepting, edges)) = self.seconds.remove(key) {
+ self.index = DerIterIndex::Map(index + 1);
+
+ Some(TwoLayers::Two(*key, forest_source, accepting, edges))
+ } else {
+ // this should not happen
+ println!("a key does not exist in the hashmap: something is wrong when taking derivatives");
+ None
+ }
+ } else {
+ self.index = DerIterIndex::Single(0);
+
+ if let Some((edge, to)) = self.singles.first() {
+ self.index = DerIterIndex::Single(1);
+
+ Some(TwoLayers::One(*edge, *to))
+ } else {
+ None
+ }
+ }
+ }
+ DerIterIndex::Single(index) => {
+ if let Some((edge, to)) = self.singles.get(index) {
+ self.index = DerIterIndex::Single(index + 1);
+
+ Some(TwoLayers::One(*edge, *to))
+ } else {
+ None
+ }
+ }
+ }
+ }
+}
+
/// A default implementation for the [`Chain`] trait.
#[derive(Debug, Clone, Default)]
-pub struct DefaultChain {}
+pub struct DefaultChain {
+ graph: DLGraph<Edge>,
+ atom: DefaultAtom,
+ current: usize,
+ history: Vec<usize>,
+ forest: DefaultForest<GrammarLabel>,
+ accepting_vec: Vec<bool>,
+}
+
+impl DefaultChain {
+ /// Return the current node.
+ #[inline]
+ pub fn current(&self) -> usize {
+ self.current
+ }
+
+ /// Return the complete slice of histories.
+ #[inline]
+ pub fn history(&self) -> &[usize] {
+ self.history.as_ref()
+ }
+
+ /// Return a reference to the associated forest.
+ #[inline]
+ pub fn forest(&self) -> &DefaultForest<GrammarLabel> {
+ &self.forest
+ }
+
+ /// Print the rule positions of the labels.
+ pub fn print_rule_positions(&self) -> Result<(), Box<dyn std::error::Error>> {
+ let mut labels = std::collections::HashSet::<usize>::default();
+
+ for node in 0..self.graph.nodes_len() {
+ labels.extend(self.graph.labels_of(node)?.map(|(label, _)| label.label));
+ }
+
+ for label in labels.into_iter() {
+ println!("{}", self.atom.rule_pos_string(label)?);
+ }
+
+ Ok(())
+ }
+}
+
+impl Graph for DefaultChain {
+ type Iter<'a> = <DLGraph<Edge> as Graph>::Iter<'a>
+ where
+ Self: 'a;
+
+ #[inline]
+ fn is_empty(&self) -> bool {
+ self.graph.is_empty()
+ }
+
+ #[inline]
+ fn nodes_len(&self) -> usize {
+ self.graph.nodes_len()
+ }
+
+ #[inline]
+ fn edges_len(&self) -> Option<usize> {
+ self.graph.edges_len()
+ }
+
+ #[inline]
+ fn children_of(&self, node_id: usize) -> Result<Self::Iter<'_>, GError> {
+ self.graph.children_of(node_id)
+ }
+
+ #[inline]
+ fn degree(&self, node_id: usize) -> Result<usize, GError> {
+ self.graph.degree(node_id)
+ }
+
+ #[inline]
+ fn is_empty_node(&self, node_id: usize) -> Result<bool, GError> {
+ self.graph.is_empty_node(node_id)
+ }
+
+ #[inline]
+ fn has_edge(&self, source: usize, target: usize) -> Result<bool, GError> {
+ self.graph.has_edge(source, target)
+ }
+
+ fn replace_by_builder(&mut self, _builder: impl graph::Builder<Result = Self>) {
+ unimplemented!("I shall refactor this")
+ }
+}
+
+impl LabelGraph<Edge> for DefaultChain {
+ type Iter<'a> = <DLGraph<Edge> as LabelGraph<Edge>>::Iter<'a>
+ where
+ Self: 'a;
+
+ type LabelIter<'a> = <DLGraph<Edge> as LabelGraph<Edge>>::LabelIter<'a>
+ where
+ Self: 'a,
+ Edge: 'a;
+
+ type EdgeLabelIter<'a> = <DLGraph<Edge> as LabelGraph<Edge>>::EdgeLabelIter<'a>
+ where
+ Self: 'a,
+ Edge: 'a;
+
+ #[inline]
+ fn edge_label(&self, source: usize, target: usize) -> Result<Self::EdgeLabelIter<'_>, GError> {
+ self.graph.edge_label(source, target)
+ }
+
+ #[inline]
+ fn find_children_with_label(
+ &self,
+ node_id: usize,
+ label: &Edge,
+ ) -> Result<<Self as LabelGraph<Edge>>::Iter<'_>, GError> {
+ self.graph.find_children_with_label(node_id, label)
+ }
+
+ #[inline]
+ fn labels_of(&self, node_id: usize) -> Result<Self::LabelIter<'_>, GError> {
+ self.graph.labels_of(node_id)
+ }
+
+ #[inline]
+ fn has_edge_label(&self, node_id: usize, label: &Edge, target: usize) -> Result<bool, GError> {
+ self.graph.has_edge_label(node_id, label, target)
+ }
+}
+
+impl LabelExtGraph<Edge> for DefaultChain {
+ #[inline]
+ fn extend(&mut self, edges: impl IntoIterator<Item = (Edge, usize)>) -> Result<usize, GError> {
+ let new = self.graph.extend(edges)?;
+ let accepting_len = self.accepting_vec.len();
+
+ if self.accepting_vec.get(new).is_none() {
+ // assert it can only grow by one node at a time.
+ #[cfg(debug_assertions)]
+ assert_eq!(new, accepting_len);
+
+ let mut updated = false;
+
+ for (label, child_iter) in self.graph.labels_of(new)? {
+ let old_accepting = {
+ let mut result = false;
+ for child in child_iter {
+ if *self
+ .accepting_vec
+ .get(child)
+ .ok_or(GError::IndexOutOfBounds(child, accepting_len))?
+ {
+ result = true;
+ break;
+ }
+ }
+
+ result
+ };
+
+ if !old_accepting {
+ self.accepting_vec.push(false);
+ updated = true;
+
+ break;
+ }
+
+ if label.is_accepting() {
+ self.accepting_vec.push(true);
+ updated = true;
+
+ break;
+ }
+ }
+
+ if !updated {
+ self.accepting_vec.push(false);
+ }
+ }
+
+ Ok(new)
+ }
+}
impl Chain for DefaultChain {
type Error = Error;
- fn unit() -> Self {
- todo!()
+ type Atom = DefaultAtom;
+
+ fn unit(atom: Self::Atom) -> Result<Self, Self::Error> {
+ let mut builder: DLGBuilder<Edge> = Default::default();
+
+ let root = builder.add_vertex();
+ let first = builder.add_vertex();
+
+ let empty_state = atom.empty();
+
+ let initial_nullable = atom
+ .is_nullable(0)
+ .map_err(|_| Error::IndexOutOfBounds(0, atom.non_num()))?;
+
+ builder.add_edge(
+ first,
+ root,
+ Edge::new(empty_state, Parent::new(0, 0), initial_nullable),
+ )?;
+
+ let graph = builder.build();
+
+ let forest =
+ DefaultForest::new_leaf(GrammarLabel::new(GrammarLabelType::TNT(TNT::Non(0)), 0));
+
+ #[cfg(debug_assertions)]
+ assert_eq!(forest.root(), Some(0));
+
+ let current = 1;
+
+ let history = Vec::new();
+
+ let accepting_vec = vec![true, initial_nullable];
+
+ Ok(Self {
+ graph,
+ atom,
+ current,
+ history,
+ forest,
+ accepting_vec,
+ })
+ }
+
+ fn epsilon(&self) -> Result<bool, Self::Error> {
+ self.accepting_vec
+ .get(self.current)
+ .copied()
+ .ok_or(Error::IndexOutOfBounds(
+ self.current,
+ self.accepting_vec.len(),
+ ))
+ }
+
+ fn update_history(&mut self, new: usize) {
+ debug_assert!(new < self.graph.nodes_len());
+
+ self.history.push(self.current);
+
+ self.current = new;
+ }
+
+ type DerIter = DerIter;
+
+ fn derive(&mut self, t: usize, pos: usize) -> Result<Self::DerIter, Self::Error> {
+ use TNT::*;
+
+ /// Convert an error telling us that an index is out of bounds.
+ ///
+ /// # Panics
+ ///
+ /// The function panics if the error is not of the expected
+ /// kind.
+ fn index_out_of_bounds_conversion(ge: GrammarError) -> Error {
+ match ge {
+ GrammarError::IndexOutOfBounds(index, bound) => {
+ Error::IndexOutOfBounds(index, bound)
+ }
+ _ => panic!("wrong error kind"),
+ }
+ }
+
+ /// A helper function to generate edges to join.
+ ///
+ /// It first checks if the base edge is accepting. If yes,
+ /// then pull in the children of the target.
+ ///
+ /// Then check if the label of the base edge has children. If
+ /// no, then do not add this base edge itself.
+ ///
+ /// The generated edges will be pushed to `output` directly,
+ /// to save some allocations.
+ // TODO: Handle forests as well.
+ fn generate_edges(
+ chain: &DefaultChain,
+ child_iter: impl Iterator<Item = usize> + ExactSizeIterator + Clone,
+ atom_child_iter: impl Iterator<Item = usize> + Clone,
+ mut output: impl AsMut<Vec<(Edge, usize)>>,
+ ) -> Result<(), Error> {
+ // First check the values from iterators are all valid.
+ let graph_len = chain.graph.nodes_len();
+ let atom_len = chain.atom.nodes_len();
+
+ for child in child_iter.clone() {
+ if !chain.graph.has_node(child) {
+ return Err(Error::IndexOutOfBounds(child, graph_len));
+ }
+ }
+
+ for atom_child in atom_child_iter.clone() {
+ if !chain.atom.has_node(atom_child) {
+ return Err(Error::IndexOutOfBounds(atom_child, atom_len));
+ }
+ }
+
+ // From now on the nodes are all valid, so we can just
+ // call `unwrap`.
+
+ // Then calculate the number of edges to append, to avoid
+ // repeated allocations
+ let mut num = 0usize;
+
+ let child_iter_total_degree = child_iter
+ .clone()
+ .map(|child| chain.graph.degree(child).unwrap())
+ .sum::<usize>();
+
+ for atom_child in atom_child_iter.clone() {
+ let atom_child_accepting = chain.atom.is_accepting(atom_child).unwrap();
+ let atom_child_empty_node = chain.atom.is_empty_node(atom_child).unwrap();
+
+ if !atom_child_empty_node {
+ num += child_iter.len();
+ }
+
+ if atom_child_accepting {
+ num += child_iter_total_degree;
+ }
+ }
+
+ let num = num;
+
+ let output = output.as_mut();
+
+ output.try_reserve(num)?;
+
+ // now push into output
+
+ let parent = Parent::new(0, 0);
+
+ for atom_child in atom_child_iter.clone() {
+ let atom_child_accepting = chain.atom.is_accepting(atom_child).unwrap();
+ let atom_child_empty_node = chain.atom.is_empty_node(atom_child).unwrap();
+
+ let edge = Edge::new(atom_child, parent, atom_child_accepting);
+
+ if !atom_child_empty_node {
+ output.extend(child_iter.clone().map(|child| (edge, child)));
+ }
+
+ if atom_child_accepting {
+ for child in child_iter.clone() {
+ for (child_label, child_child) in chain.graph.labels_of(child).unwrap() {
+ output.extend(child_child.map(|target| (*child_label, target)));
+ }
+ }
+ }
+ }
+
+ Ok(())
+ }
+
+ let mut der_iter = DerIter::default();
+
+ for (label, child_iter) in self.graph.labels_of(self.current)? {
+ for (atom_label, atom_child_iter) in self.atom.labels_of(label.label())? {
+ if atom_label.is_left_p() {
+ // We do not consider left-linearly expanded
+ // children in the first layer.
+ continue;
+ }
+
+ match *atom_label.get_value() {
+ Some(Ter(ter)) if ter == t => {
+ generate_edges(
+ self,
+ child_iter.clone(),
+ atom_child_iter.clone(),
+ &mut der_iter.singles,
+ )?;
+ }
+ Some(Non(non)) => {
+ let virtual_node = self
+ .atom
+ .atom(non, t)
+ .map_err(index_out_of_bounds_conversion)?;
+
+ if let Some(virtual_node) = virtual_node {
+ let accepting = self
+ .atom
+ .is_accepting(virtual_node)
+ .map_err(index_out_of_bounds_conversion)?;
+
+ let mut new_edges = Vec::new();
+
+ generate_edges(
+ self,
+ child_iter.clone(),
+ atom_child_iter.clone(),
+ &mut new_edges,
+ )?;
+
+ if accepting {
+ der_iter.singles.extend(new_edges.clone());
+ }
+
+ let parent = Parent::new(0, 0);
+
+ if !self.atom.is_empty_node(virtual_node).unwrap() {
+ der_iter.add_second_layer(
+ virtual_node,
+ parent,
+ accepting,
+ new_edges,
+ );
+
+ // account for atom_children without
+ // children.
+
+ for atom_child in atom_child_iter {
+ // this has been checked in
+ // `generate_edges`
+ if self.atom.is_empty_node(atom_child).unwrap() {
+ der_iter.singles.extend(child_iter.clone().map(|child| {
+ (Edge::new(virtual_node, parent, accepting), child)
+ }));
+ }
+ }
+ } else {
+ for atom_child in atom_child_iter {
+ // this has been checked in
+ // `generate_edges`
+ if self.atom.is_empty_node(atom_child).unwrap() {
+ // flat flat map, hmm...
+ der_iter.singles.extend(child_iter.clone().flat_map(
+ |child| {
+ self.graph.labels_of(child).unwrap().flat_map(
+ |(child_label, child_child_iter)| {
+ child_child_iter.map(|child_child| {
+ (*child_label, child_child)
+ })
+ },
+ )
+ },
+ ));
+ }
+ }
+ }
+ }
+ }
+ _ => {
+ continue;
+ }
+ }
+ }
+ }
+
+ Ok(der_iter)
+ }
+}
+
+#[cfg(test)]
+mod test_der_iter {
+ use super::*;
+
+ #[test]
+ fn test() -> Result<(), Box<dyn std::error::Error>> {
+ let mut der_iter = DerIter::default();
+
+ let parent = Parent::new(0, 0);
+
+ der_iter.singles.push((Edge::new(0, parent, true), 0));
+
+ der_iter.singles.push((Edge::new(1, parent, true), 0));
+
+ der_iter.singles.push((Edge::new(2, parent, true), 0));
+
+ der_iter.add_second_layer(3, parent, true, vec![(Edge::new(4, parent, true), 1)]);
+
+ der_iter.add_second_layer(6, parent, true, vec![(Edge::new(5, parent, true), 1)]);
+
+ // add an entry with a repeated label
+ der_iter.add_second_layer(3, parent, true, vec![(Edge::new(7, parent, true), 2)]);
+
+ assert_eq!(
+ der_iter.next(),
+ Some(TwoLayers::Two(
+ 3,
+ parent,
+ true,
+ vec![
+ (Edge::new(4, parent, true), 1),
+ (Edge::new(7, parent, true), 2)
+ ]
+ ))
+ );
+
+ assert_eq!(
+ der_iter.next(),
+ Some(TwoLayers::Two(
+ 6,
+ parent,
+ true,
+ vec![(Edge::new(5, parent, true), 1)]
+ ))
+ );
+
+ assert_eq!(
+ der_iter.next(),
+ Some(TwoLayers::One(Edge::new(0, parent, true), 0))
+ );
+
+ assert_eq!(
+ der_iter.next(),
+ Some(TwoLayers::One(Edge::new(1, parent, true), 0))
+ );
+
+ assert_eq!(
+ der_iter.next(),
+ Some(TwoLayers::One(Edge::new(2, parent, true), 0))
+ );
+
+ assert_eq!(der_iter.next(), None);
+ assert_eq!(der_iter.next(), None);
+
+ Ok(())
}
+}
+
+#[cfg(test)]
+mod test_chain {
+ use super::*;
+ use grammar::test_grammar_helper::*;
+
+ #[test]
+ fn base_test() -> Result<(), Box<dyn std::error::Error>> {
+ let grammar = new_notes_grammar()?;
+
+ let atom = DefaultAtom::from_grammar(grammar)?;
+
+ let mut chain = DefaultChain::unit(atom)?;
+
+ chain.chain(3, 00)?;
+ chain.chain(1, 01)?;
+ chain.chain(2, 02)?;
+ chain.chain(2, 03)?;
+ chain.chain(2, 04)?;
+ chain.chain(0, 05)?;
+ chain.chain(5, 06)?;
+ chain.chain(1, 07)?;
+ chain.chain(6, 08)?;
+ chain.chain(6, 09)?;
+ chain.chain(6, 10)?;
+ chain.chain(0, 11)?;
+ chain.chain(0, 12)?;
+
+ assert!(matches!(chain.epsilon(), Ok(true)));
+
+ #[cfg(feature = "test-print-viz")]
+ {
+ chain.graph.print_viz("chain.gv")?;
+ chain.atom.print_nfa("nfa.gv")?;
+ }
- fn chain(&mut self, _t: usize) {
- todo!()
+ Ok(())
}
- fn epsilon(&self) -> bool {
- todo!()
+ #[test]
+ fn test_speed() -> Result<(), Box<dyn std::error::Error>> {
+ let grammar = new_notes_grammar_no_regexp()?;
+
+ println!("grammar: {grammar}");
+
+ let atom = DefaultAtom::from_grammar(grammar)?;
+
+ let mut chain = DefaultChain::unit(atom)?;
+
+ let input_template = vec![3, 1, 2, 2, 2, 0, 5, 1, 6, 6, 6, 0, 0];
+
+ let repeat_times = {
+ let mut result = 1;
+
+ for arg in std::env::args() {
+ let parse_as_digit: Result<usize, _> = arg.parse();
+
+ if let Ok(parse_result) = parse_as_digit {
+ result = parse_result;
+
+ break;
+ }
+ }
+
+ result
+ };
+
+ println!("repeating {repeat_times} times");
+
+ let input = {
+ let mut result = Vec::with_capacity(input_template.len() * repeat_times);
+
+ for _ in 0..repeat_times {
+ result.extend(input_template.iter().copied());
+ }
+
+ result
+ };
+
+ let start = std::time::Instant::now();
+
+ for (index, t) in input.iter().copied().enumerate() {
+ chain.chain(t, index)?;
+ }
+
+ let elapsed = start.elapsed();
+
+ // assert!(matches!(chain.epsilon(), Ok(true)));
+
+ dbg!(elapsed);
+ dbg!(chain.current());
+
+ println!("index: terminal, history");
+ for (index, t) in input.iter().copied().enumerate().take(input.len() - 1) {
+ println!("{index}: {t}, {}", chain.history().get(index).unwrap());
+ }
+
+ #[cfg(feature = "test-print-viz")]
+ {
+ chain.graph.print_viz("chain.gv")?;
+ chain.atom.print_nfa("nfa.gv")?;
+ }
+
+ Ok(())
}
}
diff --git a/chain/src/lib.rs b/chain/src/lib.rs
index 4e21e1d..91c37f7 100644
--- a/chain/src/lib.rs
+++ b/chain/src/lib.rs
@@ -10,41 +10,189 @@
pub mod atom;
-// TODO: Define errors.
+use graph::{error::Error as GError, LabelExtGraph, Parent};
+
+use forest::Error as ForestError;
+
+/// An edge in the Chain-Rule machine.
+#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
+pub struct Edge {
+ /// The position in the atomic languages.
+ label: usize,
+ /// The source of the associated forest edge.
+ forest_source: Parent,
+ /// Whether or not this edge is "accepting".
+ accepting: bool,
+}
+
+impl Edge {
+ /// Construct a new edge.
+ pub fn new(label: usize, forest_source: Parent, accepting: bool) -> Self {
+ Self {
+ label,
+ forest_source,
+ accepting,
+ }
+ }
+
+ /// Return the label of the edge.
+ pub fn label(&self) -> usize {
+ self.label
+ }
+
+ /// Tell whether or not the edge is accepting.
+ pub fn is_accepting(&self) -> bool {
+ self.accepting
+ }
+
+ /// Return the associated forest edge of the edge.
+ pub fn forest_source(&self) -> Parent {
+ self.forest_source
+ }
+}
+
+impl core::fmt::Display for Edge {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ let label = self.label();
+ let forest_source = self.forest_source();
+
+ write!(
+ f,
+ "edge label {label} with forest source {} and edge index {}",
+ forest_source.node(),
+ forest_source.edge()
+ )
+ }
+}
+
+/// Each derivation is a concatenation of two items, so there are two
+/// layers. But some items have no children and are accepting, in
+/// which case we just skip that item completely, for performance
+/// reasons, and hence there could be only one layer as well.
+///
+/// It might even happen that both layers have no children, in which
+/// case we shall just put all previous edges here.
+#[derive(Debug, Clone, Eq, PartialEq)]
+pub enum TwoLayers {
+ /// One layer has no children.
+ One(Edge, usize),
+ // REVIEW: Maybe we do not need to store an edge in the forest: we
+ // only need the source of the edge?
+ /// Both layers have children.
+ ///
+ /// The first element is the label of the second layer, the second
+ /// the source of the associated forest edge of the second layer,
+ /// and the third is a list of edges, which are the common first
+ /// layers.
+ Two(usize, Parent, bool, Vec<(Edge, usize)>),
+}
+
+/// The type of a collapsed iterator.
+pub struct Collapsed<'a, I, C>
+where
+ I: Iterator<Item = TwoLayers>,
+ C: Chain<DerIter = I>,
+{
+ iter: I,
+ chain: &'a mut C,
+ stop: bool,
+}
+
+impl<'a, I, C> Collapsed<'a, I, C>
+where
+ I: Iterator<Item = TwoLayers>,
+ C: Chain<DerIter = I>,
+{
+ /// Collapse an iterator.
+ pub fn collapse(iter: I, chain: &'a mut C) -> Self {
+ let stop = false;
+
+ Self { iter, chain, stop }
+ }
+}
+
+impl<'a, I, C> Iterator for Collapsed<'a, I, C>
+where
+ I: Iterator<Item = TwoLayers>,
+ C: Chain<DerIter = I>,
+{
+ type Item = Result<(Edge, usize), <C as Chain>::Error>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ if self.stop {
+ return None;
+ }
+
+ if let Some(two_layer) = self.iter.next() {
+ match two_layer {
+ TwoLayers::One(edge, to) => Some(Ok((edge, to))),
+ TwoLayers::Two(label, forest_source, accepting, edges) => {
+ let new_index = match self.chain.extend(edges.into_iter()) {
+ Ok(new) => new,
+ Err(error) => {
+ // Prevent further iterations.
+
+ return Some(Err(error.into()));
+ }
+ };
+
+ Some(Ok((Edge::new(label, forest_source, accepting), new_index)))
+ }
+ }
+ } else {
+ None
+ }
+ }
+}
/// The expected behaviours of a language which can take derivatives
/// by chain rule.
-pub trait Chain: Default {
+pub trait Chain: LabelExtGraph<Edge> {
/// The implementations should choose a type to represent errors.
- type Error: std::error::Error;
+ type Error: std::error::Error + From<GError> + From<ForestError>;
+
+ /// A type of atomic languages that is chosen for this chain rule
+ /// machine.
+ type Atom: atom::Atom;
/// Represents the language that is present after we parse the
/// empty string, that is the initial configuration of the
/// language. This may or may not be different from what
/// `Default::default` gives.
- fn unit() -> Self;
-
- /// Take the derivative by a terminal symbol.
- ///
- /// This takes care of the union and the prepending operations.
- ///
- /// # A little remark about the design
- ///
- /// I have thought to separate different operations (like the
- /// union, the prepending, and single derivatives) and then define
- /// a function to put everything together. But I think that
- /// design is not convenient to use. Also, I really do not need
- /// those operations other than to provide this derivative
- /// operation, so why define them? And putting all things
- /// together may reduce the number of bugs caused by wrong uses of
- /// those component functions, and can reduce the amount of
- /// documentation strings a library user needs to read, in order
- /// to make use of this trait. So I ended up with this design.
- fn chain(&mut self, t: usize);
+ fn unit(atom: Self::Atom) -> Result<Self, Self::Error>;
/// Return true if and only if the language contains the empty
/// string.
- fn epsilon(&self) -> bool;
+ fn epsilon(&self) -> Result<bool, Self::Error>;
+
+ /// Update the history
+ fn update_history(&mut self, new: usize);
+
+ /// An iterator that iterates all layers that need to be merged.
+ type DerIter: Iterator<Item = TwoLayers>;
+
+ /// Take the derivative by a terminal symbol at position `POS`.
+ fn derive(&mut self, t: usize, pos: usize) -> Result<Self::DerIter, Self::Error>;
+
+ /// Take the union of all derivatives.
+ fn union(&mut self, der_iter: Self::DerIter) -> Result<Vec<(Edge, usize)>, Self::Error> {
+ // REVIEW: Find a way to avoid allocations.
+ Collapsed::<_, Self>::collapse(der_iter, self).collect()
+ }
+
+ /// Use chain rule to compute the derivative with respect to a
+ /// terminal.
+ fn chain(&mut self, t: usize, pos: usize) -> Result<(), Self::Error> {
+ let der_iter = self.derive(t, pos)?;
+
+ let edges = self.union(der_iter)?;
+
+ let new_index = self.extend(edges.into_iter())?;
+
+ self.update_history(new_index);
+
+ Ok(())
+ }
}
pub mod default;
diff --git a/chain/src/plan.org b/chain/src/plan.org
index 84192f0..c2a9037 100644
--- a/chain/src/plan.org
+++ b/chain/src/plan.org
@@ -88,13 +88,15 @@
+ [X] Define a trait with the expected behaviours.
+ [X] Implement them as parents-knowing graphs.
+ [X] Test
-- [-] Implement languages. [1/4]
+- [-] Implement languages. [4/6]
+ [X] First define a trait with the expected behaviours.
- + [ ] Then implement them as doubly labelled graphs.
- + [ ] Then implement finding derivatives by use of the chain rule.
- + [ ] Each edge in the chain-rule machine needs to be labelled also
+ + [X] Then implement them as doubly labelled graphs.
+ + [X] Each edge in the chain-rule machine needs to be labelled also
with a position in the forest. This perfectly solves the problem
of those "plugs"!
+ + [X] Then implement finding derivatives by use of the chain rule.
+ + [ ] Handle Forests
+ + [-] Tests
- [ ] Implement semiring. [0/5]
+ [ ] Define the trait.
+ [ ] Implement the boolean semiring.
diff --git a/forest/src/default.rs b/forest/src/default.rs
index d79c1c7..9295f1a 100644
--- a/forest/src/default.rs
+++ b/forest/src/default.rs
@@ -69,7 +69,7 @@ impl<T: GraphLabel> Default for DefaultForest<T> {
impl<T: GraphLabel> AsRef<DefaultForest<T>> for DefaultForest<T> {
fn as_ref(&self) -> &DefaultForest<T> {
- &self
+ self
}
}
@@ -123,9 +123,24 @@ impl<T: GraphLabel> ParentsGraph for DefaultForest<T> {
where
Self:'a;
+ #[inline]
fn parents_of(&self, node_id: usize) -> Result<<Self as ParentsGraph>::Iter<'_>, GError> {
self.graph.parents_of(node_id)
}
+
+ #[inline]
+ fn nth_child(&self, node_id: usize, n: usize) -> Result<usize, GError> {
+ self.graph.nth_child(node_id, n)
+ }
+
+ #[inline]
+ fn edge_to_parent(
+ &self,
+ source: usize,
+ target: usize,
+ ) -> Result<Option<graph::Parent>, GError> {
+ self.graph.edge_to_parent(source, target)
+ }
}
impl<T: GraphLabel> LabelGraph<T> for DefaultForest<T> {
@@ -252,7 +267,7 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<T> {
Ok(frag_stack.is_empty() && self_stack.is_empty())
}
- fn plant<F>(&mut self, node_id: usize, fragment: F) -> Result<(), Self::Error>
+ fn plant<F>(&mut self, node_id: usize, fragment: F, planted: bool) -> Result<(), Self::Error>
where
F: AsRef<Self>,
{
@@ -284,16 +299,6 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<T> {
let nodes_len = fragment.nodes_len();
- // First adjoin those nodes and join the edges later.
-
- for node in 0..nodes_len {
- let label = fragment
- .vertex_label(node)?
- .ok_or(Error::NodeNoLabel(node))?;
-
- builder.add_vertex(label);
- }
-
// If the fragment root has a duplicate label, the forest will
// not grow, so we use the label to find the adjoined node
// index.
@@ -313,6 +318,25 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<T> {
}
);
+ // If the fragment has been planted before, we just add an
+ // edge.
+
+ if planted {
+ builder.add_edge(node_id, conversion!(root), root_label)?;
+
+ return Ok(());
+ }
+
+ // First adjoin those nodes and join the edges later.
+
+ for node in 0..nodes_len {
+ let label = fragment
+ .vertex_label(node)?
+ .ok_or(Error::NodeNoLabel(node))?;
+
+ builder.add_vertex(label);
+ }
+
// Don't forget to join the new sub-forest to the original
// forest, at the specified position.
@@ -403,7 +427,7 @@ mod forest_test {
// add some child
- forest.plant(0, leaf!(1))?;
+ forest.plant(0, leaf!(1), false)?;
assert_eq!(forest.nodes_len(), 2);
let mut children = forest.children_of(0)?;
@@ -412,10 +436,10 @@ mod forest_test {
// add more children
- forest.plant(0, leaf!(2))?;
- forest.plant(0, leaf!(3))?;
- forest.plant(0, leaf!(4))?;
- forest.plant(2, leaf!(5))?;
+ forest.plant(0, leaf!(2), false)?;
+ forest.plant(0, leaf!(3), false)?;
+ forest.plant(0, leaf!(4), false)?;
+ forest.plant(2, leaf!(5), false)?;
assert_eq!(forest.nodes_len(), 6);
let mut children = forest.children_of(0)?;
@@ -428,32 +452,32 @@ mod forest_test {
assert_eq!(children.next(), None);
let mut test_forest = leaf!(0);
- test_forest.plant(0, leaf!(1))?;
- test_forest.plant(0, leaf!(2))?;
- test_forest.plant(0, leaf!(3))?;
- test_forest.plant(2, leaf!(5))?;
+ test_forest.plant(0, leaf!(1), false)?;
+ test_forest.plant(0, leaf!(2), false)?;
+ test_forest.plant(0, leaf!(3), false)?;
+ test_forest.plant(2, leaf!(5), false)?;
assert!(forest.is_prefix(0, &test_forest)?);
let mut test_forest = leaf!(0);
- test_forest.plant(0, leaf!(1))?;
- test_forest.plant(0, leaf!(2))?;
+ test_forest.plant(0, leaf!(1), false)?;
+ test_forest.plant(0, leaf!(2), false)?;
// this child of the root should have label 3 in order to be a
// prefix.
- test_forest.plant(0, leaf!(4))?;
- test_forest.plant(2, leaf!(5))?;
+ test_forest.plant(0, leaf!(4), false)?;
+ test_forest.plant(2, leaf!(5), false)?;
assert!(!forest.is_prefix(0, &test_forest)?);
let mut test_forest = leaf!(2);
- test_forest.plant(0, leaf!(5))?;
+ test_forest.plant(0, leaf!(5), false)?;
assert!(forest.is_prefix(2, &test_forest)?);
// now test cloning
// add a duplicate label
- forest.plant(3, leaf!(5))?;
+ forest.plant(3, leaf!(5), false)?;
let len = forest.nodes_len();
diff --git a/forest/src/design.org b/forest/src/design.org
index 09db113..c32b1c9 100644
--- a/forest/src/design.org
+++ b/forest/src/design.org
@@ -112,7 +112,8 @@ The chain-rule operation can be described as follows:
2. Prepare a new forest fragment as follows.
1) For every child \(g\) of \(e\) in the atomic language, if \(g\)
is the terminal that appears as the current input \(t\), let the
- new fragment be defined as the node moved by \(g\), alone.
+ new fragment be defined as the node moved by \(g\), followed by
+ a terminal node, as a single edge.
2) If \(g\) is a non-terminal and its first-set contains \(t\),
then for every left-linearly closed child of \(g\) that is
labelled \(t\), denoted as \(h\), then let the fragment be
diff --git a/forest/src/lib.rs b/forest/src/lib.rs
index f843bc1..8c9b918 100644
--- a/forest/src/lib.rs
+++ b/forest/src/lib.rs
@@ -37,7 +37,7 @@ pub trait Forest<T: GraphLabel>: ParentsGraph + LabelGraph<T> {
F: AsRef<Self>;
/// Extend the forest by adjoining another forest at a given node.
- fn plant<F>(&mut self, node_id: usize, fragment: F) -> Result<(), Self::Error>
+ fn plant<F>(&mut self, node_id: usize, fragment: F, planted: bool) -> Result<(), Self::Error>
where
F: AsRef<Self>;
@@ -54,3 +54,4 @@ pub trait Forest<T: GraphLabel>: ParentsGraph + LabelGraph<T> {
}
pub mod default;
+pub use default::Error;
diff --git a/grammar/src/label.rs b/grammar/src/label.rs
new file mode 100644
index 0000000..58eaddc
--- /dev/null
+++ b/grammar/src/label.rs
@@ -0,0 +1,124 @@
+//! This file implements a type of labels that could be used as the
+//! labels of a parse forest.
+
+use super::*;
+
+/// The actual label of a grammar label.
+#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
+pub enum GrammarLabelType {
+ /// A terminal or a non-terminal.
+ TNT(TNT),
+ /// A rule position.
+ Rule(usize),
+}
+
+impl GrammarLabelType {
+ /// Return the name of this label with the help of the associated
+ /// grammar.
+ pub fn name(&self, grammar: &Grammar) -> Result<String, Error> {
+ match self {
+ Self::TNT(tnt) => grammar.name_of_tnt(*tnt),
+ Self::Rule(pos) => grammar.rule_pos_to_string(*pos),
+ }
+ }
+}
+
+/// The label to be used in a forest.
+#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
+pub struct GrammarLabel {
+ /// The actual label.
+ label: GrammarLabelType,
+ /// The start in the input that this label correponds to.
+ start: usize,
+ /// The end in the input that this label correponds to.
+ end: Option<usize>,
+ /// A node in the forest might be a packed node.
+ packed_p: bool,
+}
+
+impl core::fmt::Display for GrammarLabel {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ // Simply displaying this without the help of a grammar is not
+ // of much help, so we just use the debug method to cheat,
+ // haha.
+
+ write!(f, "{:?}", self)
+ }
+}
+
+impl GrammarLabel {
+ /// Construct a new label.
+ pub fn new(label: GrammarLabelType, start: usize) -> Self {
+ let end = None;
+ let packed_p = false;
+
+ Self {
+ label,
+ start,
+ end,
+ packed_p,
+ }
+ }
+
+ /// Return the end in the input.
+ pub fn end(&self) -> Option<usize> {
+ self.end
+ }
+
+ /// Return the start in the input.
+ pub fn start(&self) -> usize {
+ self.start
+ }
+
+ /// Return the actual label.
+ pub fn label(&self) -> GrammarLabelType {
+ self.label
+ }
+
+ /// Update the end.
+ pub fn set_end(&mut self, end: usize) {
+ self.end = Some(end);
+ }
+
+ /// Check whether the node is a packed node.
+ pub fn is_packed(&self) -> bool {
+ self.packed_p
+ }
+
+ /// Update the packed status.
+ pub fn set_packed_p(&mut self, packed_p: bool) {
+ self.packed_p = packed_p;
+ }
+
+ /// Return a string description with the help of the associated
+ /// grammar.
+ pub fn to_string(&self, grammar: &Grammar) -> Result<String, Error> {
+ // REVIEW: It needs at least 34 bytes, so we just double it.
+ // Of course we can also calculate the length exactly, but
+ // this can be postponed till later.
+ let mut s = String::with_capacity(68);
+ s.push_str("a ");
+
+ if self.is_packed() {
+ s.push_str("packed ");
+ } else {
+ s.push_str("normal ");
+ }
+
+ s.push_str("node labelled ");
+
+ s.push_str(&self.label().name(grammar)?);
+
+ s.push_str(" from ");
+
+ s.push_str(&format!("{} ", self.start()));
+
+ if let Some(end) = self.end() {
+ s.push_str(&format!("to {end}"));
+ } else {
+ s.push_str("onwards");
+ }
+
+ Ok(s)
+ }
+}
diff --git a/grammar/src/left_closure.rs b/grammar/src/left_closure.rs
index 1630881..39461f0 100644
--- a/grammar/src/left_closure.rs
+++ b/grammar/src/left_closure.rs
@@ -114,10 +114,10 @@ impl Grammar {
local_result.push(Or);
builder.add_vertex();
- local_result.push(Lit(Ter(self.pack_tnt(Non(n)).unwrap())));
- let non_lit_index = builder.add_vertex();
+ // local_result.push(Lit(Ter(self.pack_tnt(Non(n)).unwrap())));
+ // let non_lit_index = builder.add_vertex();
- builder.add_edge(0, non_lit_index, ()).unwrap();
+ // builder.add_edge(0, non_lit_index, ()).unwrap();
// If this non-terminal is nullable, add an empty variant.
if self.is_nullable(n)? {
diff --git a/grammar/src/lib.rs b/grammar/src/lib.rs
index 297cb66..50a2b9b 100644
--- a/grammar/src/lib.rs
+++ b/grammar/src/lib.rs
@@ -320,11 +320,39 @@ impl Grammar {
self.accumulators.last().copied().unwrap_or(0)
}
+ /// Return an element of the accumulator.
+ #[inline]
+ pub fn nth_accumulator(&self, n: usize) -> Result<usize, Error> {
+ self.accumulators
+ .get(n)
+ .copied()
+ .ok_or_else(|| Error::IndexOutOfBounds(n, self.total()))
+ }
+
+ /// Return the index of the rules a rule position belongs to.
+ #[inline]
+ pub fn get_rule_num(&self, pos: usize) -> Result<usize, Error> {
+ let mut result = None;
+
+ for (index, accumulator) in self.accumulators.iter().copied().skip(1).enumerate() {
+ if accumulator > pos {
+ result = Some(index);
+ break;
+ }
+ }
+
+ if let Some(n) = result {
+ Ok(n)
+ } else {
+ Err(Error::IndexOutOfBounds(pos, self.total()))
+ }
+ }
+
/// Query if a position is the starting position of a
/// non-terminal. If it is, return the non-terminal, else return
/// `None` .
#[inline]
- pub fn is_nt_start_in_nfa_p(&self, pos: usize) -> Option<usize> {
+ pub fn get_nt_start_in_nfa(&self, pos: usize) -> Option<usize> {
for (index, accumulator) in self.accumulators.iter().copied().enumerate() {
let shifted_accumulator = accumulator << 1;
@@ -456,7 +484,7 @@ impl Grammar {
}
}
- Ok(self.expansion_map.get(&(pos1, pos2)).map(|v| &v[..]))
+ Ok(self.expansion_map.get(&(pos1, pos2)).map(|v| v.as_ref()))
}
// REVIEW: Do we have a better way to record expansion information
@@ -483,7 +511,11 @@ impl Grammar {
&mut self,
two_edges: TwoEdges<LabelType<TNT>>,
) -> LabelType<TNT> {
+ #[cfg(debug_assertions)]
let (first_source, first_target, first_label) = two_edges.first_edge();
+ #[cfg(not(debug_assertions))]
+ let (first_source, _, first_label) = two_edges.first_edge();
+
let (second_source, second_target, second_label) = two_edges.second_edge();
#[cfg(debug_assertions)]
@@ -501,11 +533,11 @@ impl Grammar {
// that is to say, the source of the second edge is the
// starting edge of a non-terminal.
- let mut left_p = first_label.get_left_p() || second_label.get_left_p();
+ let mut left_p = first_label.is_left_p() || second_label.is_left_p();
// Record left-linear expansion information.
- if let Some(second_nt) = self.is_nt_start_in_nfa_p(second_source) {
+ if let Some(second_nt) = self.get_nt_start_in_nfa(second_source) {
left_p = true;
if !self
@@ -554,12 +586,55 @@ impl Grammar {
.ok_or(Error::IndexOutOfBounds(non_terminal, self.non.len()))?
.iter())
}
+
+ /// Return a string describing a rule position.
+ pub fn rule_pos_to_string(&self, pos: usize) -> Result<String, Error> {
+ let rule_num = {
+ let mut result = None;
+
+ for (index, accumulator) in self.accumulators.iter().copied().skip(1).enumerate() {
+ if accumulator > pos {
+ result = Some(index);
+ break;
+ }
+ }
+
+ if let Some(n) = result {
+ n
+ } else {
+ return Err(Error::IndexOutOfBounds(pos, self.total()));
+ }
+ };
+
+ assert!(rule_num < self.rules.len());
+
+ let display_tnt = |tnt| self.name_of_tnt(tnt).unwrap_or_else(|e| format!("{e}"));
+
+ Ok(self
+ .rules
+ .get(rule_num)
+ .unwrap()
+ .regex
+ .to_string_with_dot(
+ display_tnt,
+ if rule_num == 0 {
+ pos
+ } else {
+ pos - self.accumulators.get(rule_num - 1).copied().unwrap()
+ },
+ )
+ .unwrap())
+ }
}
pub mod first_set;
pub mod left_closure;
+pub mod label;
+
+pub use label::{GrammarLabel, GrammarLabelType};
+
impl Display for Grammar {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
assert_eq!(self.non.len(), self.rules.len());
diff --git a/grammar/src/test_grammar_helper.rs b/grammar/src/test_grammar_helper.rs
index 89f9844..984eb50 100644
--- a/grammar/src/test_grammar_helper.rs
+++ b/grammar/src/test_grammar_helper.rs
@@ -45,7 +45,7 @@ fn scan_tnt(
'T' => {
let mut name = String::new();
- while let Some(c) = chars.next() {
+ for c in chars.by_ref() {
if ('a'..='z').contains(&c) || ('A'..='Z').contains(&c) {
len += 1;
write!(name, "{c}").map_err(|_| ParseError::InvalidCharacter(c))?;
@@ -63,7 +63,7 @@ fn scan_tnt(
'N' => {
let mut name = String::new();
- while let Some(c) = chars.next() {
+ for c in chars.by_ref() {
if ('a'..='z').contains(&c) || ('A'..='Z').contains(&c) {
len += 1;
write!(name, "{c}").map_err(|_| ParseError::InvalidCharacter(c))?;
@@ -125,6 +125,130 @@ pub fn new_grammar() -> Result<Grammar, Box<dyn std::error::Error>> {
}
/// Return a grammar that might serve as the grammar for my notes,
+/// somehow, without regular expressions.
+#[allow(dead_code)]
+pub fn new_notes_grammar_no_regexp() -> Result<Grammar, Box<dyn std::error::Error>> {
+ let ter = vec![
+ Terminal::new("NL".to_owned()),
+ Terminal::new("SP".to_owned()),
+ Terminal::new("CON".to_owned()),
+ Terminal::new("STAR".to_owned()),
+ Terminal::new("NOTE".to_owned()),
+ Terminal::new("PRICE".to_owned()),
+ Terminal::new("DIGIT".to_owned()),
+ ];
+ let non = vec![
+ Nonterminal("document".to_owned()),
+ Nonterminal("item".to_owned()),
+ Nonterminal("header".to_owned()),
+ Nonterminal("title".to_owned()),
+ Nonterminal("note".to_owned()),
+ Nonterminal("note-content".to_owned()),
+ Nonterminal("price".to_owned()),
+ Nonterminal("ending".to_owned()),
+ Nonterminal("digits".to_owned()),
+ ];
+
+ let mut regex_parser: DefaultRegParser<TNT> = Default::default();
+
+ regex_parser.add_tnt("NL", true);
+ regex_parser.add_tnt("SP", true);
+ regex_parser.add_tnt("CON", true);
+ regex_parser.add_tnt("STAR", true);
+ regex_parser.add_tnt("note", true);
+ regex_parser.add_tnt("price", true);
+ regex_parser.add_tnt("DIGIT", true);
+ regex_parser.add_tnt("document", false);
+ regex_parser.add_tnt("item", false);
+ regex_parser.add_tnt("header", false);
+ regex_parser.add_tnt("title", false);
+ regex_parser.add_tnt("note", false);
+ regex_parser.add_tnt("notecontent", false);
+ regex_parser.add_tnt("price", false);
+ regex_parser.add_tnt("ending", false);
+ regex_parser.add_tnt("digits", false);
+
+ let regex_parser = regex_parser;
+
+ let rule1 = Rule {
+ regex: regex_parser
+ .parse("Nitem | Nitem Ndocument", Box::new(scan_tnt), true)?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rule2 = Rule {
+ regex: regex_parser
+ .parse(
+ "Nheader | Nheader Nprice | Nheader Nnote | Nheader Nprice Nnote",
+ Box::new(scan_tnt),
+ true,
+ )?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rule3 = Rule {
+ regex: regex_parser
+ .parse(
+ "TSP Ntitle TNL Nending | TSTAR TSP Ntitle TNL Nending",
+ Box::new(scan_tnt),
+ true,
+ )?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rule4 = Rule {
+ regex: regex_parser
+ .parse("TCON | TCON Ntitle", Box::new(scan_tnt), true)?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rule5 = Rule {
+ regex: regex_parser
+ .parse("Tnote Nnotecontent TNL Nending", Box::new(scan_tnt), true)?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rule6 = Rule {
+ regex: regex_parser
+ .parse("TCON | TCON Nnotecontent", Box::new(scan_tnt), true)?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rule7 = Rule {
+ regex: regex_parser
+ .parse("Tprice TSP Ndigits TNL Nending", Box::new(scan_tnt), true)?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rule8 = Rule {
+ regex: regex_parser
+ .parse("TNL Nending | TSP Nending | ()", Box::new(scan_tnt), true)?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rule9 = Rule {
+ regex: regex_parser
+ .parse("TDIGIT | TDIGIT Ndigits", Box::new(scan_tnt), true)?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rules = vec![
+ rule1, rule2, rule3, rule4, rule5, rule6, rule7, rule8, rule9,
+ ];
+
+ Ok(Grammar::new(ter, non, rules))
+}
+
+/// Return a grammar that might serve as the grammar for my notes,
/// somehow.
#[allow(dead_code)]
pub fn new_notes_grammar() -> Result<Grammar, Box<dyn std::error::Error>> {
diff --git a/graph/src/error.rs b/graph/src/error.rs
index ce45acc..bf2714b 100644
--- a/graph/src/error.rs
+++ b/graph/src/error.rs
@@ -18,8 +18,6 @@ pub enum Error {
/// The graph does not permit duplicate edges but encounters a
/// repeated edge.
DuplicatedEdge(usize, usize),
- /// The source node has no room to add a new edge.
- FullNode(usize),
}
impl Display for Error {
@@ -37,7 +35,6 @@ impl Display for Error {
"No duplicate edges permitted, but found one from {source} to {target}"
)
}
- Error::FullNode(index) => write!(f, "the node {index} has no room for new edges"),
}
}
}
diff --git a/graph/src/labelled/binary.rs b/graph/src/labelled/binary.rs
index bfd8109..e3e1943 100644
--- a/graph/src/labelled/binary.rs
+++ b/graph/src/labelled/binary.rs
@@ -19,18 +19,22 @@ struct PLChildren {
}
impl PLChildren {
+ #[inline]
fn iter(&self) -> Iter<'_, usize> {
self.children.iter()
}
+ #[inline]
fn len(&self) -> usize {
self.children.len()
}
+ #[inline]
fn is_empty(&self) -> bool {
self.children.is_empty()
}
+ #[inline]
fn contains(&self, key: &usize) -> bool {
self.indices.contains_key(key)
}
@@ -73,6 +77,15 @@ impl PLChildren {
self.children.remove(key_index);
}
+
+ /// Retrieve the `n`-th child.
+ #[inline]
+ fn nth(&self, n: usize) -> Result<usize, Error> {
+ self.children
+ .get(n)
+ .copied()
+ .ok_or(Error::IndexOutOfBounds(n, self.children.len()))
+ }
}
/// A node has some children, some parents, and a label.
@@ -187,7 +200,7 @@ impl<T: GraphLabel> Graph for PLGraph<T> {
}
fn replace_by_builder(&mut self, _builder: impl Builder<Result = Self>) {
- todo!()
+ unimplemented!("use a `PLGBuilderMut` instead")
}
fn print_viz(&self, _filename: &str) -> Result<(), std::io::Error> {
@@ -246,6 +259,31 @@ impl<T: GraphLabel> ParentsGraph for PLGraph<T> {
Err(Error::IndexOutOfBounds(node_id, self.nodes.len()))
}
}
+
+ #[inline]
+ fn nth_child(&self, node_id: usize, n: usize) -> Result<usize, Error> {
+ if let Some(node) = self.nodes.get(node_id) {
+ node.children.nth(n)
+ } else {
+ Err(Error::IndexOutOfBounds(node_id, self.nodes_len()))
+ }
+ }
+
+ #[inline]
+ fn edge_to_parent(&self, source: usize, target: usize) -> Result<Option<Parent>, Error> {
+ if !self.has_edge(source, target)? {
+ return Ok(None);
+ }
+
+ Ok(self
+ .nodes
+ .get(source)
+ .unwrap()
+ .children
+ .indices
+ .get(&target)
+ .map(|index| Parent::new(source, *index)))
+ }
}
impl<T: GraphLabel> RedirectGraph for PLGraph<T> {
@@ -378,13 +416,13 @@ impl<'a, T: GraphLabel> std::ops::Deref for PLGBuilderMut<'a, T> {
type Target = PLGraph<T>;
fn deref(&self) -> &Self::Target {
- &self.graph
+ self.graph
}
}
impl<'a, T: GraphLabel> std::ops::DerefMut for PLGBuilderMut<'a, T> {
fn deref_mut(&mut self) -> &mut Self::Target {
- &mut self.graph
+ self.graph
}
}
diff --git a/graph/src/labelled/double.rs b/graph/src/labelled/double.rs
index 4ab8a38..174c8ef 100644
--- a/graph/src/labelled/double.rs
+++ b/graph/src/labelled/double.rs
@@ -150,6 +150,8 @@ impl<T: GraphLabel> Graph for DLGraph<T> {
}
fn print_viz(&self, filename: &str) -> Result<(), std::io::Error> {
+ let filename = format!("output/{filename}");
+
let preamble = "digraph nfa {
fontname=\"Helvetica,Arial,sans-serif\"
node [fontname=\"Helvetica,Arial,sans-serif\"]
@@ -170,14 +172,14 @@ impl<T: GraphLabel> Graph for DLGraph<T> {
let result = format!("{preamble}{post}");
- if std::fs::metadata(filename).is_ok() {
- std::fs::remove_file(filename)?;
+ if std::fs::metadata(&filename).is_ok() {
+ std::fs::remove_file(&filename)?;
}
let mut file = std::fs::File::options()
.write(true)
.create(true)
- .open(filename)?;
+ .open(&filename)?;
use std::io::Write;
@@ -206,7 +208,7 @@ impl<'a> Iterator for LabelIndexIter<'a> {
#[inline]
fn next(&mut self) -> Option<Self::Item> {
- self.iter.as_mut().and_then(|iterator| iterator.next())
+ self.iter.as_mut().and_then(Iterator::next)
}
#[inline]
@@ -242,7 +244,7 @@ impl<'a> From<&'a Set<usize>> for LabelIndexIter<'a> {
}
}
-#[derive(Debug)]
+#[derive(Debug, Clone)]
/// A delegation of iterators.
///
/// This is used to avoid a boxed pointer to an iterator.
@@ -278,7 +280,7 @@ impl<'a, T> Iterator for LabelIter<'a, T> {
}
/// This is used to avoid a boxed pointer to an iterator.
-#[derive(Debug)]
+#[derive(Debug, Clone)]
pub struct EdgeLabelIter<'a, T> {
iter: Option<Iter<'a, T>>,
}
diff --git a/graph/src/lib.rs b/graph/src/lib.rs
index 6af7889..2a0c50d 100644
--- a/graph/src/lib.rs
+++ b/graph/src/lib.rs
@@ -251,10 +251,14 @@ pub trait ParentsGraph: Graph {
/// Return an iterator of parents for a node.
fn parents_of(&self, node_id: usize) -> Result<<Self as ParentsGraph>::Iter<'_>, Error>;
-}
-// TODO: Design a trait of graphs which can "replace" a certain child
-// by another child. To re-direct children, so to speak.
+ /// We need to be able to retrieve the `n`-the child, for the edge
+ /// index to be of any use.
+ fn nth_child(&self, node_id: usize, n: usize) -> Result<usize, Error>;
+
+ /// Convert an edge to a `Parent` construct.
+ fn edge_to_parent(&self, source: usize, target: usize) -> Result<Option<Parent>, Error>;
+}
/// An /exended/ graph in the sense that it offers the ability to
/// "redirect" children of a node to another node.
diff --git a/graph_macro/src/lib.rs b/graph_macro/src/lib.rs
index 5a0672b..a55efbb 100644
--- a/graph_macro/src/lib.rs
+++ b/graph_macro/src/lib.rs
@@ -1,12 +1,19 @@
+#![allow(unused_imports)]
//! This file provides a macro to delegate the implementation of a
//! type that wraps a graph. More precisely, the macro helps the
//! wrapper type implement the various Graph-related traits.
use proc_macro::TokenStream;
+use core::iter::Peekable;
+
#[proc_macro_derive(Testing)]
pub fn test_derive(input: TokenStream) -> TokenStream {
- println!("input: {input:?}");
+ let input = input.into_iter().peekable();
+
+ for tree in input {
+ println!("tree = {tree:?}");
+ }
TokenStream::new()
}
diff --git a/nfa/src/default/nfa.rs b/nfa/src/default/nfa.rs
index a23c056..8d657d5 100644
--- a/nfa/src/default/nfa.rs
+++ b/nfa/src/default/nfa.rs
@@ -212,7 +212,7 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
let top_label = regex.vertex_label(top_index)?;
let nfa_start = offset + 2 * top_index;
- let nfa_end = offset + 2 * top_index + 1;
+ let nfa_end = nfa_start + 1;
match top_label {
RegexType::Kleene => {
diff --git a/nfa/src/default/regex.rs b/nfa/src/default/regex.rs
index c8ad370..1e3e87b 100644
--- a/nfa/src/default/regex.rs
+++ b/nfa/src/default/regex.rs
@@ -359,6 +359,147 @@ impl<T: GraphLabel> DefaultRegex<T> {
Ok(result)
}
+
+ /// Use a function to format the labels of the regular expressions
+ /// and format the entire regular expression with this aid, with a
+ /// dot at a specified position.
+ pub fn to_string_with_dot<F>(&self, mut f: F, dot_pos: usize) -> Result<String, std::fmt::Error>
+ where
+ F: FnMut(T) -> String,
+ {
+ #[derive(Copy, Clone)]
+ enum StackElement {
+ Seen(usize),
+ Unseen(usize),
+ }
+
+ impl StackElement {
+ fn index(&self) -> usize {
+ match self {
+ Seen(index) => *index,
+ Unseen(index) => *index,
+ }
+ }
+
+ fn is_seen(self) -> bool {
+ matches!(self, Seen(_))
+ }
+ }
+
+ use StackElement::{Seen, Unseen};
+
+ let mut result = String::new();
+
+ let mut stack: Vec<StackElement> = Vec::new();
+ let mut types = self.types.clone();
+ types.push(RegexType::Paren);
+
+ stack.push(Unseen(0));
+
+ while let Some(top) = stack.pop() {
+ let node_type = types.get(top.index()).unwrap();
+
+ match node_type {
+ RegexType::Kleene => {
+ if !top.is_seen() {
+ stack.push(Seen(top.index()));
+
+ if self.degree(top.index()).unwrap() > 1 {
+ write!(result, "(")?;
+ stack.push(Unseen(types.len() - 1));
+ }
+
+ stack.extend(
+ self.graph
+ .children_of(top.index())
+ .unwrap()
+ .map(Unseen)
+ .rev(),
+ );
+ } else {
+ write!(result, "*")?;
+ }
+ }
+ RegexType::Plus => {
+ if !top.is_seen() {
+ stack.push(Seen(top.index()));
+ stack.extend(
+ self.graph
+ .children_of(top.index())
+ .unwrap()
+ .map(Unseen)
+ .rev(),
+ );
+ } else {
+ write!(result, "+")?;
+ }
+ }
+ RegexType::Optional => {
+ if !top.is_seen() {
+ stack.push(Seen(top.index()));
+ stack.extend(
+ self.graph
+ .children_of(top.index())
+ .unwrap()
+ .map(Unseen)
+ .rev(),
+ );
+ } else {
+ write!(result, "?")?;
+ }
+ }
+ RegexType::Or => {
+ if !top.is_seen() {
+ write!(result, "(")?;
+
+ let len = self.len();
+
+ stack.push(Unseen(types.len() - 1));
+
+ for (child_index, child) in self
+ .graph
+ .children_of(top.index())
+ .unwrap()
+ .enumerate()
+ .rev()
+ {
+ if child_index != len - 1 && child_index != 0 {
+ stack.push(Unseen(child));
+ stack.push(Seen(top.index()));
+ } else {
+ stack.push(Unseen(child));
+ }
+ }
+ } else {
+ write!(result, "|")?;
+ }
+ }
+ RegexType::Paren => {
+ write!(result, ")")?;
+ }
+ RegexType::Empty => {
+ stack.extend(
+ self.graph
+ .children_of(top.index())
+ .unwrap()
+ .map(Unseen)
+ .rev(),
+ );
+
+ if self.graph.is_empty_node(top.index()).unwrap() {
+ write!(result, "ε")?;
+ }
+ }
+ RegexType::Lit(label) => write!(result, "{}", f(*label))?,
+ }
+
+ if top.index() == dot_pos {
+ write!(result, " · ")?;
+ }
+ }
+
+ Ok(result)
+ }
}
impl<T: GraphLabel + Display + Debug> Display for DefaultRegex<T> {
diff --git a/nfa/src/lib.rs b/nfa/src/lib.rs
index c1906e1..4440ea6 100644
--- a/nfa/src/lib.rs
+++ b/nfa/src/lib.rs
@@ -150,15 +150,17 @@ impl<T: GraphLabel> NfaLabel<T> {
pub fn get_value(&self) -> T {
self.value
}
+
/// Retrieve the moved position from the label.
#[inline]
pub fn get_moved(&self) -> usize {
self.moved
}
+
/// Retrieve whether or not the label comes from left-linear
/// expansions.
#[inline]
- pub fn get_left_p(&self) -> bool {
+ pub fn is_left_p(&self) -> bool {
self.left_p
}
}