summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Cargo.toml6
-rw-r--r--chain/Cargo.toml7
-rw-r--r--chain/src/atom/default.rs342
-rw-r--r--chain/src/atom/mod.rs41
-rw-r--r--chain/src/default.rs46
-rw-r--r--chain/src/lib.rs49
-rw-r--r--chain/src/plan.org154
-rw-r--r--forest/Cargo.toml9
-rw-r--r--forest/src/default.rs153
-rw-r--r--forest/src/lib.rs105
-rw-r--r--grammar/Cargo.toml20
-rw-r--r--grammar/src/lib.rs (renamed from chain/src/grammar.rs)302
-rw-r--r--grammar/src/test_grammar_helper.rs368
-rw-r--r--grammar/src/tests/mod.rs30
-rw-r--r--grammar/src/tests/test_grammar_left_closure.rs272
-rw-r--r--graph/src/adlist.rs6
-rw-r--r--graph/src/adset.rs6
-rw-r--r--graph/src/builder.rs3
-rw-r--r--graph/src/labelled.rs135
-rw-r--r--graph/src/lib.rs14
-rw-r--r--nfa/src/default/mod.rs2
-rw-r--r--nfa/src/default/nfa.rs160
-rw-r--r--nfa/src/default/regex.rs20
-rw-r--r--nfa/src/error.rs18
-rw-r--r--nfa/src/lib.rs91
-rw-r--r--receme/src/lib.rs2
-rw-r--r--receme/src/tree.rs2
-rw-r--r--semiring/Cargo.toml8
-rw-r--r--semiring/src/lib.rs14
29 files changed, 2042 insertions, 343 deletions
diff --git a/Cargo.toml b/Cargo.toml
index 77de27f..e34a8cf 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -10,10 +10,12 @@ keywords = ["emacs", "parser"]
# generic associated types, which are not stablized until version
# 1.65.
rust-version = "1.65"
-# testing the new resolver, even though this has no dependencies ;p
[workspace]
-members = ["graph", "receme", "nfa", "chain", "viz"]
+members = [ "graph", "receme", "nfa", "chain", "viz", "grammar",
+ "forest", "semiring"]
+
+# testing the new resolver, even though this has no dependencies ;p
resolver = "2"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
diff --git a/chain/Cargo.toml b/chain/Cargo.toml
index 32eed8b..265954c 100644
--- a/chain/Cargo.toml
+++ b/chain/Cargo.toml
@@ -7,4 +7,9 @@ edition = "2021"
[dependencies]
nfa = { path = "../nfa" }
-graph = { path = "../graph" } \ No newline at end of file
+graph = { path = "../graph" }
+grammar = { path = "../grammar" }
+forest = { path = "../forest" }
+
+[dev-dependencies]
+grammar = { path = "../grammar", features = ["test-helper"] }
diff --git a/chain/src/atom/default.rs b/chain/src/atom/default.rs
new file mode 100644
index 0000000..72989b3
--- /dev/null
+++ b/chain/src/atom/default.rs
@@ -0,0 +1,342 @@
+//! This file provides a default implementation of the
+//! [`Atom`][super::Atom] trait.
+
+use super::*;
+use grammar::Grammar;
+use graph::{error::Error as GraphError, Graph, LabelExtGraph, LabelGraph};
+use nfa::default::{nfa::DefaultNFA, regex::DefaultRegex};
+
+use core::fmt::Display;
+use std::collections::BTreeMap as Map;
+
+/// A virtual node represents the derivative of a non-terminal symbol
+/// `S` with respect to a terminal symbol `t`.
+#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Ord, PartialOrd)]
+struct VirtualNode {
+ s: usize,
+ t: usize,
+}
+
+impl Display for VirtualNode {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "VN[{}]^({})", self.s, self.t)
+ }
+}
+
+impl VirtualNode {
+ fn new(s: usize, t: usize) -> Self {
+ Self { s, t }
+ }
+}
+
+type VirtualMap = Map<VirtualNode, usize>;
+
+/// The type of atomic languages.
+#[derive(Debug, Clone, Default)]
+pub struct DefaultAtom {
+ grammar: Grammar,
+ nfa: DefaultNFA<DOption<TNT>>,
+ // NOTE: This is mostly for printing and debugging
+ regexp: Vec<DefaultRegex<TNT>>,
+ virtual_nodes: VirtualMap,
+}
+
+impl Display for DefaultAtom {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ let grammar = &self.grammar;
+
+ let error_to_string = |err| format!("{err}");
+ let tnt_to_string = |tnt, deco| {
+ grammar
+ .name_of_tnt(tnt)
+ .map(|name| format!("{deco}({name})"))
+ .unwrap_or_else(error_to_string)
+ };
+
+ let display_tnt = |tnt| match tnt {
+ TNT::Ter(t) => format!(
+ "({})",
+ grammar
+ .unpack_tnt(t)
+ .map(|tnt| tnt_to_string(tnt, ""))
+ .unwrap_or_else(error_to_string)
+ ),
+ TNT::Non(_) => tnt_to_string(tnt, "H"),
+ };
+
+ writeln!(f, "regular expressions:")?;
+
+ let mut accumulators = Vec::with_capacity(self.regexp.len() + 1);
+ accumulators.push(0usize);
+
+ for regex in self.regexp.iter() {
+ writeln!(f, "accumulator: {}", accumulators.last().unwrap())?;
+
+ accumulators.push(regex.nodes_len() * 2 + accumulators.last().unwrap());
+
+ let string = regex.to_string_with(display_tnt)?;
+
+ writeln!(f, "{string}")?;
+ }
+
+ writeln!(f, "total = {}", accumulators.last().unwrap())?;
+
+ writeln!(f, "virtual nodes:")?;
+
+ for (virtual_node, index) in self.virtual_nodes.iter() {
+ writeln!(f, "{virtual_node}: {index}")?;
+ }
+
+ Ok(())
+ }
+}
+
+// Some boiler-plate delegation implementations for Graph and
+// LabelGraph, in order to implement Nfa.
+
+impl Graph for DefaultAtom {
+ type Iter<'b> = <DefaultNFA<DOption<TNT>> as Graph>::Iter<'b>
+ where
+ Self: 'b;
+
+ fn is_empty(&self) -> bool {
+ self.nfa.is_empty()
+ }
+
+ fn nodes_len(&self) -> usize {
+ self.nfa.nodes_len()
+ }
+
+ fn children_of(&self, node_id: usize) -> Result<Self::Iter<'_>, GraphError> {
+ self.nfa.children_of(node_id)
+ }
+
+ fn degree(&self, node_id: usize) -> Result<usize, GraphError> {
+ self.nfa.degree(node_id)
+ }
+
+ fn is_empty_node(&self, node_id: usize) -> Result<bool, GraphError> {
+ self.nfa.is_empty_node(node_id)
+ }
+
+ fn has_edge(&self, source: usize, target: usize) -> Result<bool, GraphError> {
+ self.nfa.has_edge(source, target)
+ }
+
+ fn replace_by_builder(&mut self, _builder: impl graph::Builder<Result = Self>) {
+ // NOTE: We cannot replace by a builder whose result is an
+ // atom, not the underlying graph type.
+ unimplemented!()
+ }
+}
+
+impl LabelGraph<DOption<TNT>> for DefaultAtom {
+ type Iter<'b> = <DefaultNFA<DOption<TNT>> as LabelGraph<DOption<TNT>>>::Iter<'b>
+ where
+ Self: 'b;
+
+ type LabelIter<'b> = <DefaultNFA<DOption<TNT>> as LabelGraph<DOption<TNT>>>::LabelIter<'b>
+ where
+ Self: 'b,
+ DOption<TNT>: 'b;
+
+ type EdgeLabelIter<'a> = <DefaultNFA<DOption<TNT>> as LabelGraph<DOption<TNT>>>::EdgeLabelIter<'a>
+ where
+ Self: 'a,
+ DOption<TNT>: 'a;
+
+ #[inline]
+ fn vertex_label(&self, node_id: usize) -> Result<Option<DOption<TNT>>, GraphError> {
+ self.nfa.vertex_label(node_id)
+ }
+
+ #[inline]
+ fn edge_label(
+ &self,
+ source: usize,
+ target: usize,
+ ) -> Result<Self::EdgeLabelIter<'_>, GraphError> {
+ self.nfa.edge_label(source, target)
+ }
+
+ #[inline]
+ fn find_children_with_label(
+ &self,
+ node_id: usize,
+ label: &DOption<TNT>,
+ ) -> Result<<Self as LabelGraph<DOption<TNT>>>::Iter<'_>, GraphError> {
+ self.nfa.find_children_with_label(node_id, label)
+ }
+
+ #[inline]
+ fn labels_of(&self, node_id: usize) -> Result<Self::LabelIter<'_>, GraphError> {
+ self.nfa.labels_of(node_id)
+ }
+
+ #[inline]
+ fn has_edge_label(
+ &self,
+ node_id: usize,
+ label: &DOption<TNT>,
+ target: usize,
+ ) -> Result<bool, GraphError> {
+ self.nfa.has_edge_label(node_id, label, target)
+ }
+}
+
+impl LabelExtGraph<DOption<TNT>> for DefaultAtom {
+ #[inline]
+ fn extend(
+ &mut self,
+ edges: impl IntoIterator<Item = (DOption<TNT>, usize)>,
+ ) -> Result<usize, GraphError> {
+ self.nfa.extend(edges)
+ }
+}
+
+impl Nfa<DOption<TNT>> for DefaultAtom {
+ #[inline]
+ fn remove_epsilon<F>(&mut self, f: F) -> Result<(), nfa::error::Error>
+ where
+ F: Fn(DOption<TNT>) -> bool,
+ {
+ self.nfa.remove_epsilon(f)
+ }
+
+ type FromRegex<S: graph::GraphLabel + std::fmt::Display + Default> = ();
+
+ #[inline]
+ fn to_nfa(
+ _regexps: &[impl nfa::Regex<nfa::default::regex::RegexType<DOption<TNT>>>],
+ _sub_pred: impl Fn(DOption<TNT>) -> Result<nfa::SoC<DOption<TNT>>, nfa::error::Error>,
+ _default: Option<DOption<TNT>>,
+ ) -> Result<Self::FromRegex<DOption<DOption<TNT>>>, nfa::error::Error> {
+ // NOTE: We cannot construct an atom from a set of regular
+ // languages alone. So it is appropriate to panic here, if
+ // one tries to do so, for some reason.
+ unimplemented!()
+ }
+
+ #[inline]
+ fn remove_dead(&mut self, reserve: impl Fn(usize) -> bool) -> Result<(), nfa::error::Error> {
+ self.nfa.remove_dead(reserve)
+ }
+
+ #[inline]
+ fn nulling(&mut self, f: impl Fn(DOption<TNT>) -> bool) -> Result<(), nfa::error::Error> {
+ self.nfa.nulling(f)
+ }
+}
+
+impl DefaultAtom {
+ /// Construct an atomic language from a grammar.
+ pub fn from_grammar(mut grammar: Grammar) -> Result<Self, GrammarError> {
+ grammar.compute_firsts()?;
+
+ let regexp = grammar.left_closure()?;
+
+ let mut nfa = grammar.left_closure_to_nfa(&regexp)?;
+
+ use std::collections::HashSet;
+
+ let accumulators: Vec<usize> = {
+ let mut result = Vec::with_capacity(regexp.len() + 1);
+ result.push(0);
+
+ for regex in regexp.iter() {
+ result.push(regex.nodes_len() * 2 + result.last().unwrap());
+ }
+
+ result.into_iter().collect()
+ };
+
+ let accumulators_set: HashSet<usize> = accumulators.iter().copied().collect();
+
+ nfa.nulling(|label| {
+ if let Some(label) = *label {
+ match label {
+ TNT::Ter(_) => false,
+ // Panics if a non-terminal references an invalid node
+ // here.
+ TNT::Non(n) => grammar.is_nullable(n).unwrap(),
+ }
+ } else {
+ true
+ }
+ })?;
+ nfa.remove_epsilon(|label| label.is_none())?;
+ nfa.remove_dead(|node| accumulators_set.contains(&node))?;
+
+ // now add the virtual nodes
+ let mut virtual_nodes: VirtualMap = Default::default();
+
+ let nt_num = grammar.non_num();
+
+ assert!(nt_num <= accumulators.len());
+
+ // Convert an error telling us that an index is out of bounds.
+ //
+ // Panics if the error is not of the expected kind.
+ fn index_out_of_bounds_conversion(ge: GraphError) -> GrammarError {
+ match ge {
+ GraphError::IndexOutOfBounds(index, bound) => {
+ GrammarError::IndexOutOfBounds(index, bound)
+ }
+ _ => unreachable!(),
+ }
+ }
+
+ for nt in 0..nt_num {
+ let children: std::collections::HashMap<DOption<TNT>, Vec<_>> = nfa
+ // this is safe because of the above assertion.
+ .labels_of(*accumulators.get(nt).unwrap())
+ .map_err(index_out_of_bounds_conversion)?
+ .map(|(label, target_iter)| (*label, target_iter.collect()))
+ .collect();
+
+ for (label, children_vec) in children.into_iter() {
+ if let Some(TNT::Ter(t)) = *label {
+ let new_index = nfa
+ .extend(children_vec.into_iter().map(|target| (label, target)))
+ .map_err(index_out_of_bounds_conversion)?;
+
+ let virtual_node = VirtualNode::new(nt, t);
+
+ virtual_nodes.insert(virtual_node, new_index);
+ }
+ }
+ }
+
+ Ok(Self {
+ grammar,
+ nfa,
+ regexp,
+ virtual_nodes,
+ })
+ }
+}
+
+/// A convenient getter for the map of virtual nodes.
+fn query(map: &VirtualMap, nt: usize, t: usize) -> Option<usize> {
+ map.get(&VirtualNode::new(nt, t)).copied()
+}
+
+impl Atom for DefaultAtom {
+ fn atom(&self, nt: usize, t: usize) -> Result<Option<usize>, GrammarError> {
+ if nt >= self.grammar.non_num() {
+ return Err(GrammarError::IndexOutOfBounds(nt, self.grammar.non_num()));
+ }
+
+ if t >= self.grammar.ter_num() {
+ return Err(GrammarError::IndexOutOfBounds(t, self.grammar.ter_num()));
+ }
+
+ Ok(query(&self.virtual_nodes, nt, t))
+ }
+
+ fn empty(&self) -> usize {
+ assert_eq!(self.nfa.nodes_len() - 2, self.grammar.non_num() * 2);
+
+ self.nfa.nodes_len() - 2
+ }
+}
diff --git a/chain/src/atom/mod.rs b/chain/src/atom/mod.rs
new file mode 100644
index 0000000..084acca
--- /dev/null
+++ b/chain/src/atom/mod.rs
@@ -0,0 +1,41 @@
+//! This file defines the behaviour of the Atomic languages, and
+//! provides a default implementation.
+//!
+//! Here I do not to substitute external packages' implementations in
+//! the future, so why define a trait for the atomic languages?
+//! Because this way I can easily substitute other implementations if
+//! I have better ideas in the future.
+
+use grammar::{Error as GrammarError, TNT};
+use nfa::{DOption, Nfa};
+
+/// The expected behaviours of an atomic language.
+pub trait Atom: Nfa<DOption<TNT>> {
+ /// Return the index of a node representing the derivative of the
+ /// left-linear null closure of `nt` with respect to `t`.
+ fn atom(&self, nt: usize, t: usize) -> Result<Option<usize>, GrammarError>;
+
+ /// Return the index of the empty state.
+ fn empty(&self) -> usize;
+}
+
+pub mod default;
+
+pub use default::DefaultAtom;
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use grammar::test_grammar_helper::*;
+
+ #[test]
+ fn atom() -> Result<(), Box<dyn std::error::Error>> {
+ let grammar = new_notes_grammar()?;
+
+ let atom = DefaultAtom::from_grammar(grammar)?;
+
+ println!("atom = {atom}");
+
+ Ok(())
+ }
+}
diff --git a/chain/src/default.rs b/chain/src/default.rs
new file mode 100644
index 0000000..e04be9f
--- /dev/null
+++ b/chain/src/default.rs
@@ -0,0 +1,46 @@
+//! This file provides a default implementation of the
+//! [`Chain`][crate::Chain] trait.
+//!
+//! The reason for using a trait is that I might want to experiment
+//! with different implementation ideas in the future, and this
+//! modular design makes that easy.
+
+use super::*;
+use core::fmt::Display;
+
+/// The errors related to taking derivatives by chain rule.
+#[derive(Debug)]
+pub enum Error {
+ /// An invalid situation happens.
+ Invalid,
+}
+
+impl Display for Error {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ match self {
+ Self::Invalid => write!(f, "invalid"),
+ }
+ }
+}
+
+impl std::error::Error for Error {}
+
+/// A default implementation for the [`Chain`] trait.
+#[derive(Debug, Clone, Default)]
+pub struct DefaultChain {}
+
+impl Chain for DefaultChain {
+ type Error = Error;
+
+ fn unit() -> Self {
+ todo!()
+ }
+
+ fn chain(&mut self, _t: usize) {
+ todo!()
+ }
+
+ fn epsilon(&self) -> bool {
+ todo!()
+ }
+}
diff --git a/chain/src/lib.rs b/chain/src/lib.rs
index 0ec4d4c..4e21e1d 100644
--- a/chain/src/lib.rs
+++ b/chain/src/lib.rs
@@ -1,3 +1,4 @@
+#![warn(missing_docs)]
//! This package implements the core algorithm of the entire
//! workspace: parsing with derivatives by means of chain rule and
//! regular nulling languages.
@@ -7,19 +8,43 @@
//! think is the essence of this algorithm, the chain-rule for
//! derivatives of languages.
-pub mod grammar;
+pub mod atom;
-pub fn add(left: usize, right: usize) -> usize {
- left + right
-}
+// TODO: Define errors.
+
+/// The expected behaviours of a language which can take derivatives
+/// by chain rule.
+pub trait Chain: Default {
+ /// The implementations should choose a type to represent errors.
+ type Error: std::error::Error;
-#[cfg(test)]
-mod tests {
- use super::*;
+ /// Represents the language that is present after we parse the
+ /// empty string, that is the initial configuration of the
+ /// language. This may or may not be different from what
+ /// `Default::default` gives.
+ fn unit() -> Self;
- #[test]
- fn it_works() {
- let result = add(2, 2);
- assert_eq!(result, 4);
- }
+ /// Take the derivative by a terminal symbol.
+ ///
+ /// This takes care of the union and the prepending operations.
+ ///
+ /// # A little remark about the design
+ ///
+ /// I have thought to separate different operations (like the
+ /// union, the prepending, and single derivatives) and then define
+ /// a function to put everything together. But I think that
+ /// design is not convenient to use. Also, I really do not need
+ /// those operations other than to provide this derivative
+ /// operation, so why define them? And putting all things
+ /// together may reduce the number of bugs caused by wrong uses of
+ /// those component functions, and can reduce the amount of
+ /// documentation strings a library user needs to read, in order
+ /// to make use of this trait. So I ended up with this design.
+ fn chain(&mut self, t: usize);
+
+ /// Return true if and only if the language contains the empty
+ /// string.
+ fn epsilon(&self) -> bool;
}
+
+pub mod default;
diff --git a/chain/src/plan.org b/chain/src/plan.org
index 8c63492..61738a9 100644
--- a/chain/src/plan.org
+++ b/chain/src/plan.org
@@ -2,29 +2,73 @@
#+AUTHOR: Durand
#+DATE: <2022-11-18 Ven 19:57>
-* Things to do [2/7]
+* Things to do [5/10]
- [X] Implement builders for graphs
- [X] Find sets of the first terminals for each non-terminal, in the
grammar module.
-- [-] NFA [4/5]
+- [-] Establish a testing framework of various grammars. [5/6]
+ + [X] One simple grammar
+ + [X] Notes
+ + [X] Parentheses
+ + [X] Pathelogical left-recursive
+ + [X] Pathelogical right-recursive
+ + [ ] Pathelogically ambiguous
+ # More should be added
+- [X] NFA [4/4]
+ [X] Add regular expression into NFA.
+ [X] Convert a set of regular expressions into a nondeterministic
finite automaton, where non-terminals denote the indices of
regular expressions in the set to substitute.
- + [X] Convert a grammar into its grammar of left-linear closures of
- non-temrinals and their derivatives.
- + [X] Convert nondeterministic finite automata to their null
- closures.
- + [ ] Test more grammars to ensure it works correctly.
-- [ ] Define the Atom trait.
-- [ ] Implement virtual nodes for each derivative of each atom: The
- lack of this step might be the root cause of the failure of the
- previous version of this package.
-- [ ] Implement languages. [0/2]
- + [ ] First implement them as doubly labelled (directed acyclic)
- graphs.
- + [ ] Implement finding derivatives.
+ + [X] Convert a grammar to the language of atomic languages. [2/2]
+ * [X] Convert a grammar into its grammar of left-linear closures
+ of non-temrinals and their derivatives.
+ * [X] Convert nondeterministic finite automata to their null
+ closures.
+ + [X] Test more grammars to ensure it works correctly. [5/5]
+ * [X] Test the conversion to left-closure as a set of regular
+ expressions.
+ * [X] Test the conversion of a set of regular expressions into a
+ nondeterministic finite automaton.
+ * [X] Test the closure of null edges, where the nullity of an edge
+ is defined by a function. In our specific case, an edge is null
+ if the grammar symbol that edge represents, either a terminal or
+ a non-terminal, can match an empty string.
+ * [X] Test the removal of empty transitions from nondeterministic
+ finite automata.
+ * [X] Test the removal of dead states, where a state is dead if
+ and only if no other states have an edge into that state.
+- [ ] Refactor [0/3]
+ + [ ] When we pull in some regular language because of the
+ left-linear expansion, we need to mark that branch as coming from
+ left-linear expansions. This is necessary because we cannot
+ follow a left-linearly expanded branch when we take the derivative
+ of a language. We only expand left-linearly when we try to access
+ the atomic languages [s]^{(t)}. We can mark by returning a set of
+ nodes which are the beginnings of left-linearly expanded branches.
+ + [ ] An edge of the NFA should carry a label that can be more
+ informative than solely a terminal or a non-terminal.
+ + [ ] Add a mechanism for a grammar to attach labels to edges of NFA
+ which are not necessarily Copy-able, and which should be stored in a
+ separate array, such that the labels on edges of NFA point to the
+ elements of the array.
+- [X] Atom [3/3]
+ + [X] Define the Atom trait.
+ + [X] Implement virtual nodes for each derivative of each atom: The
+ lack of this step might be the root cause of the failure of the
+ previous version of this project.
+ + [X] Test atoms
+- [-] Implement languages. [1/3]
+ + [X] First define a trait with the expected behaviours.
+ + [ ] Then implement them as doubly labelled graphs.
+ + [ ] Thenimplement finding derivatives by use of the chain rule.
+- [-] Implement forests [0/2]
+ + [-] Define a trait with the expected behaviours.
+ + [ ] Implement them using adjacency list: we only need one label
+ per edge, and we do not wish to exclude duplicate edges, and we do
+ not need to index edges by the labels. All in all, the labels can
+ be implemented by a simple hash map that maps edges to labels, so
+ a special data type is not needed here.
- [ ] Implement semiring. [0/5]
+ [ ] Define the trait.
+ [ ] Implement the boolean semiring.
@@ -32,6 +76,9 @@
+ [ ] Implement the free semiring.
+ [ ] Compute and record a semiring value when computing
derivatives.
+- [X] Miscellaneous [1/1]
+ + [X] Implement a function for NFA that checks if a given predicate
+ is satisfied for each edge label.
* Atomic Languages
@@ -39,10 +86,6 @@ This describes the behaviours of atomic languages. The atomic
language consists of the null closure of any non-terminal symbol in
the grammar, and their deriavtives by terminals and non-terminal.
-* Script for creating GIF animation
-
-[[https://gist.github.com/maelvls/5379127][a gist]]
-
* Derivative Languages
This is the main driving device of the algorithm. Basically, the
@@ -82,6 +125,61 @@ should be easy enough, since we already have the mechanism of graphs,
nondeterministic automata, and semirings. All we need to do is to
combine them together.
+* Lexers
+
+I got this idea from [[https://lists.gnu.org/archive/html/emacs-devel/2022-12/msg01127.html][a discussion on emacs-devel]]. The idea can be
+formulated as
+
+#+begin_quote
+Potentially it allows invocation of a parser with different variants
+of lexers - one mode with block tokens for the exploration of
+project's structure, and another mode for indentation and error
+checking purposes.
+#+end_quote
+
+I think this is the "right" use of lexers. That is to say, the parser
+by default should operate on an abstract type of tokens, which are but
+unsigned integers, and rely on the user application to provide a lexer
+for turning the actual input, like a string, into an iterator of
+tokens. The lexer can choose to do any sort of preprocessing that it
+sees fit, or it can do nothing and just turn characters to unsigned
+integers. In the latter case, this parser generator works on the
+character level. But, when one neeeds, one can instruct the parser
+generator to work on the level of preprocessed tokens easily.
+
+This has the advantage of allowing a certain degree of flexibility in
+the type of tokens accepted, while keeping the implementation simple
+at the same time: the internal core only has to deal with unsigned
+integers, after all.
+
+* Library for drawing graphs
+
+In the past, I thought that drawing graphs is only a development tool
+for my package, so I can rely on such external tools as GraphViz, as
+that would not be needed for my end-users.
+
+But recently I realized that this is not a need only for the
+development of the package, but also for the users as well. To be
+more specific, the target users are those who want to "play" with
+grammars. So if one cannot view the resulting parse forest diagrams
+easily and interactively, it would be hard to "play" with grammars.
+
+Moreover, the internal workings of the parsing machine might also be
+interesting for users to inspect, whether to debug the program or to
+know more under the hood. As such it is required to view the diagrams
+of the internal directed acyclic graphs.
+
+For all these reasons, I know regard the ability to draw graphs and to
+interact with those graphs is essential to the user experience of this
+package. As a consequence, I would like to develop a small default
+library for this purpose. I follow the convention adapted by this
+package here as well, which is to provide a default implementation for
+the functionality, while still leaving the room for users to
+substitute with other external packages, if so desired, for whatever
+reason. For example, an external package may provide an unprecedented
+performance for drawing graphs in Emacs. If such a package appears, I
+can easily avail of that external package to draw graphs.
+
* Testing ground
I am in a strong need to test things out. The most important one is
@@ -92,3 +190,21 @@ understanding of the main algorithm is plain wrong.
This is the main reason I started this rewrite of the package.
+* To figure out
+
+I need to figure out how to construct the parse forest from a
+left-most null-closed derivation graph.
+
+Reading through the original paper again, I found that the author
+augmented the atomic languages with annotations of partial derivation
+steps on each edge of the nondeterministic finite automaton. In
+brief, this is what I tried to achieve with some obscure constructs
+such as expanding reduction informations carried by the
+nondeterministic finite automata, in one of the previous versions. To
+sum up, I really need to carry those informations in the first place,
+otherwise the parse forests cannot be reliably construced later on.
+
+But I really do not want to adopt the approach of the original author.
+I still prefer the idea of a recorder. That is to say, instead of
+constructing the parse forest after the recognition, we shall
+construct the parse forest simultaneously while we are recognizing.
diff --git a/forest/Cargo.toml b/forest/Cargo.toml
new file mode 100644
index 0000000..b88451d
--- /dev/null
+++ b/forest/Cargo.toml
@@ -0,0 +1,9 @@
+[package]
+name = "forest"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+graph = { path = "../graph" }
diff --git a/forest/src/default.rs b/forest/src/default.rs
new file mode 100644
index 0000000..36145f4
--- /dev/null
+++ b/forest/src/default.rs
@@ -0,0 +1,153 @@
+//! This file provides a default implementation for the
+//! [`Forest`][crate::Forest] trait.
+
+use super::*;
+use graph::{error::Error as GraphError, ALGraph, Graph};
+
+use std::collections::{hash_set::Iter, HashMap, HashSet};
+
+#[derive(Debug, Clone)]
+pub struct DefaultForest<NodeLabel: GraphLabel, EdgeLabel: GraphLabel> {
+ graph: ALGraph,
+ vertex_labels: HashMap<usize, NodeLabel>,
+ edge_labels: HashMap<(usize, usize), EdgeLabel>,
+ plugins: HashSet<usize>,
+ plugouts: HashSet<usize>,
+}
+
+impl<NodeLabel: GraphLabel, EdgeLabel: GraphLabel> Default for DefaultForest<NodeLabel, EdgeLabel> {
+ fn default() -> Self {
+ Self {
+ graph: Default::default(),
+ vertex_labels: Default::default(),
+ edge_labels: Default::default(),
+ plugins: Default::default(),
+ plugouts: Default::default(),
+ }
+ }
+}
+
+impl<NodeLabel: GraphLabel, EdgeLabel: GraphLabel> Graph for DefaultForest<NodeLabel, EdgeLabel> {
+ type Iter<'a> = <ALGraph as Graph>::Iter<'a>
+ where
+ Self: 'a;
+
+ fn is_empty(&self) -> bool {
+ self.graph.is_empty()
+ }
+
+ fn nodes_len(&self) -> usize {
+ self.graph.nodes_len()
+ }
+
+ fn children_of(&self, node_id: usize) -> Result<Self::Iter<'_>, GraphError> {
+ self.graph.children_of(node_id)
+ }
+
+ fn degree(&self, node_id: usize) -> Result<usize, GraphError> {
+ self.graph.degree(node_id)
+ }
+
+ fn is_empty_node(&self, node_id: usize) -> Result<bool, GraphError> {
+ self.graph.is_empty_node(node_id)
+ }
+
+ fn has_edge(&self, source: usize, target: usize) -> Result<bool, GraphError> {
+ self.graph.has_edge(source, target)
+ }
+
+ fn replace_by_builder(&mut self, _builder: impl graph::Builder<Result = Self>) {
+ unimplemented!()
+ }
+}
+
+impl<NodeLabel: GraphLabel, EdgeLabel: GraphLabel> ExtGraph
+ for DefaultForest<NodeLabel, EdgeLabel>
+{
+ fn extend(&mut self, edges: impl IntoIterator<Item = usize>) -> Result<usize, GraphError> {
+ self.graph.extend(edges)
+ }
+}
+
+#[derive(Debug)]
+pub struct LabelIter<'a> {
+ set_iter: Iter<'a, usize>,
+}
+
+impl<'a> Iterator for LabelIter<'a> {
+ type Item = usize;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ self.set_iter.next().copied()
+ }
+
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ self.set_iter.size_hint()
+ }
+}
+
+impl<'a> ExactSizeIterator for LabelIter<'a> {
+ fn len(&self) -> usize {
+ let (lower, upper) = self.size_hint();
+ // Note: This assertion is overly defensive, but it checks the
+ // invariant guaranteed by the trait.
+ debug_assert!(upper == Some(lower));
+ lower
+ }
+}
+
+impl<'a> From<Iter<'a, usize>> for LabelIter<'a> {
+ fn from(set_iter: Iter<'a, usize>) -> Self {
+ Self { set_iter }
+ }
+}
+
+impl<NodeLabel: GraphLabel, EdgeLabel: GraphLabel> Forest<NodeLabel, EdgeLabel>
+ for DefaultForest<NodeLabel, EdgeLabel>
+{
+ type PluginIter<'a> = LabelIter<'a>
+ where
+ Self: 'a;
+
+ type PlugoutIter<'a> = LabelIter<'a>
+ where
+ Self: 'a;
+
+ fn plugins(&self) -> Self::PluginIter<'_> {
+ self.plugins.iter().into()
+ }
+
+ fn plugouts(&self) -> Self::PlugoutIter<'_> {
+ self.plugouts.iter().into()
+ }
+
+ fn plug(&mut self, other: &Self) -> Result<(), Error> {
+ // PLAN: Clone the `other` forest to `self`, adjust the
+ // indices for edges, and then add edges between plugs.
+ //
+ // Since I cannot touch the underlying nodes directly, I have
+ // to add the nodes and edges individually.
+
+ todo!()
+ }
+
+ fn vertex_label(&self, node_id: usize) -> Result<Option<NodeLabel>, Error> {
+ if node_id >= self.nodes_len() {
+ return Err(Error::IndexOutOfBounds(node_id, self.nodes_len()));
+ }
+
+ Ok(self.vertex_labels.get(&node_id).copied())
+ }
+
+ fn edge_label(&self, source: usize, target: usize) -> Result<Option<EdgeLabel>, Error> {
+ if source >= self.nodes_len() {
+ return Err(Error::IndexOutOfBounds(source, self.nodes_len()));
+ }
+
+ if target >= self.nodes_len() {
+ return Err(Error::IndexOutOfBounds(target, self.nodes_len()));
+ }
+
+ Ok(self.edge_labels.get(&(source, target)).copied())
+ }
+}
diff --git a/forest/src/lib.rs b/forest/src/lib.rs
new file mode 100644
index 0000000..527a78c
--- /dev/null
+++ b/forest/src/lib.rs
@@ -0,0 +1,105 @@
+//! This file defines the expected behaviours of forests, and provides
+//! a default implementation for forests.
+//!
+//! The default forest actually implements the so-called shared packed
+//! parse forest, or SPPF. It packs sub-trees with the same parents
+//! under the same branch, and lets nodes from different branches
+//! share the same children, and hence the name.
+//!
+//! What is special here is that the forests are marked with some
+//! out-coming and in-coming plugs. These plugs are used to join
+//! different fragments of forests together.
+
+use graph::{ExtGraph, GraphLabel};
+
+use core::fmt::Display;
+
+#[derive(Debug)]
+pub enum Error {
+ IndexOutOfBounds(usize, usize),
+}
+
+impl Display for Error {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ match self {
+ Error::IndexOutOfBounds(index, bound) => {
+ write!(f, "index {index} is out of bound {bound}")
+ }
+ }
+ }
+}
+
+impl std::error::Error for Error {}
+
+/// A builder of a forest.
+pub trait ForestBuilder<NodeLabel: GraphLabel, EdgeLabel: GraphLabel> {
+ /// The type of the resulting forest.
+ type Output: Forest<NodeLabel, EdgeLabel>;
+
+ /// Construct a new builder with only one node with the given
+ /// label.
+ fn new_leaf(label: NodeLabel) -> Self;
+
+ /// Add a child to the builder the given labels for the new node
+ /// and the added edge.
+ ///
+ /// All plug-out nodes within the builder should have a new child
+ /// with the specified labels, and hence multiple children might
+ /// be added, and the plug-out nodes should be modified
+ /// accordingly.
+ fn add_children(&mut self, vertex_label: NodeLabel, edge_labe: EdgeLabel);
+
+ /// Build the forest.
+ fn build(self) -> Self::Output;
+
+ /// Build the forest from a reference.
+ fn build_ref(&self) -> Self::Output;
+}
+
+/// The expected behaviours of a forest.
+///
+/// Note that it contains a "striped down" version of the labelled
+/// graphs.
+pub trait Forest<NodeLabel: GraphLabel, EdgeLabel: GraphLabel>: ExtGraph {
+ /// Type of iterator of plug-in nodes.
+ type PluginIter<'a>: Iterator<Item = usize> + 'a
+ where
+ Self: 'a;
+
+ /// Type of iterator of plug-out nodes.
+ type PlugoutIter<'a>: Iterator<Item = usize> + 'a
+ where
+ Self: 'a;
+
+ /// Return the plug-in nodes
+ fn plugins(&self) -> Self::PluginIter<'_>;
+
+ /// Return the plug-out nodes
+ fn plugouts(&self) -> Self::PlugoutIter<'_>;
+
+ /// Plug another forest onto this forest.
+ ///
+ /// The plug-out nodes of this forest will be joined onto the
+ /// plug-in nodes of the joining forest.
+ ///
+ /// # Performance warning
+ ///
+ /// It is recommended to only call this function with a "small"
+ /// `other`, as this function might copy the whole graph
+ /// individually, node by node and edge by edge.
+ fn plug(&mut self, other: &Self) -> Result<(), Error>;
+
+ /// Return the vertex label.
+ ///
+ /// A vertex may not have labels.
+ fn vertex_label(&self, node_id: usize) -> Result<Option<NodeLabel>, Error>;
+
+ /// Return the edge label.
+ ///
+ /// An edge may have no labels. If there is no edge from the
+ /// given source to the given target, then `Ok(None)` is returned
+ /// as well.
+ fn edge_label(&self, source: usize, target: usize) -> Result<Option<EdgeLabel>, Error>;
+}
+
+pub mod default;
diff --git a/grammar/Cargo.toml b/grammar/Cargo.toml
new file mode 100644
index 0000000..20c4b48
--- /dev/null
+++ b/grammar/Cargo.toml
@@ -0,0 +1,20 @@
+[package]
+name = "grammar"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[features]
+default = []
+
+# This flag exposes the module "test_grammar_helper" which provides
+# some grammars for testing.
+test-helper = []
+
+[dependencies]
+nfa = { path = "../nfa" }
+graph = { path = "../graph" }
+
+[dev-dependencies]
+grammar = { path = ".", features = ["test-helper"] }
diff --git a/chain/src/grammar.rs b/grammar/src/lib.rs
index 287fbca..55adc9a 100644
--- a/chain/src/grammar.rs
+++ b/grammar/src/lib.rs
@@ -8,21 +8,19 @@
// words, the current focus is not on the optimisations, whereas
// scanners are for optimisations only, so to speak.
-#![allow(unused_imports)]
+// TODO: Separate contents into modules.
+
use nfa::{
default::{
nfa::DefaultNFA,
- regex::{DefaultRegParser, DefaultRegex, ParseDirection, ParseError, RegexType},
+ regex::{DefaultRegex, ParseError, RegexType},
},
- DOption, DesRec, Nfa, Regex, SoC,
+ DOption, Nfa, Regex, SoC,
};
use graph::{adlist::ALGBuilder, builder::Builder, Graph};
-use std::{
- collections::HashSet,
- fmt::{Display, Write},
-};
+use std::{collections::HashSet, fmt::Display};
/// The type of a terminal.
///
@@ -118,7 +116,15 @@ impl Display for Error {
}
}
-impl std::error::Error for Error {}
+impl std::error::Error for Error {
+ fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
+ if let Error::NFAFail(error) = self {
+ Some(error)
+ } else {
+ None
+ }
+ }
+}
/// A rule is a regular expression of terminals or non-terminals.
#[derive(Debug, Clone)]
@@ -143,11 +149,29 @@ impl Rule {
/// The type of a grammar.
#[derive(Debug, Clone, Default)]
pub struct Grammar {
+ /// A list of terminals.
ter: Vec<Terminal>,
+ /// A list of non-terminals.
non: Vec<Nonterminal>,
+ /// A list of rules.
+ ///
+ /// The length of the list must match that of the list of
+ /// non-terminals.
rules: Vec<Rule>,
+ // The following two attributes are empty until we call
+ // `compute_firsts` on the grammar.
+ /// The list of sets of "first terminals".
+ ///
+ /// The length must match that of the list of non-terminals.
firsts: Vec<HashSet<Option<usize>>>,
+ /// The list of lists of nodes that are reachable after a nullable
+ /// transition in the regular expression.
+ ///
+ /// The length must match that of the list of non-terminals.
first_nodes: Vec<Vec<usize>>,
+ // The following attribute is empty until we call `left_closure`
+ // on the grammar.
+ left_closure_branches: HashSet<usize>,
}
/// A private type to aid the recursive looping of rergular
@@ -192,12 +216,15 @@ impl Grammar {
.map(|rule| Vec::with_capacity(rule.len()))
.collect();
+ let left_closure_branches = HashSet::default();
+
Self {
ter,
non,
rules,
firsts,
first_nodes,
+ left_closure_branches,
}
}
@@ -293,6 +320,15 @@ impl Grammar {
/// # Related
///
/// This is the inverse of [`pack_tnt`][Grammar::pack_tnt].
+ ///
+ /// # Errors
+ ///
+ /// This function is supposed to return only one type of errors,
+ /// namely, the IndexOutOfBounds error that results from a bounds
+ /// check. Breaking this is breaking the guarantee of this
+ /// function, and is considered a bug. This behaviour can and
+ /// should be tested. But I have not found a convenient test
+ /// method for testing various grammars.
#[inline]
pub fn unpack_tnt(&self, flat: usize) -> Result<TNT, Error> {
let ter_num = self.ter.len();
@@ -329,6 +365,13 @@ impl Grammar {
.iter())
}
+ /// Return a hash set that contains all nodes in the set of
+ /// left-closure regular languages that are added because of the
+ /// left-linear expansion.
+ pub fn left_closure_branches(&self) -> &HashSet<usize> {
+ &self.left_closure_branches
+ }
+
/// Compute the set of terminals that can appear as the first
/// terminal in some left-linear derivation of a non-terminal, for
/// every non-terminal.
@@ -865,11 +908,18 @@ impl Grammar {
builder.add_edge(0, non_lit_index, ()).unwrap();
+ // If this non-terminal is nullable, add an empty variant.
+ if self.is_nullable(n)? {
+ local_result.push(Empty);
+ let empty_index = builder.add_vertex();
+ builder.add_edge(0, empty_index, ()).unwrap();
+ }
+
for first_node in self.first_nodes_of(n)?.copied() {
assert!(first_node < parents.len());
let tnt = match label!(first_node) {
- Lit(tnt) => tnt,
+ Lit(tnt) => Lit(tnt),
_ => continue,
};
@@ -893,38 +943,40 @@ impl Grammar {
};
if parents_chain.is_empty() {
- local_result.push(Lit(tnt));
+ local_result.push(tnt);
let lit_index = builder.add_vertex();
builder.add_edge(0, lit_index, ()).unwrap();
continue;
}
- assert!(parents_chain.first().unwrap().0 == regex_root);
+ assert_eq!(parents_chain.first().unwrap().0, regex_root);
// A different, "more local", root.
- let mut root: usize;
+ let mut local_root: usize;
// Handle the direct parent
let (parent_node, parent_edge_index) = parents_chain.pop().unwrap();
match label!(parent_node) {
Kleene | Plus => {
- local_result.extend([Empty, Lit(tnt)]);
+ // TODO: If parent_edge_index is 0, make a
+ // Plus node instead.
+ local_result.extend([Empty, tnt]);
- root = builder.add_vertex();
+ local_root = builder.add_vertex();
let lit_index = builder.add_vertex();
- builder.add_edge(root, lit_index, ()).unwrap();
+ builder.add_edge(local_root, lit_index, ()).unwrap();
let iterator = children!(parent_node);
for index in iterator.clone().skip(parent_edge_index + 1) {
- df_copy!(root, index);
+ df_copy!(local_root, index);
}
local_result.push(Kleene);
let new_parent = builder.add_vertex();
- builder.add_edge(root, new_parent, ()).unwrap();
+ builder.add_edge(local_root, new_parent, ()).unwrap();
for index in iterator {
df_copy!(new_parent, index);
@@ -932,19 +984,19 @@ impl Grammar {
}
Or => {
- local_result.push(Lit(tnt));
- root = builder.add_vertex();
+ local_result.push(tnt);
+ local_root = builder.add_vertex();
}
Optional | Empty => {
// If this path is taken, it should not be
// optional.
- local_result.extend([Empty, Lit(tnt)]);
- root = builder.add_vertex();
+ local_result.extend([Empty, tnt]);
+ local_root = builder.add_vertex();
let lit_index = builder.add_vertex();
- builder.add_edge(root, lit_index, ()).unwrap();
+ builder.add_edge(local_root, lit_index, ()).unwrap();
for index in children!(parent_node).skip(parent_edge_index + 1) {
- df_copy!(root, index);
+ df_copy!(local_root, index);
}
}
Paren | Lit(_) => unreachable!(),
@@ -957,21 +1009,24 @@ impl Grammar {
match node_type {
Kleene | Plus => {
+ // TODO: If edge_index is 0, then just
+ // make this a Plus node.
+
local_result.push(Empty);
let new_index = builder.add_vertex();
- builder.add_edge(new_index, root, ()).unwrap();
+ builder.add_edge(new_index, local_root, ()).unwrap();
- root = new_index;
+ local_root = new_index;
let iterator = children!(node);
for index in iterator.clone().skip(edge_index + 1) {
- df_copy!(root, index);
+ df_copy!(local_root, index);
}
local_result.push(Kleene);
let new_parent = builder.add_vertex();
- builder.add_edge(root, new_parent, ()).unwrap();
+ builder.add_edge(local_root, new_parent, ()).unwrap();
for index in iterator {
df_copy!(new_parent, index);
@@ -981,18 +1036,18 @@ impl Grammar {
RegexType::Optional | RegexType::Empty => {
local_result.push(Empty);
let new_index = builder.add_vertex();
- builder.add_edge(new_index, root, ()).unwrap();
- root = new_index;
+ builder.add_edge(new_index, local_root, ()).unwrap();
+ local_root = new_index;
for index in children!(node).skip(edge_index + 1) {
- df_copy!(root, index);
+ df_copy!(local_root, index);
}
}
RegexType::Paren | RegexType::Lit(_) => unreachable!(),
}
}
- builder.add_edge(0, root, ()).unwrap();
+ builder.add_edge(0, local_root, ()).unwrap();
}
local_result.shrink_to_fit();
@@ -1040,7 +1095,9 @@ impl Grammar {
TNT::Non(n) => Ok(SoC::Sub(n)),
};
- DefaultNFA::to_nfa(closure, label_transform).map_err(Into::into)
+ let nfa = DefaultNFA::to_nfa(closure, label_transform, Some(TNT::Non(0)))?;
+
+ Ok(nfa)
}
}
@@ -1066,182 +1123,9 @@ impl Display for Grammar {
}
}
-#[cfg(test)]
-mod test_grammar_helper {
- use super::*;
- use nfa::default::regex::{DefaultRegParser, ParseDirection, ParseError, RegexType};
- use std::fmt::Write;
-
- // Construct a grammar to test
- pub fn new_grammar() -> Result<Grammar, Box<dyn std::error::Error>> {
- let ter = vec![Terminal::new("a".to_owned()), Terminal::new("b".to_owned())];
- let non = vec![
- Nonterminal("start".to_owned()),
- Nonterminal("end".to_owned()),
- ];
-
- /// A function to scan the inputs.
- fn scan_tnt(
- parser: &DefaultRegParser<TNT>,
- input: &str,
- ) -> Result<Option<(usize, RegexType<TNT>, ParseDirection)>, ParseError> {
- use ParseDirection::*;
- use RegexType::*;
- use TNT::*;
-
- let mut chars = input.chars();
-
- if let Some(first) = chars.next() {
- match first {
- '*' => Ok(Some((1, Kleene, Right))),
- '+' => Ok(Some((1, Plus, Right))),
- '?' => Ok(Some((1, Optional, Right))),
- '|' => Ok(Some((1, Empty, Up))),
- '(' => Ok(Some((1, Or, Down))),
- ')' => Ok(Some((1, Paren, Up))),
- 'T' => {
- let mut name = String::new();
- let mut len = 1;
-
- while let Some(c) = chars.next() {
- if ('a'..='z').contains(&c) {
- len += 1;
- write!(name, "{c}").map_err(|_| ParseError::InvalidCharacter(c))?;
- } else {
- break;
- }
- }
-
- if let Some(t) = parser.query(&name, true) {
- Ok(Some((len, Lit(Ter(t)), Right)))
- } else {
- Err(ParseError::InvalidCharacter(first))
- }
- }
- 'N' => {
- let mut name = String::new();
- let mut len = 1;
-
- while let Some(c) = chars.next() {
- if ('a'..='z').contains(&c) {
- len += 1;
- write!(name, "{c}").map_err(|_| ParseError::InvalidCharacter(c))?;
- } else {
- break;
- }
- }
-
- if let Some(n) = parser.query(&name, false) {
- Ok(Some((len, Lit(Non(n)), Right)))
- } else {
- Err(ParseError::InvalidCharacter(first))
- }
- }
- _ => Err(ParseError::InvalidCharacter(first)),
- }
- } else {
- Ok(None)
- }
- }
-
- let mut regex_parser: DefaultRegParser<TNT> = Default::default();
-
- regex_parser.add_tnt("a", true);
- regex_parser.add_tnt("b", true);
- regex_parser.add_tnt("start", false);
- regex_parser.add_tnt("end", false);
-
- let regex_parser = regex_parser;
-
- let rule1 = Rule {
- regex: regex_parser
- .parse("Ta*Tb+Nend+", Box::new(scan_tnt), true)?
- .ok_or(ParseError::Invalid)?
- .0,
- };
-
- let rule2 = Rule {
- regex: regex_parser
- .parse("Nstart?Nend*", Box::new(scan_tnt), true)?
- .ok_or(ParseError::Invalid)?
- .0,
- };
-
- let rules = vec![rule1, rule2];
-
- Ok(Grammar::new(ter, non, rules))
- }
-}
-
-#[cfg(test)]
-mod test_grammar_display {
- use super::test_grammar_helper::new_grammar;
-
- #[test]
- fn test_display() -> Result<(), Box<dyn std::error::Error>> {
- println!("{}", new_grammar()?);
-
- Ok(())
- }
-}
-
-#[cfg(test)]
-mod test_grammar_firsts {
- use super::test_grammar_helper::new_grammar;
- use super::*;
-
- #[test]
- fn test_firsts() -> Result<(), Box<dyn std::error::Error>> {
- let mut grammar = new_grammar()?;
-
- grammar.compute_firsts()?;
-
- println!("grammar firsts: {:?}", grammar.firsts);
- println!("grammar first nodes: {:?}", grammar.first_nodes);
-
- Ok(())
- }
-}
+// A helper module that provides some grammars for testing.
+#[cfg(feature = "test-helper")]
+pub mod test_grammar_helper;
#[cfg(test)]
-mod test_grammar_left_closure {
- use super::test_grammar_helper::new_grammar;
- use super::*;
-
- pub fn new_closure_regex(
- grammar: &mut Grammar,
- ) -> Result<Vec<DefaultRegex<TNT>>, Box<dyn std::error::Error>> {
- grammar.compute_firsts()?;
-
- println!("grammar firsts: {:?}", grammar.firsts);
- println!("grammar first nodes: {:?}", grammar.first_nodes);
- println!("grammar:");
- println!("{grammar}");
-
- grammar.left_closure().map_err(Into::into)
- }
-
- #[test]
- fn test_regex() -> Result<(), Box<dyn std::error::Error>> {
- let mut grammar = new_grammar()?;
-
- let vec_of_regexps = new_closure_regex(&mut grammar)?;
-
- for regex in &vec_of_regexps {
- println!("regex: {}", regex.to_string_with(|tnt| format!("{tnt}"))?);
- // println!("regex: {regex:?}",);
- println!("regex len = {}", regex.nodes_len());
- }
-
- Ok(())
- }
-
- #[test]
- fn test_nfa() -> Result<(), Box<dyn std::error::Error>> {
- let mut grammar = new_grammar()?;
- let closure = new_closure_regex(&mut grammar)?;
- let nfa = grammar.left_closure_to_nfa(&closure)?;
- // TODO: print the nfa out
- Ok(())
- }
-}
+mod tests;
diff --git a/grammar/src/test_grammar_helper.rs b/grammar/src/test_grammar_helper.rs
new file mode 100644
index 0000000..c236952
--- /dev/null
+++ b/grammar/src/test_grammar_helper.rs
@@ -0,0 +1,368 @@
+//! This module provides some grammars for testing.
+
+use super::*;
+use nfa::{
+ default::regex::{DefaultRegParser, ParseDirection, ParseError, RegexType},
+ DesRec,
+};
+use std::fmt::Write;
+
+/// A helper function to compute the first sets of a grammar and
+/// return the left-closure of that grammar.
+pub fn new_closure_regex(
+ grammar: &mut Grammar,
+) -> Result<Vec<DefaultRegex<TNT>>, Box<dyn std::error::Error>> {
+ grammar.compute_firsts()?;
+
+ grammar.left_closure().map_err(Into::into)
+}
+
+/// A function to scan the inputs.
+fn scan_tnt(
+ parser: &DefaultRegParser<TNT>,
+ input: &str,
+) -> Result<Option<(usize, RegexType<TNT>, ParseDirection)>, ParseError> {
+ use ParseDirection::*;
+ use RegexType::*;
+ use TNT::*;
+
+ let mut chars = input.chars();
+
+ let mut len = 1;
+
+ while let Some(first) = chars.next() {
+ match first {
+ ' ' => {
+ // ignore whitespaces
+ len += 1;
+ }
+ '*' => return Ok(Some((len, Kleene, Right))),
+ '+' => return Ok(Some((len, Plus, Right))),
+ '?' => return Ok(Some((len, Optional, Right))),
+ '|' => return Ok(Some((len, Empty, Up))),
+ '(' => return Ok(Some((len, Or, Down))),
+ ')' => return Ok(Some((len, Paren, Up))),
+ 'T' => {
+ let mut name = String::new();
+
+ while let Some(c) = chars.next() {
+ if ('a'..='z').contains(&c) || ('A'..='Z').contains(&c) {
+ len += 1;
+ write!(name, "{c}").map_err(|_| ParseError::InvalidCharacter(c))?;
+ } else {
+ break;
+ }
+ }
+
+ if let Some(t) = parser.query(&name, true) {
+ return Ok(Some((len, Lit(Ter(t)), Right)));
+ } else {
+ return Err(ParseError::InvalidCharacter(first));
+ }
+ }
+ 'N' => {
+ let mut name = String::new();
+
+ while let Some(c) = chars.next() {
+ if ('a'..='z').contains(&c) || ('A'..='Z').contains(&c) {
+ len += 1;
+ write!(name, "{c}").map_err(|_| ParseError::InvalidCharacter(c))?;
+ } else {
+ break;
+ }
+ }
+
+ if let Some(n) = parser.query(&name, false) {
+ return Ok(Some((len, Lit(Non(n)), Right)));
+ } else {
+ return Err(ParseError::InvalidCharacter(first));
+ }
+ }
+ _ => {
+ return Err(ParseError::InvalidCharacter(first));
+ }
+ }
+ }
+
+ Ok(None)
+}
+
+/// Return a simple testing grammar.
+#[allow(dead_code)]
+pub fn new_grammar() -> Result<Grammar, Box<dyn std::error::Error>> {
+ let ter = vec![Terminal::new("a".to_owned()), Terminal::new("b".to_owned())];
+ let non = vec![
+ Nonterminal("start".to_owned()),
+ Nonterminal("end".to_owned()),
+ ];
+
+ let mut regex_parser: DefaultRegParser<TNT> = Default::default();
+
+ regex_parser.add_tnt("a", true);
+ regex_parser.add_tnt("b", true);
+ regex_parser.add_tnt("start", false);
+ regex_parser.add_tnt("end", false);
+
+ let regex_parser = regex_parser;
+
+ let rule1 = Rule {
+ regex: regex_parser
+ .parse("Ta*Tb+Nend+", Box::new(scan_tnt), true)?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rule2 = Rule {
+ regex: regex_parser
+ .parse("Nstart?Nend*", Box::new(scan_tnt), true)?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rules = vec![rule1, rule2];
+
+ Ok(Grammar::new(ter, non, rules))
+}
+
+/// Return a grammar that might serve as the grammar for my notes,
+/// somehow.
+#[allow(dead_code)]
+pub fn new_notes_grammar() -> Result<Grammar, Box<dyn std::error::Error>> {
+ let ter = vec![
+ Terminal::new("NL".to_owned()),
+ Terminal::new("SP".to_owned()),
+ Terminal::new("CON".to_owned()),
+ Terminal::new("STAR".to_owned()),
+ Terminal::new("NOTE".to_owned()),
+ Terminal::new("PRICE".to_owned()),
+ Terminal::new("DIGIT".to_owned()),
+ ];
+ let non = vec![
+ Nonterminal("document".to_owned()),
+ Nonterminal("item".to_owned()),
+ Nonterminal("header".to_owned()),
+ Nonterminal("title".to_owned()),
+ Nonterminal("note".to_owned()),
+ Nonterminal("note-content".to_owned()),
+ Nonterminal("price".to_owned()),
+ ];
+
+ let mut regex_parser: DefaultRegParser<TNT> = Default::default();
+
+ regex_parser.add_tnt("NL", true);
+ regex_parser.add_tnt("SP", true);
+ regex_parser.add_tnt("CON", true);
+ regex_parser.add_tnt("STAR", true);
+ regex_parser.add_tnt("note", true);
+ regex_parser.add_tnt("price", true);
+ regex_parser.add_tnt("DIGIT", true);
+ regex_parser.add_tnt("document", false);
+ regex_parser.add_tnt("item", false);
+ regex_parser.add_tnt("header", false);
+ regex_parser.add_tnt("title", false);
+ regex_parser.add_tnt("note", false);
+ regex_parser.add_tnt("notecontent", false);
+ regex_parser.add_tnt("price", false);
+
+ let regex_parser = regex_parser;
+
+ let rule1 = Rule {
+ regex: regex_parser
+ .parse("Nitem+", Box::new(scan_tnt), true)?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rule2 = Rule {
+ regex: regex_parser
+ .parse("Nheader Nprice?Nnote?", Box::new(scan_tnt), true)?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rule3 = Rule {
+ regex: regex_parser
+ .parse("TSTAR?TSP Ntitle TNL (TSP|TNL)*", Box::new(scan_tnt), true)?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rule4 = Rule {
+ regex: regex_parser
+ .parse("TCON+", Box::new(scan_tnt), true)?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rule5 = Rule {
+ regex: regex_parser
+ .parse(
+ "Tnote Nnotecontent TNL (TSP|TNL)*",
+ Box::new(scan_tnt),
+ true,
+ )?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rule6 = Rule {
+ regex: regex_parser
+ .parse("TCON+", Box::new(scan_tnt), true)?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rule7 = Rule {
+ regex: regex_parser
+ .parse(
+ "Tprice TSP TDIGIT+ TNL(TSP | TNL)+",
+ Box::new(scan_tnt),
+ true,
+ )?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rules = vec![rule1, rule2, rule3, rule4, rule5, rule6, rule7];
+
+ Ok(Grammar::new(ter, non, rules))
+}
+
+/// Return a grammar that can express parentheses.
+#[allow(dead_code)]
+pub fn new_paren_grammar() -> Result<Grammar, Box<dyn std::error::Error>> {
+ let ter = vec![
+ Terminal::new("LEFT".to_owned()),
+ Terminal::new("RIGHT".to_owned()),
+ Terminal::new("A".to_owned()),
+ ];
+ let non = vec![
+ Nonterminal("start".to_owned()),
+ Nonterminal("content".to_owned()),
+ ];
+
+ let mut regex_parser: DefaultRegParser<TNT> = Default::default();
+
+ regex_parser.add_tnt("LEFT", true);
+ regex_parser.add_tnt("RIGHT", true);
+ regex_parser.add_tnt("A", true);
+ regex_parser.add_tnt("start", false);
+ regex_parser.add_tnt("content", false);
+
+ let regex_parser = regex_parser;
+
+ let rule1 = Rule {
+ regex: regex_parser
+ .parse(
+ "TLEFT Nstart TRIGHT | Ncontent Nstart | ",
+ Box::new(scan_tnt),
+ true,
+ )?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rule2 = Rule {
+ regex: regex_parser
+ .parse("TA +", Box::new(scan_tnt), true)?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rules = vec![rule1, rule2];
+
+ Ok(Grammar::new(ter, non, rules))
+}
+
+/// Return a left recursive grammar.
+#[allow(dead_code)]
+pub fn new_left_recursive_grammar() -> Result<Grammar, Box<dyn std::error::Error>> {
+ let ter = vec![Terminal::new("B".to_owned()), Terminal::new("C".to_owned())];
+ let non = vec![
+ Nonterminal("start".to_owned()),
+ Nonterminal("S".to_owned()),
+ Nonterminal("A".to_owned()),
+ ];
+
+ let mut regex_parser: DefaultRegParser<TNT> = Default::default();
+
+ regex_parser.add_tnt("B", true);
+ regex_parser.add_tnt("C", true);
+ regex_parser.add_tnt("start", false);
+ regex_parser.add_tnt("S", false);
+ regex_parser.add_tnt("A", false);
+
+ let regex_parser = regex_parser;
+
+ let rule1 = Rule {
+ regex: regex_parser
+ .parse("NA NS TC", Box::new(scan_tnt), true)?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rule2 = Rule {
+ regex: regex_parser
+ .parse("TB | Nstart", Box::new(scan_tnt), true)?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rule3 = Rule {
+ regex: regex_parser
+ .parse("()", Box::new(scan_tnt), true)?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rules = vec![rule1, rule2, rule3];
+
+ Ok(Grammar::new(ter, non, rules))
+}
+
+/// Return a right recursive grammar.
+#[allow(dead_code)]
+pub fn new_right_recursive_grammar() -> Result<Grammar, Box<dyn std::error::Error>> {
+ let ter = vec![Terminal::new("B".to_owned()), Terminal::new("C".to_owned())];
+ let non = vec![
+ Nonterminal("start".to_owned()),
+ Nonterminal("S".to_owned()),
+ Nonterminal("A".to_owned()),
+ ];
+
+ let mut regex_parser: DefaultRegParser<TNT> = Default::default();
+
+ regex_parser.add_tnt("B", true);
+ regex_parser.add_tnt("C", true);
+ regex_parser.add_tnt("start", false);
+ regex_parser.add_tnt("S", false);
+ regex_parser.add_tnt("A", false);
+
+ let regex_parser = regex_parser;
+
+ let rule1 = Rule {
+ regex: regex_parser
+ .parse("NS TC NA|TB Nstart", Box::new(scan_tnt), true)?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rule2 = Rule {
+ regex: regex_parser
+ .parse("TB", Box::new(scan_tnt), true)?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rule3 = Rule {
+ regex: regex_parser
+ .parse("NA|", Box::new(scan_tnt), true)?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rules = vec![rule1, rule2, rule3];
+
+ Ok(Grammar::new(ter, non, rules))
+}
+// TODO: more grammars
diff --git a/grammar/src/tests/mod.rs b/grammar/src/tests/mod.rs
new file mode 100644
index 0000000..55eedff
--- /dev/null
+++ b/grammar/src/tests/mod.rs
@@ -0,0 +1,30 @@
+#[cfg(test)]
+mod test_grammar_display {
+ use crate::test_grammar_helper::new_grammar;
+
+ #[test]
+ fn test_display() -> Result<(), Box<dyn std::error::Error>> {
+ new_grammar().map(|g| println!("{g}"))
+ }
+}
+
+#[cfg(test)]
+mod test_grammar_firsts {
+ use crate::test_grammar_helper::new_grammar;
+ use std::io::Write;
+
+ #[test]
+ fn test_firsts() -> Result<(), Box<dyn std::error::Error>> {
+ let mut grammar = new_grammar()?;
+
+ grammar.compute_firsts()?;
+
+ let mut lock = std::io::stdout().lock();
+
+ writeln!(lock, "grammar firsts: {:?}", grammar.firsts)?;
+ writeln!(lock, "grammar first nodes: {:?}", grammar.first_nodes).map_err(Into::into)
+ }
+}
+
+#[cfg(test)]
+mod test_grammar_left_closure;
diff --git a/grammar/src/tests/test_grammar_left_closure.rs b/grammar/src/tests/test_grammar_left_closure.rs
new file mode 100644
index 0000000..0bc9f4d
--- /dev/null
+++ b/grammar/src/tests/test_grammar_left_closure.rs
@@ -0,0 +1,272 @@
+use crate::test_grammar_helper::*;
+use crate::*;
+use nfa::Nfa;
+use std::{
+ collections::HashSet,
+ io::{stdout, Write},
+};
+
+#[test]
+fn test_regex() -> Result<(), Box<dyn std::error::Error>> {
+ let mut grammar = new_grammar()?;
+
+ let vec_of_regexps = new_closure_regex(&mut grammar)?;
+
+ let mut lock = stdout().lock();
+
+ writeln!(lock, "grammar firsts: {:?}", grammar.firsts)?;
+ writeln!(lock, "grammar first nodes: {:?}", grammar.first_nodes)?;
+ writeln!(lock, "grammar:")?;
+ writeln!(lock, "{grammar}")?;
+
+ for regex in vec_of_regexps.into_iter().skip(1) {
+ writeln!(
+ lock,
+ "regex: {}",
+ regex.to_string_with(|tnt| format!("{tnt}"))?
+ )?;
+ // println!("regex: {regex:?}",);
+ writeln!(lock, "regex len = {}", regex.nodes_len())?;
+ }
+
+ Ok(())
+}
+
+// We ignore this test by default as it is possibly involved with
+// printing to a graphviz file.
+#[ignore]
+#[test]
+fn test_nfa() -> Result<(), Box<dyn std::error::Error>> {
+ let mut grammar = new_notes_grammar()?;
+ let closure = new_closure_regex(&mut grammar)?;
+
+ let mut lock = stdout().lock();
+
+ for regex in closure.iter() {
+ writeln!(
+ lock,
+ "regex: {}",
+ regex.to_string_with(|tnt| {
+ match tnt {
+ TNT::Ter(t) => {
+ format!(
+ "({})",
+ grammar.name_of_tnt(grammar.unpack_tnt(t).unwrap()).unwrap()
+ )
+ }
+ TNT::Non(_) => {
+ // hyper non-terminal
+ format!("H({})", grammar.name_of_tnt(tnt).unwrap())
+ }
+ }
+ })?
+ )?;
+ // println!("regex: {regex:?}",);
+ writeln!(lock, "regex len = {}", regex.nodes_len())?;
+ }
+
+ grammar
+ .left_closure_to_nfa(&closure)
+ .map(|_| ())
+ .map_err(Into::into)
+
+ // let _nfa = grammar.left_closure_to_nfa(&closure)?;
+
+ // writeln!(lock, "Not printing nfa to nfa.gv")?;
+
+ // nfa.print_viz("nfa.gv").map_err(Into::into)
+
+ // Ok(())
+}
+
+#[test]
+#[ignore]
+fn test_remove_epsilon() -> Result<(), Box<dyn std::error::Error>> {
+ let mut lock = stdout().lock();
+
+ let mut grammar = new_paren_grammar()?;
+
+ writeln!(lock, "grammar:")?;
+ writeln!(lock, "{grammar}")?;
+
+ let closure = new_closure_regex(&mut grammar)?;
+
+ let mut accumulator_value: usize = 0;
+
+ for regex in closure.iter() {
+ writeln!(
+ lock,
+ "regex: {}",
+ regex.to_string_with(|tnt| {
+ match tnt {
+ TNT::Ter(t) => {
+ format!(
+ "({})",
+ grammar.name_of_tnt(grammar.unpack_tnt(t).unwrap()).unwrap()
+ )
+ }
+ TNT::Non(_) => {
+ // hyper non-terminal
+ format!("({})", grammar.name_of_tnt(tnt).unwrap())
+ }
+ }
+ })?
+ )?;
+ writeln!(lock, "regex len = {}", regex.nodes_len())?;
+ writeln!(lock, "offset = {accumulator_value}")?;
+
+ accumulator_value += regex.nodes_len();
+ }
+
+ writeln!(lock, "total = {accumulator_value}")?;
+
+ let mut nfa = grammar.left_closure_to_nfa(&closure)?;
+
+ nfa.print_viz("nfa_orig.gv")?;
+
+ nfa.remove_epsilon(|label| label.is_none())?;
+
+ nfa.print_viz("nfa_no_epsilon.gv")?;
+
+ Ok(())
+}
+
+#[test]
+#[ignore]
+fn test_remove_dead() -> Result<(), Box<dyn std::error::Error>> {
+ let mut grammar = new_paren_grammar()?;
+ let closure = new_closure_regex(&mut grammar)?;
+
+ let mut lock = stdout().lock();
+
+ let mut accumulator = 0usize;
+ let mut accumulators = Vec::with_capacity(closure.len() + 1);
+ accumulators.push(accumulator);
+
+ for regex in closure.iter() {
+ writeln!(
+ lock,
+ "regex: {}",
+ regex.to_string_with(|tnt| {
+ match tnt {
+ TNT::Ter(t) => {
+ format!(
+ "({})",
+ grammar.name_of_tnt(grammar.unpack_tnt(t).unwrap()).unwrap()
+ )
+ }
+ TNT::Non(_) => {
+ // hyper non-terminal
+ format!("({})", grammar.name_of_tnt(tnt).unwrap())
+ }
+ }
+ })?
+ )?;
+ // println!("regex: {regex:?}",);
+ writeln!(lock, "regex len = {}", regex.nodes_len())?;
+
+ accumulator += regex.nodes_len() * 2;
+
+ accumulators.push(accumulator);
+ }
+
+ let mut nfa = grammar.left_closure_to_nfa(&closure)?;
+
+ nfa.print_viz("nfa_orig.gv")?;
+
+ nfa.remove_epsilon(|label| label.is_none())?;
+
+ let accumulators: HashSet<usize> = accumulators.into_iter().collect();
+
+ println!("accumulators = {accumulators:?}");
+
+ let grammar_reserve_node = |node| accumulators.contains(&node);
+
+ nfa.remove_dead(grammar_reserve_node)?;
+
+ nfa.print_viz("nfa_no_dead.gv")?;
+
+ Ok(())
+}
+
+#[test]
+#[ignore]
+fn test_nulling() -> Result<(), Box<dyn std::error::Error>> {
+ let mut grammar = new_left_recursive_grammar()?;
+ let closure = new_closure_regex(&mut grammar)?;
+
+ let mut lock = stdout().lock();
+
+ let mut accumulators = Vec::with_capacity(closure.len() + 1);
+ accumulators.push(0);
+
+ for regex in closure.iter() {
+ writeln!(
+ lock,
+ "regex: {}",
+ regex.to_string_with(|tnt| {
+ match tnt {
+ TNT::Ter(t) => {
+ format!(
+ "({})",
+ grammar.name_of_tnt(grammar.unpack_tnt(t).unwrap()).unwrap()
+ )
+ }
+ TNT::Non(_) => {
+ // hyper non-terminal
+ format!("H({})", grammar.name_of_tnt(tnt).unwrap())
+ }
+ }
+ })?
+ )?;
+ // println!("regex: {regex:?}",);
+ writeln!(lock, "regex len = {}", regex.nodes_len())?;
+
+ accumulators.push(regex.nodes_len() * 2 + accumulators.last().unwrap());
+ }
+
+ write!(lock, "nullables:")?;
+ let mut first_time = true;
+ for i in 0..(grammar.non_num()) {
+ if grammar.is_nullable(i)? {
+ if first_time {
+ write!(lock, " ")?;
+ } else {
+ write!(lock, ", ")?;
+ }
+ write!(lock, " {i}")?;
+ first_time = false;
+ }
+ }
+ writeln!(lock)?;
+
+ let accumulators: HashSet<usize> = accumulators.into_iter().collect();
+
+ let mut nfa = grammar.left_closure_to_nfa(&closure)?;
+
+ nfa.nulling(|label| {
+ if let Some(label) = *label {
+ match label {
+ TNT::Ter(_) => false,
+ // Panics if a non-terminal references an invalid node
+ // here.
+ TNT::Non(n) => grammar.is_nullable(n).unwrap(),
+ }
+ } else {
+ true
+ }
+ })?;
+
+ let grammar_reserve_nodes = |node| accumulators.contains(&node);
+
+ writeln!(lock, "accumulators are {accumulators:?}")?;
+
+ nfa.remove_epsilon(|label| label.is_none())?;
+ nfa.remove_dead(grammar_reserve_nodes)?;
+
+ writeln!(lock, "Printing nfa to nfa.gv")?;
+
+ nfa.print_viz("nfa.gv")?;
+
+ Ok(())
+}
diff --git a/graph/src/adlist.rs b/graph/src/adlist.rs
index 6c1dcd0..ba9afb8 100644
--- a/graph/src/adlist.rs
+++ b/graph/src/adlist.rs
@@ -155,6 +155,12 @@ impl Builder for ALGBuilder {
self.nodes.len() - 1
}
+ #[inline]
+ fn add_vertices(&mut self, n: usize) {
+ self.nodes
+ .extend(std::iter::repeat_with(ALNode::default).take(n));
+ }
+
fn add_edge(&mut self, source: usize, target: usize, _label: Self::Label) -> Result<(), Error> {
let nodes_len = self.nodes.len();
diff --git a/graph/src/adset.rs b/graph/src/adset.rs
index c225986..adf2aaf 100644
--- a/graph/src/adset.rs
+++ b/graph/src/adset.rs
@@ -190,6 +190,12 @@ impl Builder for ASGBuilder {
self.nodes.len() - 1
}
+ #[inline]
+ fn add_vertices(&mut self, n: usize) {
+ self.nodes
+ .extend(std::iter::repeat_with(ASNode::default).take(n));
+ }
+
fn add_edge(&mut self, source: usize, target: usize, _label: Self::Label) -> Result<(), Error> {
let nodes_len = self.nodes.len();
diff --git a/graph/src/builder.rs b/graph/src/builder.rs
index ce85cce..9ab5895 100644
--- a/graph/src/builder.rs
+++ b/graph/src/builder.rs
@@ -27,6 +27,9 @@ pub trait Builder: Default {
/// Add a vertex without children.
fn add_vertex(&mut self) -> usize;
+ /// Add a number of vertices at the same time.
+ fn add_vertices(&mut self, n: usize);
+
/// Add an edge from the source to the target.
fn add_edge(&mut self, source: usize, target: usize, label: Self::Label) -> Result<(), Error>;
diff --git a/graph/src/labelled.rs b/graph/src/labelled.rs
index d0a02d0..6341787 100644
--- a/graph/src/labelled.rs
+++ b/graph/src/labelled.rs
@@ -142,10 +142,11 @@ impl<T: GraphLabel> Graph for DLGraph<T> {
match self.nodes.get(source) {
Some(source_node) => {
if self.nodes.get(target).is_none() {
- return Err(Error::IndexOutOfBounds(target, self.nodes.len()));
+ Err(Error::IndexOutOfBounds(target, self.nodes.len()))
+ } else {
+ Ok(source_node.by_target.contains_key(&target)
+ && !source_node.by_target.get(&target).unwrap().is_empty())
}
-
- Ok(source_node.by_target.contains_key(&target))
}
None => Err(Error::IndexOutOfBounds(source, self.nodes.len())),
}
@@ -198,7 +199,7 @@ impl<T: GraphLabel> Graph for DLGraph<T> {
/// A delegation of iterators.
///
/// This is used to avoid a boxed pointer to an iterator.
-#[derive(Default, Debug)]
+#[derive(Default, Debug, Clone)]
pub struct LabelIndexIter<'a> {
iter: Option<std::iter::Copied<Iter<'a, usize>>>,
}
@@ -279,25 +280,81 @@ impl<'a, T> Iterator for LabelIter<'a, T> {
}
}
+/// This is used to avoid a boxed pointer to an iterator.
+#[derive(Debug)]
+pub struct EdgeLabelIter<'a, T> {
+ iter: Option<Iter<'a, T>>,
+}
+
+impl<'a, T> Default for EdgeLabelIter<'a, T> {
+ #[inline]
+ fn default() -> Self {
+ Self { iter: None }
+ }
+}
+
+impl<'a, T: Copy + Clone> ExactSizeIterator for EdgeLabelIter<'a, T> {
+ #[inline]
+ fn len(&self) -> usize {
+ if let Some(iter) = &self.iter {
+ iter.len()
+ } else {
+ 0
+ }
+ }
+}
+
+impl<'a, T: Copy + Clone> EdgeLabelIter<'a, T> {
+ fn new(iter: Iter<'a, T>) -> Self {
+ Self { iter: Some(iter) }
+ }
+}
+
+impl<'a, T: Copy + Clone> Iterator for EdgeLabelIter<'a, T> {
+ type Item = T;
+
+ #[inline]
+ fn next(&mut self) -> Option<Self::Item> {
+ if let Some(iter) = &mut self.iter {
+ iter.next().copied()
+ } else {
+ None
+ }
+ }
+
+ #[inline]
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ if let Some(iter) = &self.iter {
+ iter.size_hint()
+ } else {
+ (0, Some(0))
+ }
+ }
+}
+
impl<T: GraphLabel> LabelGraph<T> for DLGraph<T> {
type Iter<'a> = LabelIndexIter<'a> where T: 'a;
type LabelIter<'a> = LabelIter<'a,T> where T: 'a;
- fn edge_label(&self, source: usize, target: usize) -> Result<Vec<T>, Error> {
+ type EdgeLabelIter<'a> = EdgeLabelIter<'a,T>
+ where
+ Self: 'a,
+ T: 'a + Copy + Clone;
+
+ fn edge_label(&self, source: usize, target: usize) -> Result<Self::EdgeLabelIter<'_>, Error> {
if self.has_edge(source, target)? {
- Ok(self
- .nodes
- .get(source)
- .unwrap()
- .by_target
- .get(&target)
- .unwrap()
- .iter()
- .copied()
- .collect())
+ Ok(EdgeLabelIter::new(
+ self.nodes
+ .get(source)
+ .unwrap()
+ .by_target
+ .get(&target)
+ .unwrap()
+ .iter(),
+ ))
} else {
- Ok(Vec::new())
+ Ok(Default::default())
}
}
@@ -335,11 +392,11 @@ impl<T: GraphLabel> LabelGraph<T> for DLGraph<T> {
return Err(Error::IndexOutOfBounds(target, nodes_len));
}
- if let Some(labels) = node.by_target.get(&target) {
- Ok(labels.contains(label))
- } else {
- Ok(false)
- }
+ Ok(node
+ .by_target
+ .get(&target)
+ .map(|labels| labels.contains(label))
+ .unwrap_or(false))
}
None => Err(Error::IndexOutOfBounds(node_id, nodes_len)),
}
@@ -443,6 +500,12 @@ impl<T: GraphLabel> Builder for DLGBuilder<T> {
self.nodes.len() - 1
}
+ #[inline]
+ fn add_vertices(&mut self, n: usize) {
+ self.nodes
+ .extend(std::iter::repeat_with(Default::default).take(n));
+ }
+
fn add_edge(&mut self, source: usize, target: usize, label: Self::Label) -> Result<(), Error> {
let nodes_len = self.nodes.len();
@@ -541,14 +604,29 @@ impl<T: GraphLabel> Builder for DLGBuilder<T> {
// If after removal no labels remain for the target,
// we remove the edge entirely, to avoid false empty
// edges.
- if source_node.flat.is_empty() {
- source_node.by_target.remove(&target);
- for label in to_remove.iter() {
- source_node.by_label.remove(label);
+ if let Some(set) = source_node.by_target.get(&target) {
+ if set.is_empty() {
+ source_node.by_target.remove(&target);
}
}
+ for label in to_remove.iter() {
+ if let Some(set) = source_node.by_label.get(label) {
+ if set.is_empty() {
+ source_node.by_label.remove(label);
+ }
+ }
+ }
+
+ // if source_node.flat.is_empty() {
+ // source_node.by_target.remove(&target);
+
+ // for label in to_remove.iter() {
+ // source_node.by_label.remove(label);
+ // }
+ // }
+
let new_children_set: Set<(T, usize)> = source_node.flat.iter().copied().collect();
// When source_node is in use, we cannot borrow self
@@ -639,10 +717,7 @@ mod label_test {
);
// testing edge_label
- assert_eq!(
- graph.edge_label(5, 2)?.into_iter().collect::<Set<_>>(),
- set!(3)
- );
+ assert_eq!(graph.edge_label(5, 2)?.collect::<Set<_>>(), set!(3));
assert!(matches!(
graph.edge_label(6, 2),
Err(Error::IndexOutOfBounds(6, 6))
@@ -683,8 +758,6 @@ mod label_test {
}
}
-// TODO: Test print_viz
-
#[cfg(test)]
mod test_dlgraph_builder {
use super::*;
diff --git a/graph/src/lib.rs b/graph/src/lib.rs
index 2d23af3..d4f6d7c 100644
--- a/graph/src/lib.rs
+++ b/graph/src/lib.rs
@@ -233,6 +233,12 @@ pub trait LabelGraph<T: GraphLabel>: Graph {
Self: 'a,
T: 'a;
+ /// A type that iterates over labels of an edge.
+ type EdgeLabelIter<'a>: Iterator<Item = T> + 'a
+ where
+ Self: 'a,
+ T: 'a;
+
#[inline]
/// Return the label of a vertex or an error if the node is
/// invalid.
@@ -247,15 +253,9 @@ pub trait LabelGraph<T: GraphLabel>: Graph {
}
}
- #[inline]
/// Return the label of an edge or an error if some node is
/// invalid.
- ///
- /// The default implementation always returns an empty vector for
- /// valid nodes.
- fn edge_label(&self, source: usize, target: usize) -> Result<Vec<T>, Error> {
- self.has_edge(source, target).map(|_| Vec::new())
- }
+ fn edge_label(&self, source: usize, target: usize) -> Result<Self::EdgeLabelIter<'_>, Error>;
/// Return an iterator of edges out of a node, whose associated
/// label is as given.
diff --git a/nfa/src/default/mod.rs b/nfa/src/default/mod.rs
index b9ee398..115bea5 100644
--- a/nfa/src/default/mod.rs
+++ b/nfa/src/default/mod.rs
@@ -1,5 +1,5 @@
//! This file provides a structure that implements the trait
-//! [`NFA`][super::Nfa] and another that umplements the trait
+//! [`NFA`][super::Nfa] and another that implements the trait
//! [`Regex`][super::Regex].
pub mod nfa;
diff --git a/nfa/src/default/nfa.rs b/nfa/src/default/nfa.rs
index 229ba4d..e642218 100644
--- a/nfa/src/default/nfa.rs
+++ b/nfa/src/default/nfa.rs
@@ -1,17 +1,8 @@
//! This file provides a default implementation of NFA.
-// TODO: The current focus of the project is to understand the growth
-// rate of the algorithm, to know whether I made a mistake in the
-// previous iteration of the implementation, or the algorithm is not
-// as fast as the author estimated, which is not quite likely, of
-// course.
-//
-// Thus I shall establish a friendly interface that allows me to view
-// and debug the atomic languages and the languages, transparently.
-
use graph::{
builder::Builder, error::Error as GError, labelled::DLGBuilder, DLGraph, Graph, GraphLabel,
- LabelGraph,
+ LabelExtGraph, LabelGraph,
};
use crate::{default::regex::RegexType, error::Error, DOption, Nfa, Regex, SoC};
@@ -32,6 +23,11 @@ impl<T: GraphLabel + Display> Default for DefaultNFA<T> {
}
}
+// Some boiler-plate delegation implementations.
+//
+// I deliberately avoid using [`Deref`][std::ops::Deref] here, as I do
+// not want to dereference an NFA to a Graph.
+
impl<T: GraphLabel + Display> Graph for DefaultNFA<T> {
type Iter<'a> = <DLGraph<T> as Graph>::Iter<'a> where T: 'a;
@@ -81,6 +77,11 @@ impl<T: GraphLabel + Display> LabelGraph<T> for DefaultNFA<T> {
type LabelIter<'a> = <DLGraph<T> as LabelGraph<T>>::LabelIter<'a> where T: 'a;
+ type EdgeLabelIter<'a> = <DLGraph<T> as LabelGraph<T>>::EdgeLabelIter<'a>
+ where
+ Self: 'a,
+ T: 'a;
+
#[inline]
fn vertex_label(&self, node_id: usize) -> Result<Option<T>, GError> {
if self.has_node(node_id) {
@@ -91,7 +92,7 @@ impl<T: GraphLabel + Display> LabelGraph<T> for DefaultNFA<T> {
}
#[inline]
- fn edge_label(&self, source: usize, target: usize) -> Result<Vec<T>, GError> {
+ fn edge_label(&self, source: usize, target: usize) -> Result<Self::EdgeLabelIter<'_>, GError> {
self.graph.edge_label(source, target)
}
@@ -115,12 +116,24 @@ impl<T: GraphLabel + Display> LabelGraph<T> for DefaultNFA<T> {
}
}
+impl<T: GraphLabel + Display> LabelExtGraph<T> for DefaultNFA<T> {
+ #[inline]
+ fn extend(&mut self, edges: impl IntoIterator<Item = (T, usize)>) -> Result<usize, GError> {
+ self.graph.extend(edges)
+ }
+}
+
+// TODO: Define a type for the labels of DefaultNFA.
+
impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
type FromRegex<S: GraphLabel + Display + Default> = DefaultNFA<S>;
+ // Reminder: This is an adopted version of Thompson's
+ // construction.
fn to_nfa(
regexps: &[impl Regex<RegexType<T>>],
sub_pred: impl Fn(T) -> Result<SoC<T>, Error>,
+ default: Option<T>,
) -> Result<Self::FromRegex<DOption<T>>, Error> {
let total_regexps_len: usize = regexps.iter().map(Graph::nodes_len).sum();
@@ -128,7 +141,7 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
return Ok(Default::default());
}
- // We reserve two more rooms for later uses.
+ // We reserve two more rooms for the default edge.
let nfa_len = total_regexps_len * 2 + 2;
let mut builder: DLGBuilder<DOption<T>> = Builder::with_capacity(nfa_len);
@@ -141,20 +154,18 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
builder.add_vertex();
}
+ // add a default edge
+ builder.add_edge(nfa_len - 2, nfa_len - 1, DOption(default))?;
+
let accumulators: Vec<usize> = {
- let mut result = Vec::with_capacity(regexps.len() + 1);
+ let mut result = Vec::with_capacity(regexps.len());
result.push(0);
- let mut accumulator = 0;
-
- for regexp in regexps.iter() {
- accumulator += regexp.nodes_len() * 2;
- result.push(accumulator);
+ for regexp in regexps.iter().take(regexps.len() - 1) {
+ result.push(result.last().unwrap() + regexp.nodes_len() * 2);
}
- result.pop();
-
result
};
@@ -313,9 +324,8 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
SoC::Sub(sub_non) => {
// a non-terminal
- let sub_offset = get_offset_safe!(sub_non);
- let sub_nfa_start = sub_offset + 2 * sub_non;
- let sub_nfa_end = sub_offset + 2 * sub_non + 1;
+ let sub_nfa_start = get_offset_safe!(sub_non);
+ let sub_nfa_end = sub_nfa_start + 1;
builder.add_edge(nfa_start, sub_nfa_start, empty_label)?;
builder.add_edge(sub_nfa_end, nfa_end, empty_label)?;
@@ -336,14 +346,102 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
Ok(DefaultNFA { graph })
}
- fn remove_epsilon(&mut self) -> Result<(), Error> {
- todo!()
+ fn remove_epsilon<F: Fn(T) -> bool>(&mut self, f: F) -> Result<(), Error> {
+ let mut builder = self.graph.builder_ref();
+
+ let mut updated = true;
+
+ let nodes_len = self.nodes_len();
+
+ while updated {
+ updated = false;
+
+ let mut to_add: Vec<(usize, usize, T)> = Vec::with_capacity(nodes_len);
+
+ for source in builder.nodes() {
+ for (label, target_iter) in builder.labels_of(source)? {
+ if f(*label) {
+ // empty label found
+ for target in target_iter {
+ for (label, children_iter) in builder.labels_of(target)? {
+ for child in children_iter {
+ if !builder.has_edge_label(source, label, child)? {
+ updated = true;
+
+ to_add.push((source, child, *label));
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ for (source, target, label) in to_add.into_iter() {
+ builder.add_edge(source, target, label)?;
+ }
+ }
+
+ // Then remove all epsilon edges
+
+ let sources_and_targets: Vec<_> = builder.edges().collect();
+
+ for (source, target) in sources_and_targets.into_iter() {
+ builder.remove_edge(source, target, &f)?;
+ }
+
+ self.graph = builder.build();
+
+ Ok(())
}
- fn remove_dead(&mut self) -> Result<(), Error> {
- todo!()
+ fn remove_dead(&mut self, reserve: impl Fn(usize) -> bool) -> Result<(), Error> {
+ let mut builder = self.graph.builder_ref();
+
+ let mut changed = true;
+
+ // Use a counting vector to avoid calculating which nodes are
+ // dead at each iteration.
+ let mut target_counts: Vec<usize> =
+ std::iter::repeat(0).take(self.graph.nodes_len()).collect();
+
+ for (_, target) in self.graph.edges() {
+ *target_counts
+ .get_mut(target)
+ .ok_or(Error::UnknownNode(target))? += 1;
+ }
+
+ while changed {
+ changed = false;
+
+ for node in self.nodes() {
+ let deadp = !matches!(target_counts.get(node).copied(), Some(n) if n > 0);
+
+ if deadp && !reserve(node) {
+ let to_remove: Vec<usize> = builder.children_of(node)?.collect();
+
+ if !to_remove.is_empty() {
+ changed = true;
+
+ for target in to_remove {
+ builder.remove_edge(node, target, |_| true)?;
+ *target_counts
+ .get_mut(target)
+ .ok_or(Error::UnknownNode(target))? -= 1;
+ }
+ }
+ }
+ }
+ }
+
+ self.graph = builder.build();
+
+ Ok(())
}
+ // REVIEW: Combine nulling and remove_epsilon into the same
+ // method, or not.
+
fn nulling(&mut self, f: impl Fn(T) -> bool) -> Result<(), Error> {
let mut updated = true;
let mut builder = self.graph.builder_ref();
@@ -396,7 +494,6 @@ impl<T: GraphLabel + Display + Debug> Display for DefaultNFA<T> {
#[cfg(test)]
mod test_to_nfa {
- #![allow(unused_imports)]
use super::*;
use crate::SoC;
@@ -446,8 +543,11 @@ mod test_to_nfa {
println!("regex = {regex}");
- let nfa: DefaultNFA<DOption<char>> =
- DefaultNFA::to_nfa(&[regex], |label| Ok(SoC::Carry(label)))?;
+ let nfa: DefaultNFA<DOption<char>> = DefaultNFA::to_nfa(
+ &[regex],
+ |label| Ok(SoC::Carry(label)),
+ Some(char::default()),
+ )?;
nfa.print_viz("nfa.gv")?;
diff --git a/nfa/src/default/regex.rs b/nfa/src/default/regex.rs
index 2670e32..c8ad370 100644
--- a/nfa/src/default/regex.rs
+++ b/nfa/src/default/regex.rs
@@ -260,9 +260,7 @@ impl<T: GraphLabel> DefaultRegex<T> {
stack.push(Unseen(0));
while let Some(top) = stack.pop() {
- let node_type = types[top.index()];
-
- // TODO: Do not use unwrap here.
+ let node_type = types.get(top.index()).unwrap();
match node_type {
RegexType::Kleene => {
@@ -350,8 +348,12 @@ impl<T: GraphLabel> DefaultRegex<T> {
.map(Unseen)
.rev(),
);
+
+ if self.graph.is_empty_node(top.index()).unwrap() {
+ write!(result, "ε")?;
+ }
}
- RegexType::Lit(label) => write!(result, "{}", f(label))?,
+ RegexType::Lit(label) => write!(result, "{}", f(*label))?,
}
}
@@ -452,7 +454,15 @@ impl Display for ParseError {
}
}
-impl std::error::Error for ParseError {}
+impl std::error::Error for ParseError {
+ fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
+ if let ParseError::Graph(gerr) = self {
+ Some(gerr)
+ } else {
+ None
+ }
+ }
+}
impl From<GError> for ParseError {
fn from(ge: GError) -> Self {
diff --git a/nfa/src/error.rs b/nfa/src/error.rs
index ad75077..7160555 100644
--- a/nfa/src/error.rs
+++ b/nfa/src/error.rs
@@ -1,6 +1,6 @@
//! This file implements the error type for the crate.
-use graph::error::Error as GError;
+use graph::error::Error as GraphError;
use core::fmt::{Display, Formatter};
@@ -17,7 +17,7 @@ pub enum Error {
/// There is no root in the underlying regular expression.
NoRoot,
/// This error comes from some underlying graph operation.
- Graph(GError),
+ Graph(GraphError),
/// A cycle is found when constructing regular expressions.
Cycle,
}
@@ -34,10 +34,18 @@ impl Display for Error {
}
}
-impl std::error::Error for Error {}
+impl std::error::Error for Error {
+ fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
+ if let Self::Graph(gerr) = self {
+ Some(gerr)
+ } else {
+ None
+ }
+ }
+}
-impl From<GError> for Error {
- fn from(e: GError) -> Self {
+impl From<GraphError> for Error {
+ fn from(e: GraphError) -> Self {
Self::Graph(e)
}
}
diff --git a/nfa/src/lib.rs b/nfa/src/lib.rs
index b1d62b3..31bd69a 100644
--- a/nfa/src/lib.rs
+++ b/nfa/src/lib.rs
@@ -14,7 +14,7 @@ use core::fmt::Display;
use std::ops::{Deref, DerefMut};
-use graph::{Graph, GraphLabel, LabelGraph};
+use graph::{Graph, GraphLabel, LabelExtGraph};
use error::Error;
@@ -52,14 +52,14 @@ pub trait Regex<T: GraphLabel>: Graph + Display + Clone {
}
}
- // TODO: add functions that determine if certain "positions" in a
+ // TODO: Add functions that determine if certain "positions" in a
// regular language satisfy some special properties, like at the
// end of a Kleene star, or at the end of a regular language, et
// cetera. These might be needed later.
}
-/// Since Option<T> does not inherit the Display from T, we wrap it to
-/// provide an automatic implementation of Display.
+/// Since `Option<T>` does not inherit the `Display` from `T`, we wrap
+/// it to provide an automatic implementation of `Display`.
#[derive(Debug, Clone, Copy, Ord, PartialOrd, Eq, PartialEq, Hash)]
pub struct DOption<T>(Option<T>);
@@ -120,42 +120,98 @@ pub enum SoC<T> {
/// The expected behvaiour of a nondeterministic finite automaton.
///
/// Every NFA is a special labelled graph.
-pub trait Nfa<T: GraphLabel>: LabelGraph<T> {
+pub trait Nfa<T: GraphLabel>: LabelExtGraph<T> {
+ // TODO: This trait should have a type for the labels.
+
/// Remove all empty transitions from the nondeterministic finite
/// automaton.
- fn remove_epsilon(&mut self) -> Result<(), Error>;
+ fn remove_epsilon<F>(&mut self, f: F) -> Result<(), Error>
+ where
+ F: Fn(T) -> bool;
/// Return a state-minimal NFA equivalent with the original one.
///
/// This is not required. It is just to allow me to experiment
/// with NFA optimization algorithms.
- fn minimize(&self) -> Result<Self, Error> {
+ fn minimize(&self) -> Result<Box<Self>, Error> {
Err(Error::UnsupportedOperation)
}
+ /// Check every node or edge by a given predicate.
+ ///
+ /// This should also verify that every node and edge has correct
+ /// indices, so that we can safely use `unwrap` later. A
+ /// consequence is that, if one only wants to check the validity
+ /// of nodes and edges, one can pass a function that always
+ /// returns `true`.
+ #[inline]
+ fn labels_satisfy(&self, f: impl Fn(T) -> bool) -> Result<bool, Error> {
+ let nodes_len = self.nodes_len();
+ let mut result = true;
+
+ for node in self.nodes() {
+ for (label, children_iter) in self.labels_of(node)? {
+ for child in children_iter {
+ if child >= nodes_len {
+ dbg!(node, label);
+ return Err(graph::error::Error::IndexOutOfBounds(child, nodes_len).into());
+ }
+ }
+
+ // NOTE: Avoid executing `f` if `result` is already
+ // false. But still proceed in verifying that nodes
+ // and edges are correct: the correctness of nodes and
+ // edges is more important than the function execution
+ // results, as the former directly affects the safety
+ // of many algorithms.
+ if result && !f(*label) {
+ dbg!(node, label);
+ result = false;
+ }
+ }
+ }
+
+ Ok(result)
+ }
+
/// When we convert a regular expression to a nondeterministic
/// finite automaton, the label should be optional, so we use a
/// different type for the result type.
type FromRegex<S: GraphLabel + Display + Default>;
- /// Build a nondeterministic finite automaton out of a set of
- /// regular expressions.
+ /// Build a nondeterministic finite automaton out of a set
+ /// `regexps` of regular expressions.
+ ///
+ /// The `sub_pred` is a predicate function that returns an
+ /// indication whether to carry the value around or to substitute
+ /// by another regular language in the given set.
+ ///
+ /// The `default` parameter specifies the label of a default edge,
+ /// that goes from a node to another, both of which are not
+ /// associated with the given regular languages.
///
- /// The SUB_PRED is a predicate function that returns an optional
- /// index for a label. If it returns some index, then this means
- /// we should substitute the index-th regular expression in the
- /// set, whereever that label occurs in the set of regular
- /// expressions.
+ /// This function should perform Thompson's construction,
+ /// basically.
fn to_nfa(
regexps: &[impl Regex<RegexType<T>>],
+ // TODO: The transformation should produce more information.
sub_pred: impl Fn(T) -> Result<SoC<T>, Error>,
+ default: Option<T>,
) -> Result<Self::FromRegex<DOption<T>>, Error>;
/// Remove all dead states from the nondeterministic finite
/// automaton.
///
- /// A state is dead if there are no edges going to the state.
- fn remove_dead(&mut self) -> Result<(), Error>;
+ /// A state is dead if there are no edges going to the state, and
+ /// if it is not reserved.
+ ///
+ /// # Note
+ ///
+ /// Actually an implementation is allowed to just remove all edges
+ /// out of the dead nodes. Then these nodes are considered gone
+ /// from the graph, and we don't need to re-index the existing
+ /// edges. We can call this "a poor man's removal of nodes".
+ fn remove_dead(&mut self, reserve: impl Fn(usize) -> bool) -> Result<(), Error>;
/// For each edge from A to B whose edge is considered nullable by
/// a function `f`, and for every edge from B to C, add an edge
@@ -169,6 +225,3 @@ pub trait Nfa<T: GraphLabel>: LabelGraph<T> {
pub mod default;
pub mod desrec;
-
-#[cfg(test)]
-mod nfa_tests {}
diff --git a/receme/src/lib.rs b/receme/src/lib.rs
index be1f028..d9e6249 100644
--- a/receme/src/lib.rs
+++ b/receme/src/lib.rs
@@ -48,7 +48,7 @@ pub mod ralgebra;
// The following modules are for default implementations.
pub mod tree;
-// TODO: benchmarks
+// REVIEW: Do we really need this crate?
#[cfg(test)]
mod test_expr_tree {
diff --git a/receme/src/tree.rs b/receme/src/tree.rs
index eb64f31..521f913 100644
--- a/receme/src/tree.rs
+++ b/receme/src/tree.rs
@@ -365,4 +365,4 @@ where
}
}
-// TODO: Para, Apo, Histo, and Futu await us.
+// REVIEW: Para, Apo, Histo, and Futu await us.
diff --git a/semiring/Cargo.toml b/semiring/Cargo.toml
new file mode 100644
index 0000000..0a82e32
--- /dev/null
+++ b/semiring/Cargo.toml
@@ -0,0 +1,8 @@
+[package]
+name = "semiring"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
diff --git a/semiring/src/lib.rs b/semiring/src/lib.rs
new file mode 100644
index 0000000..7d12d9a
--- /dev/null
+++ b/semiring/src/lib.rs
@@ -0,0 +1,14 @@
+pub fn add(left: usize, right: usize) -> usize {
+ left + right
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn it_works() {
+ let result = add(2, 2);
+ assert_eq!(result, 4);
+ }
+}