summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJSDurand <mmemmew@gmail.com>2022-12-23 00:36:31 +0800
committerJSDurand <mmemmew@gmail.com>2022-12-23 00:36:31 +0800
commit8463dd24f815fe2b8f25fe9763e0a43023bfbb20 (patch)
tree343eea3c634efbbf72c64ed5dd778ecce60c3eea
parent9f1c88b863e247da3cd60d2792a7a13b18e25e53 (diff)
renaming core to chain and some other changes
Some changes: - The core crate is renamed to "chain". - The crate "viz" is added, which will provide layered graph drawing algorithms. - A function is added to convert from a grammar to the regular language of its left-linear closures. - A function is added to convert from a nondeterministic finite automaton to its "null" closure. A null closure is the same automaton with edges added, as if some edges are "null". Whether an edge is null is determined by a function. Combined with the previous change, we can convert a grammar to the regular language of the null closure of its left-linear closures. --- Now it remains to test more grammars and add an Atom trait, before finishing the part about compilations.
-rw-r--r--Cargo.toml13
-rw-r--r--benches/bench_receme.rs293
-rw-r--r--chain/Cargo.toml10
-rw-r--r--chain/src/grammar.rs1247
-rw-r--r--chain/src/lib.rs25
-rw-r--r--chain/src/plan.org (renamed from repcore/src/plan.org)35
-rw-r--r--graph/src/adlist.rs242
-rw-r--r--graph/src/adset.rs216
-rw-r--r--graph/src/builder.rs53
-rw-r--r--graph/src/error.rs9
-rw-r--r--graph/src/labelled.rs390
-rw-r--r--graph/src/lib.rs133
-rw-r--r--nfa/src/default/nfa.rs374
-rw-r--r--nfa/src/default/regex.rs313
-rw-r--r--nfa/src/desrec.rs22
-rw-r--r--nfa/src/error.rs11
-rw-r--r--nfa/src/lib.rs94
-rw-r--r--repcore/src/grammar.rs59
-rw-r--r--repcore/src/lib.rs20
-rw-r--r--viz/Cargo.toml (renamed from repcore/Cargo.toml)3
-rw-r--r--viz/src/lib.rs31
21 files changed, 3121 insertions, 472 deletions
diff --git a/Cargo.toml b/Cargo.toml
index e672e74..77de27f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,24 +13,13 @@ rust-version = "1.65"
# testing the new resolver, even though this has no dependencies ;p
[workspace]
-members = ["graph", "receme", "nfa", "repcore"]
+members = ["graph", "receme", "nfa", "chain", "viz"]
resolver = "2"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
-receme = { path = "receme" }
-
-[dev-dependencies]
-criterion = "0.4"
[features]
default = []
tokenizer = []
-
-[profile.bench]
-debug = true
-
-[[bench]]
-name = "bench_receme"
-harness = false
diff --git a/benches/bench_receme.rs b/benches/bench_receme.rs
deleted file mode 100644
index c9583a9..0000000
--- a/benches/bench_receme.rs
+++ /dev/null
@@ -1,293 +0,0 @@
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
-
-use receme::{
- catana::{Ana, Cata},
- functor::Functor,
- hylo::Hylo,
- tree::{DFTree, TEStrategy, Tree, TreeIndex},
-};
-
-#[derive(Debug, Clone)]
-enum Expr<T> {
- Add(T, T),
- Lit(isize),
-}
-
-impl<T> Functor<T> for Expr<T> {
- type Target<S> = Expr<S>;
-
- fn fmap<S>(self, mut f: impl FnMut(T) -> S) -> Self::Target<S> {
- match self {
- Expr::Add(a, b) => Expr::Add(f(a), f(b)),
- Expr::Lit(value) => Expr::Lit(value),
- }
- }
-}
-
-#[derive(Debug, Clone)]
-enum ExBox {
- Add(Box<ExBox>, Box<ExBox>),
- Lit(isize),
-}
-
-impl ExBox {
- fn lit(value: isize) -> Self {
- Self::Lit(value)
- }
-
- fn add(a: ExBox, b: ExBox) -> Self {
- Self::Add(Box::new(a), Box::new(b))
- }
-}
-
-pub fn bench(c: &mut Criterion) {
- fn construct_tree() -> Tree<Expr<TreeIndex>> {
- let strategy: TEStrategy = TEStrategy::UnsafeArena;
-
- let elements: Vec<Expr<TreeIndex>> = vec![
- Expr::Add(1, 2).fmap(TreeIndex::new),
- Expr::Lit(1),
- Expr::Add(3, 4).fmap(TreeIndex::new),
- Expr::Lit(3),
- Expr::Add(5, 6).fmap(TreeIndex::new),
- Expr::Lit(10),
- Expr::Lit(-14),
- ];
-
- Tree::new(elements, strategy)
- }
-
- fn construct_box_tree() -> Box<ExBox> {
- Box::new(ExBox::add(
- ExBox::lit(1),
- ExBox::add(ExBox::lit(3), ExBox::add(ExBox::lit(10), ExBox::lit(-14))),
- ))
- }
-
- let tree = construct_tree();
-
- let safe_tree = {
- let mut tree = tree.clone();
- tree.set_strategy(Default::default());
- tree
- };
-
- let depth_first_tree = {
- let mut tree = tree.clone();
- tree.set_strategy(TEStrategy::DepthFirst);
- tree
- };
-
- let boxtree = construct_box_tree();
-
- c.bench_function("bench cata", |b| {
- b.iter(|| {
- let result = tree.clone().cata(|expr: Expr<isize>| match expr {
- Expr::Add(a, b) => a + b,
- Expr::Lit(v) => v,
- });
-
- black_box(result);
- })
- });
-
- c.bench_function("bench cata safe", |b| {
- b.iter(|| {
- let result = safe_tree.clone().cata(|expr: Expr<isize>| match expr {
- Expr::Add(a, b) => a + b,
- Expr::Lit(v) => v,
- });
-
- black_box(result);
- })
- });
-
- c.bench_function("bench cata depth first", |b| {
- b.iter(|| {
- let result = depth_first_tree
- .clone()
- .cata(|expr: Expr<isize>| match expr {
- Expr::Add(a, b) => a + b,
- Expr::Lit(v) => v,
- });
-
- black_box(result);
- })
- });
-
- c.bench_function("bench cata loop", |b| {
- b.iter(|| {
- let tree = tree.clone();
-
- let mut stack: Vec<Result<usize, usize>> = vec![Ok(0)];
- let mut result_stack: Vec<isize> = vec![];
-
- while let Some(top) = stack.pop() {
- let top_is_ok_p = top.is_ok();
-
- let top = match top {
- Ok(top) => top,
- Err(top) => top,
- };
-
- let top_node = tree.nth(top).unwrap();
-
- match top_node {
- Expr::Add(a, b) => {
- if top_is_ok_p {
- stack.push(Err(top));
- stack.push(Ok(**b));
- stack.push(Ok(**a));
- } else {
- let a = result_stack.pop().unwrap();
- let b = result_stack.pop().unwrap();
-
- result_stack.push(a + b);
- }
- }
- Expr::Lit(v) => result_stack.push(*v),
- }
- }
-
- let result = result_stack.pop().unwrap();
-
- black_box(result);
- })
- });
-
- c.bench_function("bench ana box loop", |b| {
- b.iter(|| {
- let boxtree = boxtree.clone();
-
- let mut elements = vec![];
-
- let mut stack = vec![boxtree];
-
- while let Some(bt) = stack.pop() {
- match *bt {
- ExBox::Add(a, b) => {
- let len = elements.len();
-
- elements.push(Expr::Add(TreeIndex::new(len + 1), TreeIndex::new(len + 2)));
-
- stack.push(b);
- stack.push(a);
- }
- ExBox::Lit(v) => elements.push(Expr::Lit(v)),
- }
- }
-
- let tree: Tree<Expr<TreeIndex>> = Tree::new(elements, Default::default());
-
- black_box(tree);
- })
- });
-
- c.bench_function("bench ana boxed", |b| {
- b.iter(|| {
- let boxtree = boxtree.clone();
-
- let tree = Tree::ana(boxtree, |bt: Box<ExBox>| match *bt {
- ExBox::Add(a, b) => Expr::Add(a, b),
- ExBox::Lit(v) => Expr::Lit(v),
- });
-
- black_box(tree);
- })
- });
-
- c.bench_function("bench ana depth first boxed", |b| {
- b.iter(|| {
- let boxtree = boxtree.clone();
-
- let tree = DFTree::ana(boxtree, |bt: Box<ExBox>| match *bt {
- ExBox::Add(a, b) => Expr::Add(a, b),
- ExBox::Lit(v) => Expr::Lit(v),
- });
-
- black_box(tree);
- })
- });
-
- c.bench_function("bench hylo", |b| {
- b.iter(|| {
- let boxtree = boxtree.clone();
-
- black_box(Tree::hylo(
- boxtree,
- |expr| match expr {
- Expr::Add(a, b) => a + b,
- Expr::Lit(v) => v,
- },
- |bt: Box<ExBox>| match *bt {
- ExBox::Add(a, b) => Expr::Add(a, b),
- ExBox::Lit(v) => Expr::Lit(v),
- },
- ))
- })
- });
-
- c.bench_function("bench hylo loop", |b| {
- b.iter(|| {
- let boxtree = boxtree.clone();
-
- let mut elements = vec![];
-
- let mut stack = vec![boxtree];
-
- while let Some(bt) = stack.pop() {
- match *bt {
- ExBox::Add(a, b) => {
- let len = elements.len();
-
- elements.push(Expr::Add(TreeIndex::new(len + 1), TreeIndex::new(len + 2)));
-
- stack.push(b);
- stack.push(a);
- }
- ExBox::Lit(v) => elements.push(Expr::Lit(v)),
- }
- }
-
- let tree: Tree<Expr<TreeIndex>> = Tree::new(elements, Default::default());
-
- let mut stack: Vec<Result<usize, usize>> = vec![Ok(0)];
- let mut result_stack: Vec<isize> = vec![];
-
- while let Some(top) = stack.pop() {
- let top_is_ok_p = top.is_ok();
- let top = match top {
- Ok(top) => top,
- Err(top) => top,
- };
-
- let top_node = tree.nth(top).unwrap();
-
- match top_node {
- Expr::Add(a, b) => {
- if top_is_ok_p {
- stack.push(Err(top));
- stack.push(Ok(**b));
- stack.push(Ok(**a));
- } else {
- let a = result_stack.pop().unwrap();
- let b = result_stack.pop().unwrap();
-
- result_stack.push(a + b);
- }
- }
- Expr::Lit(v) => result_stack.push(*v),
- }
- }
-
- let result = result_stack.pop().unwrap();
-
- assert_eq!(result, 0);
-
- black_box(result);
- })
- });
-}
-
-criterion_group!(benches, bench);
-criterion_main!(benches);
diff --git a/chain/Cargo.toml b/chain/Cargo.toml
new file mode 100644
index 0000000..32eed8b
--- /dev/null
+++ b/chain/Cargo.toml
@@ -0,0 +1,10 @@
+[package]
+name = "chain"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+nfa = { path = "../nfa" }
+graph = { path = "../graph" } \ No newline at end of file
diff --git a/chain/src/grammar.rs b/chain/src/grammar.rs
new file mode 100644
index 0000000..287fbca
--- /dev/null
+++ b/chain/src/grammar.rs
@@ -0,0 +1,1247 @@
+#![warn(missing_docs)]
+//! This file implements the extected behaviours of grammars.
+
+// NOTE: We shall first start with a parser that works at the level of
+// characters. The purpose is to first experiment with the workings
+// and the performance of the algorithms, before optimising by using
+// regular expressions to classify inputs into tokens. In other
+// words, the current focus is not on the optimisations, whereas
+// scanners are for optimisations only, so to speak.
+
+#![allow(unused_imports)]
+use nfa::{
+ default::{
+ nfa::DefaultNFA,
+ regex::{DefaultRegParser, DefaultRegex, ParseDirection, ParseError, RegexType},
+ },
+ DOption, DesRec, Nfa, Regex, SoC,
+};
+
+use graph::{adlist::ALGBuilder, builder::Builder, Graph};
+
+use std::{
+ collections::HashSet,
+ fmt::{Display, Write},
+};
+
+/// The type of a terminal.
+///
+/// For the time being this is a wrapper around a string, but in the
+/// future it may hold more information of scanners.
+#[derive(Debug, Clone, Eq, PartialEq)]
+pub struct Terminal {
+ // If we want to use scanners, per chance add them as a new field
+ // here.
+ name: String,
+}
+
+impl Terminal {
+ /// Create a terminal with the given name.
+ #[inline]
+ pub fn new(name: String) -> Self {
+ Self { name }
+ }
+
+ /// Return the name of the terminal.
+ #[inline]
+ pub fn name(&self) -> &str {
+ &self.name
+ }
+}
+
+/// The type of a non-terminal.
+///
+/// This is just a wrapper around a string.
+#[derive(Debug, Clone)]
+pub struct Nonterminal(String);
+
+impl Nonterminal {
+ /// Return the name of the nonterminal.
+ ///
+ /// Just to improve readability.
+ #[inline]
+ pub fn name(&self) -> &str {
+ &self.0
+ }
+}
+
+/// The type of a terminal or a non-terminal.
+///
+/// Only an index is stored here. Actual data are stored in two other
+/// arrays.
+#[derive(Debug, Hash, Eq, PartialEq, Clone, Copy, Ord, PartialOrd)]
+pub enum TNT {
+ /// Terminal variant
+ Ter(usize),
+ /// Nonterminal variant
+ Non(usize),
+}
+
+impl Display for TNT {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ match self {
+ TNT::Ter(t) => write!(f, "T({t})"),
+ TNT::Non(n) => write!(f, "N({n})"),
+ }
+ }
+}
+
+/// Errors related to grammar operations.
+#[derive(Debug, Copy, Clone)]
+#[non_exhaustive]
+pub enum Error {
+ /// The first component is the index, and the second the bound.
+ IndexOutOfBounds(usize, usize),
+ /// Fail to build the N-th regular expression, due to the
+ /// ParseError.
+ BuildFail(usize, ParseError),
+ /// fail to build NFA
+ NFAFail(nfa::error::Error),
+}
+
+impl From<nfa::error::Error> for Error {
+ fn from(nfae: nfa::error::Error) -> Self {
+ Self::NFAFail(nfae)
+ }
+}
+
+impl Display for Error {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ match self {
+ Error::IndexOutOfBounds(i, b) => write!(f, "index {i} out of bound {b}"),
+ Error::BuildFail(n, pe) => write!(
+ f,
+ "Failed to build the {n}-th regular expression due to error: {pe}"
+ ),
+ Error::NFAFail(nfae) => write!(f, "failed to build NFA because of {nfae}"),
+ }
+ }
+}
+
+impl std::error::Error for Error {}
+
+/// A rule is a regular expression of terminals or non-terminals.
+#[derive(Debug, Clone)]
+pub struct Rule {
+ regex: DefaultRegex<TNT>,
+}
+
+impl Rule {
+ /// Return true if and only if the rule is empty.
+ #[inline]
+ pub fn is_empty(&self) -> bool {
+ self.regex.is_empty()
+ }
+
+ /// Return the length of the rule.
+ #[inline]
+ pub fn len(&self) -> usize {
+ self.regex.len()
+ }
+}
+
+/// The type of a grammar.
+#[derive(Debug, Clone, Default)]
+pub struct Grammar {
+ ter: Vec<Terminal>,
+ non: Vec<Nonterminal>,
+ rules: Vec<Rule>,
+ firsts: Vec<HashSet<Option<usize>>>,
+ first_nodes: Vec<Vec<usize>>,
+}
+
+/// A private type to aid the recursive looping of rergular
+/// expressions.
+#[derive(Copy, Clone)]
+enum StackElement {
+ Seen(usize),
+ Unseen(usize),
+}
+
+impl StackElement {
+ fn index(self) -> usize {
+ match self {
+ Self::Seen(index) => index,
+ Self::Unseen(index) => index,
+ }
+ }
+
+ fn is_seen(self) -> bool {
+ matches!(self, Self::Seen(_))
+ }
+}
+
+impl Grammar {
+ /// Construct a grammar from a vector of terminals, a vector of
+ /// non-terminals, and a vector of rules for the non-temrinals.
+ ///
+ /// # Panic
+ ///
+ /// If the length of `non` is not equal to that of `rules`, then
+ /// the function panics.
+ pub fn new(ter: Vec<Terminal>, non: Vec<Nonterminal>, rules: Vec<Rule>) -> Self {
+ assert_eq!(non.len(), rules.len());
+
+ // One more room is reserved for the `None` value.
+ let firsts = std::iter::repeat_with(|| HashSet::with_capacity(ter.len() + 1))
+ .take(non.len())
+ .collect();
+
+ let first_nodes = rules
+ .iter()
+ .map(|rule| Vec::with_capacity(rule.len()))
+ .collect();
+
+ Self {
+ ter,
+ non,
+ rules,
+ firsts,
+ first_nodes,
+ }
+ }
+
+ /// Return the name of a terminal or a non-terminal.
+ pub fn name_of_tnt(&self, tnt: TNT) -> Result<String, Error> {
+ match tnt {
+ TNT::Ter(t) => Ok(format!(
+ "T{}",
+ self.ter
+ .get(t)
+ .ok_or(Error::IndexOutOfBounds(t, self.ter.len()))?
+ .name()
+ )),
+ TNT::Non(n) => Ok(format!(
+ "N{}",
+ self.non
+ .get(n)
+ .ok_or(Error::IndexOutOfBounds(n, self.non.len()))?
+ .name()
+ )),
+ }
+ }
+
+ /// Return true if and only if there are no non-terminals in the
+ /// grammar.
+ #[inline]
+ pub fn is_empty(&self) -> bool {
+ self.non.is_empty()
+ }
+
+ /// Return the total length of all rules.
+ #[inline]
+ pub fn total(&self) -> usize {
+ self.rules.iter().map(Rule::len).sum()
+ }
+
+ /// Return the number of terminals.
+ #[inline]
+ pub fn ter_num(&self) -> usize {
+ self.ter.len()
+ }
+
+ /// Return the number of non-terminals.
+ #[inline]
+ pub fn non_num(&self) -> usize {
+ self.non.len()
+ }
+
+ /// Convert a non-terminal `N` to `N + TER_NUM`, so that we use a
+ /// single number to represent terminals and non-terminals.
+ ///
+ /// # Bounds
+ ///
+ /// If a terminal index is greater than or equal to the number of
+ /// terminals, then this signals an error; mutatis mutandis for
+ /// non-terminals.
+ ///
+ /// # Related
+ ///
+ /// The inverse function is [`unpack_tnt`][Grammar::unpack_tnt].
+ #[inline]
+ pub fn pack_tnt(&self, tnt: TNT) -> Result<usize, Error> {
+ let ter_num = self.ter.len();
+ let non_num = self.non.len();
+
+ match tnt {
+ TNT::Ter(t) => {
+ if t >= ter_num {
+ Err(Error::IndexOutOfBounds(t, ter_num))
+ } else {
+ Ok(t)
+ }
+ }
+ TNT::Non(n) => {
+ if n >= non_num {
+ Err(Error::IndexOutOfBounds(n, non_num))
+ } else {
+ Ok(n + ter_num)
+ }
+ }
+ }
+ }
+
+ /// Convert a single number to either a terminal or a
+ /// non-terminal.
+ ///
+ /// # Bounds
+ ///
+ /// If the number is greater than or equal to the sum of the
+ /// numbers of terminals and of non-terminals, then this signals
+ /// an error.
+ ///
+ /// # Related
+ ///
+ /// This is the inverse of [`pack_tnt`][Grammar::pack_tnt].
+ #[inline]
+ pub fn unpack_tnt(&self, flat: usize) -> Result<TNT, Error> {
+ let ter_num = self.ter.len();
+ let non_num = self.non.len();
+
+ if flat < ter_num {
+ Ok(TNT::Ter(flat))
+ } else if flat < ter_num + non_num {
+ Ok(TNT::Non(flat - ter_num))
+ } else {
+ Err(Error::IndexOutOfBounds(flat, ter_num + non_num))
+ }
+ }
+
+ /// Return true if and only if the non-terminal is nullable.
+ #[inline]
+ pub fn is_nullable(&self, non_terminal: usize) -> Result<bool, Error> {
+ Ok(self
+ .firsts
+ .get(non_terminal)
+ .ok_or(Error::IndexOutOfBounds(non_terminal, self.non.len()))?
+ .contains(&None))
+ }
+
+ /// For a NON_TERMINAL, return an iterator that goes over the
+ /// nodes that are reachable from the non-terminal through an
+ /// empty transition of the nondeterministic finite automaton.
+ #[inline]
+ pub fn first_nodes_of(&self, non_terminal: usize) -> Result<std::slice::Iter<usize>, Error> {
+ Ok(self
+ .first_nodes
+ .get(non_terminal)
+ .ok_or(Error::IndexOutOfBounds(non_terminal, self.non.len()))?
+ .iter())
+ }
+
+ /// Compute the set of terminals that can appear as the first
+ /// terminal in some left-linear derivation of a non-terminal, for
+ /// every non-terminal.
+ ///
+ /// This is an algorithm that computes the transitive closure,
+ /// which is a common approach for this task. But perhaps there
+ /// are more efficient approaches?
+ ///
+ /// Also the function computes the set of "reachable nodes" in the
+ /// process, and records the information in the `first_nodes`
+ /// attribute.
+ pub fn compute_firsts(&mut self) -> Result<(), Error> {
+ let mut updated = true;
+
+ let non_len = self.non_num();
+
+ use StackElement::{Seen, Unseen};
+
+ while updated {
+ updated = false;
+
+ for (n, regex) in self.rules.iter().map(|rule| &rule.regex).enumerate() {
+ let root = if let Some(root) = regex.root() {
+ root
+ } else {
+ if !self.is_nullable(n)? {
+ updated = true;
+
+ self.firsts.get_mut(n).unwrap().insert(None);
+
+ // The default construction of a grammar
+ // reserves some space for each vector, so
+ // explicitly setting this can reduce some
+ // minor memory overhead.
+ let pointer = self.first_nodes.get_mut(n).unwrap();
+
+ pointer.clear();
+ pointer.shrink_to_fit();
+ }
+
+ continue;
+ };
+
+ let regex_len = regex.len();
+
+ let mut stack: Vec<StackElement> = Vec::with_capacity(regex_len);
+
+ stack.push(Unseen(root));
+
+ let mut children_sets_stack: Vec<HashSet<Option<usize>>> =
+ Vec::with_capacity(regex_len);
+
+ let mut children_nodes_stack: Vec<HashSet<usize>> = Vec::with_capacity(regex_len);
+
+ while let Some(top) = stack.pop() {
+ let top_index = top.index();
+ let is_seen = top.is_seen();
+
+ match regex
+ .vertex_label(top_index)
+ .map_err(|_| Error::IndexOutOfBounds(top_index, regex_len))?
+ {
+ RegexType::Kleene => {
+ if !is_seen {
+ stack.push(Seen(top_index));
+
+ for child in regex.children_of(top_index).unwrap() {
+ stack.push(Unseen(child));
+ }
+ } else {
+ let degree = regex.degree(top_index).unwrap();
+ let children_stack_len = children_sets_stack.len();
+ let children_nodes_len = children_nodes_stack.len();
+
+ assert!(
+ children_stack_len >= degree,
+ "not enough stack elements for {top_index}"
+ );
+
+ assert!(
+ children_nodes_len >= degree,
+ "not enough stack elements for {top_index}"
+ );
+
+ let mut this_set = HashSet::new();
+
+ this_set.insert(None);
+
+ let mut this_nodes = HashSet::new();
+
+ this_nodes.insert(top_index);
+
+ if degree == 0 {
+ children_sets_stack.push(this_set);
+ children_nodes_stack.push(this_nodes);
+
+ continue;
+ }
+
+ let mut stop = false;
+
+ for (child_set, child_nodes) in children_sets_stack
+ .drain((children_stack_len - degree)..)
+ .zip(
+ children_nodes_stack.drain((children_nodes_len - degree)..),
+ )
+ {
+ if stop {
+ break;
+ }
+
+ if !child_set.contains(&None) {
+ stop = true;
+ }
+
+ this_set.extend(child_set);
+ this_nodes.extend(child_nodes);
+ }
+
+ children_sets_stack.push(this_set);
+ children_nodes_stack.push(this_nodes);
+ }
+ }
+ RegexType::Plus => {
+ if !is_seen {
+ stack.push(Seen(top_index));
+
+ for child in regex.children_of(top_index).unwrap() {
+ stack.push(Unseen(child));
+ }
+ } else {
+ let degree = regex.degree(top_index).unwrap();
+ let children_stack_len = children_sets_stack.len();
+ let children_nodes_len = children_nodes_stack.len();
+
+ assert!(
+ children_stack_len >= degree,
+ "not enough stack elements for {top_index}"
+ );
+
+ assert!(
+ children_nodes_len >= degree,
+ "not enough stack elements for {top_index}"
+ );
+
+ let mut this_set = HashSet::new();
+
+ let mut this_nodes = HashSet::new();
+
+ this_nodes.insert(top_index);
+
+ if degree == 0 {
+ this_set.insert(None);
+
+ children_sets_stack.push(this_set);
+ children_nodes_stack.push(this_nodes);
+
+ continue;
+ }
+
+ let mut stop = false;
+
+ for (child_set, child_nodes) in children_sets_stack
+ .drain((children_stack_len - degree)..)
+ .zip(
+ children_nodes_stack.drain((children_nodes_len - degree)..),
+ )
+ {
+ if stop {
+ break;
+ }
+
+ if !child_set.contains(&None) {
+ stop = true;
+ }
+
+ this_set.extend(child_set);
+ this_nodes.extend(child_nodes);
+ }
+
+ if stop {
+ this_set.remove(&None);
+ }
+
+ children_sets_stack.push(this_set);
+ children_nodes_stack.push(this_nodes);
+ }
+ }
+ RegexType::Optional => {
+ if !is_seen {
+ stack.push(Seen(top_index));
+
+ for child in regex.children_of(top_index).unwrap() {
+ stack.push(Unseen(child));
+ }
+ } else {
+ let degree = regex.degree(top_index).unwrap();
+ let children_stack_len = children_sets_stack.len();
+ let children_nodes_len = children_nodes_stack.len();
+
+ assert!(
+ children_stack_len >= degree,
+ "not enough stack elements for {top_index}"
+ );
+
+ assert!(
+ children_nodes_len >= degree,
+ "not enough stack elements for {top_index}"
+ );
+
+ let mut this_set = HashSet::new();
+
+ this_set.insert(None);
+
+ let mut this_nodes = HashSet::new();
+
+ this_nodes.insert(top_index);
+
+ if degree == 0 {
+ children_sets_stack.push(this_set);
+ children_nodes_stack.push(this_nodes);
+
+ continue;
+ }
+
+ let mut stop = false;
+
+ for (child_set, child_nodes) in children_sets_stack
+ .drain((children_stack_len - degree)..)
+ .zip(
+ children_nodes_stack.drain((children_nodes_len - degree)..),
+ )
+ {
+ if stop {
+ break;
+ }
+
+ if !child_set.contains(&None) {
+ stop = true;
+ }
+
+ this_set.extend(child_set.iter().copied());
+ this_nodes.extend(child_nodes.iter().copied());
+ }
+
+ children_sets_stack.push(this_set);
+ children_nodes_stack.push(this_nodes);
+ }
+ }
+ RegexType::Or => {
+ if !is_seen {
+ stack.push(Seen(top_index));
+
+ for child in regex.children_of(top_index).unwrap() {
+ stack.push(Unseen(child));
+ }
+ } else {
+ let degree = regex.degree(top_index).unwrap();
+ let children_stack_len = children_sets_stack.len();
+ let children_nodes_len = children_nodes_stack.len();
+
+ assert!(
+ children_stack_len >= degree,
+ "not enough stack elements for {top_index}"
+ );
+
+ assert!(
+ children_nodes_len >= degree,
+ "not enough stack elements for {top_index}"
+ );
+
+ let mut this_set = HashSet::new();
+
+ let mut this_nodes = HashSet::new();
+
+ this_nodes.insert(top_index);
+
+ if degree == 0 {
+ this_set.insert(None);
+
+ children_sets_stack.push(this_set);
+ children_nodes_stack.push(this_nodes);
+
+ continue;
+ }
+
+ for (child_set, child_nodes) in children_sets_stack
+ .drain((children_stack_len - degree)..)
+ .zip(
+ children_nodes_stack.drain((children_nodes_len - degree)..),
+ )
+ {
+ this_set.extend(child_set.iter().copied());
+ this_nodes.extend(child_nodes.iter().copied());
+ }
+
+ children_sets_stack.push(this_set);
+ children_nodes_stack.push(this_nodes);
+ }
+ }
+ RegexType::Paren => {
+ // Only for printing purposes
+ let mut this_set = HashSet::new();
+
+ this_set.insert(None);
+
+ children_sets_stack.push(this_set);
+
+ let mut this_nodes = HashSet::new();
+
+ this_nodes.insert(top_index);
+
+ children_nodes_stack.push(this_nodes);
+ }
+ RegexType::Empty => {
+ if !is_seen {
+ stack.push(Seen(top_index));
+
+ for child in regex.children_of(top_index).unwrap().rev() {
+ stack.push(Unseen(child));
+ }
+ } else {
+ let degree = regex.degree(top_index).unwrap();
+ let children_stack_len = children_sets_stack.len();
+ let children_nodes_len = children_nodes_stack.len();
+
+ assert!(
+ children_stack_len >= degree,
+ "not enough stack elements for {top_index}"
+ );
+
+ assert!(
+ children_nodes_len >= degree,
+ "not enough stack elements for {top_index}"
+ );
+
+ let mut this_set = HashSet::new();
+
+ let mut this_nodes = HashSet::new();
+
+ this_nodes.insert(top_index);
+
+ if degree == 0 {
+ this_set.insert(None);
+
+ children_sets_stack.push(this_set);
+ children_nodes_stack.push(this_nodes);
+
+ continue;
+ }
+
+ let mut stop = false;
+
+ for (child_set, child_nodes) in children_sets_stack
+ .drain((children_stack_len - degree)..)
+ .zip(
+ children_nodes_stack.drain((children_nodes_len - degree)..),
+ )
+ {
+ if stop {
+ break;
+ }
+
+ if !child_set.contains(&None) {
+ stop = true;
+ }
+
+ this_set.extend(child_set.iter().copied());
+ this_nodes.extend(child_nodes.iter().copied());
+ }
+
+ if stop {
+ this_set.remove(&None);
+ }
+
+ children_sets_stack.push(this_set);
+ children_nodes_stack.push(this_nodes);
+ }
+ }
+ RegexType::Lit(tnt) => {
+ match tnt {
+ TNT::Ter(t) => {
+ let mut this_set = HashSet::with_capacity(1);
+
+ this_set.insert(Some(t));
+
+ children_sets_stack.push(this_set);
+ }
+ TNT::Non(non) => {
+ let this_set = self
+ .firsts
+ .get(non)
+ .ok_or(Error::IndexOutOfBounds(non, non_len))?
+ .clone();
+
+ children_sets_stack.push(this_set);
+ }
+ }
+
+ let mut this_nodes = HashSet::with_capacity(1);
+ this_nodes.insert(top_index);
+
+ children_nodes_stack.push(this_nodes);
+ }
+ }
+ }
+
+ assert_eq!(
+ children_sets_stack.len(),
+ 1,
+ "Too many elements left at the end"
+ );
+
+ assert_eq!(
+ children_nodes_stack.len(),
+ 1,
+ "Too many elements left at the end"
+ );
+
+ for first in children_sets_stack.pop().unwrap().into_iter() {
+ if !self.firsts.get(n).unwrap().contains(&first) {
+ updated = true;
+
+ self.firsts.get_mut(n).unwrap().insert(first);
+ }
+ }
+
+ *self.first_nodes.get_mut(n).unwrap() =
+ children_nodes_stack.pop().unwrap().into_iter().collect();
+ }
+ }
+
+ Ok(())
+ }
+
+ /// Return the regular language of the left-linear closures of
+ /// non-terminals in the grammar.
+ ///
+ /// The resulting vector is guaranteed to be of the same length as
+ /// the number of non-terminals.
+ ///
+ /// The resulting regular language is not "self-contained". That
+ /// is to say, its terminals indices are packed indices and are
+ /// meaningless without the interpretation of the grammar. They
+ /// should be converted to a nondeterministic finite automaton and
+ /// then to its null closure later on.
+ pub fn left_closure(&self) -> Result<Vec<DefaultRegex<TNT>>, Error> {
+ let non_len = self.non_num();
+
+ let mut result = Vec::with_capacity(non_len);
+
+ for (n, rule) in self.rules.iter().enumerate() {
+ let regex = &rule.regex;
+
+ let regex_root = if let Some(root) = regex.root() {
+ root
+ } else {
+ result.push(Default::default());
+
+ continue;
+ };
+
+ let regex_len = regex.len();
+
+ /// A convenient macro to retrieve the children from the
+ /// original regular expression with error propagation.
+ macro_rules! children {
+ ($n:expr) => {
+ regex
+ .children_of($n)
+ .map_err(|_| Error::IndexOutOfBounds($n, regex_len))?
+ };
+ }
+
+ /// A convenient macro to retrieve the label from the
+ /// original regular expression with error propagation.
+ macro_rules! label {
+ ($n:expr) => {
+ regex
+ .vertex_label($n)
+ .map_err(|_| Error::IndexOutOfBounds($n, regex_len))?
+ };
+ }
+
+ let parents = regex.parents_array().map_err(|e| match e {
+ nfa::error::Error::UnknownNode(n) => Error::IndexOutOfBounds(n, regex_len),
+ nfa::error::Error::Cycle => Error::BuildFail(n, ParseError::Cycle),
+ _ => unreachable!(),
+ })?;
+
+ use RegexType::*;
+ use TNT::*;
+
+ let mut local_result: Vec<RegexType<TNT>> = Vec::with_capacity(regex_len * 2);
+ let mut builder = ALGBuilder::default();
+
+ /// Perform a depth-first copy
+ macro_rules! df_copy {
+ ($parent:expr, $index:expr) => {
+ match label!($index) {
+ Kleene | Plus | Optional | Or | Paren | Empty => {
+ let mut stack = vec![($parent, $index)];
+
+ while let Some((top_parent, top_index)) = stack.pop() {
+ let label = label!(top_index);
+ let label = match label {
+ Lit(top_tnt) => Lit(Ter(self.pack_tnt(top_tnt).unwrap())),
+ _ => label,
+ };
+
+ local_result.push(label);
+
+ let new = builder.add_vertex();
+
+ builder.add_edge(top_parent, new, ()).unwrap();
+
+ stack.extend(children!(top_index).map(|child| (new, child)));
+ }
+ }
+ Lit(remain_tnt) => {
+ local_result.push(Lit(Ter(self.pack_tnt(remain_tnt).unwrap())));
+ let new = builder.add_vertex();
+ builder.add_edge($parent, new, ()).unwrap();
+ }
+ }
+ };
+ }
+
+ local_result.push(Or);
+ builder.add_vertex();
+
+ local_result.push(Lit(Ter(self.pack_tnt(Non(n)).unwrap())));
+ let non_lit_index = builder.add_vertex();
+
+ builder.add_edge(0, non_lit_index, ()).unwrap();
+
+ for first_node in self.first_nodes_of(n)?.copied() {
+ assert!(first_node < parents.len());
+
+ let tnt = match label!(first_node) {
+ Lit(tnt) => tnt,
+ _ => continue,
+ };
+
+ let mut parents_chain = {
+ let mut result = Vec::new();
+ let mut stack = Vec::with_capacity(parents.len());
+
+ stack.push(first_node);
+
+ while let Some(top) = stack.pop() {
+ assert!(top < parents.len());
+ if let Some(parent) = parents.get(top).copied().unwrap() {
+ result.push(parent);
+ stack.push(parent.0);
+ }
+ }
+
+ result.reverse();
+
+ result
+ };
+
+ if parents_chain.is_empty() {
+ local_result.push(Lit(tnt));
+ let lit_index = builder.add_vertex();
+ builder.add_edge(0, lit_index, ()).unwrap();
+
+ continue;
+ }
+
+ assert!(parents_chain.first().unwrap().0 == regex_root);
+
+ // A different, "more local", root.
+ let mut root: usize;
+
+ // Handle the direct parent
+ let (parent_node, parent_edge_index) = parents_chain.pop().unwrap();
+
+ match label!(parent_node) {
+ Kleene | Plus => {
+ local_result.extend([Empty, Lit(tnt)]);
+
+ root = builder.add_vertex();
+ let lit_index = builder.add_vertex();
+ builder.add_edge(root, lit_index, ()).unwrap();
+
+ let iterator = children!(parent_node);
+
+ for index in iterator.clone().skip(parent_edge_index + 1) {
+ df_copy!(root, index);
+ }
+
+ local_result.push(Kleene);
+ let new_parent = builder.add_vertex();
+ builder.add_edge(root, new_parent, ()).unwrap();
+
+ for index in iterator {
+ df_copy!(new_parent, index);
+ }
+ }
+
+ Or => {
+ local_result.push(Lit(tnt));
+ root = builder.add_vertex();
+ }
+ Optional | Empty => {
+ // If this path is taken, it should not be
+ // optional.
+ local_result.extend([Empty, Lit(tnt)]);
+ root = builder.add_vertex();
+ let lit_index = builder.add_vertex();
+ builder.add_edge(root, lit_index, ()).unwrap();
+
+ for index in children!(parent_node).skip(parent_edge_index + 1) {
+ df_copy!(root, index);
+ }
+ }
+ Paren | Lit(_) => unreachable!(),
+ }
+
+ // Handle successive parents
+
+ for (node, edge_index) in parents_chain.into_iter() {
+ let node_type = label!(node);
+
+ match node_type {
+ Kleene | Plus => {
+ local_result.push(Empty);
+ let new_index = builder.add_vertex();
+ builder.add_edge(new_index, root, ()).unwrap();
+
+ root = new_index;
+
+ let iterator = children!(node);
+
+ for index in iterator.clone().skip(edge_index + 1) {
+ df_copy!(root, index);
+ }
+
+ local_result.push(Kleene);
+ let new_parent = builder.add_vertex();
+ builder.add_edge(root, new_parent, ()).unwrap();
+
+ for index in iterator {
+ df_copy!(new_parent, index);
+ }
+ }
+ RegexType::Or => {}
+ RegexType::Optional | RegexType::Empty => {
+ local_result.push(Empty);
+ let new_index = builder.add_vertex();
+ builder.add_edge(new_index, root, ()).unwrap();
+ root = new_index;
+
+ for index in children!(node).skip(edge_index + 1) {
+ df_copy!(root, index);
+ }
+ }
+ RegexType::Paren | RegexType::Lit(_) => unreachable!(),
+ }
+ }
+
+ builder.add_edge(0, root, ()).unwrap();
+ }
+
+ local_result.shrink_to_fit();
+
+ let graph = builder.build();
+
+ assert_eq!(graph.nodes_len(), local_result.len());
+
+ result.push(
+ DefaultRegex::new(graph, local_result)
+ .map_err(|_| Error::BuildFail(n, ParseError::Cycle))?,
+ );
+ }
+
+ assert_eq!(result.len(), non_len);
+
+ Ok(result)
+ }
+
+ /// Convert the regular language of left-linear closures to its
+ /// equivalent nondeterministic finite automaton.
+ ///
+ /// In the generation of the left-linear closure, the terminals
+ /// and non-terminals are packed into an unsigned integer. We
+ /// unpack them in converting to nondeterministic finite
+ /// automaton.
+ ///
+ /// The resulting nondeterministic finite automaton should be
+ /// transformed to its null-closure for use in our algorithm.
+ pub fn left_closure_to_nfa(
+ &self,
+ closure: &[DefaultRegex<TNT>],
+ ) -> Result<DefaultNFA<DOption<TNT>>, Error> {
+ let label_transform = |tnt| match tnt {
+ TNT::Ter(t) => {
+ let new_tnt = self.unpack_tnt(t).map_err(|e| match e {
+ Error::IndexOutOfBounds(index, bound) => {
+ graph::error::Error::IndexOutOfBounds(index, bound)
+ }
+ _ => unreachable!(),
+ })?;
+
+ Ok(SoC::Carry(new_tnt))
+ }
+ TNT::Non(n) => Ok(SoC::Sub(n)),
+ };
+
+ DefaultNFA::to_nfa(closure, label_transform).map_err(Into::into)
+ }
+}
+
+impl Display for Grammar {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ assert_eq!(self.non.len(), self.rules.len());
+
+ for (nt, rule) in self.non.iter().zip(self.rules.iter()) {
+ write!(f, "{}: ", nt.name())?;
+
+ writeln!(
+ f,
+ "{}",
+ rule.regex.to_string_with(|tnt| format!(
+ "({})",
+ self.name_of_tnt(tnt)
+ .unwrap_or_else(|_| format!("Unknown {tnt:?}"))
+ ))?
+ )?;
+ }
+
+ Ok(())
+ }
+}
+
+#[cfg(test)]
+mod test_grammar_helper {
+ use super::*;
+ use nfa::default::regex::{DefaultRegParser, ParseDirection, ParseError, RegexType};
+ use std::fmt::Write;
+
+ // Construct a grammar to test
+ pub fn new_grammar() -> Result<Grammar, Box<dyn std::error::Error>> {
+ let ter = vec![Terminal::new("a".to_owned()), Terminal::new("b".to_owned())];
+ let non = vec![
+ Nonterminal("start".to_owned()),
+ Nonterminal("end".to_owned()),
+ ];
+
+ /// A function to scan the inputs.
+ fn scan_tnt(
+ parser: &DefaultRegParser<TNT>,
+ input: &str,
+ ) -> Result<Option<(usize, RegexType<TNT>, ParseDirection)>, ParseError> {
+ use ParseDirection::*;
+ use RegexType::*;
+ use TNT::*;
+
+ let mut chars = input.chars();
+
+ if let Some(first) = chars.next() {
+ match first {
+ '*' => Ok(Some((1, Kleene, Right))),
+ '+' => Ok(Some((1, Plus, Right))),
+ '?' => Ok(Some((1, Optional, Right))),
+ '|' => Ok(Some((1, Empty, Up))),
+ '(' => Ok(Some((1, Or, Down))),
+ ')' => Ok(Some((1, Paren, Up))),
+ 'T' => {
+ let mut name = String::new();
+ let mut len = 1;
+
+ while let Some(c) = chars.next() {
+ if ('a'..='z').contains(&c) {
+ len += 1;
+ write!(name, "{c}").map_err(|_| ParseError::InvalidCharacter(c))?;
+ } else {
+ break;
+ }
+ }
+
+ if let Some(t) = parser.query(&name, true) {
+ Ok(Some((len, Lit(Ter(t)), Right)))
+ } else {
+ Err(ParseError::InvalidCharacter(first))
+ }
+ }
+ 'N' => {
+ let mut name = String::new();
+ let mut len = 1;
+
+ while let Some(c) = chars.next() {
+ if ('a'..='z').contains(&c) {
+ len += 1;
+ write!(name, "{c}").map_err(|_| ParseError::InvalidCharacter(c))?;
+ } else {
+ break;
+ }
+ }
+
+ if let Some(n) = parser.query(&name, false) {
+ Ok(Some((len, Lit(Non(n)), Right)))
+ } else {
+ Err(ParseError::InvalidCharacter(first))
+ }
+ }
+ _ => Err(ParseError::InvalidCharacter(first)),
+ }
+ } else {
+ Ok(None)
+ }
+ }
+
+ let mut regex_parser: DefaultRegParser<TNT> = Default::default();
+
+ regex_parser.add_tnt("a", true);
+ regex_parser.add_tnt("b", true);
+ regex_parser.add_tnt("start", false);
+ regex_parser.add_tnt("end", false);
+
+ let regex_parser = regex_parser;
+
+ let rule1 = Rule {
+ regex: regex_parser
+ .parse("Ta*Tb+Nend+", Box::new(scan_tnt), true)?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rule2 = Rule {
+ regex: regex_parser
+ .parse("Nstart?Nend*", Box::new(scan_tnt), true)?
+ .ok_or(ParseError::Invalid)?
+ .0,
+ };
+
+ let rules = vec![rule1, rule2];
+
+ Ok(Grammar::new(ter, non, rules))
+ }
+}
+
+#[cfg(test)]
+mod test_grammar_display {
+ use super::test_grammar_helper::new_grammar;
+
+ #[test]
+ fn test_display() -> Result<(), Box<dyn std::error::Error>> {
+ println!("{}", new_grammar()?);
+
+ Ok(())
+ }
+}
+
+#[cfg(test)]
+mod test_grammar_firsts {
+ use super::test_grammar_helper::new_grammar;
+ use super::*;
+
+ #[test]
+ fn test_firsts() -> Result<(), Box<dyn std::error::Error>> {
+ let mut grammar = new_grammar()?;
+
+ grammar.compute_firsts()?;
+
+ println!("grammar firsts: {:?}", grammar.firsts);
+ println!("grammar first nodes: {:?}", grammar.first_nodes);
+
+ Ok(())
+ }
+}
+
+#[cfg(test)]
+mod test_grammar_left_closure {
+ use super::test_grammar_helper::new_grammar;
+ use super::*;
+
+ pub fn new_closure_regex(
+ grammar: &mut Grammar,
+ ) -> Result<Vec<DefaultRegex<TNT>>, Box<dyn std::error::Error>> {
+ grammar.compute_firsts()?;
+
+ println!("grammar firsts: {:?}", grammar.firsts);
+ println!("grammar first nodes: {:?}", grammar.first_nodes);
+ println!("grammar:");
+ println!("{grammar}");
+
+ grammar.left_closure().map_err(Into::into)
+ }
+
+ #[test]
+ fn test_regex() -> Result<(), Box<dyn std::error::Error>> {
+ let mut grammar = new_grammar()?;
+
+ let vec_of_regexps = new_closure_regex(&mut grammar)?;
+
+ for regex in &vec_of_regexps {
+ println!("regex: {}", regex.to_string_with(|tnt| format!("{tnt}"))?);
+ // println!("regex: {regex:?}",);
+ println!("regex len = {}", regex.nodes_len());
+ }
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_nfa() -> Result<(), Box<dyn std::error::Error>> {
+ let mut grammar = new_grammar()?;
+ let closure = new_closure_regex(&mut grammar)?;
+ let nfa = grammar.left_closure_to_nfa(&closure)?;
+ // TODO: print the nfa out
+ Ok(())
+ }
+}
diff --git a/chain/src/lib.rs b/chain/src/lib.rs
new file mode 100644
index 0000000..0ec4d4c
--- /dev/null
+++ b/chain/src/lib.rs
@@ -0,0 +1,25 @@
+//! This package implements the core algorithm of the entire
+//! workspace: parsing with derivatives by means of chain rule and
+//! regular nulling languages.
+//!
+//! Since I shall not name my crate "core" to avoid collisions with
+//! the Rust's own core, I decided to name this crate after what I
+//! think is the essence of this algorithm, the chain-rule for
+//! derivatives of languages.
+
+pub mod grammar;
+
+pub fn add(left: usize, right: usize) -> usize {
+ left + right
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn it_works() {
+ let result = add(2, 2);
+ assert_eq!(result, 4);
+ }
+}
diff --git a/repcore/src/plan.org b/chain/src/plan.org
index 13c2ee0..8c63492 100644
--- a/repcore/src/plan.org
+++ b/chain/src/plan.org
@@ -2,12 +2,47 @@
#+AUTHOR: Durand
#+DATE: <2022-11-18 Ven 19:57>
+* Things to do [2/7]
+
+- [X] Implement builders for graphs
+- [X] Find sets of the first terminals for each non-terminal, in the
+ grammar module.
+- [-] NFA [4/5]
+ + [X] Add regular expression into NFA.
+ + [X] Convert a set of regular expressions into a nondeterministic
+ finite automaton, where non-terminals denote the indices of
+ regular expressions in the set to substitute.
+ + [X] Convert a grammar into its grammar of left-linear closures of
+ non-temrinals and their derivatives.
+ + [X] Convert nondeterministic finite automata to their null
+ closures.
+ + [ ] Test more grammars to ensure it works correctly.
+- [ ] Define the Atom trait.
+- [ ] Implement virtual nodes for each derivative of each atom: The
+ lack of this step might be the root cause of the failure of the
+ previous version of this package.
+- [ ] Implement languages. [0/2]
+ + [ ] First implement them as doubly labelled (directed acyclic)
+ graphs.
+ + [ ] Implement finding derivatives.
+- [ ] Implement semiring. [0/5]
+ + [ ] Define the trait.
+ + [ ] Implement the boolean semiring.
+ + [ ] Implement the natural number semiring.
+ + [ ] Implement the free semiring.
+ + [ ] Compute and record a semiring value when computing
+ derivatives.
+
* Atomic Languages
This describes the behaviours of atomic languages. The atomic
language consists of the null closure of any non-terminal symbol in
the grammar, and their deriavtives by terminals and non-terminal.
+* Script for creating GIF animation
+
+[[https://gist.github.com/maelvls/5379127][a gist]]
+
* Derivative Languages
This is the main driving device of the algorithm. Basically, the
diff --git a/graph/src/adlist.rs b/graph/src/adlist.rs
index 18ad770..6c1dcd0 100644
--- a/graph/src/adlist.rs
+++ b/graph/src/adlist.rs
@@ -3,8 +3,9 @@
//! [`Graph`][super::Graph]. This data type represents graphs using
//! adjacency lists internally.
-use super::{ExtGraph, Graph};
-use crate::error::Error;
+use std::ops::{Deref, DerefMut};
+
+use crate::{builder::Builder, error::Error, ExtGraph, Graph};
// #[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd)]
// struct ALEdge {
@@ -80,6 +81,13 @@ impl Graph for ALGraph {
Ok(self.nodes.get(source).unwrap().children.contains(&target))
}
}
+
+ #[inline]
+ fn replace_by_builder(&mut self, builder: impl Builder<Result = Self>) {
+ let graph = builder.build();
+
+ self.nodes = graph.nodes;
+ }
}
impl ExtGraph for ALGraph {
@@ -102,11 +110,115 @@ impl ExtGraph for ALGraph {
}
}
-// TODO: Add a way to build a graph by its raw adjacency list representation.
impl From<Vec<Vec<usize>>> for ALGraph {
fn from(adlist: Vec<Vec<usize>>) -> Self {
- let nodes: Vec<ALNode> = adlist.iter().cloned().map(ALNode::new).collect();
- Self { nodes }
+ Self {
+ nodes: adlist.into_iter().map(ALNode::new).collect(),
+ }
+ }
+}
+
+// Builder for this implementation of graph
+
+/// A builder for adjacency list graphs.
+#[derive(Debug, Default, Clone)]
+pub struct ALGBuilder(ALGraph);
+
+impl Deref for ALGBuilder {
+ type Target = ALGraph;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
+impl DerefMut for ALGBuilder {
+ fn deref_mut(&mut self) -> &mut Self::Target {
+ &mut self.0
+ }
+}
+
+impl Builder for ALGBuilder {
+ type Label = ();
+
+ type Result = ALGraph;
+
+ fn with_capacity(size: usize) -> Self {
+ Self(ALGraph {
+ nodes: Vec::with_capacity(size),
+ })
+ }
+
+ #[inline]
+ fn add_vertex(&mut self) -> usize {
+ self.nodes.push(ALNode::default());
+ self.nodes.len() - 1
+ }
+
+ fn add_edge(&mut self, source: usize, target: usize, _label: Self::Label) -> Result<(), Error> {
+ let nodes_len = self.nodes.len();
+
+ let source_children = self
+ .nodes
+ .get_mut(source)
+ .ok_or(Error::IndexOutOfBounds(source, nodes_len))?;
+
+ if !(0..nodes_len).contains(&target) {
+ return Err(Error::IndexOutOfBounds(target, nodes_len));
+ }
+
+ source_children.children.push(target);
+
+ Ok(())
+ }
+
+ /// Remove an edge from the source to the target.
+ ///
+ /// Since some graphs are labelled, the users are allowed to pass
+ /// a predicate to determine if an edge from the source to the
+ /// target should be removed.
+ ///
+ /// # Performance note
+ ///
+ /// It is possible that the target appears more than once in the
+ /// vector of children, so we have to remove every instance of
+ /// target.
+ ///
+ /// Since I do not think builders should be used for performance
+ /// critical tasks, this is fine.
+ ///
+ /// If one desires more performance, maybe
+ /// [`ASGraph`][crate::ASGraph] is a good choice.
+ ///
+ /// Of course, if one just wants to use adjacency list
+ /// representation and wants a performant builder, one can
+ /// implement a customized builder.
+ fn remove_edge<F>(&mut self, source: usize, target: usize, _predicate: F) -> Result<(), Error>
+ where
+ F: Fn(Self::Label) -> bool,
+ {
+ let nodes_len = self.nodes.len();
+
+ let source_children = self
+ .nodes
+ .get_mut(source)
+ .ok_or(Error::IndexOutOfBounds(source, nodes_len))?;
+
+ if !(0..nodes_len).contains(&target) {
+ return Err(Error::IndexOutOfBounds(target, nodes_len));
+ }
+
+ source_children.children.retain(|child| *child != target);
+
+ Ok(())
+ }
+
+ fn build(self) -> Self::Result {
+ self.0
+ }
+
+ fn build_ref(&self) -> Self::Result {
+ self.0.clone()
}
}
@@ -187,3 +299,123 @@ mod algraph_test {
Ok(())
}
}
+
+#[cfg(test)]
+mod test_algraph_builder {
+ use super::*;
+
+ #[test]
+ fn test_builder() -> Result<(), Box<dyn std::error::Error>> {
+ let mut builder = ALGBuilder::default();
+
+ // Add five nodes
+ builder.add_vertex();
+ builder.add_vertex();
+ builder.add_vertex();
+ builder.add_vertex();
+ builder.add_vertex();
+
+ println!("five empty nodes: {builder:?}");
+
+ // Link each node to its successor and link the last node with
+ // the first one to form a cycle.
+ for i in 0..5 {
+ builder.add_edge(i, if i == 4 { 0 } else { i + 1 }, ())?;
+ }
+
+ println!("a cycle of five nodes: {builder:?}");
+
+ // Remove the link from the last node to the first node.
+ builder.remove_edge(4, 0, |_| true)?;
+
+ println!("a line of five nodes: {builder:?}");
+
+ // build a graph
+
+ let graph = builder.build();
+
+ println!("final graph: {graph:?}");
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_errors() -> Result<(), Box<dyn std::error::Error>> {
+ let mut builder = ALGBuilder::default();
+
+ // Add five nodes
+ builder.add_vertex();
+ builder.add_vertex();
+ builder.add_vertex();
+ builder.add_vertex();
+ builder.add_vertex();
+
+ println!("five empty nodes: {builder:?}");
+
+ // Errors in add_edge
+
+ println!();
+ println!("Testing errors in add_edge:");
+ println!();
+
+ assert!(matches!(
+ builder.add_edge(0, 5, ()),
+ Err(Error::IndexOutOfBounds(5, 5))
+ ));
+
+ println!("Right error for an index out of bounds as the target");
+
+ assert!(matches!(
+ builder.add_edge(10, 5, ()),
+ Err(Error::IndexOutOfBounds(10, 5))
+ ));
+
+ println!("Right error for an index out of bounds as the source");
+
+ assert!(matches!(
+ builder.add_edge(10, 50, ()),
+ Err(Error::IndexOutOfBounds(10, 5))
+ ));
+
+ println!("Right error for both indices out of bounds");
+
+ // Errors in remove_edge
+
+ println!();
+ println!("Testing errors in remove_edge:");
+ println!();
+
+ assert!(matches!(
+ builder.remove_edge(0, 5, |_| true),
+ Err(Error::IndexOutOfBounds(5, 5))
+ ));
+
+ println!("Right error for an index out of bounds as the target");
+
+ assert!(matches!(
+ builder.remove_edge(10, 5, |_| true),
+ Err(Error::IndexOutOfBounds(10, 5))
+ ));
+
+ println!("Right error for an index out of bounds as the source");
+
+ assert!(matches!(
+ builder.remove_edge(10, 50, |_| true),
+ Err(Error::IndexOutOfBounds(10, 5))
+ ));
+
+ println!("Right error for both indices out of bounds");
+
+ assert!(matches!(builder.remove_edge(0, 1, |_| true), Ok(()),));
+
+ println!("No errors for removing a non-existing edge");
+
+ println!();
+
+ let graph = builder.build();
+
+ println!("final graph: {graph:?}");
+
+ Ok(())
+ }
+}
diff --git a/graph/src/adset.rs b/graph/src/adset.rs
index 58fed4c..c225986 100644
--- a/graph/src/adset.rs
+++ b/graph/src/adset.rs
@@ -7,8 +7,9 @@
//! duplications of languages, so it is more convenient if the
//! underlying graph type **cannot** represent duplicate edges.
-use super::{ExtGraph, Graph};
-use crate::error::Error;
+use std::ops::{Deref, DerefMut};
+
+use crate::{builder::Builder, error::Error, ExtGraph, Graph};
// If one wants to use another implementation for a set, import that
// as Set, and nothing else needs to be changed, ideally.
@@ -122,6 +123,13 @@ impl Graph for ASGraph {
.contains(&ASEdge::new(target)))
}
}
+
+ #[inline]
+ fn replace_by_builder(&mut self, builder: impl Builder<Result = Self>) {
+ let graph = builder.build();
+
+ self.nodes = graph.nodes;
+ }
}
impl ExtGraph for ASGraph {
@@ -144,6 +152,90 @@ impl ExtGraph for ASGraph {
}
}
+// Builder for this implementation of graph
+
+/// A builder for adjacency set graphs.
+#[derive(Debug, Default, Clone)]
+pub struct ASGBuilder(ASGraph);
+
+impl Deref for ASGBuilder {
+ type Target = ASGraph;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
+impl DerefMut for ASGBuilder {
+ fn deref_mut(&mut self) -> &mut Self::Target {
+ &mut self.0
+ }
+}
+
+impl Builder for ASGBuilder {
+ type Label = ();
+
+ type Result = ASGraph;
+
+ #[inline]
+ fn with_capacity(size: usize) -> Self {
+ Self(ASGraph {
+ nodes: Vec::with_capacity(size),
+ })
+ }
+
+ #[inline]
+ fn add_vertex(&mut self) -> usize {
+ self.nodes.push(ASNode::default());
+ self.nodes.len() - 1
+ }
+
+ fn add_edge(&mut self, source: usize, target: usize, _label: Self::Label) -> Result<(), Error> {
+ let nodes_len = self.nodes.len();
+
+ let source_children = self
+ .nodes
+ .get_mut(source)
+ .ok_or(Error::IndexOutOfBounds(source, nodes_len))?;
+
+ if !(0..nodes_len).contains(&target) {
+ return Err(Error::IndexOutOfBounds(target, nodes_len));
+ }
+
+ source_children.children.insert(ASEdge::new(target));
+
+ Ok(())
+ }
+
+ fn remove_edge<F>(&mut self, source: usize, target: usize, _predicate: F) -> Result<(), Error>
+ where
+ F: Fn(Self::Label) -> bool,
+ {
+ let nodes_len = self.nodes.len();
+
+ let source_children = self
+ .nodes
+ .get_mut(source)
+ .ok_or(Error::IndexOutOfBounds(source, nodes_len))?;
+
+ if !(0..nodes_len).contains(&target) {
+ return Err(Error::IndexOutOfBounds(target, nodes_len));
+ }
+
+ source_children.children.remove(&ASEdge::new(target));
+
+ Ok(())
+ }
+
+ fn build(self) -> Self::Result {
+ self.0
+ }
+
+ fn build_ref(&self) -> Self::Result {
+ self.0.clone()
+ }
+}
+
#[cfg(test)]
mod asgraph_test {
use super::*;
@@ -227,3 +319,123 @@ mod asgraph_test {
Ok(())
}
}
+
+#[cfg(test)]
+mod test_asgraph_builder {
+ use super::*;
+
+ #[test]
+ fn test_builder() -> Result<(), Box<dyn std::error::Error>> {
+ let mut builder = ASGBuilder::default();
+
+ // Add five nodes
+ builder.add_vertex();
+ builder.add_vertex();
+ builder.add_vertex();
+ builder.add_vertex();
+ builder.add_vertex();
+
+ println!("five empty nodes: {builder:?}");
+
+ // Link each node to its successor and link the last node with
+ // the first one to form a cycle.
+ for i in 0..5 {
+ builder.add_edge(i, if i == 4 { 0 } else { i + 1 }, ())?;
+ }
+
+ println!("a cycle of five nodes: {builder:?}");
+
+ // Remove the link from the last node to the first node.
+ builder.remove_edge(4, 0, |_| true)?;
+
+ println!("a line of five nodes: {builder:?}");
+
+ // build a graph
+
+ let graph = builder.build();
+
+ println!("final graph: {graph:?}");
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_errors() -> Result<(), Box<dyn std::error::Error>> {
+ let mut builder = ASGBuilder::default();
+
+ // Add five nodes
+ builder.add_vertex();
+ builder.add_vertex();
+ builder.add_vertex();
+ builder.add_vertex();
+ builder.add_vertex();
+
+ println!("five empty nodes: {builder:?}");
+
+ // Errors in add_edge
+
+ println!();
+ println!("Testing errors in add_edge:");
+ println!();
+
+ assert!(matches!(
+ builder.add_edge(0, 5, ()),
+ Err(Error::IndexOutOfBounds(5, 5))
+ ));
+
+ println!("Right error for an index out of bounds as the target");
+
+ assert!(matches!(
+ builder.add_edge(10, 5, ()),
+ Err(Error::IndexOutOfBounds(10, 5))
+ ));
+
+ println!("Right error for an index out of bounds as the source");
+
+ assert!(matches!(
+ builder.add_edge(10, 50, ()),
+ Err(Error::IndexOutOfBounds(10, 5))
+ ));
+
+ println!("Right error for both indices out of bounds");
+
+ // Errors in remove_edge
+
+ println!();
+ println!("Testing errors in remove_edge:");
+ println!();
+
+ assert!(matches!(
+ builder.remove_edge(0, 5, |_| true),
+ Err(Error::IndexOutOfBounds(5, 5))
+ ));
+
+ println!("Right error for an index out of bounds as the target");
+
+ assert!(matches!(
+ builder.remove_edge(10, 5, |_| true),
+ Err(Error::IndexOutOfBounds(10, 5))
+ ));
+
+ println!("Right error for an index out of bounds as the source");
+
+ assert!(matches!(
+ builder.remove_edge(10, 50, |_| true),
+ Err(Error::IndexOutOfBounds(10, 5))
+ ));
+
+ println!("Right error for both indices out of bounds");
+
+ assert!(matches!(builder.remove_edge(0, 1, |_| true), Ok(()),));
+
+ println!("No errors for removing a non-existing edge");
+
+ println!();
+
+ let graph = builder.build();
+
+ println!("final graph: {graph:?}");
+
+ Ok(())
+ }
+}
diff --git a/graph/src/builder.rs b/graph/src/builder.rs
new file mode 100644
index 0000000..ce85cce
--- /dev/null
+++ b/graph/src/builder.rs
@@ -0,0 +1,53 @@
+//! This file defines the expected behaviours of a builder of graphs.
+
+use crate::{error::Error, Graph};
+
+/// A builder is actually just a graph that can be altered in more
+/// ways than an extension graph can. It should not have any methods
+/// from the Graph trait, though, as we shall convert a builder to a
+/// normal final graph before using it.
+pub trait Builder: Default {
+ /// Some graphs are labelled. This type should be the type of the
+ /// labels.
+ type Label;
+
+ /// The type of the result graph.
+ type Result: Graph;
+
+ /// Construct an empty builder with the capacity to hold a given
+ /// number of nodes.
+ ///
+ /// Implementations may ignore this method, where the default
+ /// implementation just calls `Default::default`.
+ #[inline]
+ fn with_capacity(_size: usize) -> Self {
+ Self::default()
+ }
+
+ /// Add a vertex without children.
+ fn add_vertex(&mut self) -> usize;
+
+ /// Add an edge from the source to the target.
+ fn add_edge(&mut self, source: usize, target: usize, label: Self::Label) -> Result<(), Error>;
+
+ /// Remove an edge from the source to the target.
+ ///
+ /// Since some graphs are labelled, the users are allowed to pass
+ /// a predicate to determine if an edge from the source to the
+ /// target should be removed.
+ fn remove_edge<F>(&mut self, source: usize, target: usize, predicate: F) -> Result<(), Error>
+ where
+ F: Fn(Self::Label) -> bool;
+
+ /// Convert the builder into a graph.
+ ///
+ /// This is the purpose of having a builder.
+ fn build(self) -> Self::Result;
+
+ /// Convert the builder into a graph using a reference.
+ ///
+ /// This is similar to [`build`][Builder::build], but takes an
+ /// immutable reference of the builder, so that the builder can
+ /// still be used later on.
+ fn build_ref(&self) -> Self::Result;
+}
diff --git a/graph/src/error.rs b/graph/src/error.rs
index 2162685..3600005 100644
--- a/graph/src/error.rs
+++ b/graph/src/error.rs
@@ -1,16 +1,20 @@
#![warn(missing_docs)]
//! This file implements the error data type of the graph library.
-use std::fmt::{self, Display};
+use core::fmt::{self, Display};
/// The error type for methods of the trait [`Graph`][`super::Graph`].
-#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd)]
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd)]
+#[non_exhaustive]
pub enum Error {
/// The index is out of bounds.
///
/// The first component is the index that is out of bounds, and
/// the second component is the current length of nodes.
IndexOutOfBounds(usize, usize),
+ /// The graph does not permit duplicate nodes but encounters a
+ /// repeated node
+ DuplicatedNode,
}
impl Display for Error {
@@ -19,6 +23,7 @@ impl Display for Error {
Error::IndexOutOfBounds(index, len) => {
write!(f, "index {index} out of bounds {len} ")
}
+ Error::DuplicatedNode => write!(f, "No duplicate nodes permitted, but found one"),
}
}
}
diff --git a/graph/src/labelled.rs b/graph/src/labelled.rs
index d02e301..d0a02d0 100644
--- a/graph/src/labelled.rs
+++ b/graph/src/labelled.rs
@@ -7,10 +7,9 @@
//! needs to be implemented efficiently, we store the mappings between
//! labels and edges in both directions.
-#[allow(unused_imports)]
-use super::{Graph, GraphLabel, LabelExtGraph, LabelGraph};
-#[allow(unused_imports)]
-use crate::error::Error;
+use std::ops::{Deref, DerefMut};
+
+use crate::{builder::Builder, error::Error, Graph, GraphLabel, LabelExtGraph, LabelGraph};
// We use BTreeMap and BTreeSet here as we need to exclude duplicate
// edge sets, while an ordinary hashmap and hashset do not allow
@@ -21,13 +20,23 @@ use std::collections::{
BTreeMap as Map, BTreeSet as Set, HashMap as HMap,
};
-#[derive(Debug, Clone, Default)]
+#[derive(Debug, Clone)]
struct DLNode<T: GraphLabel> {
by_target: Map<usize, Set<T>>,
by_label: Map<T, Set<usize>>,
flat: Vec<(T, usize)>,
}
+impl<T: GraphLabel> Default for DLNode<T> {
+ fn default() -> Self {
+ Self {
+ by_target: Default::default(),
+ by_label: Default::default(),
+ flat: Default::default(),
+ }
+ }
+}
+
impl<T: GraphLabel> DLNode<T> {
fn new(
by_target: Map<usize, Set<T>>,
@@ -66,6 +75,18 @@ impl<T: GraphLabel> DLGraph<T> {
edges_table: HMap::default(),
}
}
+
+ /// Return a builder.
+ #[inline]
+ pub fn builder(self) -> DLGBuilder<T> {
+ DLGBuilder(self)
+ }
+
+ /// Return a builder from a reference.
+ #[inline]
+ pub fn builder_ref(&self) -> DLGBuilder<T> {
+ DLGBuilder(self.clone())
+ }
}
impl<T: GraphLabel> Default for DLGraph<T> {
@@ -129,6 +150,49 @@ impl<T: GraphLabel> Graph for DLGraph<T> {
None => Err(Error::IndexOutOfBounds(source, self.nodes.len())),
}
}
+
+ fn print_viz(&self, filename: &str) -> Result<(), std::io::Error> {
+ let preamble = "digraph nfa {
+ fontname=\"Helvetica,Arial,sans-serif\"
+ node [fontname=\"Helvetica,Arial,sans-serif\"]
+ edge [fontname=\"Helvetica,Arial,sans-serif\"]
+ rankdir=LR;\n";
+
+ let mut post = String::new();
+
+ use std::fmt::Write as FWrite;
+
+ for (source, target) in self.edges() {
+ for label in self.edge_label(source, target).unwrap() {
+ let _ = writeln!(post, " {source} -> {target} [label = \"{label}\"]");
+ }
+ }
+
+ post.push_str("}\n");
+
+ let result = format!("{preamble}{post}");
+
+ if std::fs::metadata(filename).is_ok() {
+ std::fs::remove_file(filename)?;
+ }
+
+ let mut file = std::fs::File::options()
+ .write(true)
+ .create(true)
+ .open(filename)?;
+
+ use std::io::Write;
+
+ file.write_all(result.as_bytes())
+ }
+
+ #[inline]
+ fn replace_by_builder(&mut self, builder: impl Builder<Result = Self>) {
+ let new_graph = builder.build();
+
+ self.nodes = new_graph.nodes;
+ self.edges_table = new_graph.edges_table;
+ }
}
/// A delegation of iterators.
@@ -261,6 +325,25 @@ impl<T: GraphLabel> LabelGraph<T> for DLGraph<T> {
None => Err(Error::IndexOutOfBounds(node_id, self.nodes.len())),
}
}
+
+ fn has_edge_label(&self, node_id: usize, label: &T, target: usize) -> Result<bool, Error> {
+ let nodes_len = self.nodes.len();
+
+ match self.nodes.get(node_id) {
+ Some(node) => {
+ if target >= nodes_len {
+ return Err(Error::IndexOutOfBounds(target, nodes_len));
+ }
+
+ if let Some(labels) = node.by_target.get(&target) {
+ Ok(labels.contains(label))
+ } else {
+ Ok(false)
+ }
+ }
+ None => Err(Error::IndexOutOfBounds(node_id, nodes_len)),
+ }
+ }
}
impl<T: GraphLabel> LabelExtGraph<T> for DLGraph<T> {
@@ -315,6 +398,181 @@ impl<T: GraphLabel> LabelExtGraph<T> for DLGraph<T> {
}
}
+// Builder for this implementation of graph
+
+/// A builder for labelled adjacency doubly linked graphs.
+#[derive(Debug, Clone)]
+pub struct DLGBuilder<T: GraphLabel>(DLGraph<T>);
+
+impl<T: GraphLabel> Default for DLGBuilder<T> {
+ fn default() -> Self {
+ Self(Default::default())
+ }
+}
+
+impl<T: GraphLabel> Deref for DLGBuilder<T> {
+ type Target = DLGraph<T>;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
+impl<T: GraphLabel> DerefMut for DLGBuilder<T> {
+ fn deref_mut(&mut self) -> &mut Self::Target {
+ &mut self.0
+ }
+}
+
+impl<T: GraphLabel> Builder for DLGBuilder<T> {
+ type Label = T;
+
+ type Result = DLGraph<T>;
+
+ #[inline]
+ fn with_capacity(size: usize) -> Self {
+ Self(DLGraph {
+ nodes: Vec::with_capacity(size),
+ edges_table: Default::default(),
+ })
+ }
+
+ #[inline]
+ fn add_vertex(&mut self) -> usize {
+ self.nodes.push(DLNode::default());
+ self.nodes.len() - 1
+ }
+
+ fn add_edge(&mut self, source: usize, target: usize, label: Self::Label) -> Result<(), Error> {
+ let nodes_len = self.nodes.len();
+
+ let source_node = self
+ .nodes
+ .get_mut(source)
+ .ok_or(Error::IndexOutOfBounds(source, nodes_len))?;
+
+ if !(0..nodes_len).contains(&target) {
+ return Err(Error::IndexOutOfBounds(target, nodes_len));
+ }
+
+ let new_edge_p = !matches!(
+ source_node
+ .by_target
+ .get(&target)
+ .map(|set| set.contains(&label)),
+ Some(true)
+ );
+
+ if new_edge_p {
+ let old_children_set: Set<(T, usize)> = source_node.flat.iter().copied().collect();
+
+ source_node
+ .by_target
+ .entry(target)
+ .or_insert_with(Set::default)
+ .insert(label);
+
+ source_node
+ .by_label
+ .entry(label)
+ .or_insert_with(Set::default)
+ .insert(target);
+
+ source_node.flat.push((label, target));
+
+ let new_children_set: Set<(T, usize)> = source_node.flat.iter().copied().collect();
+
+ // When source_node is in use, we cannot borrow self
+ // mutably again, so we move the following two statements
+ // to after all uses of source_node.
+
+ self.edges_table.remove(&old_children_set);
+
+ self.edges_table.insert(new_children_set, source);
+ }
+
+ Ok(())
+ }
+
+ fn remove_edge<F>(&mut self, source: usize, target: usize, predicate: F) -> Result<(), Error>
+ where
+ F: Fn(Self::Label) -> bool,
+ {
+ let nodes_len = self.nodes.len();
+
+ let source_node = self
+ .nodes
+ .get_mut(source)
+ .ok_or(Error::IndexOutOfBounds(source, nodes_len))?;
+
+ if !(0..nodes_len).contains(&target) {
+ return Err(Error::IndexOutOfBounds(target, nodes_len));
+ }
+
+ if let Some(target_labels_set) = source_node.by_target.get(&target) {
+ let mut to_remove = Vec::new();
+
+ for label in target_labels_set.iter() {
+ if predicate(*label) {
+ to_remove.push(*label);
+ }
+ }
+
+ if !to_remove.is_empty() {
+ let old_children_set: Set<(T, usize)> = source_node.flat.iter().copied().collect();
+
+ for label in to_remove.iter().copied() {
+ // This must be "Some", as the outer "if" checks
+ source_node
+ .by_target
+ .get_mut(&target)
+ .map(|set| set.remove(&label));
+
+ source_node
+ .by_label
+ .get_mut(&label)
+ .map(|set| set.remove(&target));
+
+ source_node.flat.retain(|(child_label, child_target)| {
+ (*child_label, *child_target) != (label, target)
+ });
+ }
+
+ // If after removal no labels remain for the target,
+ // we remove the edge entirely, to avoid false empty
+ // edges.
+ if source_node.flat.is_empty() {
+ source_node.by_target.remove(&target);
+
+ for label in to_remove.iter() {
+ source_node.by_label.remove(label);
+ }
+ }
+
+ let new_children_set: Set<(T, usize)> = source_node.flat.iter().copied().collect();
+
+ // When source_node is in use, we cannot borrow self
+ // mutably again, so we move the following two
+ // statements to after all uses of source_node.
+
+ self.edges_table.remove(&old_children_set);
+
+ self.edges_table.insert(new_children_set, source);
+ }
+ }
+
+ Ok(())
+ }
+
+ fn build_ref(&self) -> Self::Result {
+ self.0.clone()
+ }
+
+ fn build(self) -> Self::Result {
+ self.0
+ }
+}
+
#[cfg(test)]
mod label_test {
use super::*;
@@ -424,3 +682,125 @@ mod label_test {
Ok(())
}
}
+
+// TODO: Test print_viz
+
+#[cfg(test)]
+mod test_dlgraph_builder {
+ use super::*;
+
+ #[test]
+ fn test_builder() -> Result<(), Box<dyn std::error::Error>> {
+ let mut builder = DLGBuilder::<usize>::default();
+
+ // Add five nodes
+ builder.add_vertex();
+ builder.add_vertex();
+ builder.add_vertex();
+ builder.add_vertex();
+ builder.add_vertex();
+
+ println!("five empty nodes: {builder:?}");
+
+ // Link each node to its successor and link the last node with
+ // the first one to form a cycle.
+ for i in 0..5 {
+ builder.add_edge(i, if i < 4 { i + 1 } else { 0 }, i)?;
+ }
+
+ println!("a cycle of five nodes: {builder:?}");
+
+ // Remove the link from the last node to the first node.
+ builder.remove_edge(4, 0, |_| true)?;
+
+ println!("a line of five nodes: {builder:?}");
+
+ // build a graph
+
+ let graph = builder.build();
+
+ println!("final graph: {graph:?}");
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_errors() -> Result<(), Box<dyn std::error::Error>> {
+ let mut builder = DLGBuilder::default();
+
+ // Add five nodes
+ builder.add_vertex();
+ builder.add_vertex();
+ builder.add_vertex();
+ builder.add_vertex();
+ builder.add_vertex();
+
+ println!("five empty nodes: {builder:?}");
+
+ // Errors in add_edge
+
+ println!();
+ println!("Testing errors in add_edge:");
+ println!();
+
+ assert!(matches!(
+ builder.add_edge(0, 5, 0),
+ Err(Error::IndexOutOfBounds(5, 5))
+ ));
+
+ println!("Right error for an index out of bounds as the target");
+
+ assert!(matches!(
+ builder.add_edge(10, 5, 0),
+ Err(Error::IndexOutOfBounds(10, 5))
+ ));
+
+ println!("Right error for an index out of bounds as the source");
+
+ assert!(matches!(
+ builder.add_edge(10, 50, 0),
+ Err(Error::IndexOutOfBounds(10, 5))
+ ));
+
+ println!("Right error for both indices out of bounds");
+
+ // Errors in remove_edge
+
+ println!();
+ println!("Testing errors in remove_edge:");
+ println!();
+
+ assert!(matches!(
+ builder.remove_edge(0, 5, |_| true),
+ Err(Error::IndexOutOfBounds(5, 5))
+ ));
+
+ println!("Right error for an index out of bounds as the target");
+
+ assert!(matches!(
+ builder.remove_edge(10, 5, |_| true),
+ Err(Error::IndexOutOfBounds(10, 5))
+ ));
+
+ println!("Right error for an index out of bounds as the source");
+
+ assert!(matches!(
+ builder.remove_edge(10, 50, |_| true),
+ Err(Error::IndexOutOfBounds(10, 5))
+ ));
+
+ println!("Right error for both indices out of bounds");
+
+ assert!(matches!(builder.remove_edge(0, 1, |_| true), Ok(()),));
+
+ println!("No errors for removing a non-existing edge");
+
+ println!();
+
+ let graph = builder.build();
+
+ println!("final graph: {graph:?}");
+
+ Ok(())
+ }
+}
diff --git a/graph/src/lib.rs b/graph/src/lib.rs
index 7b74ee1..2d23af3 100644
--- a/graph/src/lib.rs
+++ b/graph/src/lib.rs
@@ -8,6 +8,8 @@
use std::hash::Hash;
+use core::fmt::{Debug, Display};
+
pub mod error;
pub mod adset;
@@ -22,6 +24,10 @@ pub mod labelled;
pub use labelled::DLGraph;
+pub mod builder;
+
+pub use builder::Builder;
+
use error::Error;
/// The expected behaviour of an immutable graph.
@@ -101,6 +107,83 @@ pub trait Graph: Default {
/// Return an error if either the source or the target is an
/// invalid node.
fn has_edge(&self, source: usize, target: usize) -> Result<bool, Error>;
+
+ /// Output the edges into a graphviz file.
+ fn print_viz(&self, filename: &str) -> Result<(), std::io::Error> {
+ let preamble = "digraph nfa {
+ fontname=\"Helvetica,Arial,sans-serif\"
+ node [fontname=\"Helvetica,Arial,sans-serif\"]
+ edge [fontname=\"Helvetica,Arial,sans-serif\"]
+ rankdir=LR;\n";
+
+ let mut post = String::new();
+
+ use std::fmt::Write as FWrite;
+
+ for (source, target) in self.edges() {
+ let _ = writeln!(post, "{source} -> {target}");
+ }
+
+ post.push_str("}\n");
+
+ let result = format!("{preamble}{post}");
+
+ if std::fs::metadata(filename).is_ok() {
+ std::fs::remove_file(filename)?;
+ }
+
+ let mut file = std::fs::File::options()
+ .write(true)
+ .create(true)
+ .open(filename)?;
+
+ use std::io::Write;
+
+ file.write_all(result.as_bytes())
+ }
+
+ /// Returns whether or not the graph contains cycles.
+ ///
+ /// If, and only if, the graph contains invalid nodes, an error
+ /// will be signalled.
+ fn contains_cycles(&self) -> Result<bool, Error> {
+ use std::collections::HashSet;
+
+ let mut seen_nodes: HashSet<usize> = HashSet::with_capacity(self.nodes_len());
+
+ for node in self.nodes() {
+ if seen_nodes.contains(&node) {
+ continue;
+ }
+
+ let mut edges_seen_nodes: HashSet<usize> = HashSet::with_capacity(self.nodes_len());
+
+ let mut stack = Vec::with_capacity(self.nodes_len());
+ stack.push(node);
+
+ while let Some(top) = stack.pop() {
+ if edges_seen_nodes.contains(&top) {
+ // a cycle is found
+ return Ok(true);
+ }
+
+ edges_seen_nodes.insert(top);
+
+ let mut local_stack: Vec<usize> = self.children_of(top)?.collect();
+
+ local_stack.reverse();
+
+ stack.extend(local_stack);
+ }
+
+ seen_nodes.extend(edges_seen_nodes);
+ }
+
+ Ok(false)
+ }
+
+ /// Replace the contents of the graph by a builder.
+ fn replace_by_builder(&mut self, builder: impl Builder<Result = Self>);
}
/// A graph that can be extended, but not mutated, in the sense that
@@ -121,9 +204,15 @@ pub trait ExtGraph: Graph {
}
/// The type of labels should be comparable and hashable.
-pub trait GraphLabel: Hash + Eq + PartialEq + Clone + Copy + Ord + PartialOrd {}
+pub trait GraphLabel:
+ Hash + Eq + PartialEq + Clone + Copy + Ord + PartialOrd + Display + Debug
+{
+}
-impl<T: Hash + Eq + PartialEq + Clone + Copy + Ord + PartialOrd> GraphLabel for T {}
+impl<T: Hash + Eq + PartialEq + Clone + Copy + Ord + PartialOrd + Display + Debug> GraphLabel
+ for T
+{
+}
/// A labelled graph is just a graph with labels associated to
/// vertices and / or edges.
@@ -182,6 +271,10 @@ pub trait LabelGraph<T: GraphLabel>: Graph {
///
/// The efficiency of this method matters in implementations.
fn labels_of(&self, node_id: usize) -> Result<Self::LabelIter<'_>, Error>;
+
+ /// Return true if and only if the node has an edge with the given
+ /// label and target.
+ fn has_edge_label(&self, node_id: usize, label: &T, target: usize) -> Result<bool, Error>;
}
/// A labelled graph that can be extended, but not mutated, in the
@@ -201,3 +294,39 @@ pub trait LabelExtGraph<T: GraphLabel>: LabelGraph<T> {
/// an error.
fn extend(&mut self, edges: impl IntoIterator<Item = (T, usize)>) -> Result<usize, Error>;
}
+
+#[cfg(test)]
+mod test_cycle_detection {
+ use super::{builder::Builder, labelled::DLGBuilder, Graph};
+
+ #[test]
+ fn test() -> Result<(), Box<dyn std::error::Error>> {
+ let mut builder: DLGBuilder<usize> = DLGBuilder::default();
+
+ // Add five nodes
+ builder.add_vertex();
+ builder.add_vertex();
+ builder.add_vertex();
+ builder.add_vertex();
+ builder.add_vertex();
+
+ // Link each node to its successor and link the last node with
+ // the first one to form a cycle.
+ for i in 0..5 {
+ builder.add_edge(i, if i < 4 { i + 1 } else { 0 }, i)?;
+ }
+
+ let graph = builder.build_ref();
+
+ assert_eq!(graph.contains_cycles()?, true);
+
+ // Remove the link from the last node to the first node.
+ builder.remove_edge(4, 0, |_| true)?;
+
+ let graph = builder.build();
+
+ assert_eq!(graph.contains_cycles()?, false);
+
+ Ok(())
+ }
+}
diff --git a/nfa/src/default/nfa.rs b/nfa/src/default/nfa.rs
index 3c2bd83..229ba4d 100644
--- a/nfa/src/default/nfa.rs
+++ b/nfa/src/default/nfa.rs
@@ -1,25 +1,26 @@
//! This file provides a default implementation of NFA.
-// TODO: Store the regular expression in the NFA as well.
-//
-// The current focus of the project is to understand the growth rate
-// of the algorithm, to know whether I made a mistake in the previous
-// iteration of the implementation, or the algorithm is not as fast as
-// the author estimated, which is not quite likely, of course.
+// TODO: The current focus of the project is to understand the growth
+// rate of the algorithm, to know whether I made a mistake in the
+// previous iteration of the implementation, or the algorithm is not
+// as fast as the author estimated, which is not quite likely, of
+// course.
//
// Thus I shall establish a friendly interface that allows me to view
// and debug the atomic languages and the languages, transparently.
-use graph::{error::Error as GError, DLGraph, Graph, GraphLabel, LabelGraph};
+use graph::{
+ builder::Builder, error::Error as GError, labelled::DLGBuilder, DLGraph, Graph, GraphLabel,
+ LabelGraph,
+};
-use crate::{error::Error, Nfa, Regex};
+use crate::{default::regex::RegexType, error::Error, DOption, Nfa, Regex, SoC};
-use std::fmt::Display;
+use core::fmt::{Debug, Display};
-#[non_exhaustive]
-#[derive(Debug)]
/// Default NFA implementation.
-pub struct DefaultNFA<T: GraphLabel + Display> {
+#[derive(Debug, Clone)]
+pub struct DefaultNFA<T: GraphLabel> {
graph: DLGraph<T>,
}
@@ -63,6 +64,16 @@ impl<T: GraphLabel + Display> Graph for DefaultNFA<T> {
fn has_edge(&self, source: usize, target: usize) -> Result<bool, GError> {
self.graph.has_edge(source, target)
}
+
+ #[inline]
+ fn print_viz(&self, filename: &str) -> Result<(), std::io::Error> {
+ self.graph.print_viz(filename)
+ }
+
+ #[inline]
+ fn replace_by_builder(&mut self, _builder: impl Builder<Result = Self>) {
+ unimplemented!()
+ }
}
impl<T: GraphLabel + Display> LabelGraph<T> for DefaultNFA<T> {
@@ -70,11 +81,10 @@ impl<T: GraphLabel + Display> LabelGraph<T> for DefaultNFA<T> {
type LabelIter<'a> = <DLGraph<T> as LabelGraph<T>>::LabelIter<'a> where T: 'a;
- // TODO: Return the label from the contained regular language.
#[inline]
fn vertex_label(&self, node_id: usize) -> Result<Option<T>, GError> {
if self.has_node(node_id) {
- todo!()
+ unimplemented!()
} else {
Err(GError::IndexOutOfBounds(node_id, self.nodes_len()))
}
@@ -98,12 +108,232 @@ impl<T: GraphLabel + Display> LabelGraph<T> for DefaultNFA<T> {
fn labels_of(&self, node_id: usize) -> Result<Self::LabelIter<'_>, GError> {
self.graph.labels_of(node_id)
}
+
+ #[inline]
+ fn has_edge_label(&self, node_id: usize, label: &T, target: usize) -> Result<bool, GError> {
+ self.graph.has_edge_label(node_id, label, target)
+ }
}
impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
- #[allow(unused)]
- fn to_nfa(regex: impl Regex<T>) -> Self {
- todo!()
+ type FromRegex<S: GraphLabel + Display + Default> = DefaultNFA<S>;
+
+ fn to_nfa(
+ regexps: &[impl Regex<RegexType<T>>],
+ sub_pred: impl Fn(T) -> Result<SoC<T>, Error>,
+ ) -> Result<Self::FromRegex<DOption<T>>, Error> {
+ let total_regexps_len: usize = regexps.iter().map(Graph::nodes_len).sum();
+
+ if total_regexps_len == 0 {
+ return Ok(Default::default());
+ }
+
+ // We reserve two more rooms for later uses.
+ let nfa_len = total_regexps_len * 2 + 2;
+
+ let mut builder: DLGBuilder<DOption<T>> = Builder::with_capacity(nfa_len);
+
+ // Since DOption<T> implements Copy when T does, we can use a
+ // variable to hold the empty label to avoid repetitions.
+ let empty_label: DOption<T> = Default::default();
+
+ for _ in 0..nfa_len {
+ builder.add_vertex();
+ }
+
+ let accumulators: Vec<usize> = {
+ let mut result = Vec::with_capacity(regexps.len() + 1);
+
+ result.push(0);
+
+ let mut accumulator = 0;
+
+ for regexp in regexps.iter() {
+ accumulator += regexp.nodes_len() * 2;
+ result.push(accumulator);
+ }
+
+ result.pop();
+
+ result
+ };
+
+ assert_eq!(accumulators.len(), regexps.len());
+
+ /// Get offset from `accumulators` safely.
+ macro_rules! get_offset_safe (
+ ($num:expr) => {
+ *accumulators.get($num).ok_or(Error::UnknownNode($num))?
+ }
+ );
+
+ /// Get offset from `accumulators` without checking bounds.
+ macro_rules! get_offset_unsafe (
+ ($num:expr) => {
+ { unsafe { *accumulators.get_unchecked($num) } }
+ }
+ );
+
+ for (index, regex) in regexps.iter().enumerate() {
+ let root = if let Some(root) = regex.root() {
+ root
+ } else {
+ // A regular expression without roots is empty, so we
+ // skip it.
+ continue;
+ };
+
+ let regex_len = regex.nodes_len();
+
+ // It is safe here to call `get_offset_unsafe`, as `index`
+ // is guaranteed to be strictly less than the length of
+ // `accumulators` by an assertion above.
+ let offset = get_offset_unsafe!(index);
+
+ let mut stack: Vec<usize> = Vec::with_capacity(regex_len);
+
+ stack.push(root);
+
+ while let Some(top_index) = stack.pop() {
+ let top_label = regex.vertex_label(top_index)?;
+
+ let nfa_start = offset + 2 * top_index;
+ let nfa_end = offset + 2 * top_index + 1;
+
+ match top_label {
+ RegexType::Kleene => {
+ builder.add_edge(nfa_start, nfa_end, empty_label)?;
+
+ let mut source = nfa_start;
+
+ if !regex.is_empty_node(top_index)? {
+ for child in regex.children_of(top_index)? {
+ stack.push(child);
+
+ let child_start = offset + 2 * child;
+ let child_end = offset + 2 * child + 1;
+
+ builder.add_edge(source, child_start, empty_label)?;
+
+ source = child_end;
+ }
+
+ builder.add_edge(source, nfa_end, empty_label)?;
+
+ builder.add_edge(nfa_end, nfa_start, empty_label)?;
+ }
+ }
+ RegexType::Plus => {
+ let mut source = nfa_start;
+
+ if !regex.is_empty_node(top_index)? {
+ for child in regex.children_of(top_index)? {
+ stack.push(child);
+
+ let child_start = offset + 2 * child;
+ let child_end = offset + 2 * child + 1;
+
+ builder.add_edge(source, child_start, empty_label)?;
+
+ source = child_end;
+ }
+
+ builder.add_edge(source, nfa_end, empty_label)?;
+
+ builder.add_edge(nfa_end, nfa_start, empty_label)?;
+ } else {
+ builder.add_edge(nfa_start, nfa_end, empty_label)?;
+ }
+ }
+ RegexType::Optional => {
+ builder.add_edge(nfa_start, nfa_end, empty_label)?;
+
+ let mut source = nfa_start;
+
+ if !regex.is_empty_node(top_index)? {
+ for child in regex.children_of(top_index)? {
+ stack.push(child);
+
+ let child_start = offset + 2 * child;
+ let child_end = offset + 2 * child + 1;
+
+ builder.add_edge(source, child_start, empty_label)?;
+
+ source = child_end;
+ }
+
+ builder.add_edge(source, nfa_end, empty_label)?;
+ }
+ }
+ RegexType::Or => {
+ if !regex.is_empty_node(top_index)? {
+ for child in regex.children_of(top_index)? {
+ stack.push(child);
+
+ let child_start = offset + 2 * child;
+ let child_end = offset + 2 * child + 1;
+
+ builder.add_edge(nfa_start, child_start, empty_label)?;
+ builder.add_edge(child_end, nfa_end, empty_label)?;
+ }
+ } else {
+ builder.add_edge(nfa_start, nfa_end, empty_label)?;
+ }
+ }
+ RegexType::Paren => {
+ // Ignore Paren nodes since currently these
+ // are used only for printing purposes.
+ }
+ RegexType::Empty => {
+ let mut source = nfa_start;
+
+ if !regex.is_empty_node(top_index)? {
+ for child in regex.children_of(top_index)? {
+ stack.push(child);
+
+ let child_start = offset + 2 * child;
+ let child_end = offset + 2 * child + 1;
+
+ builder.add_edge(source, child_start, empty_label)?;
+
+ source = child_end;
+ }
+
+ builder.add_edge(source, nfa_end, empty_label)?;
+ } else {
+ builder.add_edge(nfa_start, nfa_end, empty_label)?;
+ }
+ }
+ RegexType::Lit(value) => {
+ // The children would be ignored even if for
+ // some reason this literal node had some
+ // children.
+
+ match sub_pred(value)? {
+ SoC::Sub(sub_non) => {
+ // a non-terminal
+
+ let sub_offset = get_offset_safe!(sub_non);
+ let sub_nfa_start = sub_offset + 2 * sub_non;
+ let sub_nfa_end = sub_offset + 2 * sub_non + 1;
+
+ builder.add_edge(nfa_start, sub_nfa_start, empty_label)?;
+ builder.add_edge(sub_nfa_end, nfa_end, empty_label)?;
+ }
+ SoC::Carry(new_value) => {
+ // a terminal
+
+ builder.add_edge(nfa_start, nfa_end, DOption(Some(new_value)))?;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ let graph = builder.build();
+
+ Ok(DefaultNFA { graph })
}
fn remove_epsilon(&mut self) -> Result<(), Error> {
@@ -114,7 +344,113 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
todo!()
}
- fn nulling(&mut self) -> Result<(), Error> {
- todo!()
+ fn nulling(&mut self, f: impl Fn(T) -> bool) -> Result<(), Error> {
+ let mut updated = true;
+ let mut builder = self.graph.builder_ref();
+
+ while updated {
+ updated = false;
+
+ let mut nullable = false;
+
+ let mut to_add = Vec::new();
+
+ for (source, target) in builder.edges() {
+ for label in builder.edge_label(source, target)? {
+ if f(label) {
+ nullable = true;
+
+ break;
+ }
+ }
+
+ if nullable {
+ for (label, child_iter) in builder.labels_of(target)? {
+ for child in child_iter {
+ if !builder.has_edge_label(source, label, child)? {
+ updated = true;
+
+ to_add.push((source, child, *label));
+ }
+ }
+ }
+ }
+ }
+
+ for (source, child, label) in to_add {
+ builder.add_edge(source, child, label)?;
+ }
+ }
+
+ self.graph.replace_by_builder(builder);
+
+ Ok(())
+ }
+}
+
+impl<T: GraphLabel + Display + Debug> Display for DefaultNFA<T> {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ Debug::fmt(self, f)
+ }
+}
+
+#[cfg(test)]
+mod test_to_nfa {
+ #![allow(unused_imports)]
+ use super::*;
+ use crate::SoC;
+
+ use crate::{
+ default::regex::{DefaultRegParser, DefaultRegex, ParseDirection, ParseError, RegexType},
+ desrec::DesRec,
+ };
+
+ fn new_regex() -> Result<DefaultRegex<char>, ParseError> {
+ let parser = DefaultRegParser::<char>::default();
+
+ fn test_scanner<T: GraphLabel + Display + Debug>(
+ _parser: &DefaultRegParser<T>,
+ input: &str,
+ ) -> Result<Option<(usize, RegexType<char>, ParseDirection)>, ParseError> {
+ use ParseDirection::*;
+ use RegexType::*;
+
+ if let Some(first) = input.chars().next() {
+ match first {
+ '*' => Ok(Some((1, Kleene, Right))),
+ '+' => Ok(Some((1, Plus, Right))),
+ '?' => Ok(Some((1, Optional, Right))),
+ '|' => Ok(Some((1, Empty, Up))),
+ '(' => Ok(Some((1, Or, Down))),
+ ')' => Ok(Some((1, Paren, Up))),
+ ' '..='~' => Ok(Some((1, Lit(first), Right))),
+ _ => Err(ParseError::InvalidCharacter(first)),
+ }
+ } else {
+ Ok(None)
+ }
+ }
+
+ let input_string = "a*b?c+|(d*| +)?".to_owned();
+
+ Ok(
+ DefaultRegParser::<char>::parse(&parser, &input_string, Box::new(test_scanner), true)?
+ .unwrap_or(Default::default())
+ .0,
+ )
+ }
+
+ #[test]
+ fn test_to_nfa() -> Result<(), Box<dyn std::error::Error>> {
+ let regex = new_regex()?;
+
+ println!("regex = {regex}");
+
+ let nfa: DefaultNFA<DOption<char>> =
+ DefaultNFA::to_nfa(&[regex], |label| Ok(SoC::Carry(label)))?;
+
+ nfa.print_viz("nfa.gv")?;
+
+ Ok(())
}
}
diff --git a/nfa/src/default/regex.rs b/nfa/src/default/regex.rs
index a60f801..2670e32 100644
--- a/nfa/src/default/regex.rs
+++ b/nfa/src/default/regex.rs
@@ -6,7 +6,11 @@ use crate::{desrec::DesRec, error::Error, Regex};
use receme::{algebra::Algebra, catana::Cata};
-use std::fmt::{Debug, Display};
+use std::{
+ collections::HashMap,
+ fmt::{Debug, Display, Write},
+ marker::PhantomData,
+};
/// The type of a node in a regular expression.
///
@@ -25,7 +29,7 @@ use std::fmt::{Debug, Display};
/// This is possible because a regular expression has a root node.
/// For the sake of convenience, the root node has type "Or".
#[derive(Debug, Hash, Default, Eq, PartialEq, Clone, Copy, Ord, PartialOrd)]
-pub enum RegexType<T: GraphLabel + Display> {
+pub enum RegexType<T: GraphLabel> {
/// A star node is a node with an arbitrary number of repetitions.
Kleene,
/// A plus node is a node with at least one repetition: a+ equals
@@ -44,13 +48,31 @@ pub enum RegexType<T: GraphLabel + Display> {
Lit(T),
}
+impl<T: GraphLabel> Display for RegexType<T> {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ match self {
+ RegexType::Kleene => write!(f, "*"),
+ RegexType::Plus => write!(f, "+"),
+ RegexType::Optional => write!(f, "?"),
+ RegexType::Or => write!(f, "|"),
+ RegexType::Paren => write!(f, "()"),
+ RegexType::Empty => write!(f, "empty"),
+ RegexType::Lit(value) => write!(f, "{value}"),
+ }
+ }
+}
+
/// A default implementation of regular expressions.
-#[derive(Debug)]
+#[derive(Debug, Clone)]
pub struct DefaultRegex<T: GraphLabel + Display> {
/// The underlying graph is stored using adjacency lists.
graph: ALGraph,
/// The types of the underlying nodes.
types: Vec<RegexType<T>>,
+ /// The root of the graph.
+ ///
+ /// If it is None, it indicates the regular expression is empty.
+ root: Option<usize>,
}
impl<T: GraphLabel + Display> Default for DefaultRegex<T> {
@@ -58,11 +80,39 @@ impl<T: GraphLabel + Display> Default for DefaultRegex<T> {
Self {
graph: Default::default(),
types: Default::default(),
+ root: Default::default(),
}
}
}
-impl<T: GraphLabel + Display> DefaultRegex<T> {
+impl<T: GraphLabel> DefaultRegex<T> {
+ /// Set the root of the regular expression.
+ #[inline]
+ pub fn set_root(&mut self, root: Option<usize>) {
+ self.root = root;
+ }
+
+ /// Construct a regular expression from a raw adjacency list graph
+ /// and an array of labels.
+ ///
+ /// # Error
+ ///
+ /// If the graph contains cycles, this function will return an
+ /// error. This check is added to make sure that the regular
+ /// expression contains no cycles, otherwise operations with
+ /// regular expressions might enter infinite loops later.
+ pub fn new(graph: ALGraph, types: Vec<RegexType<T>>) -> Result<Self, Error> {
+ if graph.contains_cycles()? {
+ Err(Error::Cycle)
+ } else {
+ Ok(Self {
+ graph,
+ types,
+ root: Some(0),
+ })
+ }
+ }
+
/// Return the number of elements in this regular expression,
/// counting nodes.
pub fn len(&self) -> usize {
@@ -86,10 +136,65 @@ impl<T: GraphLabel + Display> DefaultRegex<T> {
Ok(())
}
+
+ /// Return the internal graph.
+ ///
+ /// Normally this should not be used.
+ pub fn internal_graph(&self) -> ALGraph {
+ self.graph.clone()
+ }
+
+ /// Return the internal types array.
+ ///
+ /// Normally this should not be used.
+ pub fn internal_types(&self) -> Vec<RegexType<T>> {
+ self.types.clone()
+ }
+
+ /// Return the array of parents.
+ ///
+ /// The element at index N of the returned array is the parent of
+ /// that N-th element. If an element has no parents, a `None` is
+ /// placed at its place.
+ ///
+ /// # Error
+ ///
+ /// If some edge points to an invalid node, an erro will be
+ /// returned.
+ ///
+ /// Also, if the graph contains cycles, an error is also returned.
+ pub fn parents_array(&self) -> Result<Vec<Option<(usize, usize)>>, Error> {
+ if self.graph.contains_cycles().map_err(|e| match e {
+ GError::IndexOutOfBounds(n, _) => Error::UnknownNode(n),
+ _ => unreachable!(),
+ })? {
+ return Err(Error::Cycle);
+ }
+
+ let len = self.len();
+
+ let mut result: Vec<_> = std::iter::repeat_with(|| None).take(len).collect();
+
+ for source in self.graph.nodes() {
+ for (index, target) in self
+ .graph
+ .children_of(source)
+ .map_err(|_| Error::UnknownNode(source))?
+ .enumerate()
+ {
+ result
+ .get_mut(target)
+ .ok_or(Error::UnknownNode(target))?
+ .replace((source, index));
+ }
+ }
+
+ Ok(result)
+ }
}
// REVIEW: This may not be needed.
-impl<S: GraphLabel + Display, T, A> Cata<T, Vec<T>, A> for &DefaultRegex<S>
+impl<S: GraphLabel, T, A> Cata<T, Vec<T>, A> for &DefaultRegex<S>
where
A: Algebra<T, Vec<T>>,
{
@@ -118,8 +223,13 @@ where
}
}
-impl<T: GraphLabel + Display> Display for DefaultRegex<T> {
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+impl<T: GraphLabel> DefaultRegex<T> {
+ /// Use a function to format the labels of the regular expressions
+ /// and format the entire regular expression with this aid.
+ pub fn to_string_with<F>(&self, mut f: F) -> Result<String, std::fmt::Error>
+ where
+ F: FnMut(T) -> String,
+ {
#[derive(Copy, Clone)]
enum StackElement {
Seen(usize),
@@ -134,16 +244,15 @@ impl<T: GraphLabel + Display> Display for DefaultRegex<T> {
}
}
- fn is_seen(&self) -> bool {
- match self {
- Seen(_) => true,
- Unseen(_) => false,
- }
+ fn is_seen(self) -> bool {
+ matches!(self, Seen(_))
}
}
use StackElement::{Seen, Unseen};
+ let mut result = String::new();
+
let mut stack: Vec<StackElement> = Vec::new();
let mut types = self.types.clone();
types.push(RegexType::Paren);
@@ -159,6 +268,12 @@ impl<T: GraphLabel + Display> Display for DefaultRegex<T> {
RegexType::Kleene => {
if !top.is_seen() {
stack.push(Seen(top.index()));
+
+ if self.degree(top.index()).unwrap() > 1 {
+ write!(result, "(")?;
+ stack.push(Unseen(types.len() - 1));
+ }
+
stack.extend(
self.graph
.children_of(top.index())
@@ -167,7 +282,7 @@ impl<T: GraphLabel + Display> Display for DefaultRegex<T> {
.rev(),
);
} else {
- write!(f, "*")?;
+ write!(result, "*")?;
}
}
RegexType::Plus => {
@@ -181,7 +296,7 @@ impl<T: GraphLabel + Display> Display for DefaultRegex<T> {
.rev(),
);
} else {
- write!(f, "+")?;
+ write!(result, "+")?;
}
}
RegexType::Optional => {
@@ -195,12 +310,12 @@ impl<T: GraphLabel + Display> Display for DefaultRegex<T> {
.rev(),
);
} else {
- write!(f, "?")?;
+ write!(result, "?")?;
}
}
RegexType::Or => {
if !top.is_seen() {
- write!(f, "(")?;
+ write!(result, "(")?;
let len = self.len();
@@ -221,11 +336,11 @@ impl<T: GraphLabel + Display> Display for DefaultRegex<T> {
}
}
} else {
- write!(f, "|")?;
+ write!(result, "|")?;
}
}
RegexType::Paren => {
- write!(f, ")")?;
+ write!(result, ")")?;
}
RegexType::Empty => {
stack.extend(
@@ -236,11 +351,17 @@ impl<T: GraphLabel + Display> Display for DefaultRegex<T> {
.rev(),
);
}
- RegexType::Lit(label) => write!(f, "{label}")?,
+ RegexType::Lit(label) => write!(result, "{}", f(label))?,
}
}
- Ok(())
+ Ok(result)
+ }
+}
+
+impl<T: GraphLabel + Display + Debug> Display for DefaultRegex<T> {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ write!(f, "{}", self.to_string_with(|t| format!("{t}"))?)
}
}
@@ -278,9 +399,20 @@ impl<T: GraphLabel + Display> Graph for DefaultRegex<T> {
fn has_edge(&self, source: usize, target: usize) -> Result<bool, GError> {
self.graph.has_edge(source, target)
}
+
+ #[inline]
+ fn replace_by_builder(&mut self, _builder: impl graph::Builder<Result = Self>) {
+ unimplemented!()
+ }
}
-impl<T: GraphLabel + Display> Regex<RegexType<T>> for DefaultRegex<T> {
+impl<T: GraphLabel + Display + Debug> Regex<RegexType<T>> for DefaultRegex<T> {
+ /// Return the root of the regular expression.
+ #[inline]
+ fn root(&self) -> Option<usize> {
+ self.root
+ }
+
#[inline]
fn vertex_label(&self, node_id: usize) -> Result<RegexType<T>, Error> {
self.types
@@ -291,8 +423,10 @@ impl<T: GraphLabel + Display> Regex<RegexType<T>> for DefaultRegex<T> {
}
/// An error type for holding parsing errors.
-#[derive(Debug)]
+#[derive(Debug, Copy, Clone)]
pub enum ParseError {
+ /// A cycle is encountered.
+ Cycle,
/// Encounter an invalid state.
Invalid,
/// An error from graph operations.
@@ -354,23 +488,77 @@ impl Display for ParseDirection {
}
}
-impl<T: GraphLabel + Display + Debug> DesRec for DefaultRegex<T> {
+/// A default recursive descent parser for regular expressions of
+/// terminals or non-terminals.
+#[derive(Debug, Clone)]
+pub struct DefaultRegParser<T: GraphLabel + Display> {
+ ter_map: HashMap<String, usize>,
+ non_map: HashMap<String, usize>,
+ _phantom: PhantomData<T>,
+}
+
+impl<T: GraphLabel + Display> DefaultRegParser<T> {
+ /// Query if a terminal or a non-terminal is already found.
+ ///
+ /// If found, return the associated index of the terminal or
+ /// non-terminal.
+ pub fn query(&self, tnt: &str, terminal_p: bool) -> Option<usize> {
+ if terminal_p {
+ self.ter_map.get(tnt).copied()
+ } else {
+ self.non_map.get(tnt).copied()
+ }
+ }
+
+ /// Add a terminal or a non-terminal.
+ pub fn add_tnt(&mut self, tnt: &str, terminal_p: bool) {
+ if terminal_p {
+ let ter_len = self.ter_map.len();
+
+ self.ter_map.entry(tnt.to_owned()).or_insert(ter_len);
+ } else {
+ let non_len = self.non_map.len();
+
+ self.non_map.entry(tnt.to_owned()).or_insert(non_len);
+ }
+ }
+}
+
+impl<T: GraphLabel + Display> Default for DefaultRegParser<T> {
+ fn default() -> Self {
+ Self {
+ ter_map: Default::default(),
+ non_map: Default::default(),
+ _phantom: PhantomData,
+ }
+ }
+}
+
+impl<T: GraphLabel + Display + Debug> DesRec for DefaultRegParser<T> {
type Label = RegexType<T>;
- type Regex = Self;
+ type Regex = DefaultRegex<T>;
type Error = ParseError;
- type Aux = ParseDirection;
+ type Inter = ParseDirection;
- type Scanner<'a> =
- Box<dyn FnMut(&'a str) -> Result<Option<(usize, Self::Label, Self::Aux)>, Self::Error>>;
+ type Scanner<'a, 'b> = Box<
+ dyn FnMut(
+ &'b Self,
+ &'a str,
+ ) -> Result<Option<(usize, Self::Label, Self::Inter)>, Self::Error>,
+ > where RegexType<T>:'b;
- fn parse<'a>(
+ fn parse<'a, 'b>(
+ &'b self,
mut input: &'a str,
- mut scanner: Self::Scanner<'a>,
+ mut scanner: Self::Scanner<'a, 'b>,
post_p: bool,
- ) -> Result<Option<(DefaultRegex<T>, &'a str)>, Self::Error> {
+ ) -> Result<Option<(DefaultRegex<T>, &'a str)>, Self::Error>
+ where
+ Self::Label: 'b,
+ {
use ParseDirection::*;
use RegexType::*;
@@ -380,7 +568,7 @@ impl<T: GraphLabel + Display + Debug> DesRec for DefaultRegex<T> {
// Classifies the input into a sequence of tokens with
// directions.
while !input.is_empty() {
- if let Some((len, label, direction)) = scanner(input)? {
+ if let Some((len, label, direction)) = scanner(self, input)? {
if len == 0 {
break;
}
@@ -523,7 +711,12 @@ impl<T: GraphLabel + Display + Debug> DesRec for DefaultRegex<T> {
let graph = list_of_children.into();
- let result = DefaultRegex { graph, types };
+ // Check there are no cycles
+ let result = DefaultRegex::new(graph, types).map_err(|e| match e {
+ Error::Graph(ge) => ParseError::Graph(ge),
+ Error::Cycle => ParseError::Cycle,
+ _ => unreachable!(),
+ })?;
Ok(Some((result, input)))
}
@@ -533,9 +726,17 @@ impl<T: GraphLabel + Display + Debug> DesRec for DefaultRegex<T> {
mod test_des_rec {
use super::*;
- fn test_scanner(
- input: &str,
- ) -> Result<Option<(usize, RegexType<char>, ParseDirection)>, ParseError> {
+ use crate::desrec::DesRec;
+
+ #[allow(dead_code, unused)]
+ fn test_scanner<'a, 'b, T>(
+ parser: &'b DefaultRegParser<T>,
+ input: &'a str,
+ ) -> Result<Option<(usize, RegexType<char>, ParseDirection)>, ParseError>
+ where
+ T: GraphLabel + Display + Debug,
+ T: 'b,
+ {
use ParseDirection::*;
use RegexType::*;
@@ -557,19 +758,57 @@ mod test_des_rec {
#[test]
fn test_des_rec() -> Result<(), Box<dyn std::error::Error>> {
- let input_string = "a*b?c+|(d*| +)?".to_owned();
+ let input_string = "(ade)*b?c+|(d*| +)?".to_owned();
+
+ let parser: DefaultRegParser<char> = Default::default();
if let Some((regex, remain)) =
- DefaultRegex::<char>::parse(&input_string, Box::new(test_scanner), true)?
+ DefaultRegParser::<char>::parse(&parser, &input_string, Box::new(test_scanner), true)?
{
println!("regex = {regex}");
println!("remain = {remain}");
println!("regex length = {}", regex.len());
+ let parents = regex.parents_array()?;
+
+ println!("parents = {parents:?}");
+
Ok(())
} else {
unreachable!()
}
}
+
+ #[test]
+ fn test_display() -> Result<(), Box<dyn std::error::Error>> {
+ use graph::builder::Builder;
+ use RegexType::*;
+
+ let mut builder = graph::adlist::ALGBuilder::default();
+ let mut types: Vec<RegexType<usize>> = Vec::with_capacity(4);
+
+ types.push(Kleene);
+ builder.add_vertex();
+
+ types.push(Lit(0));
+ builder.add_vertex();
+ builder.add_edge(0, 1, ())?;
+
+ types.push(Lit(1));
+ builder.add_vertex();
+ builder.add_edge(0, 2, ())?;
+
+ types.push(Lit(2));
+ builder.add_vertex();
+ builder.add_edge(0, 3, ())?;
+
+ let graph = builder.build();
+
+ let regex = DefaultRegex::new(graph, types)?;
+
+ println!("regex = {regex}");
+
+ Ok(())
+ }
}
diff --git a/nfa/src/desrec.rs b/nfa/src/desrec.rs
index c57d313..ac45c2d 100644
--- a/nfa/src/desrec.rs
+++ b/nfa/src/desrec.rs
@@ -20,8 +20,8 @@ pub trait DesRec {
/// The type of errors encountered when parsing.
type Error: std::error::Error;
- /// Auxiliary data when parsing
- type Aux;
+ /// Intermediate data when parsing
+ type Inter;
/// The type of a scanner that classifies inputs into tokens.
///
@@ -34,7 +34,14 @@ pub trait DesRec {
///
/// - `Ok(Some(_))`: the classification succeeds and the parsing
/// should continue.
- type Scanner<'a>: FnMut(&'a str) -> Result<Option<(usize, Self::Label, Self::Aux)>, Self::Error>;
+ type Scanner<'a, 'b>: FnMut(
+ &'b Self,
+ &'a str,
+ )
+ -> Result<Option<(usize, Self::Label, Self::Inter)>, Self::Error>
+ where
+ Self: 'b,
+ Self::Label: 'b;
/// Parse a string into a regular expression with the aid of this
/// type.
@@ -42,9 +49,12 @@ pub trait DesRec {
/// Accept a slice of string and return either a parsing error, or
/// a pair of correctly parsed regular expression and the
/// remaining slice.
- fn parse<'a>(
+ fn parse<'a, 'b>(
+ &'b self,
input: &'a str,
- scanner: Self::Scanner<'a>,
+ scanner: Self::Scanner<'a, 'b>,
post_p: bool,
- ) -> Result<ParseOutput<'a, Self::Regex>, Self::Error>;
+ ) -> Result<ParseOutput<'a, Self::Regex>, Self::Error>
+ where
+ Self::Label: 'b;
}
diff --git a/nfa/src/error.rs b/nfa/src/error.rs
index 0c6bb3c..ad75077 100644
--- a/nfa/src/error.rs
+++ b/nfa/src/error.rs
@@ -2,10 +2,11 @@
use graph::error::Error as GError;
-use std::fmt::{Display, Formatter};
+use core::fmt::{Display, Formatter};
-#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd)]
/// The error type for NFA operations.
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd)]
+#[non_exhaustive]
pub enum Error {
/// An unknown node id is encountered.
UnknownNode(usize),
@@ -13,8 +14,12 @@ pub enum Error {
///
/// Everything is a trade-off, wink wink.
UnsupportedOperation,
+ /// There is no root in the underlying regular expression.
+ NoRoot,
/// This error comes from some underlying graph operation.
Graph(GError),
+ /// A cycle is found when constructing regular expressions.
+ Cycle,
}
impl Display for Error {
@@ -23,6 +28,8 @@ impl Display for Error {
Error::UnknownNode(id) => write!(f, "unknown node: {id}"),
Error::UnsupportedOperation => write!(f, "unsupported operation"),
Error::Graph(e) => write!(f, "graph error: {e}"),
+ Error::NoRoot => write!(f, "no root found"),
+ Error::Cycle => write!(f, "a cycle is found when constructing regular expressions"),
}
}
}
diff --git a/nfa/src/lib.rs b/nfa/src/lib.rs
index 1e2a30c..b1d62b3 100644
--- a/nfa/src/lib.rs
+++ b/nfa/src/lib.rs
@@ -12,17 +12,23 @@ extern crate graph;
use core::fmt::Display;
+use std::ops::{Deref, DerefMut};
+
use graph::{Graph, GraphLabel, LabelGraph};
use error::Error;
+pub use desrec::DesRec;
+
+use default::regex::RegexType;
+
/// The expected behaviour of a regular language.
///
/// Nondeterministic finite automata are equivalent to regular
/// languages. Since regular languages are easier to understand for a
/// human being, nondeterministic finite automata include the data for
/// the equivalent regular languages.
-pub trait Regex<T: GraphLabel>: Graph + Display {
+pub trait Regex<T: GraphLabel>: Graph + Display + Clone {
/// Return the label of a vertex, or an error if the node is
/// invalid.
fn vertex_label(&self, node_id: usize) -> Result<T, Error>;
@@ -52,6 +58,65 @@ pub trait Regex<T: GraphLabel>: Graph + Display {
// cetera. These might be needed later.
}
+/// Since Option<T> does not inherit the Display from T, we wrap it to
+/// provide an automatic implementation of Display.
+#[derive(Debug, Clone, Copy, Ord, PartialOrd, Eq, PartialEq, Hash)]
+pub struct DOption<T>(Option<T>);
+
+impl<T> Default for DOption<T> {
+ fn default() -> Self {
+ Self(None)
+ }
+}
+
+impl<T> Deref for DOption<T> {
+ type Target = Option<T>;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
+impl<T> DerefMut for DOption<T> {
+ fn deref_mut(&mut self) -> &mut Self::Target {
+ &mut self.0
+ }
+}
+
+impl<T: Display> Display for DOption<T> {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ match self.deref() {
+ Some(value) => Display::fmt(value, f),
+ None => write!(f, "ε"),
+ }
+ }
+}
+
+/// Substitute or Carry
+///
+/// This enumeration indicates whether a label from a regular
+/// expression should be substituted by another regular expression, or
+/// to be carried around in the resulting nondeterministic finite
+/// automaton, in the process of the [`to_nfa`][Nfa::to_nfa] function.
+///
+/// # Transform labels
+///
+/// The label that is returned to be carried can be different from the
+/// original label, as a way to transform the labels.
+///
+/// # Remark on the abbreviation
+///
+/// It happens "by chance" that this abbreviation coincides with the
+/// abbreviation of "system on chip". Since I know nothing about this
+/// topic, this is just a meaningless pun.
+#[derive(Debug, Copy, Clone)]
+pub enum SoC<T> {
+ /// To be substituted by another regular expression.
+ Sub(usize),
+ /// To carry around this label.
+ Carry(T),
+}
+
/// The expected behvaiour of a nondeterministic finite automaton.
///
/// Every NFA is a special labelled graph.
@@ -68,9 +133,23 @@ pub trait Nfa<T: GraphLabel>: LabelGraph<T> {
Err(Error::UnsupportedOperation)
}
- /// Build a nondeterministic finite automaton out of a regular
- /// language.
- fn to_nfa(regex: impl Regex<T>) -> Self;
+ /// When we convert a regular expression to a nondeterministic
+ /// finite automaton, the label should be optional, so we use a
+ /// different type for the result type.
+ type FromRegex<S: GraphLabel + Display + Default>;
+
+ /// Build a nondeterministic finite automaton out of a set of
+ /// regular expressions.
+ ///
+ /// The SUB_PRED is a predicate function that returns an optional
+ /// index for a label. If it returns some index, then this means
+ /// we should substitute the index-th regular expression in the
+ /// set, whereever that label occurs in the set of regular
+ /// expressions.
+ fn to_nfa(
+ regexps: &[impl Regex<RegexType<T>>],
+ sub_pred: impl Fn(T) -> Result<SoC<T>, Error>,
+ ) -> Result<Self::FromRegex<DOption<T>>, Error>;
/// Remove all dead states from the nondeterministic finite
/// automaton.
@@ -78,13 +157,14 @@ pub trait Nfa<T: GraphLabel>: LabelGraph<T> {
/// A state is dead if there are no edges going to the state.
fn remove_dead(&mut self) -> Result<(), Error>;
- /// For each empty transition from A to B, and for every edge from
- /// B to C, say, add an edge from A to C.
+ /// For each edge from A to B whose edge is considered nullable by
+ /// a function `f`, and for every edge from B to C, add an edge
+ /// from A to C.
///
/// This is needed specifically by the algorithm to produce a set
/// of atomic languages that represent "null-closed" derived
/// languages.
- fn nulling(&mut self) -> Result<(), Error>;
+ fn nulling(&mut self, f: impl Fn(T) -> bool) -> Result<(), Error>;
}
pub mod default;
diff --git a/repcore/src/grammar.rs b/repcore/src/grammar.rs
deleted file mode 100644
index ee9f033..0000000
--- a/repcore/src/grammar.rs
+++ /dev/null
@@ -1,59 +0,0 @@
-//! This file implements the extected behaviours of grammars.
-
-// NOTE: We shall first start with a parser that works at the level of
-// characters. The purpose is to first experiment with the workings
-// and the performance of the algorithms, before optimising by using
-// regular expressions to classify inputs into tokens. In other
-// words, the current focus is not on the optimisations, whereas
-// scanners are for optimisations only, so to speak.
-
-/// The type of a terminal.
-///
-/// For the time being this is a wrapper around a string, but in the
-/// future it may hold more information of scanners.
-pub struct Terminal {
- // If we want to use scanners, per chance add them as a new field
- // here.
- name: String,
-}
-
-impl Terminal {
- /// Create a terminal with the given name.
- #[inline]
- pub fn new(name: String) -> Self {
- Self { name }
- }
-
- /// Return the name of the terminal.
- #[inline]
- pub fn name(&self) -> &str {
- &self.name
- }
-}
-
-/// The type of a non-terminal.
-///
-/// This is just a wrapper around a string.
-pub struct Nonterminal(String);
-
-impl Nonterminal {
- /// Return the name of the nonterminal.
- ///
- /// Just to improve readability.
- #[inline]
- pub fn name(&self) -> &str {
- &self.0
- }
-}
-
-/// The type of a terminal or a non-terminal.
-///
-/// Only an index is stored here. Actual data are stored in two other
-/// arrays.
-#[derive(Debug, Hash, Eq, PartialEq, Clone, Copy, Ord, PartialOrd)]
-pub enum TNT {
- /// Terminal variant
- Ter(usize),
- /// Nonterminal variant
- Non(usize),
-}
diff --git a/repcore/src/lib.rs b/repcore/src/lib.rs
deleted file mode 100644
index 589b61c..0000000
--- a/repcore/src/lib.rs
+++ /dev/null
@@ -1,20 +0,0 @@
-//! This package implements the core algorithm of the entire
-//! workspace: parsing with derivatives by means of regular nulling
-//! languages.
-
-pub mod grammar;
-
-pub fn add(left: usize, right: usize) -> usize {
- left + right
-}
-
-#[cfg(test)]
-mod tests {
- use super::*;
-
- #[test]
- fn it_works() {
- let result = add(2, 2);
- assert_eq!(result, 4);
- }
-}
diff --git a/repcore/Cargo.toml b/viz/Cargo.toml
index 7416ad5..2894c21 100644
--- a/repcore/Cargo.toml
+++ b/viz/Cargo.toml
@@ -1,8 +1,9 @@
[package]
-name = "repcore"
+name = "viz"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
+graph = { path = "../graph" } \ No newline at end of file
diff --git a/viz/src/lib.rs b/viz/src/lib.rs
new file mode 100644
index 0000000..b40c78a
--- /dev/null
+++ b/viz/src/lib.rs
@@ -0,0 +1,31 @@
+//! This library defines a binary format to store graph data such that
+//! a client which understands the format can easily print the graph,
+//! centered at any specific node, with any scope. The library also
+//! provides functions to print graphs in this format.
+//!
+//! This way, one does not need a server to print and interact with
+//! graphs.
+//!
+//! # Graph representation
+//!
+//! The library uses the [`Graph`][graph::Graph] trait from the crate
+//! [graph]. Only the functions exposed from the trait are used by
+//! this library. So the users can also implement the trait and then
+//! print the graph data represented by other graph crates.
+
+extern crate graph;
+
+pub fn add(left: usize, right: usize) -> usize {
+ left + right
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn it_works() {
+ let result = add(2, 2);
+ assert_eq!(result, 4);
+ }
+}