summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJSDurand <mmemmew@gmail.com>2023-01-28 10:17:24 +0800
committerJSDurand <mmemmew@gmail.com>2023-01-28 10:22:57 +0800
commitf28155105134b90fd86049c65478d307e0d8dbbc (patch)
tree72b3b4872d5dba89413eca70bcaae9e421def7ee
parente8ea01319b3a9032a3f4f69f65e9ca96562b87b9 (diff)
a prototype of an item derivation forest
It seems to be complete now, but still awaits more tests to see where the errors are, which should be plenty, haha.
-rw-r--r--ChangeLog19
-rw-r--r--chain/Cargo.toml1
-rw-r--r--chain/src/default.rs128
-rw-r--r--chain/src/item/default.rs547
-rw-r--r--chain/src/item/genins.rs231
-rw-r--r--chain/src/item/mod.rs252
-rw-r--r--chain/src/lib.rs26
-rw-r--r--chain/src/plan.org44
-rw-r--r--forest/src/lib.rs4
-rw-r--r--grammar/src/label.rs75
-rw-r--r--grammar/src/lib.rs3
-rw-r--r--graph/src/labelled/double.rs126
-rw-r--r--graph/src/labelled/mod.rs2
-rw-r--r--nfa/src/default/nfa.rs8
-rw-r--r--nfa/src/default/regex.rs4
-rw-r--r--semiring/src/lib.rs51
16 files changed, 1324 insertions, 197 deletions
diff --git a/ChangeLog b/ChangeLog
index 44e241a..057ef5d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,22 @@
+2023-01-28 Jean Sévère Durand <durand@jsdurand.xyz>
+
+ * chain: Move the forest crate to the item module in the chain
+ crate. That crate used to provide a kind of forests. This turns
+ out to be an item derivation forest for the chain-rule machine.
+ So it seems appropriate to implement this in the chain crate
+ instead.
+
+ * chain: Also the chain rule machine seems to be complete now. It
+ remains to test the machine to find the must-be errors.
+
+ * graph: Now the doubly linked graphs do not exclude nodes with
+ duplicate edge sets. That is, previously, when adding a node
+ whose children set is equal to that of another existing node, the
+ graph just reuses that old node. Now this is not the case
+ anymore. The reason is that this only saves some minor memory
+ usage, at the cost of more expensive graph operations, and might
+ affect the time complixity, so I just remove this functionality.
+
2023-01-22 Jean Sévère Durand <durand@jsdurand.xyz>
* forest: Correctly clone nodes.
diff --git a/chain/Cargo.toml b/chain/Cargo.toml
index c55586d..496f5dd 100644
--- a/chain/Cargo.toml
+++ b/chain/Cargo.toml
@@ -13,7 +13,6 @@ test-print-viz = []
nfa = { path = "../nfa" }
graph = { path = "../graph" }
grammar = { path = "../grammar" }
-forest = { path = "../forest" }
[dev-dependencies]
grammar = { path = "../grammar", features = ["test-helper"] }
diff --git a/chain/src/default.rs b/chain/src/default.rs
index 22befff..77a64ca 100644
--- a/chain/src/default.rs
+++ b/chain/src/default.rs
@@ -7,8 +7,10 @@
use super::*;
use crate::atom::{Atom, DefaultAtom};
+use crate::item::{
+ default::DefaultForest, generate_fragment, Forest, ForestLabel, ForestLabelError,
+};
use core::fmt::Display;
-use forest::{default::DefaultForest, Forest};
use grammar::{Error as GrammarError, GrammarLabel, GrammarLabelType, TNT};
use graph::{labelled::DLGBuilder, Builder, DLGraph, Graph, LabelExtGraph, LabelGraph};
@@ -29,6 +31,10 @@ pub enum Error {
NodeNoLabel(usize),
/// Reserving memory fails.
CannotReserve(TryReserveError),
+ /// Cannot create label for cloning nodes.
+ CannotClone(ForestLabelError),
+ /// Cannot find a suitable node to plant the new forest fragment.
+ CannotPlant,
/// An invalid situation happens.
Invalid,
}
@@ -44,6 +50,8 @@ impl Display for Error {
),
Self::NodeNoLabel(n) => write!(f, "node {n} has no labels while it should have one"),
Self::CannotReserve(e) => write!(f, "cannot reserve memory: {e}"),
+ Self::CannotClone(fe) => write!(f, "cannot clone due to {fe}"),
+ Self::CannotPlant => write!(f, "cannot plant a new forest fragment"),
Self::Invalid => write!(f, "invalid"),
}
}
@@ -69,6 +77,7 @@ impl From<ForestError> for Error {
ForestError::DuplicatedNode(n) => Error::DuplicateNode(n),
ForestError::InvalidGraphError(ge) => ge.into(),
ForestError::NodeNoLabel(n) => Error::NodeNoLabel(n),
+ ForestError::LabelConversion(ce) => Error::CannotClone(ce),
}
}
}
@@ -93,7 +102,7 @@ impl Default for DerIterIndex {
}
/// A complex type used for storing values of edges with two layers.
-type SecondTypeValue = (Parent, bool, Vec<(Edge, usize)>);
+type SecondTypeValue = (PaSe, bool, Vec<(Edge, usize)>);
/// An iterator of TwoLayers.
#[derive(Debug, Default)]
@@ -120,7 +129,7 @@ impl DerIter {
fn add_second_layer(
&mut self,
label: usize,
- forest_source: Parent,
+ forest_source: PaSe,
accepting: bool,
edges: Vec<(Edge, usize)>,
) {
@@ -186,7 +195,7 @@ pub struct DefaultChain {
atom: DefaultAtom,
current: usize,
history: Vec<usize>,
- forest: DefaultForest<GrammarLabel>,
+ forest: DefaultForest<ForestLabel<GrammarLabel>>,
accepting_vec: Vec<bool>,
}
@@ -205,7 +214,7 @@ impl DefaultChain {
/// Return a reference to the associated forest.
#[inline]
- pub fn forest(&self) -> &DefaultForest<GrammarLabel> {
+ pub fn forest(&self) -> &DefaultForest<ForestLabel<GrammarLabel>> {
&self.forest
}
@@ -316,6 +325,7 @@ impl LabelExtGraph<Edge> for DefaultChain {
let new = self.graph.extend(edges)?;
let accepting_len = self.accepting_vec.len();
+ // Update the acceptace of the new node, if any.
if self.accepting_vec.get(new).is_none() {
// assert it can only grow by one node at a time.
#[cfg(debug_assertions)]
@@ -377,14 +387,17 @@ impl Chain for DefaultChain {
let empty_state = atom.empty();
+ // The zero-th non-terminal is assumed to be the starting
+ // non-terminal, by convention.
let initial_nullable = atom
.is_nullable(0)
.map_err(|_| Error::IndexOutOfBounds(0, atom.non_num()))?;
+ // NOTE: Is it really correct to use `Parent::new(0, 0)` here?
builder.add_edge(
first,
root,
- Edge::new(empty_state, Parent::new(0, 0), initial_nullable),
+ Edge::new(empty_state, Default::default(), initial_nullable),
)?;
let graph = builder.build();
@@ -431,7 +444,7 @@ impl Chain for DefaultChain {
type DerIter = DerIter;
- fn derive(&mut self, t: usize, _pos: usize) -> Result<Self::DerIter, Self::Error> {
+ fn derive(&mut self, t: usize, pos: usize) -> Result<Self::DerIter, Self::Error> {
use TNT::*;
/// Convert an error telling us that an index is out of bounds.
@@ -445,7 +458,7 @@ impl Chain for DefaultChain {
GrammarError::IndexOutOfBounds(index, bound) => {
Error::IndexOutOfBounds(index, bound)
}
- _ => panic!("wrong error kind"),
+ _ => Error::Invalid,
}
}
@@ -459,11 +472,11 @@ impl Chain for DefaultChain {
///
/// The generated edges will be pushed to `output` directly,
/// to save some allocations.
- // TODO: Handle forests as well.
fn generate_edges(
chain: &DefaultChain,
child_iter: impl Iterator<Item = usize> + ExactSizeIterator + Clone,
atom_child_iter: impl Iterator<Item = usize> + Clone,
+ pase: PaSe,
mut output: impl AsMut<Vec<(Edge, usize)>>,
) -> Result<(), Error> {
// First check the values from iterators are all valid.
@@ -515,13 +528,11 @@ impl Chain for DefaultChain {
// now push into output
- let parent = Parent::new(0, 0);
-
for atom_child in atom_child_iter {
let atom_child_accepting = chain.atom.is_accepting(atom_child).unwrap();
let atom_child_empty_node = chain.atom.is_empty_node(atom_child).unwrap();
- let edge = Edge::new(atom_child, parent, atom_child_accepting);
+ let edge = Edge::new(atom_child, pase, atom_child_accepting);
if !atom_child_empty_node {
output.extend(child_iter.clone().map(|child| (edge, child)));
@@ -549,16 +560,41 @@ impl Chain for DefaultChain {
continue;
}
+ let atom_moved = atom_label.get_moved();
+
match *atom_label.get_value() {
Some(Ter(ter)) if ter == t => {
+ // prepare forest fragment
+
+ let fragment =
+ generate_fragment([atom_moved.into(), Ter(ter).into()], pos)?;
+
+ let new_pase = self.forest.insert_item(
+ *label,
+ fragment,
+ atom_child_iter.clone(),
+ &self.atom,
+ )?;
+
generate_edges(
self,
child_iter.clone(),
atom_child_iter.clone(),
+ new_pase,
&mut der_iter.singles,
)?;
}
Some(Non(non)) => {
+ let first_fragment =
+ generate_fragment([atom_moved.into(), Non(non).into()], pos)?;
+
+ let first_segment_pase = self.forest.insert_item(
+ *label,
+ first_fragment,
+ atom_child_iter.clone(),
+ &self.atom,
+ )?;
+
let virtual_node = self
.atom
.atom(non, t)
@@ -570,12 +606,66 @@ impl Chain for DefaultChain {
.is_accepting(virtual_node)
.map_err(index_out_of_bounds_conversion)?;
+ let virtual_fragment = {
+ // `non` is valid, as checked above
+ let non_start = self.atom.nth_accumulator(non).unwrap() * 2;
+
+ let mut result = DefaultForest::default();
+
+ for (label, child_iter) in self.atom.labels_of(non_start)? {
+ if matches!(*label.get_value(),
+ Some(Ter(ter)) if ter == t)
+ {
+ for child in child_iter {
+ let mut line: Vec<_> = self
+ .atom
+ .query_expansion(non_start, child)
+ .map_err(index_out_of_bounds_conversion)?
+ .iter()
+ .copied()
+ .flatten()
+ .map(|(nt, rule)| [(*rule).into(), Non(*nt).into()])
+ .collect();
+
+ line.push([label.get_moved().into(), Ter(t).into()]);
+
+ let line: Vec<GrammarLabelType> =
+ line.into_iter().flatten().collect();
+
+ if result.is_empty() {
+ result = generate_fragment(line, pos)?;
+ } else {
+ result.plant(
+ 0,
+ generate_fragment(line, pos)?,
+ false,
+ )?;
+ }
+ }
+ }
+ }
+
+ result
+ };
+
+ // NOTE: We only need the PaSe from the
+ // first segment, so we pass an empty
+ // iterator, in which case the passed
+ // label is only used for the PaSe.
+ let virtual_pase = self.forest.insert_item(
+ Edge::new(0, first_segment_pase, true),
+ virtual_fragment,
+ std::iter::empty(),
+ &self.atom,
+ )?;
+
let mut new_edges = Vec::new();
generate_edges(
self,
child_iter.clone(),
atom_child_iter.clone(),
+ first_segment_pase,
&mut new_edges,
)?;
@@ -583,12 +673,10 @@ impl Chain for DefaultChain {
der_iter.singles.extend(new_edges.clone());
}
- let parent = Parent::new(0, 0);
-
if !self.atom.is_empty_node(virtual_node).unwrap() {
der_iter.add_second_layer(
virtual_node,
- parent,
+ virtual_pase,
accepting,
new_edges,
);
@@ -601,7 +689,10 @@ impl Chain for DefaultChain {
// `generate_edges`
if self.atom.is_empty_node(atom_child).unwrap() {
der_iter.singles.extend(child_iter.clone().map(|child| {
- (Edge::new(virtual_node, parent, accepting), child)
+ (
+ Edge::new(virtual_node, virtual_pase, accepting),
+ child,
+ )
}));
}
}
@@ -610,7 +701,8 @@ impl Chain for DefaultChain {
// this has been checked in
// `generate_edges`
if self.atom.is_empty_node(atom_child).unwrap() {
- // flat flat map, hmm...
+ // REVIEW: flat flat map,
+ // hmm...
der_iter.singles.extend(child_iter.clone().flat_map(
|child| {
self.graph.labels_of(child).unwrap().flat_map(
@@ -646,7 +738,7 @@ mod test_der_iter {
fn test() -> Result<(), Box<dyn std::error::Error>> {
let mut der_iter = DerIter::default();
- let parent = Parent::new(0, 0);
+ let parent = Default::default();
der_iter.singles.push((Edge::new(0, parent, true), 0));
diff --git a/chain/src/item/default.rs b/chain/src/item/default.rs
new file mode 100644
index 0000000..b71f940
--- /dev/null
+++ b/chain/src/item/default.rs
@@ -0,0 +1,547 @@
+//! This module provides a default implementation of iten derivation
+//! forest.
+
+use super::*;
+use graph::{
+ builder::BuilderMut, labelled::binary::PLGBuilderMut, Graph, LabelGraph, PLGraph, RedirectGraph,
+};
+
+use core::fmt::Display;
+
+/// The type of errors for forest operations.
+#[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd)]
+pub enum Error {
+ /// An index is out of bounds.
+ ///
+ /// The first component is the index that is out of bounds, and
+ /// the second component is the current length of nodes.
+ IndexOutOfBounds(usize, usize),
+ /// The forest does not permit duplicate nodes but encounters a
+ /// repeated node.
+ DuplicatedNode(usize),
+ /// A node has no labels while it is required to have one.
+ NodeNoLabel(usize),
+ /// Encounter an invalid error in converting from an error of
+ /// graphs.
+ InvalidGraphError(GError),
+ /// Encounter an error when converting forest labels.
+ LabelConversion(ForestLabelError),
+}
+
+impl Display for Error {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ match self {
+ Error::IndexOutOfBounds(index, bound) => {
+ write!(f, "index {index} is out of bound {bound}")
+ }
+ Error::DuplicatedNode(n) => write!(f, "node {n} is duplicated"),
+ Error::InvalidGraphError(ge) => write!(f, "invalid error: {ge}"),
+ Error::NodeNoLabel(n) => write!(f, "node {n} has no labels, but it should have one"),
+ Error::LabelConversion(le) => write!(f, "fail to convert labels: {le}"),
+ }
+ }
+}
+
+impl std::error::Error for Error {}
+
+impl From<GError> for Error {
+ fn from(ge: GError) -> Self {
+ match ge {
+ GError::IndexOutOfBounds(index, bound) => Self::IndexOutOfBounds(index, bound),
+ GError::DuplicatedNode(n) => Self::DuplicatedNode(n),
+ _ => Self::InvalidGraphError(ge),
+ }
+ }
+}
+
+impl From<ForestLabelError> for Error {
+ fn from(le: ForestLabelError) -> Self {
+ Self::LabelConversion(le)
+ }
+}
+
+/// A default implementation of forest.
+#[derive(Debug, Clone)]
+pub struct DefaultForest<T: GraphLabel> {
+ graph: PLGraph<T>,
+ root: Option<usize>,
+}
+
+impl<T: GraphLabel> Default for DefaultForest<T> {
+ fn default() -> Self {
+ let graph = Default::default();
+ let root = None;
+
+ Self { graph, root }
+ }
+}
+
+impl<T: GraphLabel> AsRef<DefaultForest<T>> for DefaultForest<T> {
+ fn as_ref(&self) -> &DefaultForest<T> {
+ self
+ }
+}
+
+impl<T: GraphLabel> Graph for DefaultForest<T> {
+ type Iter<'a> = <PLGraph<T> as Graph>::Iter<'a>
+ where
+ Self: 'a;
+
+ #[inline]
+ fn is_empty(&self) -> bool {
+ self.graph.is_empty()
+ }
+
+ #[inline]
+ fn nodes_len(&self) -> usize {
+ self.graph.nodes_len()
+ }
+
+ #[inline]
+ fn edges_len(&self) -> Option<usize> {
+ self.graph.edges_len()
+ }
+
+ #[inline]
+ fn children_of(&self, node_id: usize) -> Result<Self::Iter<'_>, GError> {
+ self.graph.children_of(node_id)
+ }
+
+ #[inline]
+ fn degree(&self, node_id: usize) -> Result<usize, GError> {
+ self.graph.degree(node_id)
+ }
+
+ #[inline]
+ fn is_empty_node(&self, node_id: usize) -> Result<bool, GError> {
+ self.graph.is_empty_node(node_id)
+ }
+
+ #[inline]
+ fn has_edge(&self, source: usize, target: usize) -> Result<bool, GError> {
+ self.graph.has_edge(source, target)
+ }
+
+ fn replace_by_builder(&mut self, _builder: impl graph::Builder<Result = Self>) {
+ unimplemented!()
+ }
+}
+
+impl<T: GraphLabel> ParentsGraph for DefaultForest<T> {
+ type Iter<'a>= <PLGraph<T> as ParentsGraph>::Iter<'a>
+ where
+ Self:'a;
+
+ #[inline]
+ fn parents_of(&self, node_id: usize) -> Result<<Self as ParentsGraph>::Iter<'_>, GError> {
+ self.graph.parents_of(node_id)
+ }
+
+ #[inline]
+ fn nth_child(&self, node_id: usize, n: usize) -> Result<usize, GError> {
+ self.graph.nth_child(node_id, n)
+ }
+
+ #[inline]
+ fn edge_to_parent(
+ &self,
+ source: usize,
+ target: usize,
+ ) -> Result<Option<graph::Parent>, GError> {
+ self.graph.edge_to_parent(source, target)
+ }
+}
+
+impl<T: GraphLabel> LabelGraph<T> for DefaultForest<T> {
+ type Iter<'a> = std::iter::Empty<usize>
+ where
+ Self: 'a;
+
+ type LabelIter<'a> = std::iter::Empty<(&'a T, <Self as LabelGraph<T>>::Iter<'a>)>
+ where
+ Self: 'a,
+ T: 'a;
+
+ type EdgeLabelIter<'a> = std::iter::Empty<T>
+ where
+ Self: 'a,
+ T: 'a;
+
+ #[inline]
+ fn query_label(&self, label: T) -> Option<usize> {
+ self.graph.query_label(label)
+ }
+
+ #[inline]
+ fn vertex_label(&self, node_id: usize) -> Result<Option<T>, GError> {
+ self.graph.vertex_label(node_id)
+ }
+
+ fn edge_label(
+ &self,
+ _source: usize,
+ _target: usize,
+ ) -> Result<Self::EdgeLabelIter<'_>, GError> {
+ unimplemented!("edges have no labels")
+ }
+
+ fn find_children_with_label(
+ &self,
+ _node_id: usize,
+ _label: &T,
+ ) -> Result<<Self as LabelGraph<T>>::Iter<'_>, GError> {
+ unimplemented!("edges have no labels")
+ }
+
+ fn labels_of(&self, _node_id: usize) -> Result<Self::LabelIter<'_>, GError> {
+ unimplemented!("edges have no labels")
+ }
+
+ fn has_edge_label(&self, _node_id: usize, _label: &T, _target: usize) -> Result<bool, GError> {
+ unimplemented!("edges have no labels")
+ }
+}
+
+impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> {
+ type Error = Error;
+
+ fn root(&self) -> Option<usize> {
+ self.root
+ }
+
+ fn new_leaf(label: T) -> Self {
+ let mut graph = PLGraph::default();
+
+ let mut builder = PLGBuilderMut::from_graph_mut(&mut graph);
+
+ let root = Some(builder.add_vertex(ForestLabel::from(label)));
+
+ Self { graph, root }
+ }
+
+ fn is_prefix<F>(&self, node_id: usize, fragment: F) -> Result<bool, Self::Error>
+ where
+ F: Borrow<Self>,
+ {
+ if !self.has_node(node_id) {
+ return Err(Error::IndexOutOfBounds(node_id, self.nodes_len()));
+ }
+
+ // We do a depth-first traversal to determine if every node
+ // encountered has the same set of children (labels taken into
+ // the consideration).
+
+ let fragment = fragment.borrow();
+
+ let mut frag_stack = Vec::with_capacity(fragment.nodes_len());
+
+ let mut self_stack = Vec::with_capacity(fragment.nodes_len());
+
+ let frag_root = if let Some(root) = fragment.root() {
+ root
+ } else {
+ // an empty forest is a prefix of any forest.
+ return Ok(true);
+ };
+
+ frag_stack.push(frag_root);
+ self_stack.push(node_id);
+
+ // defer popping
+ while let (Some(frag_top), Some(self_top)) =
+ (frag_stack.last().copied(), self_stack.last().copied())
+ {
+ frag_stack.pop();
+ self_stack.pop();
+
+ if fragment.vertex_label(frag_top)? != self.vertex_label(self_top)? {
+ // not a prefix
+ return Ok(false);
+ }
+
+ let mut self_children = self.children_of(self_top)?;
+
+ for child in fragment.children_of(frag_top)? {
+ if let Some(self_child) = self_children.next() {
+ frag_stack.push(child);
+ self_stack.push(self_child);
+ } else {
+ // too few children
+ return Ok(false);
+ }
+ }
+ }
+
+ // Check both stacks are empty at the end.
+ Ok(frag_stack.is_empty() && self_stack.is_empty())
+ }
+
+ fn plant<F>(&mut self, node_id: usize, fragment: F, planted: bool) -> Result<(), Self::Error>
+ where
+ F: Borrow<Self>,
+ {
+ // Convert self to a builder_mut, and traverse fragment in a
+ // depth-first manner and adjoin corresponding nodes along the
+ // way.
+
+ if !self.has_node(node_id) {
+ return Err(Error::IndexOutOfBounds(node_id, self.nodes_len()));
+ }
+
+ let fragment = fragment.borrow();
+
+ let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph);
+
+ let root = if let Some(root) = fragment.root() {
+ root
+ } else {
+ // Nothing to do to plant an empty forest.
+ return Ok(());
+ };
+
+ // Just a dummy label for use in adding edges.
+ //
+ // REVIEW: I probably should refactor the API for builder_mut.
+ let root_label = fragment
+ .vertex_label(root)?
+ .ok_or(Error::NodeNoLabel(root))?;
+
+ let nodes_len = fragment.nodes_len();
+
+ /// If the fragment root has a duplicate label, the forest
+ /// will not grow, so we use the label to find the adjoined
+ /// node index.
+ ///
+ /// The nodes hava already been added to the forest, so it is
+ /// safe to call unwrap.
+ macro_rules! conversion (
+ ($node:expr) => {
+ {
+ builder
+ .query_label(
+ fragment
+ .vertex_label($node)?
+ .ok_or(Error::NodeNoLabel($node))?
+ ).unwrap()
+ }
+ }
+ );
+
+ // If the fragment has been planted before, we just add an
+ // edge.
+
+ if planted {
+ builder.add_edge(node_id, conversion!(root), root_label)?;
+
+ return Ok(());
+ }
+
+ // First adjoin those nodes and join the edges later.
+
+ for node in 0..nodes_len {
+ let label = fragment
+ .vertex_label(node)?
+ .ok_or(Error::NodeNoLabel(node))?;
+
+ builder.add_vertex(label);
+ }
+
+ // Don't forget to join the new sub-forest to the original
+ // forest, at the specified position.
+
+ builder.add_edge(node_id, conversion!(root), root_label)?;
+
+ // We can try to calculate the depth of fragments, if we need
+ // to lower the memory usage. But in our use cases, we
+ // usually deal with fragments where each node has at most one
+ // child, so the depth is supposed to be equal to the length
+ // in this case.
+ let mut stack = Vec::with_capacity(fragment.nodes_len());
+
+ stack.push(root);
+
+ while let Some(top) = stack.pop() {
+ for child in fragment.children_of(top)? {
+ builder.add_edge(conversion!(top), conversion!(child), root_label)?;
+ }
+ }
+
+ Ok(())
+ }
+
+ fn clone_node(&mut self, node_id: usize) -> Result<usize, Self::Error> {
+ let builder = PLGBuilderMut::from_graph_mut(&mut self.graph);
+
+ let old_label = builder
+ .vertex_label(node_id)?
+ .ok_or(Error::NodeNoLabel(node_id))?;
+
+ if old_label.is_packed() {
+ return Err(ForestLabelError::ClonePack.into());
+ }
+
+ // We are sure old_label is not packed here, so it is safe to
+ // unwrap.
+ let new_label = old_label.clone(self).unwrap();
+
+ if old_label.clone_index().is_some() {
+ let mut parents = self.parents_of(node_id)?;
+
+ // Make sure it only has one parent, which is the
+ // representative node.
+ assert_eq!(parents.len(), 1);
+
+ let rep_node = parents.next().unwrap().node();
+
+ let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph);
+
+ let new_clone = builder.add_vertex(new_label);
+
+ builder.add_edge(rep_node, new_clone, new_label)?;
+
+ // We checked its length is 1, so it is safe to unwrap
+ // here.
+ return Ok(new_clone);
+ }
+
+ let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph);
+
+ // We are sure old_label is not a clone here, so it is safe to
+ // unwrap.
+ let rep_label = old_label.pack().unwrap();
+
+ // Make a new node
+ let new_index = builder.add_vertex(rep_label);
+
+ // Re-direct parents to the new node.
+ //
+ // This must be done before pointing the new node to the old
+ // node, otherwise that edge will be redirected as well.
+
+ // Unfortunately I cannot loop through parents and mutate them
+ // at the same time, so I first collect them into a vector.
+ let parents: Vec<_> = builder.parents_of(node_id)?.collect();
+
+ for parent in parents.into_iter() {
+ builder.redirect(parent.node(), parent.edge(), new_index)?;
+ }
+
+ // Point the new node to the old node. OLD_LABEL is just a
+ // place holder.
+
+ builder.add_edge(new_index, node_id, old_label)?;
+
+ // Modify the label of the old node.
+
+ builder.set_label(node_id, new_label)?;
+
+ // Make another clone
+
+ // new_label is cloned, so is guaranteed not to be packed, and
+ // we are safe to unwrap.
+ let new_label = new_label.clone(self).unwrap();
+
+ let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph);
+
+ let new_clone = builder.add_vertex(new_label);
+
+ builder.add_edge(new_index, new_clone, new_label)?;
+
+ Ok(new_clone)
+ }
+}
+
+#[cfg(test)]
+mod item_test {
+ use super::*;
+
+ macro_rules! leaf (
+ ($label:expr, $type:tt) =>{
+ DefaultForest::<ForestLabel<$type>>::new_leaf($label)
+ };
+ ($label:expr) => {
+ DefaultForest::<ForestLabel<usize>>::new_leaf($label)
+ }
+ );
+
+ #[test]
+ fn test_forest_api() -> Result<(), Box<dyn std::error::Error>> {
+ let forest: DefaultForest<usize> = Default::default();
+
+ // empty forest
+
+ assert!(forest.is_empty());
+
+ // leaf forest
+
+ let mut forest = leaf!(0, usize);
+
+ assert_eq!(forest.nodes_len(), 1);
+ assert_eq!(forest.root(), Some(0));
+
+ // add some child
+
+ forest.plant(0, leaf!(1), false)?;
+
+ assert_eq!(forest.nodes_len(), 2);
+ let mut children = forest.children_of(0)?;
+ assert_eq!(children.next(), Some(1));
+ assert_eq!(children.next(), None);
+
+ // add more children
+
+ forest.plant(0, leaf!(2), false)?;
+ forest.plant(0, leaf!(3), false)?;
+ forest.plant(0, leaf!(4), false)?;
+ forest.plant(2, leaf!(5), false)?;
+
+ assert_eq!(forest.nodes_len(), 6);
+ let mut children = forest.children_of(0)?;
+ assert_eq!(children.next(), Some(1));
+ assert_eq!(children.next(), Some(2));
+ assert_eq!(children.next(), Some(3));
+ assert_eq!(children.next(), Some(4));
+ let mut children = forest.children_of(2)?;
+ assert_eq!(children.next(), Some(5));
+ assert_eq!(children.next(), None);
+
+ let mut test_forest = leaf!(0);
+ test_forest.plant(0, leaf!(1), false)?;
+ test_forest.plant(0, leaf!(2), false)?;
+ test_forest.plant(0, leaf!(3), false)?;
+ test_forest.plant(2, leaf!(5), false)?;
+
+ assert!(forest.is_prefix(0, &test_forest)?);
+
+ let mut test_forest = leaf!(0);
+ test_forest.plant(0, leaf!(1), false)?;
+ test_forest.plant(0, leaf!(2), false)?;
+ // this child of the root should have label 3 in order to be a
+ // prefix.
+ test_forest.plant(0, leaf!(4), false)?;
+ test_forest.plant(2, leaf!(5), false)?;
+
+ assert!(!forest.is_prefix(0, &test_forest)?);
+
+ let mut test_forest = leaf!(2);
+ test_forest.plant(0, leaf!(5), false)?;
+
+ assert!(forest.is_prefix(2, &test_forest)?);
+
+ // now test cloning
+
+ // add a duplicate label
+ forest.plant(3, leaf!(5), false)?;
+
+ let _len = forest.nodes_len();
+
+ forest.clone_node(5)?;
+
+ assert_eq!(forest.nodes_len(), 7);
+
+ #[cfg(feature = "test-print-viz")]
+ forest.print_viz("forest.gv")?;
+
+ Ok(())
+ }
+}
diff --git a/chain/src/item/genins.rs b/chain/src/item/genins.rs
new file mode 100644
index 0000000..99d9202
--- /dev/null
+++ b/chain/src/item/genins.rs
@@ -0,0 +1,231 @@
+//! This module implements the generation and insertion of item
+//! derivation forests.
+//!
+//! This is used for the chain-rule machine to conveniently produce
+//! item derivations into a forest. This forest can serve as a rough
+//! approximation of the parse forests, and can be executed in other
+//! semirings later on.
+
+use super::*;
+use crate::{atom::DefaultAtom, default::Error, item::default::DefaultForest, Edge};
+use grammar::{Error as GrammarError, GrammarLabel, GrammarLabelType, TNT};
+use graph::Graph;
+
+use core::borrow::Borrow;
+use std::collections::HashSet as Set;
+
+/// A helper function to generate a fragment of forest.
+///
+/// It simply constructs a root node and then appends
+/// successive nodes as successive children of the previous
+/// node. Also the starting positions will all be set to the
+/// same position.
+///
+/// If the input is empty, this returns an empty forest;
+/// otherwise the result is not empty.
+pub fn generate_fragment(
+ labels: impl AsRef<[GrammarLabelType]>,
+ pos: usize,
+) -> Result<DefaultForest<ForestLabel<GrammarLabel>>, crate::default::Error> {
+ let mut labels_iter = labels.as_ref().iter();
+ let mut result: DefaultForest<ForestLabel<GrammarLabel>>;
+
+ if let Some(first_label) = labels_iter.next() {
+ result = DefaultForest::new_leaf(GrammarLabel::new(*first_label, pos));
+
+ let mut index = 0;
+
+ for label in labels_iter {
+ result.plant(
+ index,
+ DefaultForest::new_leaf(GrammarLabel::new(*label, pos)),
+ false,
+ )?;
+
+ // To prevent duplication of labels causing
+ // panics, we query the index of the new node.
+ index = result
+ .query_label(GrammarLabel::new(*label, pos).into())
+ // REVIEW: Perhaps a LabelNoNode error?
+ .ok_or(Error::Invalid)?;
+ }
+ } else {
+ result = Default::default();
+ }
+
+ Ok(result)
+}
+
+impl DefaultForest<ForestLabel<GrammarLabel>> {
+ /// Insert an item derivation forest into a recording forest.
+ ///
+ /// We need the help of other things just for finding the correct
+ /// places to insert these item fragments.
+ pub fn insert_item(
+ &mut self,
+ label: Edge,
+ fragment: impl Borrow<DefaultForest<ForestLabel<GrammarLabel>>>,
+ atom_child_iter: impl Iterator<Item = usize>,
+ atom: &DefaultAtom,
+ ) -> Result<PaSe, Error> {
+ /// Convert an error telling us that an index is out of bounds.
+ ///
+ /// # Panics
+ ///
+ /// The function panics if the error is not of the expected
+ /// kind.
+ fn index_out_of_bounds_conversion(ge: GrammarError) -> Error {
+ match ge {
+ GrammarError::IndexOutOfBounds(index, bound) => {
+ Error::IndexOutOfBounds(index, bound)
+ }
+ _ => Error::Invalid,
+ }
+ }
+
+ let fragment = fragment.borrow();
+
+ let pase = label.forest_source();
+
+ let parents: Vec<Parent> = {
+ let mut result = Vec::new();
+
+ if let Some(parent) = pase.parent() {
+ result.push(parent);
+ } else {
+ let (root, leaf) = pase.segment().unwrap();
+ let mut seen_nodes = Set::new();
+
+ let mut stack = vec![root];
+
+ while let Some(top) = stack.pop() {
+ if seen_nodes.contains(&top) {
+ continue;
+ }
+
+ seen_nodes.insert(top);
+
+ for (index, child) in self.children_of(top)?.enumerate() {
+ if child == leaf {
+ result.push(Parent::new(top, index));
+ } else {
+ stack.push(child);
+ }
+ }
+ }
+ }
+
+ result
+ };
+
+ for atom_child in atom_child_iter {
+ // Find reduction information.
+ let reduction_info = atom
+ .query_reduction(label.label(), atom_child)
+ .map_err(index_out_of_bounds_conversion)?;
+
+ let mut stack = parents.clone();
+ let mut second_stack = Vec::new();
+
+ // locate the nodes
+ for reduction_nt in reduction_info.iter().copied().flatten().rev() {
+ while let Some(node) = stack.pop() {
+ // REVIEW: A lot of labels appear here.
+ // Perhaps I need to refactor something?
+ if matches!(
+ self
+ .vertex_label(node.node())?
+ .ok_or_else(|| Error::NodeNoLabel(node.node()))?
+ .label()
+ .label(),
+ GrammarLabelType::TNT(TNT::Non(nt)) if nt == *reduction_nt
+ ) {
+ for parent in self.parents_of(node.node())? {
+ debug_assert!(matches!(
+ self.vertex_label(parent.node()),
+ Ok(Some(label)) if
+ label
+ .label()
+ .label()
+ .rule()
+ .is_some()));
+
+ second_stack.extend(self.parents_of(parent.node())?.filter(|n| {
+ matches!(self.vertex_label(n.node()),
+ Ok(Some(label))
+ if matches!(
+ label.label().label().tnt(),
+ Some(TNT::Non(_))))
+ }));
+ }
+ }
+ }
+
+ std::mem::swap(&mut stack, &mut second_stack);
+
+ if stack.is_empty() {
+ break;
+ }
+ }
+
+ if stack.is_empty() {
+ return Err(Error::CannotPlant);
+ }
+
+ for parent in stack.into_iter() {
+ if parent.edge() + 1 >= self.degree(parent.node())? {
+ self.plant(parent.node(), fragment, false)?;
+ } else {
+ let nth_child = self.nth_child(parent.node(), parent.edge() + 1)?;
+
+ if self.is_prefix(nth_child, fragment)? {
+ // do nothing
+ continue;
+ }
+
+ let cloned_node = self.clone_node(nth_child)?;
+
+ self.plant(cloned_node, fragment, false)?;
+ }
+ }
+ }
+
+ let result = if fragment.nodes_len() == 2 {
+ let root_label = fragment.vertex_label(0)?.unwrap();
+ let leaf_label = fragment.vertex_label(1)?.unwrap();
+
+ // it has been planted, so should be safe.
+ let node = self.query_label(root_label).unwrap();
+
+ let edge = {
+ let mut result = None;
+
+ for (index, child) in self.children_of(node)?.enumerate() {
+ if matches!(self.vertex_label(child)?, Some(child_label) if child_label == leaf_label)
+ {
+ result = Some(index);
+ break;
+ }
+ }
+
+ if let Some(result) = result {
+ result
+ } else {
+ unreachable!("the forest is wrongly planted");
+ }
+ };
+
+ PaSe::Parent(node, edge)
+ } else {
+ let root_label = fragment.vertex_label(0)?.unwrap();
+ let leaf_label = fragment.vertex_label(fragment.nodes_len() - 1)?.unwrap();
+
+ PaSe::Segment(
+ self.query_label(root_label).unwrap(),
+ self.query_label(leaf_label).unwrap(),
+ )
+ };
+
+ Ok(result)
+ }
+}
diff --git a/chain/src/item/mod.rs b/chain/src/item/mod.rs
new file mode 100644
index 0000000..7fafcc8
--- /dev/null
+++ b/chain/src/item/mod.rs
@@ -0,0 +1,252 @@
+#![warn(missing_docs)]
+//! This module implements the type of items for the chain-rule
+//! machine.
+//!
+//! More specifically, it implements the iten derivation forests for
+//! the machine.
+
+// TODO: Implement an enumeration for Parent or Segment, perhaps
+// called PaSe.
+
+// TODO: Move functions for handling forests, currently contained
+// within the method derive to this module, and make them aware of the
+// enumeration PaSe.
+
+// TODO: Implement the Item trait from semirings for the item
+// derivation forest, and then we can easily execute items later on.
+
+use graph::{error::Error as GError, GraphLabel, LabelGraph, Parent, ParentsGraph};
+
+use core::borrow::Borrow;
+
+/// A parent or a segment.
+///
+/// A parent is a node with an edge index, which represents a certain
+/// edge.
+///
+/// A segment represents every edge from the root node to the single
+/// terminating node.
+#[allow(unused)]
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
+pub enum PaSe {
+ /// An edge from a node, as the n-th child.
+ Parent(usize, usize),
+ /// A segment from a root to the single terminating node.
+ Segment(usize, usize),
+}
+
+impl From<Parent> for PaSe {
+ fn from(value: Parent) -> Self {
+ Self::Parent(value.node(), value.edge())
+ }
+}
+
+impl Default for PaSe {
+ fn default() -> Self {
+ Self::Segment(0, 0)
+ }
+}
+
+impl core::fmt::Display for PaSe {
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ match self {
+ Self::Parent(node, edge) => write!(f, "the {edge}-th edge from {node}"),
+ Self::Segment(root, leaf) => write!(f, "a segment from {root} to {leaf}"),
+ }
+ }
+}
+
+impl PaSe {
+ fn parent(self) -> Option<Parent> {
+ if let Self::Parent(node, edge) = self {
+ Some(Parent::new(node, edge))
+ } else {
+ None
+ }
+ }
+
+ fn segment(self) -> Option<(usize, usize)> {
+ if let Self::Segment(root, leaf) = self {
+ Some((root, leaf))
+ } else {
+ None
+ }
+ }
+}
+
+/// An internal type that indicates the status of a node as either a
+/// packed, cloned, or plain node.
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Default)]
+enum ForestLabelType {
+ Packed,
+ #[default]
+ Plain,
+ Cloned(usize),
+}
+
+impl core::fmt::Display for ForestLabelType {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ match self {
+ Self::Packed => write!(f, "packed"),
+ Self::Plain => write!(f, "plain"),
+ Self::Cloned(index) => write!(f, "the {index}-th clone"),
+ }
+ }
+}
+
+/// A type that encodes the properties demanded by a forest.
+#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
+pub struct ForestLabel<T: GraphLabel> {
+ label: T,
+ status: ForestLabelType,
+}
+
+impl<T: GraphLabel> core::fmt::Display for ForestLabel<T> {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ writeln!(f, "label: {}, status: {}", self.label, self.status)
+ }
+}
+
+/// The type of erros for converting forest labels.
+#[non_exhaustive]
+#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)]
+pub enum ForestLabelError {
+ /// Try to pack a cloned node.
+ PackClone,
+ /// Try to clone a packed node.
+ ClonePack,
+}
+
+impl core::fmt::Display for ForestLabelError {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ match self {
+ Self::PackClone => write!(f, "cannot pack a cloned node"),
+ Self::ClonePack => write!(f, "cannot clone a packed node"),
+ }
+ }
+}
+
+impl std::error::Error for ForestLabelError {}
+
+impl<T: GraphLabel> ForestLabel<T> {
+ /// Retrieve the label.
+ pub fn label(&self) -> T {
+ self.label
+ }
+
+ /// Return true if and only if this node is packed.
+ pub fn is_packed(&self) -> bool {
+ matches!(self.status, ForestLabelType::Packed)
+ }
+
+ /// Retrieve the optional clone index.
+ pub fn clone_index(&self) -> Option<usize> {
+ match self.status {
+ ForestLabelType::Cloned(index) => Some(index),
+ _ => None,
+ }
+ }
+
+ /// Try to clone a node.
+ pub fn clone<F>(self, forest: &F) -> Result<Self, ForestLabelError>
+ where
+ F: Forest<T>,
+ {
+ if self.is_packed() {
+ Err(ForestLabelError::ClonePack)
+ } else {
+ let clone_index = if let Some(old_index) = self.clone_index() {
+ let mut new_index: usize = old_index + 1;
+
+ let mut new_label = Self {
+ status: ForestLabelType::Cloned(new_index),
+ ..self
+ };
+
+ while forest.query_label(new_label).is_some() {
+ new_index += 1;
+ new_label = Self {
+ status: ForestLabelType::Cloned(new_index),
+ ..self
+ };
+ }
+
+ new_index
+ } else {
+ 0
+ };
+
+ Ok(Self {
+ status: ForestLabelType::Cloned(clone_index),
+ ..self
+ })
+ }
+ }
+
+ /// Try to pack a node.
+ pub fn pack(self) -> Result<Self, ForestLabelError> {
+ if self.clone_index().is_some() {
+ Err(ForestLabelError::PackClone)
+ } else {
+ let new_label = Self {
+ status: ForestLabelType::Packed,
+ ..self
+ };
+
+ Ok(new_label)
+ }
+ }
+}
+
+impl<T: GraphLabel> From<T> for ForestLabel<T> {
+ fn from(label: T) -> Self {
+ let status = ForestLabelType::default();
+ Self { label, status }
+ }
+}
+
+/// The expected behaviours of an item derivation forest.
+///
+/// Note that it requires only a subset of the functionalities of
+/// labelled graphs.
+pub trait Forest<T: GraphLabel>: ParentsGraph + LabelGraph<ForestLabel<T>> {
+ /// The type of errors for operations on the forest.
+ type Error: std::error::Error + From<GError> + From<ForestLabelError>;
+
+ /// Return the root of the forest.
+ ///
+ /// A forest without a root is regarded as empty.
+ fn root(&self) -> Option<usize>;
+
+ /// Construct a forest consisting of one leaf node with the given
+ /// label.
+ fn new_leaf(label: T) -> Self;
+
+ /// Detect if the fragment is a prefix of the sub-forest rooted at
+ /// `node_id`.
+ fn is_prefix<F>(&self, node_id: usize, fragment: F) -> Result<bool, Self::Error>
+ where
+ F: Borrow<Self>;
+
+ /// Extend the forest by adjoining another forest at a given node.
+ fn plant<F>(&mut self, node_id: usize, fragment: F, planted: bool) -> Result<(), Self::Error>
+ where
+ F: Borrow<Self>;
+
+ /// Clone a node by making a new node and making all the nodes
+ /// that previously pointed to the old node now point to the new
+ /// node, and the new node points to the old node. Return the
+ /// index of the new node. In addition, if the old node had
+ /// already been cloned, just return the index of its
+ /// representative.
+ ///
+ /// The labels of the representing node and of the cloned node
+ /// will be handled automatically.
+ fn clone_node(&mut self, node_id: usize) -> Result<usize, Self::Error>;
+}
+
+pub mod default;
+
+pub mod genins;
+
+pub use genins::generate_fragment;
diff --git a/chain/src/lib.rs b/chain/src/lib.rs
index a3d420b..916678f 100644
--- a/chain/src/lib.rs
+++ b/chain/src/lib.rs
@@ -10,9 +10,16 @@
pub mod atom;
-use graph::{error::Error as GError, LabelExtGraph, Parent};
+pub mod item;
-use forest::Error as ForestError;
+// TODO: We need a module for items, that serve the role of the
+// forests now.
+
+use graph::{error::Error as GError, LabelExtGraph};
+
+use item::default::Error as ForestError;
+
+use item::PaSe;
/// An edge in the Chain-Rule machine.
#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
@@ -20,14 +27,14 @@ pub struct Edge {
/// The position in the atomic languages.
label: usize,
/// The source of the associated forest edge.
- forest_source: Parent,
+ forest_source: PaSe,
/// Whether or not this edge is "accepting".
accepting: bool,
}
impl Edge {
/// Construct a new edge.
- pub fn new(label: usize, forest_source: Parent, accepting: bool) -> Self {
+ pub fn new(label: usize, forest_source: PaSe, accepting: bool) -> Self {
Self {
label,
forest_source,
@@ -46,7 +53,7 @@ impl Edge {
}
/// Return the associated forest edge of the edge.
- pub fn forest_source(&self) -> Parent {
+ pub fn forest_source(&self) -> PaSe {
self.forest_source
}
}
@@ -56,12 +63,7 @@ impl core::fmt::Display for Edge {
let label = self.label();
let forest_source = self.forest_source();
- write!(
- f,
- "edge label {label} with forest source {} and edge index {}",
- forest_source.node(),
- forest_source.edge()
- )
+ write!(f, "edge label {label} with item {}", forest_source)
}
}
@@ -84,7 +86,7 @@ pub enum TwoLayers {
/// the source of the associated forest edge of the second layer,
/// and the third is a list of edges, which are the common first
/// layers.
- Two(usize, Parent, bool, Vec<(Edge, usize)>),
+ Two(usize, PaSe, bool, Vec<(Edge, usize)>),
}
/// The type of a collapsed iterator.
diff --git a/chain/src/plan.org b/chain/src/plan.org
index c2a9037..1da33cc 100644
--- a/chain/src/plan.org
+++ b/chain/src/plan.org
@@ -2,7 +2,7 @@
#+AUTHOR: Durand
#+DATE: <2022-11-18 Ven 19:57>
-* Things to do [7/10]
+* Things to do [6/9]
- [X] Implement builders for graphs
- [X] Find sets of the first terminals for each non-terminal, in the
@@ -79,15 +79,13 @@
lack of this step might be the root cause of the failure of the
previous version of this project.
+ [X] Test atoms
-- [X] Implement forests [4/4]
- + [X] Design a format of forests. This should be the most crucial
- thing to do, in order to have a better understanding of the whole
- picture. I need to have a clear understanding of the details of
- the binarised shared packed parsed forests, that reflects the
- regular expressions in the grammar equations.
- + [X] Define a trait with the expected behaviours.
- + [X] Implement them as parents-knowing graphs.
- + [X] Test
+- [ ] Implement semiring. [0/5]
+ + [ ] Define the trait.
+ + [ ] Define items and rules for the chain-rule parser, as an
+ item-based description.
+ + [ ] Implement the boolean semiring.
+ + [ ] Implement the natural number semiring.
+ + [ ] Implement the free semiring.
- [-] Implement languages. [4/6]
+ [X] First define a trait with the expected behaviours.
+ [X] Then implement them as doubly labelled graphs.
@@ -95,19 +93,29 @@
with a position in the forest. This perfectly solves the problem
of those "plugs"!
+ [X] Then implement finding derivatives by use of the chain rule.
- + [ ] Handle Forests
+ + [ ] Handle Semirings
+ [-] Tests
-- [ ] Implement semiring. [0/5]
- + [ ] Define the trait.
- + [ ] Implement the boolean semiring.
- + [ ] Implement the natural number semiring.
- + [ ] Implement the free semiring.
- + [ ] Compute and record a semiring value when computing
- derivatives.
- [X] Miscellaneous [1/1]
+ [X] Implement a function for NFA that checks if a given predicate
is satisfied for each edge label.
+** Forests
+
+This was a failed attempt to handle forests. I decided to handle
+forests as a special case of semi-rings. This is not only more
+systematic, but has also the potential of handling more general
+semi-rings later, like the Viterbi semirings, for example.
+
+- [X] Implement forests [4/4]
+ + [X] Design a format of forests. This should be the most crucial
+ thing to do, in order to have a better understanding of the whole
+ picture. I need to have a clear understanding of the details of
+ the binarised shared packed parsed forests, that reflects the
+ regular expressions in the grammar equations.
+ + [X] Define a trait with the expected behaviours.
+ + [X] Implement them as parents-knowing graphs.
+ + [X] Test
+
* Atomic Languages
This describes the behaviours of atomic languages. The atomic
diff --git a/forest/src/lib.rs b/forest/src/lib.rs
index 02c5fcd..0b1092f 100644
--- a/forest/src/lib.rs
+++ b/forest/src/lib.rs
@@ -6,10 +6,6 @@
//! parse forest, or SPPF. It packs sub-trees with the same parents
//! under the same branch, and lets nodes from different branches
//! share the same children, and hence the name.
-//!
-//! What is special here is that the forests are marked with some
-//! out-coming and in-coming plugs. These plugs are used to join
-//! different fragments of forests together.
use graph::{error::Error as GError, GraphLabel, LabelGraph, ParentsGraph};
diff --git a/grammar/src/label.rs b/grammar/src/label.rs
index 05b0b1e..3f89d9a 100644
--- a/grammar/src/label.rs
+++ b/grammar/src/label.rs
@@ -12,15 +12,50 @@ pub enum GrammarLabelType {
Rule(usize),
}
+// Some convenient conversions
+
+impl From<usize> for GrammarLabelType {
+ #[inline]
+ fn from(r: usize) -> Self {
+ Self::Rule(r)
+ }
+}
+
+impl From<TNT> for GrammarLabelType {
+ #[inline]
+ fn from(tnt: TNT) -> Self {
+ Self::TNT(tnt)
+ }
+}
+
impl GrammarLabelType {
/// Return the name of this label with the help of the associated
/// grammar.
+ #[inline]
pub fn name(&self, grammar: &Grammar) -> Result<String, Error> {
match self {
Self::TNT(tnt) => grammar.name_of_tnt(*tnt),
Self::Rule(pos) => grammar.rule_pos_to_string(*pos),
}
}
+
+ /// Return the contained TNT, if any.
+ #[inline]
+ pub fn tnt(&self) -> Option<TNT> {
+ match self {
+ Self::TNT(tnt) => Some(*tnt),
+ Self::Rule(_) => None,
+ }
+ }
+
+ /// Return the contained rule position, if any.
+ #[inline]
+ pub fn rule(&self) -> Option<usize> {
+ match self {
+ Self::TNT(_) => None,
+ Self::Rule(pos) => Some(*pos),
+ }
+ }
}
/// The label to be used in a forest.
@@ -32,8 +67,6 @@ pub struct GrammarLabel {
start: usize,
/// The end in the input that this label correponds to.
end: Option<usize>,
- /// A node in the forest might be a packed node.
- packed_p: bool,
}
impl core::fmt::Display for GrammarLabel {
@@ -48,54 +81,44 @@ impl core::fmt::Display for GrammarLabel {
impl GrammarLabel {
/// Construct a new label.
- pub fn new(label: GrammarLabelType, start: usize) -> Self {
+ #[inline]
+ pub fn new(label: impl Into<GrammarLabelType>, start: usize) -> Self {
+ let label = label.into();
let end = None;
- let packed_p = false;
- Self {
- label,
- start,
- end,
- packed_p,
- }
+ Self { label, start, end }
}
/// Return the end in the input.
+ #[inline]
pub fn end(&self) -> Option<usize> {
self.end
}
/// Return the start in the input.
+ #[inline]
pub fn start(&self) -> usize {
self.start
}
/// Return the actual label.
+ #[inline]
pub fn label(&self) -> GrammarLabelType {
self.label
}
/// Update the end.
+ #[inline]
pub fn set_end(&mut self, end: usize) {
self.end = Some(end);
}
- /// Check whether the node is a packed node.
- pub fn is_packed(&self) -> bool {
- self.packed_p
- }
-
- /// Update the packed status.
- pub fn set_packed_p(&mut self, packed_p: bool) {
- self.packed_p = packed_p;
- }
-
/// Return a string description with the help of the associated
/// grammar.
pub fn to_string(&self, grammar: &Grammar) -> Result<String, Error> {
// First calculate the length of the resulting string.
- let mut num = 2 + 7 + 14 + 6;
+ let mut num = 16 + 6;
num += self.label().name(grammar)?.len();
@@ -111,15 +134,7 @@ impl GrammarLabel {
let mut s = String::with_capacity(num);
- s.push_str("a ");
-
- if self.is_packed() {
- s.push_str("packed ");
- } else {
- s.push_str("normal ");
- }
-
- s.push_str("node labelled ");
+ s.push_str("a node labelled ");
s.push_str(&self.label().name(grammar)?);
diff --git a/grammar/src/lib.rs b/grammar/src/lib.rs
index b29fc69..2c17a5f 100644
--- a/grammar/src/lib.rs
+++ b/grammar/src/lib.rs
@@ -486,6 +486,9 @@ impl Grammar {
.contains(&None))
}
+ // FIXME: We shall use a label to query this information as well,
+ // probably.
+
/// Query the expansion information from the position `pos1` to
/// the position `pos2` .
#[inline]
diff --git a/graph/src/labelled/double.rs b/graph/src/labelled/double.rs
index 174c8ef..ab2b27c 100644
--- a/graph/src/labelled/double.rs
+++ b/graph/src/labelled/double.rs
@@ -8,13 +8,10 @@
use super::*;
-// We use BTreeMap and BTreeSet here as we need to exclude duplicate
-// edge sets, while an ordinary hashmap and hashset do not allow
-// hashing.
use std::collections::{
- btree_map::{Iter as MapIter, Keys},
- btree_set::Iter,
- BTreeMap as Map, BTreeSet as Set, HashMap as HMap,
+ hash_map::{Iter as MapIter, Keys},
+ hash_set::Iter,
+ HashMap as Map, HashSet as Set,
};
#[derive(Debug, Clone)]
@@ -48,31 +45,13 @@ impl<T: GraphLabel> DLNode<T> {
}
}
-/// Mapping a set of edges to an index of node.
-type EdgeMap<T> = HMap<Set<(T, usize)>, usize>;
-
/// Double direction Labelled Graph.
-///
-/// Each node is supposed to have a unique edge set. Constructing
-/// methods such as from the trait
-/// [`LabelExtGraph`][super::LabelExtGraph] already handles the
-/// elimination of duplication.
#[derive(Debug, Clone)]
pub struct DLGraph<T: GraphLabel> {
nodes: Vec<DLNode<T>>,
- edges_table: EdgeMap<T>,
}
impl<T: GraphLabel> DLGraph<T> {
- #[inline]
- /// Return an empty graph.
- pub fn new() -> Self {
- Self {
- nodes: Vec::new(),
- edges_table: HMap::default(),
- }
- }
-
/// Return a builder.
#[inline]
pub fn builder(self) -> DLGBuilder<T> {
@@ -89,7 +68,9 @@ impl<T: GraphLabel> DLGraph<T> {
impl<T: GraphLabel> Default for DLGraph<T> {
#[inline]
fn default() -> Self {
- Self::new()
+ let nodes = Vec::new();
+
+ Self { nodes }
}
}
@@ -160,11 +141,9 @@ impl<T: GraphLabel> Graph for DLGraph<T> {
let mut post = String::new();
- use std::fmt::Write as FWrite;
-
for (source, target) in self.edges() {
for label in self.edge_label(source, target).unwrap() {
- let _ = writeln!(post, " {source} -> {target} [label = \"{label}\"]");
+ post.push_str(&format!(" {source} -> {target} [label = \"{label}\"]\n"));
}
}
@@ -191,7 +170,6 @@ impl<T: GraphLabel> Graph for DLGraph<T> {
let new_graph = builder.build();
self.nodes = new_graph.nodes;
- self.edges_table = new_graph.edges_table;
}
}
@@ -336,10 +314,10 @@ impl<T: GraphLabel> LabelGraph<T> for DLGraph<T> {
type LabelIter<'a> = LabelIter<'a,T> where T: 'a;
- type EdgeLabelIter<'a> = EdgeLabelIter<'a,T>
+ type EdgeLabelIter<'a> = EdgeLabelIter<'a, T>
where
Self: 'a,
- T: 'a + Copy + Clone;
+ T: 'a;
fn edge_label(&self, source: usize, target: usize) -> Result<Self::EdgeLabelIter<'_>, Error> {
if self.has_edge(source, target)? {
@@ -438,19 +416,12 @@ impl<T: GraphLabel> LabelExtGraph<T> for DLGraph<T> {
}
}
- match self.edges_table.get(&edges_set) {
- Some(old_index) => Ok(*old_index),
- None => {
- let new_node = DLNode::new(by_target, by_label, flat);
- let new_index = self.nodes_len();
-
- self.edges_table.insert(edges_set, new_index);
+ let new_node = DLNode::new(by_target, by_label, flat);
+ let new_index = self.nodes_len();
- self.nodes.push(new_node);
+ self.nodes.push(new_node);
- Ok(new_index)
- }
- }
+ Ok(new_index)
}
}
@@ -489,7 +460,6 @@ impl<T: GraphLabel> Builder for DLGBuilder<T> {
fn with_capacity(size: usize) -> Self {
Self(DLGraph {
nodes: Vec::with_capacity(size),
- edges_table: Default::default(),
})
}
@@ -526,8 +496,6 @@ impl<T: GraphLabel> Builder for DLGBuilder<T> {
);
if new_edge_p {
- let old_children_set: Set<(T, usize)> = source_node.flat.iter().copied().collect();
-
source_node
.by_target
.entry(target)
@@ -541,16 +509,6 @@ impl<T: GraphLabel> Builder for DLGBuilder<T> {
.insert(target);
source_node.flat.push((label, target));
-
- let new_children_set: Set<(T, usize)> = source_node.flat.iter().copied().collect();
-
- // When source_node is in use, we cannot borrow self
- // mutably again, so we move the following two statements
- // to after all uses of source_node.
-
- self.edges_table.remove(&old_children_set);
-
- self.edges_table.insert(new_children_set, source);
}
Ok(())
@@ -586,8 +544,6 @@ impl<T: GraphLabel> Builder for DLGBuilder<T> {
}
if !to_remove.is_empty() {
- let old_children_set: Set<(T, usize)> = source_node.flat.iter().copied().collect();
-
for label in to_remove.iter().copied() {
// This must be "Some", as the outer "if" checks
source_node
@@ -605,41 +561,19 @@ impl<T: GraphLabel> Builder for DLGBuilder<T> {
});
}
- // If after removal no labels remain for the target,
- // we remove the edge entirely, to avoid false empty
- // edges.
-
- if let Some(set) = source_node.by_target.get(&target) {
- if set.is_empty() {
- source_node.by_target.remove(&target);
- }
+ if matches!(source_node.by_target.get(&target),
+ Some(set) if set.is_empty())
+ {
+ source_node.by_target.remove(&target);
}
for label in to_remove.iter() {
- if let Some(set) = source_node.by_label.get(label) {
- if set.is_empty() {
- source_node.by_label.remove(label);
- }
+ if matches!(source_node.by_label.get(label),
+ Some(set) if set.is_empty())
+ {
+ source_node.by_label.remove(label);
}
}
-
- // if source_node.flat.is_empty() {
- // source_node.by_target.remove(&target);
-
- // for label in to_remove.iter() {
- // source_node.by_label.remove(label);
- // }
- // }
-
- let new_children_set: Set<(T, usize)> = source_node.flat.iter().copied().collect();
-
- // When source_node is in use, we cannot borrow self
- // mutably again, so we move the following two
- // statements to after all uses of source_node.
-
- self.edges_table.remove(&old_children_set);
-
- self.edges_table.insert(new_children_set, source);
}
}
@@ -703,13 +637,13 @@ mod label_test {
assert_eq!(graph.extend([(2, 1), (3, 2), (2, 3)].iter().copied())?, 5);
// testing adding a duplicated edge set
- assert_eq!(graph.extend([(2, 1), (2, 3), (3, 2)].iter().copied())?, 5);
- assert_eq!(graph.extend([(3, 2), (3, 0)].iter().copied())?, 3);
+ assert_eq!(graph.extend([(2, 1), (2, 3), (3, 2)].iter().copied())?, 6);
+ assert_eq!(graph.extend([(3, 2), (3, 0)].iter().copied())?, 7);
let graph = graph;
// ensuring the correct length
- assert_eq!(graph.nodes_len(), 6);
+ assert_eq!(graph.nodes_len(), 8);
// testing children_of
assert_eq!(graph.children_of(5)?.collect::<Set<_>>(), set!(1, 3, 2));
@@ -723,8 +657,8 @@ mod label_test {
// testing edge_label
assert_eq!(graph.edge_label(5, 2)?.collect::<Set<_>>(), set!(3));
assert!(matches!(
- graph.edge_label(6, 2),
- Err(Error::IndexOutOfBounds(6, 6))
+ graph.edge_label(8, 2),
+ Err(Error::IndexOutOfBounds(8, 8))
));
// testing degree
@@ -738,8 +672,8 @@ mod label_test {
assert!(graph.has_edge(3, 2)?);
assert!(!graph.has_edge(3, 1)?);
assert!(matches!(
- graph.has_edge(3, 6),
- Err(Error::IndexOutOfBounds(6, 6))
+ graph.has_edge(3, 8),
+ Err(Error::IndexOutOfBounds(8, 8))
));
// testing labels_of
@@ -754,8 +688,8 @@ mod label_test {
assert_eq!(label_map, compare_map);
assert!(matches!(
- graph.labels_of(6),
- Err(Error::IndexOutOfBounds(6, 6))
+ graph.labels_of(8),
+ Err(Error::IndexOutOfBounds(8, 8))
));
Ok(())
diff --git a/graph/src/labelled/mod.rs b/graph/src/labelled/mod.rs
index 2bbc7ec..fb7b405 100644
--- a/graph/src/labelled/mod.rs
+++ b/graph/src/labelled/mod.rs
@@ -17,5 +17,3 @@ pub use double::{DLGBuilder, DLGraph};
pub mod binary;
pub use binary::{PLGBuilderMut, PLGraph};
-
-// pub use binary::BLGraph;
diff --git a/nfa/src/default/nfa.rs b/nfa/src/default/nfa.rs
index 8d657d5..6b1e56f 100644
--- a/nfa/src/default/nfa.rs
+++ b/nfa/src/default/nfa.rs
@@ -147,9 +147,11 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> {
let mut builder: DLGBuilder<LabelType<T>> = Builder::with_capacity(nfa_len);
- for _ in 0..nfa_len {
- builder.add_vertex();
- }
+ builder.add_vertices(nfa_len);
+
+ // for _ in 0..nfa_len {
+ // builder.add_vertex();
+ // }
let default = LabelType::new(DOption(default), total_regexps_len, false);
diff --git a/nfa/src/default/regex.rs b/nfa/src/default/regex.rs
index 1e3e87b..9e1ed5c 100644
--- a/nfa/src/default/regex.rs
+++ b/nfa/src/default/regex.rs
@@ -879,9 +879,9 @@ mod test_des_rec {
use crate::desrec::DesRec;
- #[allow(dead_code, unused)]
+ #[allow(dead_code)]
fn test_scanner<'a, 'b, T>(
- parser: &'b DefaultRegParser<T>,
+ _parser: &'b DefaultRegParser<T>,
input: &'a str,
) -> Result<Option<(usize, RegexType<char>, ParseDirection)>, ParseError>
where
diff --git a/semiring/src/lib.rs b/semiring/src/lib.rs
index 7d12d9a..9d096db 100644
--- a/semiring/src/lib.rs
+++ b/semiring/src/lib.rs
@@ -1,14 +1,43 @@
-pub fn add(left: usize, right: usize) -> usize {
- left + right
+#![warn(missing_docs)]
+//! This crate defines the expected behaviours of semirings and
+//! semiring parsers. Some standard semirings are also implemented,
+//! such as the boolean, the counting, and the derivation forest
+//! semirings. Moreover, this crate also defines the behaviours of a
+//! parser, with the formalism of an "item-based description". See
+//! the paper [Parsing
+//! inside-out](https://arxiv.org/abs/cmp-lg/9805007) by Joshua
+//! Goodman. To be more precise, it defines the behaviours of an
+//! "item interpreter" that executes "items" in a chosen semiring.
+//! Finally, it provides a default implementation of an item
+//! interpreter.
+//!
+//! The crate *chain* will implement a parser using the item-based
+//! description. Specifically, it will produce items as it parses the
+//! input string. Then we can execute these items by any interpreter.
+
+/// A semiring is equipped with two operations: the addition and the
+/// multiplication. It also has additive and multiplicative
+/// identities. Also the two operations are supposed to be
+/// associative, and the multiplication is supposed to be distributive
+/// over the addition. But the addition is not required to be
+/// commutative, and is usually not indeed commutative.
+pub trait Semiring {
+ /// The additive identity.
+ fn zero() -> Self;
+
+ /// The multiplicative identity.
+ fn one() -> Self;
+
+ /// The addition.
+ fn add(&mut self, other: impl AsRef<Self>);
+
+ /// The multiplication.
+ fn mul(&mut self, other: impl AsRef<Self>);
}
+// TODO: Implement boolean semiring
+// TODO: Implement counting semiring
+// TODO: Implement derivation forest semiring
+
#[cfg(test)]
-mod tests {
- use super::*;
-
- #[test]
- fn it_works() {
- let result = add(2, 2);
- assert_eq!(result, 4);
- }
-}
+mod tests {}