diff options
author | JSDurand <mmemmew@gmail.com> | 2023-01-28 10:17:24 +0800 |
---|---|---|
committer | JSDurand <mmemmew@gmail.com> | 2023-01-28 10:22:57 +0800 |
commit | f28155105134b90fd86049c65478d307e0d8dbbc (patch) | |
tree | 72b3b4872d5dba89413eca70bcaae9e421def7ee | |
parent | e8ea01319b3a9032a3f4f69f65e9ca96562b87b9 (diff) |
a prototype of an item derivation forest
It seems to be complete now, but still awaits more tests to see where
the errors are, which should be plenty, haha.
-rw-r--r-- | ChangeLog | 19 | ||||
-rw-r--r-- | chain/Cargo.toml | 1 | ||||
-rw-r--r-- | chain/src/default.rs | 128 | ||||
-rw-r--r-- | chain/src/item/default.rs | 547 | ||||
-rw-r--r-- | chain/src/item/genins.rs | 231 | ||||
-rw-r--r-- | chain/src/item/mod.rs | 252 | ||||
-rw-r--r-- | chain/src/lib.rs | 26 | ||||
-rw-r--r-- | chain/src/plan.org | 44 | ||||
-rw-r--r-- | forest/src/lib.rs | 4 | ||||
-rw-r--r-- | grammar/src/label.rs | 75 | ||||
-rw-r--r-- | grammar/src/lib.rs | 3 | ||||
-rw-r--r-- | graph/src/labelled/double.rs | 126 | ||||
-rw-r--r-- | graph/src/labelled/mod.rs | 2 | ||||
-rw-r--r-- | nfa/src/default/nfa.rs | 8 | ||||
-rw-r--r-- | nfa/src/default/regex.rs | 4 | ||||
-rw-r--r-- | semiring/src/lib.rs | 51 |
16 files changed, 1324 insertions, 197 deletions
@@ -1,3 +1,22 @@ +2023-01-28 Jean Sévère Durand <durand@jsdurand.xyz> + + * chain: Move the forest crate to the item module in the chain + crate. That crate used to provide a kind of forests. This turns + out to be an item derivation forest for the chain-rule machine. + So it seems appropriate to implement this in the chain crate + instead. + + * chain: Also the chain rule machine seems to be complete now. It + remains to test the machine to find the must-be errors. + + * graph: Now the doubly linked graphs do not exclude nodes with + duplicate edge sets. That is, previously, when adding a node + whose children set is equal to that of another existing node, the + graph just reuses that old node. Now this is not the case + anymore. The reason is that this only saves some minor memory + usage, at the cost of more expensive graph operations, and might + affect the time complixity, so I just remove this functionality. + 2023-01-22 Jean Sévère Durand <durand@jsdurand.xyz> * forest: Correctly clone nodes. diff --git a/chain/Cargo.toml b/chain/Cargo.toml index c55586d..496f5dd 100644 --- a/chain/Cargo.toml +++ b/chain/Cargo.toml @@ -13,7 +13,6 @@ test-print-viz = [] nfa = { path = "../nfa" } graph = { path = "../graph" } grammar = { path = "../grammar" } -forest = { path = "../forest" } [dev-dependencies] grammar = { path = "../grammar", features = ["test-helper"] } diff --git a/chain/src/default.rs b/chain/src/default.rs index 22befff..77a64ca 100644 --- a/chain/src/default.rs +++ b/chain/src/default.rs @@ -7,8 +7,10 @@ use super::*; use crate::atom::{Atom, DefaultAtom}; +use crate::item::{ + default::DefaultForest, generate_fragment, Forest, ForestLabel, ForestLabelError, +}; use core::fmt::Display; -use forest::{default::DefaultForest, Forest}; use grammar::{Error as GrammarError, GrammarLabel, GrammarLabelType, TNT}; use graph::{labelled::DLGBuilder, Builder, DLGraph, Graph, LabelExtGraph, LabelGraph}; @@ -29,6 +31,10 @@ pub enum Error { NodeNoLabel(usize), /// Reserving memory fails. CannotReserve(TryReserveError), + /// Cannot create label for cloning nodes. + CannotClone(ForestLabelError), + /// Cannot find a suitable node to plant the new forest fragment. + CannotPlant, /// An invalid situation happens. Invalid, } @@ -44,6 +50,8 @@ impl Display for Error { ), Self::NodeNoLabel(n) => write!(f, "node {n} has no labels while it should have one"), Self::CannotReserve(e) => write!(f, "cannot reserve memory: {e}"), + Self::CannotClone(fe) => write!(f, "cannot clone due to {fe}"), + Self::CannotPlant => write!(f, "cannot plant a new forest fragment"), Self::Invalid => write!(f, "invalid"), } } @@ -69,6 +77,7 @@ impl From<ForestError> for Error { ForestError::DuplicatedNode(n) => Error::DuplicateNode(n), ForestError::InvalidGraphError(ge) => ge.into(), ForestError::NodeNoLabel(n) => Error::NodeNoLabel(n), + ForestError::LabelConversion(ce) => Error::CannotClone(ce), } } } @@ -93,7 +102,7 @@ impl Default for DerIterIndex { } /// A complex type used for storing values of edges with two layers. -type SecondTypeValue = (Parent, bool, Vec<(Edge, usize)>); +type SecondTypeValue = (PaSe, bool, Vec<(Edge, usize)>); /// An iterator of TwoLayers. #[derive(Debug, Default)] @@ -120,7 +129,7 @@ impl DerIter { fn add_second_layer( &mut self, label: usize, - forest_source: Parent, + forest_source: PaSe, accepting: bool, edges: Vec<(Edge, usize)>, ) { @@ -186,7 +195,7 @@ pub struct DefaultChain { atom: DefaultAtom, current: usize, history: Vec<usize>, - forest: DefaultForest<GrammarLabel>, + forest: DefaultForest<ForestLabel<GrammarLabel>>, accepting_vec: Vec<bool>, } @@ -205,7 +214,7 @@ impl DefaultChain { /// Return a reference to the associated forest. #[inline] - pub fn forest(&self) -> &DefaultForest<GrammarLabel> { + pub fn forest(&self) -> &DefaultForest<ForestLabel<GrammarLabel>> { &self.forest } @@ -316,6 +325,7 @@ impl LabelExtGraph<Edge> for DefaultChain { let new = self.graph.extend(edges)?; let accepting_len = self.accepting_vec.len(); + // Update the acceptace of the new node, if any. if self.accepting_vec.get(new).is_none() { // assert it can only grow by one node at a time. #[cfg(debug_assertions)] @@ -377,14 +387,17 @@ impl Chain for DefaultChain { let empty_state = atom.empty(); + // The zero-th non-terminal is assumed to be the starting + // non-terminal, by convention. let initial_nullable = atom .is_nullable(0) .map_err(|_| Error::IndexOutOfBounds(0, atom.non_num()))?; + // NOTE: Is it really correct to use `Parent::new(0, 0)` here? builder.add_edge( first, root, - Edge::new(empty_state, Parent::new(0, 0), initial_nullable), + Edge::new(empty_state, Default::default(), initial_nullable), )?; let graph = builder.build(); @@ -431,7 +444,7 @@ impl Chain for DefaultChain { type DerIter = DerIter; - fn derive(&mut self, t: usize, _pos: usize) -> Result<Self::DerIter, Self::Error> { + fn derive(&mut self, t: usize, pos: usize) -> Result<Self::DerIter, Self::Error> { use TNT::*; /// Convert an error telling us that an index is out of bounds. @@ -445,7 +458,7 @@ impl Chain for DefaultChain { GrammarError::IndexOutOfBounds(index, bound) => { Error::IndexOutOfBounds(index, bound) } - _ => panic!("wrong error kind"), + _ => Error::Invalid, } } @@ -459,11 +472,11 @@ impl Chain for DefaultChain { /// /// The generated edges will be pushed to `output` directly, /// to save some allocations. - // TODO: Handle forests as well. fn generate_edges( chain: &DefaultChain, child_iter: impl Iterator<Item = usize> + ExactSizeIterator + Clone, atom_child_iter: impl Iterator<Item = usize> + Clone, + pase: PaSe, mut output: impl AsMut<Vec<(Edge, usize)>>, ) -> Result<(), Error> { // First check the values from iterators are all valid. @@ -515,13 +528,11 @@ impl Chain for DefaultChain { // now push into output - let parent = Parent::new(0, 0); - for atom_child in atom_child_iter { let atom_child_accepting = chain.atom.is_accepting(atom_child).unwrap(); let atom_child_empty_node = chain.atom.is_empty_node(atom_child).unwrap(); - let edge = Edge::new(atom_child, parent, atom_child_accepting); + let edge = Edge::new(atom_child, pase, atom_child_accepting); if !atom_child_empty_node { output.extend(child_iter.clone().map(|child| (edge, child))); @@ -549,16 +560,41 @@ impl Chain for DefaultChain { continue; } + let atom_moved = atom_label.get_moved(); + match *atom_label.get_value() { Some(Ter(ter)) if ter == t => { + // prepare forest fragment + + let fragment = + generate_fragment([atom_moved.into(), Ter(ter).into()], pos)?; + + let new_pase = self.forest.insert_item( + *label, + fragment, + atom_child_iter.clone(), + &self.atom, + )?; + generate_edges( self, child_iter.clone(), atom_child_iter.clone(), + new_pase, &mut der_iter.singles, )?; } Some(Non(non)) => { + let first_fragment = + generate_fragment([atom_moved.into(), Non(non).into()], pos)?; + + let first_segment_pase = self.forest.insert_item( + *label, + first_fragment, + atom_child_iter.clone(), + &self.atom, + )?; + let virtual_node = self .atom .atom(non, t) @@ -570,12 +606,66 @@ impl Chain for DefaultChain { .is_accepting(virtual_node) .map_err(index_out_of_bounds_conversion)?; + let virtual_fragment = { + // `non` is valid, as checked above + let non_start = self.atom.nth_accumulator(non).unwrap() * 2; + + let mut result = DefaultForest::default(); + + for (label, child_iter) in self.atom.labels_of(non_start)? { + if matches!(*label.get_value(), + Some(Ter(ter)) if ter == t) + { + for child in child_iter { + let mut line: Vec<_> = self + .atom + .query_expansion(non_start, child) + .map_err(index_out_of_bounds_conversion)? + .iter() + .copied() + .flatten() + .map(|(nt, rule)| [(*rule).into(), Non(*nt).into()]) + .collect(); + + line.push([label.get_moved().into(), Ter(t).into()]); + + let line: Vec<GrammarLabelType> = + line.into_iter().flatten().collect(); + + if result.is_empty() { + result = generate_fragment(line, pos)?; + } else { + result.plant( + 0, + generate_fragment(line, pos)?, + false, + )?; + } + } + } + } + + result + }; + + // NOTE: We only need the PaSe from the + // first segment, so we pass an empty + // iterator, in which case the passed + // label is only used for the PaSe. + let virtual_pase = self.forest.insert_item( + Edge::new(0, first_segment_pase, true), + virtual_fragment, + std::iter::empty(), + &self.atom, + )?; + let mut new_edges = Vec::new(); generate_edges( self, child_iter.clone(), atom_child_iter.clone(), + first_segment_pase, &mut new_edges, )?; @@ -583,12 +673,10 @@ impl Chain for DefaultChain { der_iter.singles.extend(new_edges.clone()); } - let parent = Parent::new(0, 0); - if !self.atom.is_empty_node(virtual_node).unwrap() { der_iter.add_second_layer( virtual_node, - parent, + virtual_pase, accepting, new_edges, ); @@ -601,7 +689,10 @@ impl Chain for DefaultChain { // `generate_edges` if self.atom.is_empty_node(atom_child).unwrap() { der_iter.singles.extend(child_iter.clone().map(|child| { - (Edge::new(virtual_node, parent, accepting), child) + ( + Edge::new(virtual_node, virtual_pase, accepting), + child, + ) })); } } @@ -610,7 +701,8 @@ impl Chain for DefaultChain { // this has been checked in // `generate_edges` if self.atom.is_empty_node(atom_child).unwrap() { - // flat flat map, hmm... + // REVIEW: flat flat map, + // hmm... der_iter.singles.extend(child_iter.clone().flat_map( |child| { self.graph.labels_of(child).unwrap().flat_map( @@ -646,7 +738,7 @@ mod test_der_iter { fn test() -> Result<(), Box<dyn std::error::Error>> { let mut der_iter = DerIter::default(); - let parent = Parent::new(0, 0); + let parent = Default::default(); der_iter.singles.push((Edge::new(0, parent, true), 0)); diff --git a/chain/src/item/default.rs b/chain/src/item/default.rs new file mode 100644 index 0000000..b71f940 --- /dev/null +++ b/chain/src/item/default.rs @@ -0,0 +1,547 @@ +//! This module provides a default implementation of iten derivation +//! forest. + +use super::*; +use graph::{ + builder::BuilderMut, labelled::binary::PLGBuilderMut, Graph, LabelGraph, PLGraph, RedirectGraph, +}; + +use core::fmt::Display; + +/// The type of errors for forest operations. +#[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd)] +pub enum Error { + /// An index is out of bounds. + /// + /// The first component is the index that is out of bounds, and + /// the second component is the current length of nodes. + IndexOutOfBounds(usize, usize), + /// The forest does not permit duplicate nodes but encounters a + /// repeated node. + DuplicatedNode(usize), + /// A node has no labels while it is required to have one. + NodeNoLabel(usize), + /// Encounter an invalid error in converting from an error of + /// graphs. + InvalidGraphError(GError), + /// Encounter an error when converting forest labels. + LabelConversion(ForestLabelError), +} + +impl Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Error::IndexOutOfBounds(index, bound) => { + write!(f, "index {index} is out of bound {bound}") + } + Error::DuplicatedNode(n) => write!(f, "node {n} is duplicated"), + Error::InvalidGraphError(ge) => write!(f, "invalid error: {ge}"), + Error::NodeNoLabel(n) => write!(f, "node {n} has no labels, but it should have one"), + Error::LabelConversion(le) => write!(f, "fail to convert labels: {le}"), + } + } +} + +impl std::error::Error for Error {} + +impl From<GError> for Error { + fn from(ge: GError) -> Self { + match ge { + GError::IndexOutOfBounds(index, bound) => Self::IndexOutOfBounds(index, bound), + GError::DuplicatedNode(n) => Self::DuplicatedNode(n), + _ => Self::InvalidGraphError(ge), + } + } +} + +impl From<ForestLabelError> for Error { + fn from(le: ForestLabelError) -> Self { + Self::LabelConversion(le) + } +} + +/// A default implementation of forest. +#[derive(Debug, Clone)] +pub struct DefaultForest<T: GraphLabel> { + graph: PLGraph<T>, + root: Option<usize>, +} + +impl<T: GraphLabel> Default for DefaultForest<T> { + fn default() -> Self { + let graph = Default::default(); + let root = None; + + Self { graph, root } + } +} + +impl<T: GraphLabel> AsRef<DefaultForest<T>> for DefaultForest<T> { + fn as_ref(&self) -> &DefaultForest<T> { + self + } +} + +impl<T: GraphLabel> Graph for DefaultForest<T> { + type Iter<'a> = <PLGraph<T> as Graph>::Iter<'a> + where + Self: 'a; + + #[inline] + fn is_empty(&self) -> bool { + self.graph.is_empty() + } + + #[inline] + fn nodes_len(&self) -> usize { + self.graph.nodes_len() + } + + #[inline] + fn edges_len(&self) -> Option<usize> { + self.graph.edges_len() + } + + #[inline] + fn children_of(&self, node_id: usize) -> Result<Self::Iter<'_>, GError> { + self.graph.children_of(node_id) + } + + #[inline] + fn degree(&self, node_id: usize) -> Result<usize, GError> { + self.graph.degree(node_id) + } + + #[inline] + fn is_empty_node(&self, node_id: usize) -> Result<bool, GError> { + self.graph.is_empty_node(node_id) + } + + #[inline] + fn has_edge(&self, source: usize, target: usize) -> Result<bool, GError> { + self.graph.has_edge(source, target) + } + + fn replace_by_builder(&mut self, _builder: impl graph::Builder<Result = Self>) { + unimplemented!() + } +} + +impl<T: GraphLabel> ParentsGraph for DefaultForest<T> { + type Iter<'a>= <PLGraph<T> as ParentsGraph>::Iter<'a> + where + Self:'a; + + #[inline] + fn parents_of(&self, node_id: usize) -> Result<<Self as ParentsGraph>::Iter<'_>, GError> { + self.graph.parents_of(node_id) + } + + #[inline] + fn nth_child(&self, node_id: usize, n: usize) -> Result<usize, GError> { + self.graph.nth_child(node_id, n) + } + + #[inline] + fn edge_to_parent( + &self, + source: usize, + target: usize, + ) -> Result<Option<graph::Parent>, GError> { + self.graph.edge_to_parent(source, target) + } +} + +impl<T: GraphLabel> LabelGraph<T> for DefaultForest<T> { + type Iter<'a> = std::iter::Empty<usize> + where + Self: 'a; + + type LabelIter<'a> = std::iter::Empty<(&'a T, <Self as LabelGraph<T>>::Iter<'a>)> + where + Self: 'a, + T: 'a; + + type EdgeLabelIter<'a> = std::iter::Empty<T> + where + Self: 'a, + T: 'a; + + #[inline] + fn query_label(&self, label: T) -> Option<usize> { + self.graph.query_label(label) + } + + #[inline] + fn vertex_label(&self, node_id: usize) -> Result<Option<T>, GError> { + self.graph.vertex_label(node_id) + } + + fn edge_label( + &self, + _source: usize, + _target: usize, + ) -> Result<Self::EdgeLabelIter<'_>, GError> { + unimplemented!("edges have no labels") + } + + fn find_children_with_label( + &self, + _node_id: usize, + _label: &T, + ) -> Result<<Self as LabelGraph<T>>::Iter<'_>, GError> { + unimplemented!("edges have no labels") + } + + fn labels_of(&self, _node_id: usize) -> Result<Self::LabelIter<'_>, GError> { + unimplemented!("edges have no labels") + } + + fn has_edge_label(&self, _node_id: usize, _label: &T, _target: usize) -> Result<bool, GError> { + unimplemented!("edges have no labels") + } +} + +impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> { + type Error = Error; + + fn root(&self) -> Option<usize> { + self.root + } + + fn new_leaf(label: T) -> Self { + let mut graph = PLGraph::default(); + + let mut builder = PLGBuilderMut::from_graph_mut(&mut graph); + + let root = Some(builder.add_vertex(ForestLabel::from(label))); + + Self { graph, root } + } + + fn is_prefix<F>(&self, node_id: usize, fragment: F) -> Result<bool, Self::Error> + where + F: Borrow<Self>, + { + if !self.has_node(node_id) { + return Err(Error::IndexOutOfBounds(node_id, self.nodes_len())); + } + + // We do a depth-first traversal to determine if every node + // encountered has the same set of children (labels taken into + // the consideration). + + let fragment = fragment.borrow(); + + let mut frag_stack = Vec::with_capacity(fragment.nodes_len()); + + let mut self_stack = Vec::with_capacity(fragment.nodes_len()); + + let frag_root = if let Some(root) = fragment.root() { + root + } else { + // an empty forest is a prefix of any forest. + return Ok(true); + }; + + frag_stack.push(frag_root); + self_stack.push(node_id); + + // defer popping + while let (Some(frag_top), Some(self_top)) = + (frag_stack.last().copied(), self_stack.last().copied()) + { + frag_stack.pop(); + self_stack.pop(); + + if fragment.vertex_label(frag_top)? != self.vertex_label(self_top)? { + // not a prefix + return Ok(false); + } + + let mut self_children = self.children_of(self_top)?; + + for child in fragment.children_of(frag_top)? { + if let Some(self_child) = self_children.next() { + frag_stack.push(child); + self_stack.push(self_child); + } else { + // too few children + return Ok(false); + } + } + } + + // Check both stacks are empty at the end. + Ok(frag_stack.is_empty() && self_stack.is_empty()) + } + + fn plant<F>(&mut self, node_id: usize, fragment: F, planted: bool) -> Result<(), Self::Error> + where + F: Borrow<Self>, + { + // Convert self to a builder_mut, and traverse fragment in a + // depth-first manner and adjoin corresponding nodes along the + // way. + + if !self.has_node(node_id) { + return Err(Error::IndexOutOfBounds(node_id, self.nodes_len())); + } + + let fragment = fragment.borrow(); + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + let root = if let Some(root) = fragment.root() { + root + } else { + // Nothing to do to plant an empty forest. + return Ok(()); + }; + + // Just a dummy label for use in adding edges. + // + // REVIEW: I probably should refactor the API for builder_mut. + let root_label = fragment + .vertex_label(root)? + .ok_or(Error::NodeNoLabel(root))?; + + let nodes_len = fragment.nodes_len(); + + /// If the fragment root has a duplicate label, the forest + /// will not grow, so we use the label to find the adjoined + /// node index. + /// + /// The nodes hava already been added to the forest, so it is + /// safe to call unwrap. + macro_rules! conversion ( + ($node:expr) => { + { + builder + .query_label( + fragment + .vertex_label($node)? + .ok_or(Error::NodeNoLabel($node))? + ).unwrap() + } + } + ); + + // If the fragment has been planted before, we just add an + // edge. + + if planted { + builder.add_edge(node_id, conversion!(root), root_label)?; + + return Ok(()); + } + + // First adjoin those nodes and join the edges later. + + for node in 0..nodes_len { + let label = fragment + .vertex_label(node)? + .ok_or(Error::NodeNoLabel(node))?; + + builder.add_vertex(label); + } + + // Don't forget to join the new sub-forest to the original + // forest, at the specified position. + + builder.add_edge(node_id, conversion!(root), root_label)?; + + // We can try to calculate the depth of fragments, if we need + // to lower the memory usage. But in our use cases, we + // usually deal with fragments where each node has at most one + // child, so the depth is supposed to be equal to the length + // in this case. + let mut stack = Vec::with_capacity(fragment.nodes_len()); + + stack.push(root); + + while let Some(top) = stack.pop() { + for child in fragment.children_of(top)? { + builder.add_edge(conversion!(top), conversion!(child), root_label)?; + } + } + + Ok(()) + } + + fn clone_node(&mut self, node_id: usize) -> Result<usize, Self::Error> { + let builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + let old_label = builder + .vertex_label(node_id)? + .ok_or(Error::NodeNoLabel(node_id))?; + + if old_label.is_packed() { + return Err(ForestLabelError::ClonePack.into()); + } + + // We are sure old_label is not packed here, so it is safe to + // unwrap. + let new_label = old_label.clone(self).unwrap(); + + if old_label.clone_index().is_some() { + let mut parents = self.parents_of(node_id)?; + + // Make sure it only has one parent, which is the + // representative node. + assert_eq!(parents.len(), 1); + + let rep_node = parents.next().unwrap().node(); + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + let new_clone = builder.add_vertex(new_label); + + builder.add_edge(rep_node, new_clone, new_label)?; + + // We checked its length is 1, so it is safe to unwrap + // here. + return Ok(new_clone); + } + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + // We are sure old_label is not a clone here, so it is safe to + // unwrap. + let rep_label = old_label.pack().unwrap(); + + // Make a new node + let new_index = builder.add_vertex(rep_label); + + // Re-direct parents to the new node. + // + // This must be done before pointing the new node to the old + // node, otherwise that edge will be redirected as well. + + // Unfortunately I cannot loop through parents and mutate them + // at the same time, so I first collect them into a vector. + let parents: Vec<_> = builder.parents_of(node_id)?.collect(); + + for parent in parents.into_iter() { + builder.redirect(parent.node(), parent.edge(), new_index)?; + } + + // Point the new node to the old node. OLD_LABEL is just a + // place holder. + + builder.add_edge(new_index, node_id, old_label)?; + + // Modify the label of the old node. + + builder.set_label(node_id, new_label)?; + + // Make another clone + + // new_label is cloned, so is guaranteed not to be packed, and + // we are safe to unwrap. + let new_label = new_label.clone(self).unwrap(); + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + let new_clone = builder.add_vertex(new_label); + + builder.add_edge(new_index, new_clone, new_label)?; + + Ok(new_clone) + } +} + +#[cfg(test)] +mod item_test { + use super::*; + + macro_rules! leaf ( + ($label:expr, $type:tt) =>{ + DefaultForest::<ForestLabel<$type>>::new_leaf($label) + }; + ($label:expr) => { + DefaultForest::<ForestLabel<usize>>::new_leaf($label) + } + ); + + #[test] + fn test_forest_api() -> Result<(), Box<dyn std::error::Error>> { + let forest: DefaultForest<usize> = Default::default(); + + // empty forest + + assert!(forest.is_empty()); + + // leaf forest + + let mut forest = leaf!(0, usize); + + assert_eq!(forest.nodes_len(), 1); + assert_eq!(forest.root(), Some(0)); + + // add some child + + forest.plant(0, leaf!(1), false)?; + + assert_eq!(forest.nodes_len(), 2); + let mut children = forest.children_of(0)?; + assert_eq!(children.next(), Some(1)); + assert_eq!(children.next(), None); + + // add more children + + forest.plant(0, leaf!(2), false)?; + forest.plant(0, leaf!(3), false)?; + forest.plant(0, leaf!(4), false)?; + forest.plant(2, leaf!(5), false)?; + + assert_eq!(forest.nodes_len(), 6); + let mut children = forest.children_of(0)?; + assert_eq!(children.next(), Some(1)); + assert_eq!(children.next(), Some(2)); + assert_eq!(children.next(), Some(3)); + assert_eq!(children.next(), Some(4)); + let mut children = forest.children_of(2)?; + assert_eq!(children.next(), Some(5)); + assert_eq!(children.next(), None); + + let mut test_forest = leaf!(0); + test_forest.plant(0, leaf!(1), false)?; + test_forest.plant(0, leaf!(2), false)?; + test_forest.plant(0, leaf!(3), false)?; + test_forest.plant(2, leaf!(5), false)?; + + assert!(forest.is_prefix(0, &test_forest)?); + + let mut test_forest = leaf!(0); + test_forest.plant(0, leaf!(1), false)?; + test_forest.plant(0, leaf!(2), false)?; + // this child of the root should have label 3 in order to be a + // prefix. + test_forest.plant(0, leaf!(4), false)?; + test_forest.plant(2, leaf!(5), false)?; + + assert!(!forest.is_prefix(0, &test_forest)?); + + let mut test_forest = leaf!(2); + test_forest.plant(0, leaf!(5), false)?; + + assert!(forest.is_prefix(2, &test_forest)?); + + // now test cloning + + // add a duplicate label + forest.plant(3, leaf!(5), false)?; + + let _len = forest.nodes_len(); + + forest.clone_node(5)?; + + assert_eq!(forest.nodes_len(), 7); + + #[cfg(feature = "test-print-viz")] + forest.print_viz("forest.gv")?; + + Ok(()) + } +} diff --git a/chain/src/item/genins.rs b/chain/src/item/genins.rs new file mode 100644 index 0000000..99d9202 --- /dev/null +++ b/chain/src/item/genins.rs @@ -0,0 +1,231 @@ +//! This module implements the generation and insertion of item +//! derivation forests. +//! +//! This is used for the chain-rule machine to conveniently produce +//! item derivations into a forest. This forest can serve as a rough +//! approximation of the parse forests, and can be executed in other +//! semirings later on. + +use super::*; +use crate::{atom::DefaultAtom, default::Error, item::default::DefaultForest, Edge}; +use grammar::{Error as GrammarError, GrammarLabel, GrammarLabelType, TNT}; +use graph::Graph; + +use core::borrow::Borrow; +use std::collections::HashSet as Set; + +/// A helper function to generate a fragment of forest. +/// +/// It simply constructs a root node and then appends +/// successive nodes as successive children of the previous +/// node. Also the starting positions will all be set to the +/// same position. +/// +/// If the input is empty, this returns an empty forest; +/// otherwise the result is not empty. +pub fn generate_fragment( + labels: impl AsRef<[GrammarLabelType]>, + pos: usize, +) -> Result<DefaultForest<ForestLabel<GrammarLabel>>, crate::default::Error> { + let mut labels_iter = labels.as_ref().iter(); + let mut result: DefaultForest<ForestLabel<GrammarLabel>>; + + if let Some(first_label) = labels_iter.next() { + result = DefaultForest::new_leaf(GrammarLabel::new(*first_label, pos)); + + let mut index = 0; + + for label in labels_iter { + result.plant( + index, + DefaultForest::new_leaf(GrammarLabel::new(*label, pos)), + false, + )?; + + // To prevent duplication of labels causing + // panics, we query the index of the new node. + index = result + .query_label(GrammarLabel::new(*label, pos).into()) + // REVIEW: Perhaps a LabelNoNode error? + .ok_or(Error::Invalid)?; + } + } else { + result = Default::default(); + } + + Ok(result) +} + +impl DefaultForest<ForestLabel<GrammarLabel>> { + /// Insert an item derivation forest into a recording forest. + /// + /// We need the help of other things just for finding the correct + /// places to insert these item fragments. + pub fn insert_item( + &mut self, + label: Edge, + fragment: impl Borrow<DefaultForest<ForestLabel<GrammarLabel>>>, + atom_child_iter: impl Iterator<Item = usize>, + atom: &DefaultAtom, + ) -> Result<PaSe, Error> { + /// Convert an error telling us that an index is out of bounds. + /// + /// # Panics + /// + /// The function panics if the error is not of the expected + /// kind. + fn index_out_of_bounds_conversion(ge: GrammarError) -> Error { + match ge { + GrammarError::IndexOutOfBounds(index, bound) => { + Error::IndexOutOfBounds(index, bound) + } + _ => Error::Invalid, + } + } + + let fragment = fragment.borrow(); + + let pase = label.forest_source(); + + let parents: Vec<Parent> = { + let mut result = Vec::new(); + + if let Some(parent) = pase.parent() { + result.push(parent); + } else { + let (root, leaf) = pase.segment().unwrap(); + let mut seen_nodes = Set::new(); + + let mut stack = vec![root]; + + while let Some(top) = stack.pop() { + if seen_nodes.contains(&top) { + continue; + } + + seen_nodes.insert(top); + + for (index, child) in self.children_of(top)?.enumerate() { + if child == leaf { + result.push(Parent::new(top, index)); + } else { + stack.push(child); + } + } + } + } + + result + }; + + for atom_child in atom_child_iter { + // Find reduction information. + let reduction_info = atom + .query_reduction(label.label(), atom_child) + .map_err(index_out_of_bounds_conversion)?; + + let mut stack = parents.clone(); + let mut second_stack = Vec::new(); + + // locate the nodes + for reduction_nt in reduction_info.iter().copied().flatten().rev() { + while let Some(node) = stack.pop() { + // REVIEW: A lot of labels appear here. + // Perhaps I need to refactor something? + if matches!( + self + .vertex_label(node.node())? + .ok_or_else(|| Error::NodeNoLabel(node.node()))? + .label() + .label(), + GrammarLabelType::TNT(TNT::Non(nt)) if nt == *reduction_nt + ) { + for parent in self.parents_of(node.node())? { + debug_assert!(matches!( + self.vertex_label(parent.node()), + Ok(Some(label)) if + label + .label() + .label() + .rule() + .is_some())); + + second_stack.extend(self.parents_of(parent.node())?.filter(|n| { + matches!(self.vertex_label(n.node()), + Ok(Some(label)) + if matches!( + label.label().label().tnt(), + Some(TNT::Non(_)))) + })); + } + } + } + + std::mem::swap(&mut stack, &mut second_stack); + + if stack.is_empty() { + break; + } + } + + if stack.is_empty() { + return Err(Error::CannotPlant); + } + + for parent in stack.into_iter() { + if parent.edge() + 1 >= self.degree(parent.node())? { + self.plant(parent.node(), fragment, false)?; + } else { + let nth_child = self.nth_child(parent.node(), parent.edge() + 1)?; + + if self.is_prefix(nth_child, fragment)? { + // do nothing + continue; + } + + let cloned_node = self.clone_node(nth_child)?; + + self.plant(cloned_node, fragment, false)?; + } + } + } + + let result = if fragment.nodes_len() == 2 { + let root_label = fragment.vertex_label(0)?.unwrap(); + let leaf_label = fragment.vertex_label(1)?.unwrap(); + + // it has been planted, so should be safe. + let node = self.query_label(root_label).unwrap(); + + let edge = { + let mut result = None; + + for (index, child) in self.children_of(node)?.enumerate() { + if matches!(self.vertex_label(child)?, Some(child_label) if child_label == leaf_label) + { + result = Some(index); + break; + } + } + + if let Some(result) = result { + result + } else { + unreachable!("the forest is wrongly planted"); + } + }; + + PaSe::Parent(node, edge) + } else { + let root_label = fragment.vertex_label(0)?.unwrap(); + let leaf_label = fragment.vertex_label(fragment.nodes_len() - 1)?.unwrap(); + + PaSe::Segment( + self.query_label(root_label).unwrap(), + self.query_label(leaf_label).unwrap(), + ) + }; + + Ok(result) + } +} diff --git a/chain/src/item/mod.rs b/chain/src/item/mod.rs new file mode 100644 index 0000000..7fafcc8 --- /dev/null +++ b/chain/src/item/mod.rs @@ -0,0 +1,252 @@ +#![warn(missing_docs)] +//! This module implements the type of items for the chain-rule +//! machine. +//! +//! More specifically, it implements the iten derivation forests for +//! the machine. + +// TODO: Implement an enumeration for Parent or Segment, perhaps +// called PaSe. + +// TODO: Move functions for handling forests, currently contained +// within the method derive to this module, and make them aware of the +// enumeration PaSe. + +// TODO: Implement the Item trait from semirings for the item +// derivation forest, and then we can easily execute items later on. + +use graph::{error::Error as GError, GraphLabel, LabelGraph, Parent, ParentsGraph}; + +use core::borrow::Borrow; + +/// A parent or a segment. +/// +/// A parent is a node with an edge index, which represents a certain +/// edge. +/// +/// A segment represents every edge from the root node to the single +/// terminating node. +#[allow(unused)] +#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub enum PaSe { + /// An edge from a node, as the n-th child. + Parent(usize, usize), + /// A segment from a root to the single terminating node. + Segment(usize, usize), +} + +impl From<Parent> for PaSe { + fn from(value: Parent) -> Self { + Self::Parent(value.node(), value.edge()) + } +} + +impl Default for PaSe { + fn default() -> Self { + Self::Segment(0, 0) + } +} + +impl core::fmt::Display for PaSe { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Parent(node, edge) => write!(f, "the {edge}-th edge from {node}"), + Self::Segment(root, leaf) => write!(f, "a segment from {root} to {leaf}"), + } + } +} + +impl PaSe { + fn parent(self) -> Option<Parent> { + if let Self::Parent(node, edge) = self { + Some(Parent::new(node, edge)) + } else { + None + } + } + + fn segment(self) -> Option<(usize, usize)> { + if let Self::Segment(root, leaf) = self { + Some((root, leaf)) + } else { + None + } + } +} + +/// An internal type that indicates the status of a node as either a +/// packed, cloned, or plain node. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Default)] +enum ForestLabelType { + Packed, + #[default] + Plain, + Cloned(usize), +} + +impl core::fmt::Display for ForestLabelType { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + Self::Packed => write!(f, "packed"), + Self::Plain => write!(f, "plain"), + Self::Cloned(index) => write!(f, "the {index}-th clone"), + } + } +} + +/// A type that encodes the properties demanded by a forest. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub struct ForestLabel<T: GraphLabel> { + label: T, + status: ForestLabelType, +} + +impl<T: GraphLabel> core::fmt::Display for ForestLabel<T> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + writeln!(f, "label: {}, status: {}", self.label, self.status) + } +} + +/// The type of erros for converting forest labels. +#[non_exhaustive] +#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] +pub enum ForestLabelError { + /// Try to pack a cloned node. + PackClone, + /// Try to clone a packed node. + ClonePack, +} + +impl core::fmt::Display for ForestLabelError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + Self::PackClone => write!(f, "cannot pack a cloned node"), + Self::ClonePack => write!(f, "cannot clone a packed node"), + } + } +} + +impl std::error::Error for ForestLabelError {} + +impl<T: GraphLabel> ForestLabel<T> { + /// Retrieve the label. + pub fn label(&self) -> T { + self.label + } + + /// Return true if and only if this node is packed. + pub fn is_packed(&self) -> bool { + matches!(self.status, ForestLabelType::Packed) + } + + /// Retrieve the optional clone index. + pub fn clone_index(&self) -> Option<usize> { + match self.status { + ForestLabelType::Cloned(index) => Some(index), + _ => None, + } + } + + /// Try to clone a node. + pub fn clone<F>(self, forest: &F) -> Result<Self, ForestLabelError> + where + F: Forest<T>, + { + if self.is_packed() { + Err(ForestLabelError::ClonePack) + } else { + let clone_index = if let Some(old_index) = self.clone_index() { + let mut new_index: usize = old_index + 1; + + let mut new_label = Self { + status: ForestLabelType::Cloned(new_index), + ..self + }; + + while forest.query_label(new_label).is_some() { + new_index += 1; + new_label = Self { + status: ForestLabelType::Cloned(new_index), + ..self + }; + } + + new_index + } else { + 0 + }; + + Ok(Self { + status: ForestLabelType::Cloned(clone_index), + ..self + }) + } + } + + /// Try to pack a node. + pub fn pack(self) -> Result<Self, ForestLabelError> { + if self.clone_index().is_some() { + Err(ForestLabelError::PackClone) + } else { + let new_label = Self { + status: ForestLabelType::Packed, + ..self + }; + + Ok(new_label) + } + } +} + +impl<T: GraphLabel> From<T> for ForestLabel<T> { + fn from(label: T) -> Self { + let status = ForestLabelType::default(); + Self { label, status } + } +} + +/// The expected behaviours of an item derivation forest. +/// +/// Note that it requires only a subset of the functionalities of +/// labelled graphs. +pub trait Forest<T: GraphLabel>: ParentsGraph + LabelGraph<ForestLabel<T>> { + /// The type of errors for operations on the forest. + type Error: std::error::Error + From<GError> + From<ForestLabelError>; + + /// Return the root of the forest. + /// + /// A forest without a root is regarded as empty. + fn root(&self) -> Option<usize>; + + /// Construct a forest consisting of one leaf node with the given + /// label. + fn new_leaf(label: T) -> Self; + + /// Detect if the fragment is a prefix of the sub-forest rooted at + /// `node_id`. + fn is_prefix<F>(&self, node_id: usize, fragment: F) -> Result<bool, Self::Error> + where + F: Borrow<Self>; + + /// Extend the forest by adjoining another forest at a given node. + fn plant<F>(&mut self, node_id: usize, fragment: F, planted: bool) -> Result<(), Self::Error> + where + F: Borrow<Self>; + + /// Clone a node by making a new node and making all the nodes + /// that previously pointed to the old node now point to the new + /// node, and the new node points to the old node. Return the + /// index of the new node. In addition, if the old node had + /// already been cloned, just return the index of its + /// representative. + /// + /// The labels of the representing node and of the cloned node + /// will be handled automatically. + fn clone_node(&mut self, node_id: usize) -> Result<usize, Self::Error>; +} + +pub mod default; + +pub mod genins; + +pub use genins::generate_fragment; diff --git a/chain/src/lib.rs b/chain/src/lib.rs index a3d420b..916678f 100644 --- a/chain/src/lib.rs +++ b/chain/src/lib.rs @@ -10,9 +10,16 @@ pub mod atom; -use graph::{error::Error as GError, LabelExtGraph, Parent}; +pub mod item; -use forest::Error as ForestError; +// TODO: We need a module for items, that serve the role of the +// forests now. + +use graph::{error::Error as GError, LabelExtGraph}; + +use item::default::Error as ForestError; + +use item::PaSe; /// An edge in the Chain-Rule machine. #[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)] @@ -20,14 +27,14 @@ pub struct Edge { /// The position in the atomic languages. label: usize, /// The source of the associated forest edge. - forest_source: Parent, + forest_source: PaSe, /// Whether or not this edge is "accepting". accepting: bool, } impl Edge { /// Construct a new edge. - pub fn new(label: usize, forest_source: Parent, accepting: bool) -> Self { + pub fn new(label: usize, forest_source: PaSe, accepting: bool) -> Self { Self { label, forest_source, @@ -46,7 +53,7 @@ impl Edge { } /// Return the associated forest edge of the edge. - pub fn forest_source(&self) -> Parent { + pub fn forest_source(&self) -> PaSe { self.forest_source } } @@ -56,12 +63,7 @@ impl core::fmt::Display for Edge { let label = self.label(); let forest_source = self.forest_source(); - write!( - f, - "edge label {label} with forest source {} and edge index {}", - forest_source.node(), - forest_source.edge() - ) + write!(f, "edge label {label} with item {}", forest_source) } } @@ -84,7 +86,7 @@ pub enum TwoLayers { /// the source of the associated forest edge of the second layer, /// and the third is a list of edges, which are the common first /// layers. - Two(usize, Parent, bool, Vec<(Edge, usize)>), + Two(usize, PaSe, bool, Vec<(Edge, usize)>), } /// The type of a collapsed iterator. diff --git a/chain/src/plan.org b/chain/src/plan.org index c2a9037..1da33cc 100644 --- a/chain/src/plan.org +++ b/chain/src/plan.org @@ -2,7 +2,7 @@ #+AUTHOR: Durand #+DATE: <2022-11-18 Ven 19:57> -* Things to do [7/10] +* Things to do [6/9] - [X] Implement builders for graphs - [X] Find sets of the first terminals for each non-terminal, in the @@ -79,15 +79,13 @@ lack of this step might be the root cause of the failure of the previous version of this project. + [X] Test atoms -- [X] Implement forests [4/4] - + [X] Design a format of forests. This should be the most crucial - thing to do, in order to have a better understanding of the whole - picture. I need to have a clear understanding of the details of - the binarised shared packed parsed forests, that reflects the - regular expressions in the grammar equations. - + [X] Define a trait with the expected behaviours. - + [X] Implement them as parents-knowing graphs. - + [X] Test +- [ ] Implement semiring. [0/5] + + [ ] Define the trait. + + [ ] Define items and rules for the chain-rule parser, as an + item-based description. + + [ ] Implement the boolean semiring. + + [ ] Implement the natural number semiring. + + [ ] Implement the free semiring. - [-] Implement languages. [4/6] + [X] First define a trait with the expected behaviours. + [X] Then implement them as doubly labelled graphs. @@ -95,19 +93,29 @@ with a position in the forest. This perfectly solves the problem of those "plugs"! + [X] Then implement finding derivatives by use of the chain rule. - + [ ] Handle Forests + + [ ] Handle Semirings + [-] Tests -- [ ] Implement semiring. [0/5] - + [ ] Define the trait. - + [ ] Implement the boolean semiring. - + [ ] Implement the natural number semiring. - + [ ] Implement the free semiring. - + [ ] Compute and record a semiring value when computing - derivatives. - [X] Miscellaneous [1/1] + [X] Implement a function for NFA that checks if a given predicate is satisfied for each edge label. +** Forests + +This was a failed attempt to handle forests. I decided to handle +forests as a special case of semi-rings. This is not only more +systematic, but has also the potential of handling more general +semi-rings later, like the Viterbi semirings, for example. + +- [X] Implement forests [4/4] + + [X] Design a format of forests. This should be the most crucial + thing to do, in order to have a better understanding of the whole + picture. I need to have a clear understanding of the details of + the binarised shared packed parsed forests, that reflects the + regular expressions in the grammar equations. + + [X] Define a trait with the expected behaviours. + + [X] Implement them as parents-knowing graphs. + + [X] Test + * Atomic Languages This describes the behaviours of atomic languages. The atomic diff --git a/forest/src/lib.rs b/forest/src/lib.rs index 02c5fcd..0b1092f 100644 --- a/forest/src/lib.rs +++ b/forest/src/lib.rs @@ -6,10 +6,6 @@ //! parse forest, or SPPF. It packs sub-trees with the same parents //! under the same branch, and lets nodes from different branches //! share the same children, and hence the name. -//! -//! What is special here is that the forests are marked with some -//! out-coming and in-coming plugs. These plugs are used to join -//! different fragments of forests together. use graph::{error::Error as GError, GraphLabel, LabelGraph, ParentsGraph}; diff --git a/grammar/src/label.rs b/grammar/src/label.rs index 05b0b1e..3f89d9a 100644 --- a/grammar/src/label.rs +++ b/grammar/src/label.rs @@ -12,15 +12,50 @@ pub enum GrammarLabelType { Rule(usize), } +// Some convenient conversions + +impl From<usize> for GrammarLabelType { + #[inline] + fn from(r: usize) -> Self { + Self::Rule(r) + } +} + +impl From<TNT> for GrammarLabelType { + #[inline] + fn from(tnt: TNT) -> Self { + Self::TNT(tnt) + } +} + impl GrammarLabelType { /// Return the name of this label with the help of the associated /// grammar. + #[inline] pub fn name(&self, grammar: &Grammar) -> Result<String, Error> { match self { Self::TNT(tnt) => grammar.name_of_tnt(*tnt), Self::Rule(pos) => grammar.rule_pos_to_string(*pos), } } + + /// Return the contained TNT, if any. + #[inline] + pub fn tnt(&self) -> Option<TNT> { + match self { + Self::TNT(tnt) => Some(*tnt), + Self::Rule(_) => None, + } + } + + /// Return the contained rule position, if any. + #[inline] + pub fn rule(&self) -> Option<usize> { + match self { + Self::TNT(_) => None, + Self::Rule(pos) => Some(*pos), + } + } } /// The label to be used in a forest. @@ -32,8 +67,6 @@ pub struct GrammarLabel { start: usize, /// The end in the input that this label correponds to. end: Option<usize>, - /// A node in the forest might be a packed node. - packed_p: bool, } impl core::fmt::Display for GrammarLabel { @@ -48,54 +81,44 @@ impl core::fmt::Display for GrammarLabel { impl GrammarLabel { /// Construct a new label. - pub fn new(label: GrammarLabelType, start: usize) -> Self { + #[inline] + pub fn new(label: impl Into<GrammarLabelType>, start: usize) -> Self { + let label = label.into(); let end = None; - let packed_p = false; - Self { - label, - start, - end, - packed_p, - } + Self { label, start, end } } /// Return the end in the input. + #[inline] pub fn end(&self) -> Option<usize> { self.end } /// Return the start in the input. + #[inline] pub fn start(&self) -> usize { self.start } /// Return the actual label. + #[inline] pub fn label(&self) -> GrammarLabelType { self.label } /// Update the end. + #[inline] pub fn set_end(&mut self, end: usize) { self.end = Some(end); } - /// Check whether the node is a packed node. - pub fn is_packed(&self) -> bool { - self.packed_p - } - - /// Update the packed status. - pub fn set_packed_p(&mut self, packed_p: bool) { - self.packed_p = packed_p; - } - /// Return a string description with the help of the associated /// grammar. pub fn to_string(&self, grammar: &Grammar) -> Result<String, Error> { // First calculate the length of the resulting string. - let mut num = 2 + 7 + 14 + 6; + let mut num = 16 + 6; num += self.label().name(grammar)?.len(); @@ -111,15 +134,7 @@ impl GrammarLabel { let mut s = String::with_capacity(num); - s.push_str("a "); - - if self.is_packed() { - s.push_str("packed "); - } else { - s.push_str("normal "); - } - - s.push_str("node labelled "); + s.push_str("a node labelled "); s.push_str(&self.label().name(grammar)?); diff --git a/grammar/src/lib.rs b/grammar/src/lib.rs index b29fc69..2c17a5f 100644 --- a/grammar/src/lib.rs +++ b/grammar/src/lib.rs @@ -486,6 +486,9 @@ impl Grammar { .contains(&None)) } + // FIXME: We shall use a label to query this information as well, + // probably. + /// Query the expansion information from the position `pos1` to /// the position `pos2` . #[inline] diff --git a/graph/src/labelled/double.rs b/graph/src/labelled/double.rs index 174c8ef..ab2b27c 100644 --- a/graph/src/labelled/double.rs +++ b/graph/src/labelled/double.rs @@ -8,13 +8,10 @@ use super::*; -// We use BTreeMap and BTreeSet here as we need to exclude duplicate -// edge sets, while an ordinary hashmap and hashset do not allow -// hashing. use std::collections::{ - btree_map::{Iter as MapIter, Keys}, - btree_set::Iter, - BTreeMap as Map, BTreeSet as Set, HashMap as HMap, + hash_map::{Iter as MapIter, Keys}, + hash_set::Iter, + HashMap as Map, HashSet as Set, }; #[derive(Debug, Clone)] @@ -48,31 +45,13 @@ impl<T: GraphLabel> DLNode<T> { } } -/// Mapping a set of edges to an index of node. -type EdgeMap<T> = HMap<Set<(T, usize)>, usize>; - /// Double direction Labelled Graph. -/// -/// Each node is supposed to have a unique edge set. Constructing -/// methods such as from the trait -/// [`LabelExtGraph`][super::LabelExtGraph] already handles the -/// elimination of duplication. #[derive(Debug, Clone)] pub struct DLGraph<T: GraphLabel> { nodes: Vec<DLNode<T>>, - edges_table: EdgeMap<T>, } impl<T: GraphLabel> DLGraph<T> { - #[inline] - /// Return an empty graph. - pub fn new() -> Self { - Self { - nodes: Vec::new(), - edges_table: HMap::default(), - } - } - /// Return a builder. #[inline] pub fn builder(self) -> DLGBuilder<T> { @@ -89,7 +68,9 @@ impl<T: GraphLabel> DLGraph<T> { impl<T: GraphLabel> Default for DLGraph<T> { #[inline] fn default() -> Self { - Self::new() + let nodes = Vec::new(); + + Self { nodes } } } @@ -160,11 +141,9 @@ impl<T: GraphLabel> Graph for DLGraph<T> { let mut post = String::new(); - use std::fmt::Write as FWrite; - for (source, target) in self.edges() { for label in self.edge_label(source, target).unwrap() { - let _ = writeln!(post, " {source} -> {target} [label = \"{label}\"]"); + post.push_str(&format!(" {source} -> {target} [label = \"{label}\"]\n")); } } @@ -191,7 +170,6 @@ impl<T: GraphLabel> Graph for DLGraph<T> { let new_graph = builder.build(); self.nodes = new_graph.nodes; - self.edges_table = new_graph.edges_table; } } @@ -336,10 +314,10 @@ impl<T: GraphLabel> LabelGraph<T> for DLGraph<T> { type LabelIter<'a> = LabelIter<'a,T> where T: 'a; - type EdgeLabelIter<'a> = EdgeLabelIter<'a,T> + type EdgeLabelIter<'a> = EdgeLabelIter<'a, T> where Self: 'a, - T: 'a + Copy + Clone; + T: 'a; fn edge_label(&self, source: usize, target: usize) -> Result<Self::EdgeLabelIter<'_>, Error> { if self.has_edge(source, target)? { @@ -438,19 +416,12 @@ impl<T: GraphLabel> LabelExtGraph<T> for DLGraph<T> { } } - match self.edges_table.get(&edges_set) { - Some(old_index) => Ok(*old_index), - None => { - let new_node = DLNode::new(by_target, by_label, flat); - let new_index = self.nodes_len(); - - self.edges_table.insert(edges_set, new_index); + let new_node = DLNode::new(by_target, by_label, flat); + let new_index = self.nodes_len(); - self.nodes.push(new_node); + self.nodes.push(new_node); - Ok(new_index) - } - } + Ok(new_index) } } @@ -489,7 +460,6 @@ impl<T: GraphLabel> Builder for DLGBuilder<T> { fn with_capacity(size: usize) -> Self { Self(DLGraph { nodes: Vec::with_capacity(size), - edges_table: Default::default(), }) } @@ -526,8 +496,6 @@ impl<T: GraphLabel> Builder for DLGBuilder<T> { ); if new_edge_p { - let old_children_set: Set<(T, usize)> = source_node.flat.iter().copied().collect(); - source_node .by_target .entry(target) @@ -541,16 +509,6 @@ impl<T: GraphLabel> Builder for DLGBuilder<T> { .insert(target); source_node.flat.push((label, target)); - - let new_children_set: Set<(T, usize)> = source_node.flat.iter().copied().collect(); - - // When source_node is in use, we cannot borrow self - // mutably again, so we move the following two statements - // to after all uses of source_node. - - self.edges_table.remove(&old_children_set); - - self.edges_table.insert(new_children_set, source); } Ok(()) @@ -586,8 +544,6 @@ impl<T: GraphLabel> Builder for DLGBuilder<T> { } if !to_remove.is_empty() { - let old_children_set: Set<(T, usize)> = source_node.flat.iter().copied().collect(); - for label in to_remove.iter().copied() { // This must be "Some", as the outer "if" checks source_node @@ -605,41 +561,19 @@ impl<T: GraphLabel> Builder for DLGBuilder<T> { }); } - // If after removal no labels remain for the target, - // we remove the edge entirely, to avoid false empty - // edges. - - if let Some(set) = source_node.by_target.get(&target) { - if set.is_empty() { - source_node.by_target.remove(&target); - } + if matches!(source_node.by_target.get(&target), + Some(set) if set.is_empty()) + { + source_node.by_target.remove(&target); } for label in to_remove.iter() { - if let Some(set) = source_node.by_label.get(label) { - if set.is_empty() { - source_node.by_label.remove(label); - } + if matches!(source_node.by_label.get(label), + Some(set) if set.is_empty()) + { + source_node.by_label.remove(label); } } - - // if source_node.flat.is_empty() { - // source_node.by_target.remove(&target); - - // for label in to_remove.iter() { - // source_node.by_label.remove(label); - // } - // } - - let new_children_set: Set<(T, usize)> = source_node.flat.iter().copied().collect(); - - // When source_node is in use, we cannot borrow self - // mutably again, so we move the following two - // statements to after all uses of source_node. - - self.edges_table.remove(&old_children_set); - - self.edges_table.insert(new_children_set, source); } } @@ -703,13 +637,13 @@ mod label_test { assert_eq!(graph.extend([(2, 1), (3, 2), (2, 3)].iter().copied())?, 5); // testing adding a duplicated edge set - assert_eq!(graph.extend([(2, 1), (2, 3), (3, 2)].iter().copied())?, 5); - assert_eq!(graph.extend([(3, 2), (3, 0)].iter().copied())?, 3); + assert_eq!(graph.extend([(2, 1), (2, 3), (3, 2)].iter().copied())?, 6); + assert_eq!(graph.extend([(3, 2), (3, 0)].iter().copied())?, 7); let graph = graph; // ensuring the correct length - assert_eq!(graph.nodes_len(), 6); + assert_eq!(graph.nodes_len(), 8); // testing children_of assert_eq!(graph.children_of(5)?.collect::<Set<_>>(), set!(1, 3, 2)); @@ -723,8 +657,8 @@ mod label_test { // testing edge_label assert_eq!(graph.edge_label(5, 2)?.collect::<Set<_>>(), set!(3)); assert!(matches!( - graph.edge_label(6, 2), - Err(Error::IndexOutOfBounds(6, 6)) + graph.edge_label(8, 2), + Err(Error::IndexOutOfBounds(8, 8)) )); // testing degree @@ -738,8 +672,8 @@ mod label_test { assert!(graph.has_edge(3, 2)?); assert!(!graph.has_edge(3, 1)?); assert!(matches!( - graph.has_edge(3, 6), - Err(Error::IndexOutOfBounds(6, 6)) + graph.has_edge(3, 8), + Err(Error::IndexOutOfBounds(8, 8)) )); // testing labels_of @@ -754,8 +688,8 @@ mod label_test { assert_eq!(label_map, compare_map); assert!(matches!( - graph.labels_of(6), - Err(Error::IndexOutOfBounds(6, 6)) + graph.labels_of(8), + Err(Error::IndexOutOfBounds(8, 8)) )); Ok(()) diff --git a/graph/src/labelled/mod.rs b/graph/src/labelled/mod.rs index 2bbc7ec..fb7b405 100644 --- a/graph/src/labelled/mod.rs +++ b/graph/src/labelled/mod.rs @@ -17,5 +17,3 @@ pub use double::{DLGBuilder, DLGraph}; pub mod binary; pub use binary::{PLGBuilderMut, PLGraph}; - -// pub use binary::BLGraph; diff --git a/nfa/src/default/nfa.rs b/nfa/src/default/nfa.rs index 8d657d5..6b1e56f 100644 --- a/nfa/src/default/nfa.rs +++ b/nfa/src/default/nfa.rs @@ -147,9 +147,11 @@ impl<T: GraphLabel + Display> Nfa<T> for DefaultNFA<T> { let mut builder: DLGBuilder<LabelType<T>> = Builder::with_capacity(nfa_len); - for _ in 0..nfa_len { - builder.add_vertex(); - } + builder.add_vertices(nfa_len); + + // for _ in 0..nfa_len { + // builder.add_vertex(); + // } let default = LabelType::new(DOption(default), total_regexps_len, false); diff --git a/nfa/src/default/regex.rs b/nfa/src/default/regex.rs index 1e3e87b..9e1ed5c 100644 --- a/nfa/src/default/regex.rs +++ b/nfa/src/default/regex.rs @@ -879,9 +879,9 @@ mod test_des_rec { use crate::desrec::DesRec; - #[allow(dead_code, unused)] + #[allow(dead_code)] fn test_scanner<'a, 'b, T>( - parser: &'b DefaultRegParser<T>, + _parser: &'b DefaultRegParser<T>, input: &'a str, ) -> Result<Option<(usize, RegexType<char>, ParseDirection)>, ParseError> where diff --git a/semiring/src/lib.rs b/semiring/src/lib.rs index 7d12d9a..9d096db 100644 --- a/semiring/src/lib.rs +++ b/semiring/src/lib.rs @@ -1,14 +1,43 @@ -pub fn add(left: usize, right: usize) -> usize { - left + right +#![warn(missing_docs)] +//! This crate defines the expected behaviours of semirings and +//! semiring parsers. Some standard semirings are also implemented, +//! such as the boolean, the counting, and the derivation forest +//! semirings. Moreover, this crate also defines the behaviours of a +//! parser, with the formalism of an "item-based description". See +//! the paper [Parsing +//! inside-out](https://arxiv.org/abs/cmp-lg/9805007) by Joshua +//! Goodman. To be more precise, it defines the behaviours of an +//! "item interpreter" that executes "items" in a chosen semiring. +//! Finally, it provides a default implementation of an item +//! interpreter. +//! +//! The crate *chain* will implement a parser using the item-based +//! description. Specifically, it will produce items as it parses the +//! input string. Then we can execute these items by any interpreter. + +/// A semiring is equipped with two operations: the addition and the +/// multiplication. It also has additive and multiplicative +/// identities. Also the two operations are supposed to be +/// associative, and the multiplication is supposed to be distributive +/// over the addition. But the addition is not required to be +/// commutative, and is usually not indeed commutative. +pub trait Semiring { + /// The additive identity. + fn zero() -> Self; + + /// The multiplicative identity. + fn one() -> Self; + + /// The addition. + fn add(&mut self, other: impl AsRef<Self>); + + /// The multiplication. + fn mul(&mut self, other: impl AsRef<Self>); } +// TODO: Implement boolean semiring +// TODO: Implement counting semiring +// TODO: Implement derivation forest semiring + #[cfg(test)] -mod tests { - use super::*; - - #[test] - fn it_works() { - let result = add(2, 2); - assert_eq!(result, 4); - } -} +mod tests {} |