diff options
author | JSDurand <mmemmew@gmail.com> | 2023-01-28 10:17:24 +0800 |
---|---|---|
committer | JSDurand <mmemmew@gmail.com> | 2023-01-28 10:22:57 +0800 |
commit | f28155105134b90fd86049c65478d307e0d8dbbc (patch) | |
tree | 72b3b4872d5dba89413eca70bcaae9e421def7ee /chain | |
parent | e8ea01319b3a9032a3f4f69f65e9ca96562b87b9 (diff) |
a prototype of an item derivation forest
It seems to be complete now, but still awaits more tests to see where
the errors are, which should be plenty, haha.
Diffstat (limited to 'chain')
-rw-r--r-- | chain/Cargo.toml | 1 | ||||
-rw-r--r-- | chain/src/default.rs | 128 | ||||
-rw-r--r-- | chain/src/item/default.rs | 547 | ||||
-rw-r--r-- | chain/src/item/genins.rs | 231 | ||||
-rw-r--r-- | chain/src/item/mod.rs | 252 | ||||
-rw-r--r-- | chain/src/lib.rs | 26 | ||||
-rw-r--r-- | chain/src/plan.org | 44 |
7 files changed, 1180 insertions, 49 deletions
diff --git a/chain/Cargo.toml b/chain/Cargo.toml index c55586d..496f5dd 100644 --- a/chain/Cargo.toml +++ b/chain/Cargo.toml @@ -13,7 +13,6 @@ test-print-viz = [] nfa = { path = "../nfa" } graph = { path = "../graph" } grammar = { path = "../grammar" } -forest = { path = "../forest" } [dev-dependencies] grammar = { path = "../grammar", features = ["test-helper"] } diff --git a/chain/src/default.rs b/chain/src/default.rs index 22befff..77a64ca 100644 --- a/chain/src/default.rs +++ b/chain/src/default.rs @@ -7,8 +7,10 @@ use super::*; use crate::atom::{Atom, DefaultAtom}; +use crate::item::{ + default::DefaultForest, generate_fragment, Forest, ForestLabel, ForestLabelError, +}; use core::fmt::Display; -use forest::{default::DefaultForest, Forest}; use grammar::{Error as GrammarError, GrammarLabel, GrammarLabelType, TNT}; use graph::{labelled::DLGBuilder, Builder, DLGraph, Graph, LabelExtGraph, LabelGraph}; @@ -29,6 +31,10 @@ pub enum Error { NodeNoLabel(usize), /// Reserving memory fails. CannotReserve(TryReserveError), + /// Cannot create label for cloning nodes. + CannotClone(ForestLabelError), + /// Cannot find a suitable node to plant the new forest fragment. + CannotPlant, /// An invalid situation happens. Invalid, } @@ -44,6 +50,8 @@ impl Display for Error { ), Self::NodeNoLabel(n) => write!(f, "node {n} has no labels while it should have one"), Self::CannotReserve(e) => write!(f, "cannot reserve memory: {e}"), + Self::CannotClone(fe) => write!(f, "cannot clone due to {fe}"), + Self::CannotPlant => write!(f, "cannot plant a new forest fragment"), Self::Invalid => write!(f, "invalid"), } } @@ -69,6 +77,7 @@ impl From<ForestError> for Error { ForestError::DuplicatedNode(n) => Error::DuplicateNode(n), ForestError::InvalidGraphError(ge) => ge.into(), ForestError::NodeNoLabel(n) => Error::NodeNoLabel(n), + ForestError::LabelConversion(ce) => Error::CannotClone(ce), } } } @@ -93,7 +102,7 @@ impl Default for DerIterIndex { } /// A complex type used for storing values of edges with two layers. -type SecondTypeValue = (Parent, bool, Vec<(Edge, usize)>); +type SecondTypeValue = (PaSe, bool, Vec<(Edge, usize)>); /// An iterator of TwoLayers. #[derive(Debug, Default)] @@ -120,7 +129,7 @@ impl DerIter { fn add_second_layer( &mut self, label: usize, - forest_source: Parent, + forest_source: PaSe, accepting: bool, edges: Vec<(Edge, usize)>, ) { @@ -186,7 +195,7 @@ pub struct DefaultChain { atom: DefaultAtom, current: usize, history: Vec<usize>, - forest: DefaultForest<GrammarLabel>, + forest: DefaultForest<ForestLabel<GrammarLabel>>, accepting_vec: Vec<bool>, } @@ -205,7 +214,7 @@ impl DefaultChain { /// Return a reference to the associated forest. #[inline] - pub fn forest(&self) -> &DefaultForest<GrammarLabel> { + pub fn forest(&self) -> &DefaultForest<ForestLabel<GrammarLabel>> { &self.forest } @@ -316,6 +325,7 @@ impl LabelExtGraph<Edge> for DefaultChain { let new = self.graph.extend(edges)?; let accepting_len = self.accepting_vec.len(); + // Update the acceptace of the new node, if any. if self.accepting_vec.get(new).is_none() { // assert it can only grow by one node at a time. #[cfg(debug_assertions)] @@ -377,14 +387,17 @@ impl Chain for DefaultChain { let empty_state = atom.empty(); + // The zero-th non-terminal is assumed to be the starting + // non-terminal, by convention. let initial_nullable = atom .is_nullable(0) .map_err(|_| Error::IndexOutOfBounds(0, atom.non_num()))?; + // NOTE: Is it really correct to use `Parent::new(0, 0)` here? builder.add_edge( first, root, - Edge::new(empty_state, Parent::new(0, 0), initial_nullable), + Edge::new(empty_state, Default::default(), initial_nullable), )?; let graph = builder.build(); @@ -431,7 +444,7 @@ impl Chain for DefaultChain { type DerIter = DerIter; - fn derive(&mut self, t: usize, _pos: usize) -> Result<Self::DerIter, Self::Error> { + fn derive(&mut self, t: usize, pos: usize) -> Result<Self::DerIter, Self::Error> { use TNT::*; /// Convert an error telling us that an index is out of bounds. @@ -445,7 +458,7 @@ impl Chain for DefaultChain { GrammarError::IndexOutOfBounds(index, bound) => { Error::IndexOutOfBounds(index, bound) } - _ => panic!("wrong error kind"), + _ => Error::Invalid, } } @@ -459,11 +472,11 @@ impl Chain for DefaultChain { /// /// The generated edges will be pushed to `output` directly, /// to save some allocations. - // TODO: Handle forests as well. fn generate_edges( chain: &DefaultChain, child_iter: impl Iterator<Item = usize> + ExactSizeIterator + Clone, atom_child_iter: impl Iterator<Item = usize> + Clone, + pase: PaSe, mut output: impl AsMut<Vec<(Edge, usize)>>, ) -> Result<(), Error> { // First check the values from iterators are all valid. @@ -515,13 +528,11 @@ impl Chain for DefaultChain { // now push into output - let parent = Parent::new(0, 0); - for atom_child in atom_child_iter { let atom_child_accepting = chain.atom.is_accepting(atom_child).unwrap(); let atom_child_empty_node = chain.atom.is_empty_node(atom_child).unwrap(); - let edge = Edge::new(atom_child, parent, atom_child_accepting); + let edge = Edge::new(atom_child, pase, atom_child_accepting); if !atom_child_empty_node { output.extend(child_iter.clone().map(|child| (edge, child))); @@ -549,16 +560,41 @@ impl Chain for DefaultChain { continue; } + let atom_moved = atom_label.get_moved(); + match *atom_label.get_value() { Some(Ter(ter)) if ter == t => { + // prepare forest fragment + + let fragment = + generate_fragment([atom_moved.into(), Ter(ter).into()], pos)?; + + let new_pase = self.forest.insert_item( + *label, + fragment, + atom_child_iter.clone(), + &self.atom, + )?; + generate_edges( self, child_iter.clone(), atom_child_iter.clone(), + new_pase, &mut der_iter.singles, )?; } Some(Non(non)) => { + let first_fragment = + generate_fragment([atom_moved.into(), Non(non).into()], pos)?; + + let first_segment_pase = self.forest.insert_item( + *label, + first_fragment, + atom_child_iter.clone(), + &self.atom, + )?; + let virtual_node = self .atom .atom(non, t) @@ -570,12 +606,66 @@ impl Chain for DefaultChain { .is_accepting(virtual_node) .map_err(index_out_of_bounds_conversion)?; + let virtual_fragment = { + // `non` is valid, as checked above + let non_start = self.atom.nth_accumulator(non).unwrap() * 2; + + let mut result = DefaultForest::default(); + + for (label, child_iter) in self.atom.labels_of(non_start)? { + if matches!(*label.get_value(), + Some(Ter(ter)) if ter == t) + { + for child in child_iter { + let mut line: Vec<_> = self + .atom + .query_expansion(non_start, child) + .map_err(index_out_of_bounds_conversion)? + .iter() + .copied() + .flatten() + .map(|(nt, rule)| [(*rule).into(), Non(*nt).into()]) + .collect(); + + line.push([label.get_moved().into(), Ter(t).into()]); + + let line: Vec<GrammarLabelType> = + line.into_iter().flatten().collect(); + + if result.is_empty() { + result = generate_fragment(line, pos)?; + } else { + result.plant( + 0, + generate_fragment(line, pos)?, + false, + )?; + } + } + } + } + + result + }; + + // NOTE: We only need the PaSe from the + // first segment, so we pass an empty + // iterator, in which case the passed + // label is only used for the PaSe. + let virtual_pase = self.forest.insert_item( + Edge::new(0, first_segment_pase, true), + virtual_fragment, + std::iter::empty(), + &self.atom, + )?; + let mut new_edges = Vec::new(); generate_edges( self, child_iter.clone(), atom_child_iter.clone(), + first_segment_pase, &mut new_edges, )?; @@ -583,12 +673,10 @@ impl Chain for DefaultChain { der_iter.singles.extend(new_edges.clone()); } - let parent = Parent::new(0, 0); - if !self.atom.is_empty_node(virtual_node).unwrap() { der_iter.add_second_layer( virtual_node, - parent, + virtual_pase, accepting, new_edges, ); @@ -601,7 +689,10 @@ impl Chain for DefaultChain { // `generate_edges` if self.atom.is_empty_node(atom_child).unwrap() { der_iter.singles.extend(child_iter.clone().map(|child| { - (Edge::new(virtual_node, parent, accepting), child) + ( + Edge::new(virtual_node, virtual_pase, accepting), + child, + ) })); } } @@ -610,7 +701,8 @@ impl Chain for DefaultChain { // this has been checked in // `generate_edges` if self.atom.is_empty_node(atom_child).unwrap() { - // flat flat map, hmm... + // REVIEW: flat flat map, + // hmm... der_iter.singles.extend(child_iter.clone().flat_map( |child| { self.graph.labels_of(child).unwrap().flat_map( @@ -646,7 +738,7 @@ mod test_der_iter { fn test() -> Result<(), Box<dyn std::error::Error>> { let mut der_iter = DerIter::default(); - let parent = Parent::new(0, 0); + let parent = Default::default(); der_iter.singles.push((Edge::new(0, parent, true), 0)); diff --git a/chain/src/item/default.rs b/chain/src/item/default.rs new file mode 100644 index 0000000..b71f940 --- /dev/null +++ b/chain/src/item/default.rs @@ -0,0 +1,547 @@ +//! This module provides a default implementation of iten derivation +//! forest. + +use super::*; +use graph::{ + builder::BuilderMut, labelled::binary::PLGBuilderMut, Graph, LabelGraph, PLGraph, RedirectGraph, +}; + +use core::fmt::Display; + +/// The type of errors for forest operations. +#[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd)] +pub enum Error { + /// An index is out of bounds. + /// + /// The first component is the index that is out of bounds, and + /// the second component is the current length of nodes. + IndexOutOfBounds(usize, usize), + /// The forest does not permit duplicate nodes but encounters a + /// repeated node. + DuplicatedNode(usize), + /// A node has no labels while it is required to have one. + NodeNoLabel(usize), + /// Encounter an invalid error in converting from an error of + /// graphs. + InvalidGraphError(GError), + /// Encounter an error when converting forest labels. + LabelConversion(ForestLabelError), +} + +impl Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Error::IndexOutOfBounds(index, bound) => { + write!(f, "index {index} is out of bound {bound}") + } + Error::DuplicatedNode(n) => write!(f, "node {n} is duplicated"), + Error::InvalidGraphError(ge) => write!(f, "invalid error: {ge}"), + Error::NodeNoLabel(n) => write!(f, "node {n} has no labels, but it should have one"), + Error::LabelConversion(le) => write!(f, "fail to convert labels: {le}"), + } + } +} + +impl std::error::Error for Error {} + +impl From<GError> for Error { + fn from(ge: GError) -> Self { + match ge { + GError::IndexOutOfBounds(index, bound) => Self::IndexOutOfBounds(index, bound), + GError::DuplicatedNode(n) => Self::DuplicatedNode(n), + _ => Self::InvalidGraphError(ge), + } + } +} + +impl From<ForestLabelError> for Error { + fn from(le: ForestLabelError) -> Self { + Self::LabelConversion(le) + } +} + +/// A default implementation of forest. +#[derive(Debug, Clone)] +pub struct DefaultForest<T: GraphLabel> { + graph: PLGraph<T>, + root: Option<usize>, +} + +impl<T: GraphLabel> Default for DefaultForest<T> { + fn default() -> Self { + let graph = Default::default(); + let root = None; + + Self { graph, root } + } +} + +impl<T: GraphLabel> AsRef<DefaultForest<T>> for DefaultForest<T> { + fn as_ref(&self) -> &DefaultForest<T> { + self + } +} + +impl<T: GraphLabel> Graph for DefaultForest<T> { + type Iter<'a> = <PLGraph<T> as Graph>::Iter<'a> + where + Self: 'a; + + #[inline] + fn is_empty(&self) -> bool { + self.graph.is_empty() + } + + #[inline] + fn nodes_len(&self) -> usize { + self.graph.nodes_len() + } + + #[inline] + fn edges_len(&self) -> Option<usize> { + self.graph.edges_len() + } + + #[inline] + fn children_of(&self, node_id: usize) -> Result<Self::Iter<'_>, GError> { + self.graph.children_of(node_id) + } + + #[inline] + fn degree(&self, node_id: usize) -> Result<usize, GError> { + self.graph.degree(node_id) + } + + #[inline] + fn is_empty_node(&self, node_id: usize) -> Result<bool, GError> { + self.graph.is_empty_node(node_id) + } + + #[inline] + fn has_edge(&self, source: usize, target: usize) -> Result<bool, GError> { + self.graph.has_edge(source, target) + } + + fn replace_by_builder(&mut self, _builder: impl graph::Builder<Result = Self>) { + unimplemented!() + } +} + +impl<T: GraphLabel> ParentsGraph for DefaultForest<T> { + type Iter<'a>= <PLGraph<T> as ParentsGraph>::Iter<'a> + where + Self:'a; + + #[inline] + fn parents_of(&self, node_id: usize) -> Result<<Self as ParentsGraph>::Iter<'_>, GError> { + self.graph.parents_of(node_id) + } + + #[inline] + fn nth_child(&self, node_id: usize, n: usize) -> Result<usize, GError> { + self.graph.nth_child(node_id, n) + } + + #[inline] + fn edge_to_parent( + &self, + source: usize, + target: usize, + ) -> Result<Option<graph::Parent>, GError> { + self.graph.edge_to_parent(source, target) + } +} + +impl<T: GraphLabel> LabelGraph<T> for DefaultForest<T> { + type Iter<'a> = std::iter::Empty<usize> + where + Self: 'a; + + type LabelIter<'a> = std::iter::Empty<(&'a T, <Self as LabelGraph<T>>::Iter<'a>)> + where + Self: 'a, + T: 'a; + + type EdgeLabelIter<'a> = std::iter::Empty<T> + where + Self: 'a, + T: 'a; + + #[inline] + fn query_label(&self, label: T) -> Option<usize> { + self.graph.query_label(label) + } + + #[inline] + fn vertex_label(&self, node_id: usize) -> Result<Option<T>, GError> { + self.graph.vertex_label(node_id) + } + + fn edge_label( + &self, + _source: usize, + _target: usize, + ) -> Result<Self::EdgeLabelIter<'_>, GError> { + unimplemented!("edges have no labels") + } + + fn find_children_with_label( + &self, + _node_id: usize, + _label: &T, + ) -> Result<<Self as LabelGraph<T>>::Iter<'_>, GError> { + unimplemented!("edges have no labels") + } + + fn labels_of(&self, _node_id: usize) -> Result<Self::LabelIter<'_>, GError> { + unimplemented!("edges have no labels") + } + + fn has_edge_label(&self, _node_id: usize, _label: &T, _target: usize) -> Result<bool, GError> { + unimplemented!("edges have no labels") + } +} + +impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> { + type Error = Error; + + fn root(&self) -> Option<usize> { + self.root + } + + fn new_leaf(label: T) -> Self { + let mut graph = PLGraph::default(); + + let mut builder = PLGBuilderMut::from_graph_mut(&mut graph); + + let root = Some(builder.add_vertex(ForestLabel::from(label))); + + Self { graph, root } + } + + fn is_prefix<F>(&self, node_id: usize, fragment: F) -> Result<bool, Self::Error> + where + F: Borrow<Self>, + { + if !self.has_node(node_id) { + return Err(Error::IndexOutOfBounds(node_id, self.nodes_len())); + } + + // We do a depth-first traversal to determine if every node + // encountered has the same set of children (labels taken into + // the consideration). + + let fragment = fragment.borrow(); + + let mut frag_stack = Vec::with_capacity(fragment.nodes_len()); + + let mut self_stack = Vec::with_capacity(fragment.nodes_len()); + + let frag_root = if let Some(root) = fragment.root() { + root + } else { + // an empty forest is a prefix of any forest. + return Ok(true); + }; + + frag_stack.push(frag_root); + self_stack.push(node_id); + + // defer popping + while let (Some(frag_top), Some(self_top)) = + (frag_stack.last().copied(), self_stack.last().copied()) + { + frag_stack.pop(); + self_stack.pop(); + + if fragment.vertex_label(frag_top)? != self.vertex_label(self_top)? { + // not a prefix + return Ok(false); + } + + let mut self_children = self.children_of(self_top)?; + + for child in fragment.children_of(frag_top)? { + if let Some(self_child) = self_children.next() { + frag_stack.push(child); + self_stack.push(self_child); + } else { + // too few children + return Ok(false); + } + } + } + + // Check both stacks are empty at the end. + Ok(frag_stack.is_empty() && self_stack.is_empty()) + } + + fn plant<F>(&mut self, node_id: usize, fragment: F, planted: bool) -> Result<(), Self::Error> + where + F: Borrow<Self>, + { + // Convert self to a builder_mut, and traverse fragment in a + // depth-first manner and adjoin corresponding nodes along the + // way. + + if !self.has_node(node_id) { + return Err(Error::IndexOutOfBounds(node_id, self.nodes_len())); + } + + let fragment = fragment.borrow(); + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + let root = if let Some(root) = fragment.root() { + root + } else { + // Nothing to do to plant an empty forest. + return Ok(()); + }; + + // Just a dummy label for use in adding edges. + // + // REVIEW: I probably should refactor the API for builder_mut. + let root_label = fragment + .vertex_label(root)? + .ok_or(Error::NodeNoLabel(root))?; + + let nodes_len = fragment.nodes_len(); + + /// If the fragment root has a duplicate label, the forest + /// will not grow, so we use the label to find the adjoined + /// node index. + /// + /// The nodes hava already been added to the forest, so it is + /// safe to call unwrap. + macro_rules! conversion ( + ($node:expr) => { + { + builder + .query_label( + fragment + .vertex_label($node)? + .ok_or(Error::NodeNoLabel($node))? + ).unwrap() + } + } + ); + + // If the fragment has been planted before, we just add an + // edge. + + if planted { + builder.add_edge(node_id, conversion!(root), root_label)?; + + return Ok(()); + } + + // First adjoin those nodes and join the edges later. + + for node in 0..nodes_len { + let label = fragment + .vertex_label(node)? + .ok_or(Error::NodeNoLabel(node))?; + + builder.add_vertex(label); + } + + // Don't forget to join the new sub-forest to the original + // forest, at the specified position. + + builder.add_edge(node_id, conversion!(root), root_label)?; + + // We can try to calculate the depth of fragments, if we need + // to lower the memory usage. But in our use cases, we + // usually deal with fragments where each node has at most one + // child, so the depth is supposed to be equal to the length + // in this case. + let mut stack = Vec::with_capacity(fragment.nodes_len()); + + stack.push(root); + + while let Some(top) = stack.pop() { + for child in fragment.children_of(top)? { + builder.add_edge(conversion!(top), conversion!(child), root_label)?; + } + } + + Ok(()) + } + + fn clone_node(&mut self, node_id: usize) -> Result<usize, Self::Error> { + let builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + let old_label = builder + .vertex_label(node_id)? + .ok_or(Error::NodeNoLabel(node_id))?; + + if old_label.is_packed() { + return Err(ForestLabelError::ClonePack.into()); + } + + // We are sure old_label is not packed here, so it is safe to + // unwrap. + let new_label = old_label.clone(self).unwrap(); + + if old_label.clone_index().is_some() { + let mut parents = self.parents_of(node_id)?; + + // Make sure it only has one parent, which is the + // representative node. + assert_eq!(parents.len(), 1); + + let rep_node = parents.next().unwrap().node(); + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + let new_clone = builder.add_vertex(new_label); + + builder.add_edge(rep_node, new_clone, new_label)?; + + // We checked its length is 1, so it is safe to unwrap + // here. + return Ok(new_clone); + } + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + // We are sure old_label is not a clone here, so it is safe to + // unwrap. + let rep_label = old_label.pack().unwrap(); + + // Make a new node + let new_index = builder.add_vertex(rep_label); + + // Re-direct parents to the new node. + // + // This must be done before pointing the new node to the old + // node, otherwise that edge will be redirected as well. + + // Unfortunately I cannot loop through parents and mutate them + // at the same time, so I first collect them into a vector. + let parents: Vec<_> = builder.parents_of(node_id)?.collect(); + + for parent in parents.into_iter() { + builder.redirect(parent.node(), parent.edge(), new_index)?; + } + + // Point the new node to the old node. OLD_LABEL is just a + // place holder. + + builder.add_edge(new_index, node_id, old_label)?; + + // Modify the label of the old node. + + builder.set_label(node_id, new_label)?; + + // Make another clone + + // new_label is cloned, so is guaranteed not to be packed, and + // we are safe to unwrap. + let new_label = new_label.clone(self).unwrap(); + + let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); + + let new_clone = builder.add_vertex(new_label); + + builder.add_edge(new_index, new_clone, new_label)?; + + Ok(new_clone) + } +} + +#[cfg(test)] +mod item_test { + use super::*; + + macro_rules! leaf ( + ($label:expr, $type:tt) =>{ + DefaultForest::<ForestLabel<$type>>::new_leaf($label) + }; + ($label:expr) => { + DefaultForest::<ForestLabel<usize>>::new_leaf($label) + } + ); + + #[test] + fn test_forest_api() -> Result<(), Box<dyn std::error::Error>> { + let forest: DefaultForest<usize> = Default::default(); + + // empty forest + + assert!(forest.is_empty()); + + // leaf forest + + let mut forest = leaf!(0, usize); + + assert_eq!(forest.nodes_len(), 1); + assert_eq!(forest.root(), Some(0)); + + // add some child + + forest.plant(0, leaf!(1), false)?; + + assert_eq!(forest.nodes_len(), 2); + let mut children = forest.children_of(0)?; + assert_eq!(children.next(), Some(1)); + assert_eq!(children.next(), None); + + // add more children + + forest.plant(0, leaf!(2), false)?; + forest.plant(0, leaf!(3), false)?; + forest.plant(0, leaf!(4), false)?; + forest.plant(2, leaf!(5), false)?; + + assert_eq!(forest.nodes_len(), 6); + let mut children = forest.children_of(0)?; + assert_eq!(children.next(), Some(1)); + assert_eq!(children.next(), Some(2)); + assert_eq!(children.next(), Some(3)); + assert_eq!(children.next(), Some(4)); + let mut children = forest.children_of(2)?; + assert_eq!(children.next(), Some(5)); + assert_eq!(children.next(), None); + + let mut test_forest = leaf!(0); + test_forest.plant(0, leaf!(1), false)?; + test_forest.plant(0, leaf!(2), false)?; + test_forest.plant(0, leaf!(3), false)?; + test_forest.plant(2, leaf!(5), false)?; + + assert!(forest.is_prefix(0, &test_forest)?); + + let mut test_forest = leaf!(0); + test_forest.plant(0, leaf!(1), false)?; + test_forest.plant(0, leaf!(2), false)?; + // this child of the root should have label 3 in order to be a + // prefix. + test_forest.plant(0, leaf!(4), false)?; + test_forest.plant(2, leaf!(5), false)?; + + assert!(!forest.is_prefix(0, &test_forest)?); + + let mut test_forest = leaf!(2); + test_forest.plant(0, leaf!(5), false)?; + + assert!(forest.is_prefix(2, &test_forest)?); + + // now test cloning + + // add a duplicate label + forest.plant(3, leaf!(5), false)?; + + let _len = forest.nodes_len(); + + forest.clone_node(5)?; + + assert_eq!(forest.nodes_len(), 7); + + #[cfg(feature = "test-print-viz")] + forest.print_viz("forest.gv")?; + + Ok(()) + } +} diff --git a/chain/src/item/genins.rs b/chain/src/item/genins.rs new file mode 100644 index 0000000..99d9202 --- /dev/null +++ b/chain/src/item/genins.rs @@ -0,0 +1,231 @@ +//! This module implements the generation and insertion of item +//! derivation forests. +//! +//! This is used for the chain-rule machine to conveniently produce +//! item derivations into a forest. This forest can serve as a rough +//! approximation of the parse forests, and can be executed in other +//! semirings later on. + +use super::*; +use crate::{atom::DefaultAtom, default::Error, item::default::DefaultForest, Edge}; +use grammar::{Error as GrammarError, GrammarLabel, GrammarLabelType, TNT}; +use graph::Graph; + +use core::borrow::Borrow; +use std::collections::HashSet as Set; + +/// A helper function to generate a fragment of forest. +/// +/// It simply constructs a root node and then appends +/// successive nodes as successive children of the previous +/// node. Also the starting positions will all be set to the +/// same position. +/// +/// If the input is empty, this returns an empty forest; +/// otherwise the result is not empty. +pub fn generate_fragment( + labels: impl AsRef<[GrammarLabelType]>, + pos: usize, +) -> Result<DefaultForest<ForestLabel<GrammarLabel>>, crate::default::Error> { + let mut labels_iter = labels.as_ref().iter(); + let mut result: DefaultForest<ForestLabel<GrammarLabel>>; + + if let Some(first_label) = labels_iter.next() { + result = DefaultForest::new_leaf(GrammarLabel::new(*first_label, pos)); + + let mut index = 0; + + for label in labels_iter { + result.plant( + index, + DefaultForest::new_leaf(GrammarLabel::new(*label, pos)), + false, + )?; + + // To prevent duplication of labels causing + // panics, we query the index of the new node. + index = result + .query_label(GrammarLabel::new(*label, pos).into()) + // REVIEW: Perhaps a LabelNoNode error? + .ok_or(Error::Invalid)?; + } + } else { + result = Default::default(); + } + + Ok(result) +} + +impl DefaultForest<ForestLabel<GrammarLabel>> { + /// Insert an item derivation forest into a recording forest. + /// + /// We need the help of other things just for finding the correct + /// places to insert these item fragments. + pub fn insert_item( + &mut self, + label: Edge, + fragment: impl Borrow<DefaultForest<ForestLabel<GrammarLabel>>>, + atom_child_iter: impl Iterator<Item = usize>, + atom: &DefaultAtom, + ) -> Result<PaSe, Error> { + /// Convert an error telling us that an index is out of bounds. + /// + /// # Panics + /// + /// The function panics if the error is not of the expected + /// kind. + fn index_out_of_bounds_conversion(ge: GrammarError) -> Error { + match ge { + GrammarError::IndexOutOfBounds(index, bound) => { + Error::IndexOutOfBounds(index, bound) + } + _ => Error::Invalid, + } + } + + let fragment = fragment.borrow(); + + let pase = label.forest_source(); + + let parents: Vec<Parent> = { + let mut result = Vec::new(); + + if let Some(parent) = pase.parent() { + result.push(parent); + } else { + let (root, leaf) = pase.segment().unwrap(); + let mut seen_nodes = Set::new(); + + let mut stack = vec![root]; + + while let Some(top) = stack.pop() { + if seen_nodes.contains(&top) { + continue; + } + + seen_nodes.insert(top); + + for (index, child) in self.children_of(top)?.enumerate() { + if child == leaf { + result.push(Parent::new(top, index)); + } else { + stack.push(child); + } + } + } + } + + result + }; + + for atom_child in atom_child_iter { + // Find reduction information. + let reduction_info = atom + .query_reduction(label.label(), atom_child) + .map_err(index_out_of_bounds_conversion)?; + + let mut stack = parents.clone(); + let mut second_stack = Vec::new(); + + // locate the nodes + for reduction_nt in reduction_info.iter().copied().flatten().rev() { + while let Some(node) = stack.pop() { + // REVIEW: A lot of labels appear here. + // Perhaps I need to refactor something? + if matches!( + self + .vertex_label(node.node())? + .ok_or_else(|| Error::NodeNoLabel(node.node()))? + .label() + .label(), + GrammarLabelType::TNT(TNT::Non(nt)) if nt == *reduction_nt + ) { + for parent in self.parents_of(node.node())? { + debug_assert!(matches!( + self.vertex_label(parent.node()), + Ok(Some(label)) if + label + .label() + .label() + .rule() + .is_some())); + + second_stack.extend(self.parents_of(parent.node())?.filter(|n| { + matches!(self.vertex_label(n.node()), + Ok(Some(label)) + if matches!( + label.label().label().tnt(), + Some(TNT::Non(_)))) + })); + } + } + } + + std::mem::swap(&mut stack, &mut second_stack); + + if stack.is_empty() { + break; + } + } + + if stack.is_empty() { + return Err(Error::CannotPlant); + } + + for parent in stack.into_iter() { + if parent.edge() + 1 >= self.degree(parent.node())? { + self.plant(parent.node(), fragment, false)?; + } else { + let nth_child = self.nth_child(parent.node(), parent.edge() + 1)?; + + if self.is_prefix(nth_child, fragment)? { + // do nothing + continue; + } + + let cloned_node = self.clone_node(nth_child)?; + + self.plant(cloned_node, fragment, false)?; + } + } + } + + let result = if fragment.nodes_len() == 2 { + let root_label = fragment.vertex_label(0)?.unwrap(); + let leaf_label = fragment.vertex_label(1)?.unwrap(); + + // it has been planted, so should be safe. + let node = self.query_label(root_label).unwrap(); + + let edge = { + let mut result = None; + + for (index, child) in self.children_of(node)?.enumerate() { + if matches!(self.vertex_label(child)?, Some(child_label) if child_label == leaf_label) + { + result = Some(index); + break; + } + } + + if let Some(result) = result { + result + } else { + unreachable!("the forest is wrongly planted"); + } + }; + + PaSe::Parent(node, edge) + } else { + let root_label = fragment.vertex_label(0)?.unwrap(); + let leaf_label = fragment.vertex_label(fragment.nodes_len() - 1)?.unwrap(); + + PaSe::Segment( + self.query_label(root_label).unwrap(), + self.query_label(leaf_label).unwrap(), + ) + }; + + Ok(result) + } +} diff --git a/chain/src/item/mod.rs b/chain/src/item/mod.rs new file mode 100644 index 0000000..7fafcc8 --- /dev/null +++ b/chain/src/item/mod.rs @@ -0,0 +1,252 @@ +#![warn(missing_docs)] +//! This module implements the type of items for the chain-rule +//! machine. +//! +//! More specifically, it implements the iten derivation forests for +//! the machine. + +// TODO: Implement an enumeration for Parent or Segment, perhaps +// called PaSe. + +// TODO: Move functions for handling forests, currently contained +// within the method derive to this module, and make them aware of the +// enumeration PaSe. + +// TODO: Implement the Item trait from semirings for the item +// derivation forest, and then we can easily execute items later on. + +use graph::{error::Error as GError, GraphLabel, LabelGraph, Parent, ParentsGraph}; + +use core::borrow::Borrow; + +/// A parent or a segment. +/// +/// A parent is a node with an edge index, which represents a certain +/// edge. +/// +/// A segment represents every edge from the root node to the single +/// terminating node. +#[allow(unused)] +#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub enum PaSe { + /// An edge from a node, as the n-th child. + Parent(usize, usize), + /// A segment from a root to the single terminating node. + Segment(usize, usize), +} + +impl From<Parent> for PaSe { + fn from(value: Parent) -> Self { + Self::Parent(value.node(), value.edge()) + } +} + +impl Default for PaSe { + fn default() -> Self { + Self::Segment(0, 0) + } +} + +impl core::fmt::Display for PaSe { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Parent(node, edge) => write!(f, "the {edge}-th edge from {node}"), + Self::Segment(root, leaf) => write!(f, "a segment from {root} to {leaf}"), + } + } +} + +impl PaSe { + fn parent(self) -> Option<Parent> { + if let Self::Parent(node, edge) = self { + Some(Parent::new(node, edge)) + } else { + None + } + } + + fn segment(self) -> Option<(usize, usize)> { + if let Self::Segment(root, leaf) = self { + Some((root, leaf)) + } else { + None + } + } +} + +/// An internal type that indicates the status of a node as either a +/// packed, cloned, or plain node. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Default)] +enum ForestLabelType { + Packed, + #[default] + Plain, + Cloned(usize), +} + +impl core::fmt::Display for ForestLabelType { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + Self::Packed => write!(f, "packed"), + Self::Plain => write!(f, "plain"), + Self::Cloned(index) => write!(f, "the {index}-th clone"), + } + } +} + +/// A type that encodes the properties demanded by a forest. +#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub struct ForestLabel<T: GraphLabel> { + label: T, + status: ForestLabelType, +} + +impl<T: GraphLabel> core::fmt::Display for ForestLabel<T> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + writeln!(f, "label: {}, status: {}", self.label, self.status) + } +} + +/// The type of erros for converting forest labels. +#[non_exhaustive] +#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] +pub enum ForestLabelError { + /// Try to pack a cloned node. + PackClone, + /// Try to clone a packed node. + ClonePack, +} + +impl core::fmt::Display for ForestLabelError { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + Self::PackClone => write!(f, "cannot pack a cloned node"), + Self::ClonePack => write!(f, "cannot clone a packed node"), + } + } +} + +impl std::error::Error for ForestLabelError {} + +impl<T: GraphLabel> ForestLabel<T> { + /// Retrieve the label. + pub fn label(&self) -> T { + self.label + } + + /// Return true if and only if this node is packed. + pub fn is_packed(&self) -> bool { + matches!(self.status, ForestLabelType::Packed) + } + + /// Retrieve the optional clone index. + pub fn clone_index(&self) -> Option<usize> { + match self.status { + ForestLabelType::Cloned(index) => Some(index), + _ => None, + } + } + + /// Try to clone a node. + pub fn clone<F>(self, forest: &F) -> Result<Self, ForestLabelError> + where + F: Forest<T>, + { + if self.is_packed() { + Err(ForestLabelError::ClonePack) + } else { + let clone_index = if let Some(old_index) = self.clone_index() { + let mut new_index: usize = old_index + 1; + + let mut new_label = Self { + status: ForestLabelType::Cloned(new_index), + ..self + }; + + while forest.query_label(new_label).is_some() { + new_index += 1; + new_label = Self { + status: ForestLabelType::Cloned(new_index), + ..self + }; + } + + new_index + } else { + 0 + }; + + Ok(Self { + status: ForestLabelType::Cloned(clone_index), + ..self + }) + } + } + + /// Try to pack a node. + pub fn pack(self) -> Result<Self, ForestLabelError> { + if self.clone_index().is_some() { + Err(ForestLabelError::PackClone) + } else { + let new_label = Self { + status: ForestLabelType::Packed, + ..self + }; + + Ok(new_label) + } + } +} + +impl<T: GraphLabel> From<T> for ForestLabel<T> { + fn from(label: T) -> Self { + let status = ForestLabelType::default(); + Self { label, status } + } +} + +/// The expected behaviours of an item derivation forest. +/// +/// Note that it requires only a subset of the functionalities of +/// labelled graphs. +pub trait Forest<T: GraphLabel>: ParentsGraph + LabelGraph<ForestLabel<T>> { + /// The type of errors for operations on the forest. + type Error: std::error::Error + From<GError> + From<ForestLabelError>; + + /// Return the root of the forest. + /// + /// A forest without a root is regarded as empty. + fn root(&self) -> Option<usize>; + + /// Construct a forest consisting of one leaf node with the given + /// label. + fn new_leaf(label: T) -> Self; + + /// Detect if the fragment is a prefix of the sub-forest rooted at + /// `node_id`. + fn is_prefix<F>(&self, node_id: usize, fragment: F) -> Result<bool, Self::Error> + where + F: Borrow<Self>; + + /// Extend the forest by adjoining another forest at a given node. + fn plant<F>(&mut self, node_id: usize, fragment: F, planted: bool) -> Result<(), Self::Error> + where + F: Borrow<Self>; + + /// Clone a node by making a new node and making all the nodes + /// that previously pointed to the old node now point to the new + /// node, and the new node points to the old node. Return the + /// index of the new node. In addition, if the old node had + /// already been cloned, just return the index of its + /// representative. + /// + /// The labels of the representing node and of the cloned node + /// will be handled automatically. + fn clone_node(&mut self, node_id: usize) -> Result<usize, Self::Error>; +} + +pub mod default; + +pub mod genins; + +pub use genins::generate_fragment; diff --git a/chain/src/lib.rs b/chain/src/lib.rs index a3d420b..916678f 100644 --- a/chain/src/lib.rs +++ b/chain/src/lib.rs @@ -10,9 +10,16 @@ pub mod atom; -use graph::{error::Error as GError, LabelExtGraph, Parent}; +pub mod item; -use forest::Error as ForestError; +// TODO: We need a module for items, that serve the role of the +// forests now. + +use graph::{error::Error as GError, LabelExtGraph}; + +use item::default::Error as ForestError; + +use item::PaSe; /// An edge in the Chain-Rule machine. #[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)] @@ -20,14 +27,14 @@ pub struct Edge { /// The position in the atomic languages. label: usize, /// The source of the associated forest edge. - forest_source: Parent, + forest_source: PaSe, /// Whether or not this edge is "accepting". accepting: bool, } impl Edge { /// Construct a new edge. - pub fn new(label: usize, forest_source: Parent, accepting: bool) -> Self { + pub fn new(label: usize, forest_source: PaSe, accepting: bool) -> Self { Self { label, forest_source, @@ -46,7 +53,7 @@ impl Edge { } /// Return the associated forest edge of the edge. - pub fn forest_source(&self) -> Parent { + pub fn forest_source(&self) -> PaSe { self.forest_source } } @@ -56,12 +63,7 @@ impl core::fmt::Display for Edge { let label = self.label(); let forest_source = self.forest_source(); - write!( - f, - "edge label {label} with forest source {} and edge index {}", - forest_source.node(), - forest_source.edge() - ) + write!(f, "edge label {label} with item {}", forest_source) } } @@ -84,7 +86,7 @@ pub enum TwoLayers { /// the source of the associated forest edge of the second layer, /// and the third is a list of edges, which are the common first /// layers. - Two(usize, Parent, bool, Vec<(Edge, usize)>), + Two(usize, PaSe, bool, Vec<(Edge, usize)>), } /// The type of a collapsed iterator. diff --git a/chain/src/plan.org b/chain/src/plan.org index c2a9037..1da33cc 100644 --- a/chain/src/plan.org +++ b/chain/src/plan.org @@ -2,7 +2,7 @@ #+AUTHOR: Durand #+DATE: <2022-11-18 Ven 19:57> -* Things to do [7/10] +* Things to do [6/9] - [X] Implement builders for graphs - [X] Find sets of the first terminals for each non-terminal, in the @@ -79,15 +79,13 @@ lack of this step might be the root cause of the failure of the previous version of this project. + [X] Test atoms -- [X] Implement forests [4/4] - + [X] Design a format of forests. This should be the most crucial - thing to do, in order to have a better understanding of the whole - picture. I need to have a clear understanding of the details of - the binarised shared packed parsed forests, that reflects the - regular expressions in the grammar equations. - + [X] Define a trait with the expected behaviours. - + [X] Implement them as parents-knowing graphs. - + [X] Test +- [ ] Implement semiring. [0/5] + + [ ] Define the trait. + + [ ] Define items and rules for the chain-rule parser, as an + item-based description. + + [ ] Implement the boolean semiring. + + [ ] Implement the natural number semiring. + + [ ] Implement the free semiring. - [-] Implement languages. [4/6] + [X] First define a trait with the expected behaviours. + [X] Then implement them as doubly labelled graphs. @@ -95,19 +93,29 @@ with a position in the forest. This perfectly solves the problem of those "plugs"! + [X] Then implement finding derivatives by use of the chain rule. - + [ ] Handle Forests + + [ ] Handle Semirings + [-] Tests -- [ ] Implement semiring. [0/5] - + [ ] Define the trait. - + [ ] Implement the boolean semiring. - + [ ] Implement the natural number semiring. - + [ ] Implement the free semiring. - + [ ] Compute and record a semiring value when computing - derivatives. - [X] Miscellaneous [1/1] + [X] Implement a function for NFA that checks if a given predicate is satisfied for each edge label. +** Forests + +This was a failed attempt to handle forests. I decided to handle +forests as a special case of semi-rings. This is not only more +systematic, but has also the potential of handling more general +semi-rings later, like the Viterbi semirings, for example. + +- [X] Implement forests [4/4] + + [X] Design a format of forests. This should be the most crucial + thing to do, in order to have a better understanding of the whole + picture. I need to have a clear understanding of the details of + the binarised shared packed parsed forests, that reflects the + regular expressions in the grammar equations. + + [X] Define a trait with the expected behaviours. + + [X] Implement them as parents-knowing graphs. + + [X] Test + * Atomic Languages This describes the behaviours of atomic languages. The atomic |