summaryrefslogtreecommitdiff
path: root/chain/src/lib.rs
diff options
context:
space:
mode:
authorJSDurand <mmemmew@gmail.com>2023-01-20 13:48:26 +0800
committerJSDurand <mmemmew@gmail.com>2023-01-20 13:48:26 +0800
commit18d7955b7d84c00467ede38baae53f4ce1fb6908 (patch)
tree97d0746b82816a21d980636e50f8cdbeb804b518 /chain/src/lib.rs
parent8f8d3d1a3c276be4be2e5d2e767ada564c47279a (diff)
chain: a prototype is added.
I have an ostensibly working prototype now. Further tests are needed to make sure that the algorithm meets the time complexity requirement, though.
Diffstat (limited to 'chain/src/lib.rs')
-rw-r--r--chain/src/lib.rs194
1 files changed, 171 insertions, 23 deletions
diff --git a/chain/src/lib.rs b/chain/src/lib.rs
index 4e21e1d..91c37f7 100644
--- a/chain/src/lib.rs
+++ b/chain/src/lib.rs
@@ -10,41 +10,189 @@
pub mod atom;
-// TODO: Define errors.
+use graph::{error::Error as GError, LabelExtGraph, Parent};
+
+use forest::Error as ForestError;
+
+/// An edge in the Chain-Rule machine.
+#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
+pub struct Edge {
+ /// The position in the atomic languages.
+ label: usize,
+ /// The source of the associated forest edge.
+ forest_source: Parent,
+ /// Whether or not this edge is "accepting".
+ accepting: bool,
+}
+
+impl Edge {
+ /// Construct a new edge.
+ pub fn new(label: usize, forest_source: Parent, accepting: bool) -> Self {
+ Self {
+ label,
+ forest_source,
+ accepting,
+ }
+ }
+
+ /// Return the label of the edge.
+ pub fn label(&self) -> usize {
+ self.label
+ }
+
+ /// Tell whether or not the edge is accepting.
+ pub fn is_accepting(&self) -> bool {
+ self.accepting
+ }
+
+ /// Return the associated forest edge of the edge.
+ pub fn forest_source(&self) -> Parent {
+ self.forest_source
+ }
+}
+
+impl core::fmt::Display for Edge {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ let label = self.label();
+ let forest_source = self.forest_source();
+
+ write!(
+ f,
+ "edge label {label} with forest source {} and edge index {}",
+ forest_source.node(),
+ forest_source.edge()
+ )
+ }
+}
+
+/// Each derivation is a concatenation of two items, so there are two
+/// layers. But some items have no children and are accepting, in
+/// which case we just skip that item completely, for performance
+/// reasons, and hence there could be only one layer as well.
+///
+/// It might even happen that both layers have no children, in which
+/// case we shall just put all previous edges here.
+#[derive(Debug, Clone, Eq, PartialEq)]
+pub enum TwoLayers {
+ /// One layer has no children.
+ One(Edge, usize),
+ // REVIEW: Maybe we do not need to store an edge in the forest: we
+ // only need the source of the edge?
+ /// Both layers have children.
+ ///
+ /// The first element is the label of the second layer, the second
+ /// the source of the associated forest edge of the second layer,
+ /// and the third is a list of edges, which are the common first
+ /// layers.
+ Two(usize, Parent, bool, Vec<(Edge, usize)>),
+}
+
+/// The type of a collapsed iterator.
+pub struct Collapsed<'a, I, C>
+where
+ I: Iterator<Item = TwoLayers>,
+ C: Chain<DerIter = I>,
+{
+ iter: I,
+ chain: &'a mut C,
+ stop: bool,
+}
+
+impl<'a, I, C> Collapsed<'a, I, C>
+where
+ I: Iterator<Item = TwoLayers>,
+ C: Chain<DerIter = I>,
+{
+ /// Collapse an iterator.
+ pub fn collapse(iter: I, chain: &'a mut C) -> Self {
+ let stop = false;
+
+ Self { iter, chain, stop }
+ }
+}
+
+impl<'a, I, C> Iterator for Collapsed<'a, I, C>
+where
+ I: Iterator<Item = TwoLayers>,
+ C: Chain<DerIter = I>,
+{
+ type Item = Result<(Edge, usize), <C as Chain>::Error>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ if self.stop {
+ return None;
+ }
+
+ if let Some(two_layer) = self.iter.next() {
+ match two_layer {
+ TwoLayers::One(edge, to) => Some(Ok((edge, to))),
+ TwoLayers::Two(label, forest_source, accepting, edges) => {
+ let new_index = match self.chain.extend(edges.into_iter()) {
+ Ok(new) => new,
+ Err(error) => {
+ // Prevent further iterations.
+
+ return Some(Err(error.into()));
+ }
+ };
+
+ Some(Ok((Edge::new(label, forest_source, accepting), new_index)))
+ }
+ }
+ } else {
+ None
+ }
+ }
+}
/// The expected behaviours of a language which can take derivatives
/// by chain rule.
-pub trait Chain: Default {
+pub trait Chain: LabelExtGraph<Edge> {
/// The implementations should choose a type to represent errors.
- type Error: std::error::Error;
+ type Error: std::error::Error + From<GError> + From<ForestError>;
+
+ /// A type of atomic languages that is chosen for this chain rule
+ /// machine.
+ type Atom: atom::Atom;
/// Represents the language that is present after we parse the
/// empty string, that is the initial configuration of the
/// language. This may or may not be different from what
/// `Default::default` gives.
- fn unit() -> Self;
-
- /// Take the derivative by a terminal symbol.
- ///
- /// This takes care of the union and the prepending operations.
- ///
- /// # A little remark about the design
- ///
- /// I have thought to separate different operations (like the
- /// union, the prepending, and single derivatives) and then define
- /// a function to put everything together. But I think that
- /// design is not convenient to use. Also, I really do not need
- /// those operations other than to provide this derivative
- /// operation, so why define them? And putting all things
- /// together may reduce the number of bugs caused by wrong uses of
- /// those component functions, and can reduce the amount of
- /// documentation strings a library user needs to read, in order
- /// to make use of this trait. So I ended up with this design.
- fn chain(&mut self, t: usize);
+ fn unit(atom: Self::Atom) -> Result<Self, Self::Error>;
/// Return true if and only if the language contains the empty
/// string.
- fn epsilon(&self) -> bool;
+ fn epsilon(&self) -> Result<bool, Self::Error>;
+
+ /// Update the history
+ fn update_history(&mut self, new: usize);
+
+ /// An iterator that iterates all layers that need to be merged.
+ type DerIter: Iterator<Item = TwoLayers>;
+
+ /// Take the derivative by a terminal symbol at position `POS`.
+ fn derive(&mut self, t: usize, pos: usize) -> Result<Self::DerIter, Self::Error>;
+
+ /// Take the union of all derivatives.
+ fn union(&mut self, der_iter: Self::DerIter) -> Result<Vec<(Edge, usize)>, Self::Error> {
+ // REVIEW: Find a way to avoid allocations.
+ Collapsed::<_, Self>::collapse(der_iter, self).collect()
+ }
+
+ /// Use chain rule to compute the derivative with respect to a
+ /// terminal.
+ fn chain(&mut self, t: usize, pos: usize) -> Result<(), Self::Error> {
+ let der_iter = self.derive(t, pos)?;
+
+ let edges = self.union(der_iter)?;
+
+ let new_index = self.extend(edges.into_iter())?;
+
+ self.update_history(new_index);
+
+ Ok(())
+ }
}
pub mod default;