summaryrefslogtreecommitdiff
path: root/chain/src/lib.rs
blob: a3d420b91e2594be22bf7f606f1136abe7bd483d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
#![warn(missing_docs)]
//! This package implements the core algorithm of the entire
//! workspace: parsing with derivatives by means of chain rule and
//! regular nulling languages.
//!
//! Since I shall not name my crate "core" to avoid collisions with
//! the Rust's own core, I decided to name this crate after what I
//! think is the essence of this algorithm, the chain-rule for
//! derivatives of languages.

pub mod atom;

use graph::{error::Error as GError, LabelExtGraph, Parent};

use forest::Error as ForestError;

/// An edge in the Chain-Rule machine.
#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
pub struct Edge {
    /// The position in the atomic languages.
    label: usize,
    /// The source of the associated forest edge.
    forest_source: Parent,
    /// Whether or not this edge is "accepting".
    accepting: bool,
}

impl Edge {
    /// Construct a new edge.
    pub fn new(label: usize, forest_source: Parent, accepting: bool) -> Self {
        Self {
            label,
            forest_source,
            accepting,
        }
    }

    /// Return the label of the edge.
    pub fn label(&self) -> usize {
        self.label
    }

    /// Tell whether or not the edge is accepting.
    pub fn is_accepting(&self) -> bool {
        self.accepting
    }

    /// Return the associated forest edge of the edge.
    pub fn forest_source(&self) -> Parent {
        self.forest_source
    }
}

impl core::fmt::Display for Edge {
    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
        let label = self.label();
        let forest_source = self.forest_source();

        write!(
            f,
            "edge label {label} with forest source {} and edge index {}",
            forest_source.node(),
            forest_source.edge()
        )
    }
}

/// Each derivation is a concatenation of two items, so there are two
/// layers.  But some items have no children and are accepting, in
/// which case we just skip that item completely, for performance
/// reasons, and hence there could be only one layer as well.
///
/// It might even happen that both layers have no children, in which
/// case we shall just put all previous edges here.
#[derive(Debug, Clone, Eq, PartialEq)]
pub enum TwoLayers {
    /// One layer has no children.
    One(Edge, usize),
    // REVIEW: Maybe we do not need to store an edge in the forest: we
    // only need the source of the edge?
    /// Both layers have children.
    ///
    /// The first element is the label of the second layer, the second
    /// the source of the associated forest edge of the second layer,
    /// and the third is a list of edges, which are the common first
    /// layers.
    Two(usize, Parent, bool, Vec<(Edge, usize)>),
}

/// The type of a collapsed iterator.
pub struct Collapsed<'a, I, C>
where
    I: Iterator<Item = TwoLayers>,
    C: Chain<DerIter = I>,
{
    iter: I,
    chain: &'a mut C,
    stop: bool,
}

impl<'a, I, C> Collapsed<'a, I, C>
where
    I: Iterator<Item = TwoLayers>,
    C: Chain<DerIter = I>,
{
    /// Collapse an iterator.
    pub fn collapse(iter: I, chain: &'a mut C) -> Self {
        let stop = false;

        Self { iter, chain, stop }
    }
}

impl<'a, I, C> Iterator for Collapsed<'a, I, C>
where
    I: Iterator<Item = TwoLayers>,
    C: Chain<DerIter = I>,
{
    type Item = Result<(Edge, usize), <C as Chain>::Error>;

    fn next(&mut self) -> Option<Self::Item> {
        if self.stop {
            return None;
        }

        if let Some(two_layer) = self.iter.next() {
            match two_layer {
                TwoLayers::One(edge, to) => Some(Ok((edge, to))),
                TwoLayers::Two(label, forest_source, accepting, edges) => {
                    let new_index = match self.chain.extend(edges.into_iter()) {
                        Ok(new) => new,
                        Err(error) => {
                            // Prevent further iterations.
                            self.stop = true;

                            return Some(Err(error.into()));
                        }
                    };

                    Some(Ok((Edge::new(label, forest_source, accepting), new_index)))
                }
            }
        } else {
            None
        }
    }
}

/// The expected behaviours of a language which can take derivatives
/// by chain rule.
pub trait Chain: LabelExtGraph<Edge> {
    /// The implementations should choose a type to represent errors.
    type Error: std::error::Error + From<GError> + From<ForestError>;

    /// A type of atomic languages that is chosen for this chain rule
    /// machine.
    type Atom: atom::Atom;

    /// Represents the language that is present after we parse the
    /// empty string, that is the initial configuration of the
    /// language.
    fn unit(atom: Self::Atom) -> Result<Self, Self::Error>;

    /// Return true if and only if the language contains the empty
    /// string.
    fn epsilon(&self) -> Result<bool, Self::Error>;

    /// Update the history
    fn update_history(&mut self, new: usize);

    /// An iterator that iterates all layers that need to be merged.
    type DerIter: Iterator<Item = TwoLayers>;

    /// Take the derivative by a terminal `t` at position `pos`.
    fn derive(&mut self, t: usize, pos: usize) -> Result<Self::DerIter, Self::Error>;

    /// Take the union of all derivatives.
    fn union(&mut self, der_iter: Self::DerIter) -> Result<Vec<(Edge, usize)>, Self::Error> {
        // REVIEW: Find a way to avoid allocations.
        Collapsed::<_, Self>::collapse(der_iter, self).collect()
    }

    /// Use chain rule to compute the derivative with respect to a
    /// terminal.
    fn chain(&mut self, t: usize, pos: usize) -> Result<(), Self::Error> {
        let der_iter = self.derive(t, pos)?;

        let edges = self.union(der_iter)?;

        let new_index = self.extend(edges)?;

        self.update_history(new_index);

        Ok(())
    }
}

pub mod default;