chain/src/lib.rs


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345

#![warn(missing_docs)]
//! This package implements the core algorithm of the entire
//! workspace: parsing with derivatives by means of chain rule and
//! regular nulling languages.
//!
//! Since I shall not name my crate "core" to avoid collisions with
//! the Rust's own core, I decided to name this crate after what I
//! think is the essence of this algorithm, the chain-rule for
//! derivatives of languages.

pub mod atom;

pub mod item;

use graph::{error::Error as GError, LabelExtGraph};

use item::default::Error as ForestError;

use item::PaVi;

/// An edge in the Chain-Rule machine.
#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
pub struct Edge {
    /// The position in the atomic languages.
    label: usize,
    /// The source of the associated forest edge.
    forest_source: PaVi,
    /// The bottom source on which we shall perform reduction.
    ///
    /// If this equals `forest_source`, no extra reduction needs to be
    /// done.
    true_source: PaVi,
    /// Whether or not this edge is "accepting".
    accepting: bool,
}

impl Edge {
    /// Construct a new edge.
    pub fn new(label: usize, forest_source: PaVi, accepting: bool) -> Self {
        let true_source = forest_source;
        Self {
            label,
            forest_source,
            accepting,
            true_source,
        }
    }

    /// Return the label of the edge.
    pub fn label(&self) -> usize {
        self.label
    }

    /// Tell whether or not the edge is accepting.
    pub fn is_accepting(&self) -> bool {
        self.accepting
    }

    /// Return the associated forest edge of the edge.
    pub fn forest_source(&self) -> PaVi {
        self.forest_source
    }

    /// Set the associated forest edge of the edge of the edge.
    pub fn set_forest_source(&mut self, source: PaVi) {
        self.forest_source = source;
    }

    /// Set the associated bottom edge of the edge from which onwards
    /// we shall perform the reduction.
    pub fn set_true_source(&mut self, true_source: PaVi) {
        self.true_source = true_source;
    }

    /// Return the associated bottom edge of the edge from which
    /// onwards we shall perform the reduction.
    pub fn true_source(&self) -> PaVi {
        self.true_source
    }
}

impl core::fmt::Display for Edge {
    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
        let label = self.label();
        let forest_source = self.forest_source();

        write!(f, "edge label {label} with item {}", forest_source)
    }
}

/// A Real Or Imaginary edge.
///
/// An imaginary edge will be ignored when adding edges.  The
/// imaginary edges will be used to determine the acceptance of a
/// node, when and only when that new node turns out to have no
/// children.
///
/// # Note
///
/// Non, je ne suis pas un roi.  Je ne sais pas qu'est-ce que vous
/// parlez.
#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
pub enum Roi {
    /// A real edge is an ordinary edge.
    Re(Edge),
    /// The label of an imaginary edge is not important, so we only
    /// record its target.
    Im(usize),
}

// Some convenient conversions

impl From<Edge> for Roi {
    fn from(edge: Edge) -> Self {
        Self::Re(edge)
    }
}

impl From<usize> for Roi {
    fn from(target: usize) -> Self {
        Self::Im(target)
    }
}

impl Roi {
    /// Return the real part if it is a real edge.
    fn real_part(self) -> Option<Edge> {
        if let Self::Re(edge) = self {
            Some(edge)
        } else {
            None
        }
    }

    /// Return the imaginary part if it is an imaginary edge.
    fn imaginary_part(self) -> Option<usize> {
        if let Self::Im(target) = self {
            Some(target)
        } else {
            None
        }
    }
}

/// Each derivation is a concatenation of two items, so there are two
/// layers.  But some items have no children and are accepting, in
/// which case we just skip that item completely, for performance
/// reasons, and hence there could be only one layer as well.
///
/// It might even happen that both layers have no children, in which
/// case we shall just put all previous edges here.
#[derive(Debug, Clone, Eq, PartialEq)]
pub enum TwoLayers {
    /// One layer has no children.
    One(Roi, usize),
    // REVIEW: Maybe we do not need to store an edge in the forest: we
    // only need the source of the edge?
    /// Both layers have children.
    ///
    /// The first element is the label of the second layer, the second
    /// the source of the associated forest edge of the second layer,
    /// and the third is a list of edges, which are the common first
    /// layers.
    Two(usize, PaVi, bool, Vec<(Roi, usize)>),
}

/// The type of a collapsed iterator.
pub struct Collapsed<'a, I, C>
where
    I: Iterator<Item = TwoLayers>,
    C: Chain<DerIter = I>,
{
    iter: I,
    chain: &'a mut C,
    stop: bool,
}

impl<'a, I, C> Collapsed<'a, I, C>
where
    I: Iterator<Item = TwoLayers>,
    C: Chain<DerIter = I>,
{
    /// Collapse an iterator.
    pub fn collapse(iter: I, chain: &'a mut C) -> Self {
        let stop = false;

        Self { iter, chain, stop }
    }
}

impl<'a, I, C> Iterator for Collapsed<'a, I, C>
where
    I: Iterator<Item = TwoLayers>,
    C: Chain<DerIter = I>,
{
    type Item = Result<(Roi, usize), <C as Chain>::Error>;

    fn next(&mut self) -> Option<Self::Item> {
        if self.stop {
            return None;
        }

        if let Some(two_layer) = self.iter.next() {
            match two_layer {
                TwoLayers::One(edge, to) => Some(Ok((edge, to))),
                TwoLayers::Two(label, forest_source, accepting, edges) => {
                    let new_index = match self.chain.extend(
                        edges
                            .into_iter()
                            .filter_map(|(roi, target)| roi.real_part().map(|edge| (edge, target))),
                    ) {
                        Ok(new) => new,
                        Err(error) => {
                            // Prevent further iterations.
                            self.stop = true;

                            return Some(Err(error.into()));
                        }
                    };

                    Some(Ok((
                        Edge::new(label, forest_source, accepting).into(),
                        new_index,
                    )))
                }
            }
        } else {
            None
        }
    }
}

/// The expected behaviours of a language which can take derivatives
/// by chain rule.
pub trait Chain: LabelExtGraph<Edge> {
    /// The implementations should choose a type to represent errors.
    type Error: std::error::Error + From<GError> + From<ForestError>;

    /// A type of atomic languages that is chosen for this chain rule
    /// machine.
    type Atom: atom::Atom;

    /// Represents the language that is present after we parse the
    /// empty string, that is the initial configuration of the
    /// language.
    fn unit(atom: Self::Atom) -> Result<Self, Self::Error>;

    /// Produce the underlying atom, so that we can produce an initial
    /// empty chain from the same atom again.
    fn atom(&self) -> &Self::Atom;

    /// Return true if and only if the language contains the empty
    /// string.
    fn epsilon(&self) -> Result<bool, Self::Error>;

    /// Update the history
    fn update_history(&mut self, new: usize);

    /// Update the acceptance of a node, when and only when that node
    /// turns out to have no children.
    fn update_epsilon(
        &mut self,
        node_id: usize,
        edges: impl Iterator<Item = (Roi, usize)>,
    ) -> Result<(), Self::Error>;

    /// An iterator that iterates all layers that need to be merged.
    type DerIter: Iterator<Item = TwoLayers>;

    /// Take the derivative by a terminal `t` at position `pos`.
    fn derive(&mut self, t: usize, pos: usize, no_item: bool)
        -> Result<Self::DerIter, Self::Error>;

    /// Take the union of all derivatives.
    fn union(&mut self, der_iter: Self::DerIter) -> Result<Vec<(Roi, usize)>, Self::Error> {
        // REVIEW: Think about the possibilities to avoid allocations.
        Collapsed::<_, Self>::collapse(der_iter, self)
            .collect::<Result<Vec<(Roi, usize)>, Self::Error>>()
            .map(|mut v| {
                v.retain(|(_, target)| {
                    *target == 0 || matches!(self.degree(*target), Ok(deg) if deg != 0)
                });
                v
            })
    }

    /// Use chain rule to compute the derivative with respect to a
    /// terminal.
    ///
    /// # Arguments
    ///
    /// The argument `t` is the terminal we computet the derivative
    /// with.
    ///
    /// The argument `pos` is the zero-based position within the
    /// input.
    ///
    /// The argument `no_item` determines whether we want the item
    /// derivation forest as well.
    fn chain(&mut self, t: usize, pos: usize, no_item: bool) -> Result<(), Self::Error> {
        let der_iter = self.derive(t, pos, no_item)?;

        let edges = self.union(der_iter)?;

        let new_index = self.extend(
            edges
                .iter()
                .filter_map(|(roi, target)| roi.real_part().map(|edge| (edge, *target))),
        )?;

        if self.is_empty_node(new_index)? {
            self.update_epsilon(new_index, edges.into_iter())?;
        }

        self.update_history(new_index);

        Ok(())
    }

    // FIXME: I shall probably not use the trait of forests for this
    // purpose, but I have not figured out yet wha the correct trait
    // should be.
    /// The type of output item that will be produced by this machine.
    type Item: item::Forest<grammar::GrammarLabel>;

    /// Signal to the parser that the end of the input is reached, so
    /// that the parser knows to generate suitable forests.
    ///
    /// This is not needed when recognizing instead of parsing.
    ///
    /// We also pass in the current position `pos` so that we have a
    /// little flexibility in the position of the end of input.  In
    /// addition, this makes it easier to produce the suitable
    /// forests.
    ///
    /// As a reminder, `pos` should be the position of the last
    /// terminal, so if there are three terminals in total, the `pos`
    /// should be `2` , instead of `3`.
    ///
    /// Similarly, we pass in the terminal at the position `pos` to
    /// aid the production of the forest.
    fn end_of_input(&mut self, pos: usize, ter: usize) -> Result<Self::Item, Self::Error>;
}

pub mod default;