From 18d7955b7d84c00467ede38baae53f4ce1fb6908 Mon Sep 17 00:00:00 2001
From: JSDurand <mmemmew@gmail.com>
Date: Fri, 20 Jan 2023 13:48:26 +0800
Subject: chain: a prototype is added.

I have an ostensibly working prototype now.

Further tests are needed to make sure that the algorithm meets the
time complexity requirement, though.
---
 grammar/src/label.rs               | 124 +++++++++++++++++++++++++++++++++++
 grammar/src/left_closure.rs        |   6 +-
 grammar/src/lib.rs                 |  83 ++++++++++++++++++++++--
 grammar/src/test_grammar_helper.rs | 128 ++++++++++++++++++++++++++++++++++++-
 4 files changed, 332 insertions(+), 9 deletions(-)
 create mode 100644 grammar/src/label.rs

(limited to 'grammar')

diff --git a/grammar/src/label.rs b/grammar/src/label.rs
new file mode 100644
index 0000000..58eaddc
--- /dev/null
+++ b/grammar/src/label.rs
@@ -0,0 +1,124 @@
+//! This file implements a type of labels that could be used as the
+//! labels of a parse forest.
+
+use super::*;
+
+/// The actual label of a grammar label.
+#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
+pub enum GrammarLabelType {
+    /// A terminal or a non-terminal.
+    TNT(TNT),
+    /// A rule position.
+    Rule(usize),
+}
+
+impl GrammarLabelType {
+    /// Return the name of this label with the help of the associated
+    /// grammar.
+    pub fn name(&self, grammar: &Grammar) -> Result<String, Error> {
+        match self {
+            Self::TNT(tnt) => grammar.name_of_tnt(*tnt),
+            Self::Rule(pos) => grammar.rule_pos_to_string(*pos),
+        }
+    }
+}
+
+/// The label to be used in a forest.
+#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
+pub struct GrammarLabel {
+    /// The actual label.
+    label: GrammarLabelType,
+    /// The start in the input that this label correponds to.
+    start: usize,
+    /// The end in the input that this label correponds to.
+    end: Option<usize>,
+    /// A node in the forest might be a packed node.
+    packed_p: bool,
+}
+
+impl core::fmt::Display for GrammarLabel {
+    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+        // Simply displaying this without the help of a grammar is not
+        // of much help, so we just use the debug method to cheat,
+        // haha.
+
+        write!(f, "{:?}", self)
+    }
+}
+
+impl GrammarLabel {
+    /// Construct a new label.
+    pub fn new(label: GrammarLabelType, start: usize) -> Self {
+        let end = None;
+        let packed_p = false;
+
+        Self {
+            label,
+            start,
+            end,
+            packed_p,
+        }
+    }
+
+    /// Return the end in the input.
+    pub fn end(&self) -> Option<usize> {
+        self.end
+    }
+
+    /// Return the start in the input.
+    pub fn start(&self) -> usize {
+        self.start
+    }
+
+    /// Return the actual label.
+    pub fn label(&self) -> GrammarLabelType {
+        self.label
+    }
+
+    /// Update the end.
+    pub fn set_end(&mut self, end: usize) {
+        self.end = Some(end);
+    }
+
+    /// Check whether the node is a packed node.
+    pub fn is_packed(&self) -> bool {
+        self.packed_p
+    }
+
+    /// Update the packed status.
+    pub fn set_packed_p(&mut self, packed_p: bool) {
+        self.packed_p = packed_p;
+    }
+
+    /// Return a string description with the help of the associated
+    /// grammar.
+    pub fn to_string(&self, grammar: &Grammar) -> Result<String, Error> {
+        // REVIEW: It needs at least 34 bytes, so we just double it.
+        // Of course we can also calculate the length exactly, but
+        // this can be postponed till later.
+        let mut s = String::with_capacity(68);
+        s.push_str("a ");
+
+        if self.is_packed() {
+            s.push_str("packed ");
+        } else {
+            s.push_str("normal ");
+        }
+
+        s.push_str("node labelled ");
+
+        s.push_str(&self.label().name(grammar)?);
+
+        s.push_str(" from ");
+
+        s.push_str(&format!("{} ", self.start()));
+
+        if let Some(end) = self.end() {
+            s.push_str(&format!("to {end}"));
+        } else {
+            s.push_str("onwards");
+        }
+
+        Ok(s)
+    }
+}
diff --git a/grammar/src/left_closure.rs b/grammar/src/left_closure.rs
index 1630881..39461f0 100644
--- a/grammar/src/left_closure.rs
+++ b/grammar/src/left_closure.rs
@@ -114,10 +114,10 @@ impl Grammar {
             local_result.push(Or);
             builder.add_vertex();
 
-            local_result.push(Lit(Ter(self.pack_tnt(Non(n)).unwrap())));
-            let non_lit_index = builder.add_vertex();
+            // local_result.push(Lit(Ter(self.pack_tnt(Non(n)).unwrap())));
+            // let non_lit_index = builder.add_vertex();
 
-            builder.add_edge(0, non_lit_index, ()).unwrap();
+            // builder.add_edge(0, non_lit_index, ()).unwrap();
 
             // If this non-terminal is nullable, add an empty variant.
             if self.is_nullable(n)? {
diff --git a/grammar/src/lib.rs b/grammar/src/lib.rs
index 297cb66..50a2b9b 100644
--- a/grammar/src/lib.rs
+++ b/grammar/src/lib.rs
@@ -320,11 +320,39 @@ impl Grammar {
         self.accumulators.last().copied().unwrap_or(0)
     }
 
+    /// Return an element of the accumulator.
+    #[inline]
+    pub fn nth_accumulator(&self, n: usize) -> Result<usize, Error> {
+        self.accumulators
+            .get(n)
+            .copied()
+            .ok_or_else(|| Error::IndexOutOfBounds(n, self.total()))
+    }
+
+    /// Return the index of the rules a rule position belongs to.
+    #[inline]
+    pub fn get_rule_num(&self, pos: usize) -> Result<usize, Error> {
+        let mut result = None;
+
+        for (index, accumulator) in self.accumulators.iter().copied().skip(1).enumerate() {
+            if accumulator > pos {
+                result = Some(index);
+                break;
+            }
+        }
+
+        if let Some(n) = result {
+            Ok(n)
+        } else {
+            Err(Error::IndexOutOfBounds(pos, self.total()))
+        }
+    }
+
     /// Query if a position is the starting position of a
     /// non-terminal.  If it is, return the non-terminal, else return
     /// `None` .
     #[inline]
-    pub fn is_nt_start_in_nfa_p(&self, pos: usize) -> Option<usize> {
+    pub fn get_nt_start_in_nfa(&self, pos: usize) -> Option<usize> {
         for (index, accumulator) in self.accumulators.iter().copied().enumerate() {
             let shifted_accumulator = accumulator << 1;
 
@@ -456,7 +484,7 @@ impl Grammar {
             }
         }
 
-        Ok(self.expansion_map.get(&(pos1, pos2)).map(|v| &v[..]))
+        Ok(self.expansion_map.get(&(pos1, pos2)).map(|v| v.as_ref()))
     }
 
     // REVIEW: Do we have a better way to record expansion information
@@ -483,7 +511,11 @@ impl Grammar {
         &mut self,
         two_edges: TwoEdges<LabelType<TNT>>,
     ) -> LabelType<TNT> {
+        #[cfg(debug_assertions)]
         let (first_source, first_target, first_label) = two_edges.first_edge();
+        #[cfg(not(debug_assertions))]
+        let (first_source, _, first_label) = two_edges.first_edge();
+
         let (second_source, second_target, second_label) = two_edges.second_edge();
 
         #[cfg(debug_assertions)]
@@ -501,11 +533,11 @@ impl Grammar {
         // that is to say, the source of the second edge is the
         // starting edge of a non-terminal.
 
-        let mut left_p = first_label.get_left_p() || second_label.get_left_p();
+        let mut left_p = first_label.is_left_p() || second_label.is_left_p();
 
         // Record left-linear expansion information.
 
-        if let Some(second_nt) = self.is_nt_start_in_nfa_p(second_source) {
+        if let Some(second_nt) = self.get_nt_start_in_nfa(second_source) {
             left_p = true;
 
             if !self
@@ -554,12 +586,55 @@ impl Grammar {
             .ok_or(Error::IndexOutOfBounds(non_terminal, self.non.len()))?
             .iter())
     }
+
+    /// Return a string describing a rule position.
+    pub fn rule_pos_to_string(&self, pos: usize) -> Result<String, Error> {
+        let rule_num = {
+            let mut result = None;
+
+            for (index, accumulator) in self.accumulators.iter().copied().skip(1).enumerate() {
+                if accumulator > pos {
+                    result = Some(index);
+                    break;
+                }
+            }
+
+            if let Some(n) = result {
+                n
+            } else {
+                return Err(Error::IndexOutOfBounds(pos, self.total()));
+            }
+        };
+
+        assert!(rule_num < self.rules.len());
+
+        let display_tnt = |tnt| self.name_of_tnt(tnt).unwrap_or_else(|e| format!("{e}"));
+
+        Ok(self
+            .rules
+            .get(rule_num)
+            .unwrap()
+            .regex
+            .to_string_with_dot(
+                display_tnt,
+                if rule_num == 0 {
+                    pos
+                } else {
+                    pos - self.accumulators.get(rule_num - 1).copied().unwrap()
+                },
+            )
+            .unwrap())
+    }
 }
 
 pub mod first_set;
 
 pub mod left_closure;
 
+pub mod label;
+
+pub use label::{GrammarLabel, GrammarLabelType};
+
 impl Display for Grammar {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         assert_eq!(self.non.len(), self.rules.len());
diff --git a/grammar/src/test_grammar_helper.rs b/grammar/src/test_grammar_helper.rs
index 89f9844..984eb50 100644
--- a/grammar/src/test_grammar_helper.rs
+++ b/grammar/src/test_grammar_helper.rs
@@ -45,7 +45,7 @@ fn scan_tnt(
             'T' => {
                 let mut name = String::new();
 
-                while let Some(c) = chars.next() {
+                for c in chars.by_ref() {
                     if ('a'..='z').contains(&c) || ('A'..='Z').contains(&c) {
                         len += 1;
                         write!(name, "{c}").map_err(|_| ParseError::InvalidCharacter(c))?;
@@ -63,7 +63,7 @@ fn scan_tnt(
             'N' => {
                 let mut name = String::new();
 
-                while let Some(c) = chars.next() {
+                for c in chars.by_ref() {
                     if ('a'..='z').contains(&c) || ('A'..='Z').contains(&c) {
                         len += 1;
                         write!(name, "{c}").map_err(|_| ParseError::InvalidCharacter(c))?;
@@ -124,6 +124,130 @@ pub fn new_grammar() -> Result<Grammar, Box<dyn std::error::Error>> {
     Ok(Grammar::new(ter, non, rules))
 }
 
+/// Return a grammar that might serve as the grammar for my notes,
+/// somehow, without regular expressions.
+#[allow(dead_code)]
+pub fn new_notes_grammar_no_regexp() -> Result<Grammar, Box<dyn std::error::Error>> {
+    let ter = vec![
+        Terminal::new("NL".to_owned()),
+        Terminal::new("SP".to_owned()),
+        Terminal::new("CON".to_owned()),
+        Terminal::new("STAR".to_owned()),
+        Terminal::new("NOTE".to_owned()),
+        Terminal::new("PRICE".to_owned()),
+        Terminal::new("DIGIT".to_owned()),
+    ];
+    let non = vec![
+        Nonterminal("document".to_owned()),
+        Nonterminal("item".to_owned()),
+        Nonterminal("header".to_owned()),
+        Nonterminal("title".to_owned()),
+        Nonterminal("note".to_owned()),
+        Nonterminal("note-content".to_owned()),
+        Nonterminal("price".to_owned()),
+        Nonterminal("ending".to_owned()),
+        Nonterminal("digits".to_owned()),
+    ];
+
+    let mut regex_parser: DefaultRegParser<TNT> = Default::default();
+
+    regex_parser.add_tnt("NL", true);
+    regex_parser.add_tnt("SP", true);
+    regex_parser.add_tnt("CON", true);
+    regex_parser.add_tnt("STAR", true);
+    regex_parser.add_tnt("note", true);
+    regex_parser.add_tnt("price", true);
+    regex_parser.add_tnt("DIGIT", true);
+    regex_parser.add_tnt("document", false);
+    regex_parser.add_tnt("item", false);
+    regex_parser.add_tnt("header", false);
+    regex_parser.add_tnt("title", false);
+    regex_parser.add_tnt("note", false);
+    regex_parser.add_tnt("notecontent", false);
+    regex_parser.add_tnt("price", false);
+    regex_parser.add_tnt("ending", false);
+    regex_parser.add_tnt("digits", false);
+
+    let regex_parser = regex_parser;
+
+    let rule1 = Rule {
+        regex: regex_parser
+            .parse("Nitem | Nitem Ndocument", Box::new(scan_tnt), true)?
+            .ok_or(ParseError::Invalid)?
+            .0,
+    };
+
+    let rule2 = Rule {
+        regex: regex_parser
+            .parse(
+                "Nheader | Nheader Nprice | Nheader Nnote | Nheader Nprice Nnote",
+                Box::new(scan_tnt),
+                true,
+            )?
+            .ok_or(ParseError::Invalid)?
+            .0,
+    };
+
+    let rule3 = Rule {
+        regex: regex_parser
+            .parse(
+                "TSP Ntitle TNL Nending | TSTAR TSP Ntitle TNL Nending",
+                Box::new(scan_tnt),
+                true,
+            )?
+            .ok_or(ParseError::Invalid)?
+            .0,
+    };
+
+    let rule4 = Rule {
+        regex: regex_parser
+            .parse("TCON | TCON Ntitle", Box::new(scan_tnt), true)?
+            .ok_or(ParseError::Invalid)?
+            .0,
+    };
+
+    let rule5 = Rule {
+        regex: regex_parser
+            .parse("Tnote Nnotecontent TNL Nending", Box::new(scan_tnt), true)?
+            .ok_or(ParseError::Invalid)?
+            .0,
+    };
+
+    let rule6 = Rule {
+        regex: regex_parser
+            .parse("TCON | TCON Nnotecontent", Box::new(scan_tnt), true)?
+            .ok_or(ParseError::Invalid)?
+            .0,
+    };
+
+    let rule7 = Rule {
+        regex: regex_parser
+            .parse("Tprice TSP Ndigits TNL Nending", Box::new(scan_tnt), true)?
+            .ok_or(ParseError::Invalid)?
+            .0,
+    };
+
+    let rule8 = Rule {
+        regex: regex_parser
+            .parse("TNL Nending | TSP Nending | ()", Box::new(scan_tnt), true)?
+            .ok_or(ParseError::Invalid)?
+            .0,
+    };
+
+    let rule9 = Rule {
+        regex: regex_parser
+            .parse("TDIGIT | TDIGIT Ndigits", Box::new(scan_tnt), true)?
+            .ok_or(ParseError::Invalid)?
+            .0,
+    };
+
+    let rules = vec![
+        rule1, rule2, rule3, rule4, rule5, rule6, rule7, rule8, rule9,
+    ];
+
+    Ok(Grammar::new(ter, non, rules))
+}
+
 /// Return a grammar that might serve as the grammar for my notes,
 /// somehow.
 #[allow(dead_code)]
-- 
cgit v1.2.3-18-g5258