summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJSDurand <mmemmew@gmail.com>2023-02-03 10:52:35 +0800
committerJSDurand <mmemmew@gmail.com>2023-02-03 10:52:35 +0800
commit265ff8f87dc7392fdf701f811eb2bf54d7bc6678 (patch)
tree35538c8ac7524e0d9f2acff8be21b72994728bc4
parentf28155105134b90fd86049c65478d307e0d8dbbc (diff)
Finally produced the first correct forest
Finally the prototype parser has produced the first correct forest. It is my first time to generate a correct forest, in fact, ever since the beginning of this project.
-rw-r--r--chain/src/atom/default.rs19
-rw-r--r--chain/src/default.rs82
-rw-r--r--chain/src/item/default.rs125
-rw-r--r--chain/src/item/genins.rs318
-rw-r--r--chain/src/item/mod.rs1
-rw-r--r--chain/src/plan.org6
-rw-r--r--grammar/src/left_closure.rs8
-rw-r--r--grammar/src/lib.rs160
-rw-r--r--grammar/src/test_grammar_helper.rs5
-rw-r--r--graph/src/labelled/binary.rs47
-rw-r--r--graph/src/lib.rs6
-rw-r--r--nfa/src/default/regex.rs1
12 files changed, 620 insertions, 158 deletions
diff --git a/chain/src/atom/default.rs b/chain/src/atom/default.rs
index e88cfc9..9c91296 100644
--- a/chain/src/atom/default.rs
+++ b/chain/src/atom/default.rs
@@ -48,18 +48,31 @@ pub struct DefaultAtom {
impl DefaultAtom {
/// Return the string description of a rule position.
pub fn rule_pos_string(&self, pos: usize) -> Result<String, Box<dyn std::error::Error>> {
+ if pos == self.grammar.total() {
+ return Ok("End of rules".to_owned());
+ }
+
let rule_num = self.grammar.get_rule_num(pos)?;
assert!(rule_num < self.grammar.non_num());
- let display_tnt = |tnt| self.name_of_tnt(tnt).unwrap_or_else(|e| format!("{e}"));
+ let display_tnt = |tnt| {
+ format!(
+ " {} ",
+ self.name_of_tnt(match tnt {
+ TNT::Non(_) => tnt,
+ TNT::Ter(t) => self.unpack_tnt(t).unwrap(),
+ })
+ .unwrap_or_else(|e| format!("{e}"))
+ )
+ };
Ok(self.regexp.get(rule_num).unwrap().to_string_with_dot(
display_tnt,
if rule_num == 0 {
pos
} else {
- pos - self.grammar.nth_accumulator(rule_num - 1)?
+ pos - self.grammar.nth_accumulator(rule_num)?
},
)?)
}
@@ -394,8 +407,6 @@ impl DefaultAtom {
}
}
- // dbg!(&accumulators);
-
for nt in 0..nt_num {
let children: std::collections::HashMap<_, _> = nfa
// This is safe because of the above assertion.
diff --git a/chain/src/default.rs b/chain/src/default.rs
index 77a64ca..5e94623 100644
--- a/chain/src/default.rs
+++ b/chain/src/default.rs
@@ -8,7 +8,8 @@
use super::*;
use crate::atom::{Atom, DefaultAtom};
use crate::item::{
- default::DefaultForest, generate_fragment, Forest, ForestLabel, ForestLabelError,
+ default::DefaultForest, generate_fragment, genins::virtual_generate_fragment, Forest,
+ ForestLabel, ForestLabelError,
};
use core::fmt::Display;
use grammar::{Error as GrammarError, GrammarLabel, GrammarLabelType, TNT};
@@ -35,6 +36,8 @@ pub enum Error {
CannotClone(ForestLabelError),
/// Cannot find a suitable node to plant the new forest fragment.
CannotPlant,
+ /// Trying to insert an empty item.
+ EmptyItem,
/// An invalid situation happens.
Invalid,
}
@@ -52,6 +55,7 @@ impl Display for Error {
Self::CannotReserve(e) => write!(f, "cannot reserve memory: {e}"),
Self::CannotClone(fe) => write!(f, "cannot clone due to {fe}"),
Self::CannotPlant => write!(f, "cannot plant a new forest fragment"),
+ Self::EmptyItem => write!(f, "trying to insert an empty item"),
Self::Invalid => write!(f, "invalid"),
}
}
@@ -393,7 +397,6 @@ impl Chain for DefaultChain {
.is_nullable(0)
.map_err(|_| Error::IndexOutOfBounds(0, atom.non_num()))?;
- // NOTE: Is it really correct to use `Parent::new(0, 0)` here?
builder.add_edge(
first,
root,
@@ -585,15 +588,13 @@ impl Chain for DefaultChain {
)?;
}
Some(Non(non)) => {
- let first_fragment =
- generate_fragment([atom_moved.into(), Non(non).into()], pos)?;
-
- let first_segment_pase = self.forest.insert_item(
- *label,
- first_fragment,
- atom_child_iter.clone(),
- &self.atom,
- )?;
+ if !self
+ .atom
+ .is_first_of(non, t)
+ .map_err(index_out_of_bounds_conversion)?
+ {
+ continue;
+ }
let virtual_node = self
.atom
@@ -601,59 +602,30 @@ impl Chain for DefaultChain {
.map_err(index_out_of_bounds_conversion)?;
if let Some(virtual_node) = virtual_node {
+ let first_fragment =
+ generate_fragment([atom_moved.into(), Non(non).into()], pos)?;
+
+ let first_segment_pase = self.forest.insert_item(
+ *label,
+ first_fragment,
+ atom_child_iter.clone(),
+ &self.atom,
+ )?;
+
let accepting = self
.atom
.is_accepting(virtual_node)
.map_err(index_out_of_bounds_conversion)?;
- let virtual_fragment = {
- // `non` is valid, as checked above
- let non_start = self.atom.nth_accumulator(non).unwrap() * 2;
-
- let mut result = DefaultForest::default();
-
- for (label, child_iter) in self.atom.labels_of(non_start)? {
- if matches!(*label.get_value(),
- Some(Ter(ter)) if ter == t)
- {
- for child in child_iter {
- let mut line: Vec<_> = self
- .atom
- .query_expansion(non_start, child)
- .map_err(index_out_of_bounds_conversion)?
- .iter()
- .copied()
- .flatten()
- .map(|(nt, rule)| [(*rule).into(), Non(*nt).into()])
- .collect();
-
- line.push([label.get_moved().into(), Ter(t).into()]);
-
- let line: Vec<GrammarLabelType> =
- line.into_iter().flatten().collect();
-
- if result.is_empty() {
- result = generate_fragment(line, pos)?;
- } else {
- result.plant(
- 0,
- generate_fragment(line, pos)?,
- false,
- )?;
- }
- }
- }
- }
-
- result
- };
+ let virtual_fragment =
+ virtual_generate_fragment(&self.atom, non, t, pos)?;
// NOTE: We only need the PaSe from the
// first segment, so we pass an empty
// iterator, in which case the passed
// label is only used for the PaSe.
let virtual_pase = self.forest.insert_item(
- Edge::new(0, first_segment_pase, true),
+ Edge::new(0, first_segment_pase, accepting),
virtual_fragment,
std::iter::empty(),
&self.atom,
@@ -810,8 +782,8 @@ mod test_chain {
let atom = DefaultAtom::from_grammar(grammar)?;
let mut chain = DefaultChain::unit(atom)?;
-
chain.chain(3, 00)?;
+ dbg!("hi");
chain.chain(1, 01)?;
chain.chain(2, 02)?;
chain.chain(2, 03)?;
@@ -831,6 +803,8 @@ mod test_chain {
{
chain.graph.print_viz("chain.gv")?;
chain.atom.print_nfa("nfa.gv")?;
+ chain.forest.print_viz("forest.gv")?;
+ item::default::print_labels(&chain.atom, &chain.forest)?;
}
Ok(())
diff --git a/chain/src/item/default.rs b/chain/src/item/default.rs
index b71f940..f9d26ec 100644
--- a/chain/src/item/default.rs
+++ b/chain/src/item/default.rs
@@ -2,6 +2,8 @@
//! forest.
use super::*;
+use crate::atom::default::DefaultAtom;
+use grammar::{GrammarLabel, GrammarLabelType};
use graph::{
builder::BuilderMut, labelled::binary::PLGBuilderMut, Graph, LabelGraph, PLGraph, RedirectGraph,
};
@@ -125,6 +127,11 @@ impl<T: GraphLabel> Graph for DefaultForest<T> {
fn replace_by_builder(&mut self, _builder: impl graph::Builder<Result = Self>) {
unimplemented!()
}
+
+ #[inline]
+ fn print_viz(&self, filename: &str) -> Result<(), std::io::Error> {
+ self.graph.print_viz(filename)
+ }
}
impl<T: GraphLabel> ParentsGraph for DefaultForest<T> {
@@ -360,9 +367,18 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> {
stack.push(root);
+ let mut seen_nodes = std::collections::HashSet::<usize>::new();
+
while let Some(top) = stack.pop() {
+ seen_nodes.insert(top);
+
for child in fragment.children_of(top)? {
builder.add_edge(conversion!(top), conversion!(child), root_label)?;
+
+ if !seen_nodes.contains(&child) {
+ seen_nodes.insert(child);
+ stack.push(child);
+ }
}
}
@@ -451,22 +467,82 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> {
}
}
+impl<T: GraphLabel> PartialEq for DefaultForest<ForestLabel<T>> {
+ fn eq(&self, other: &Self) -> bool {
+ let self_root = self.root();
+ let other_root = other.root();
+
+ if (self_root.is_some() && other_root.is_none())
+ || (self_root.is_none() && other_root.is_some())
+ {
+ return false;
+ } else if self_root.is_none() && other_root.is_none() {
+ return true;
+ }
+
+ // both roots are not empty
+
+ let self_root = self_root.unwrap();
+ let other_root = other_root.unwrap();
+
+ self.is_prefix(self_root, other).unwrap_or(false)
+ && other.is_prefix(other_root, self).unwrap_or(false)
+ }
+}
+
+impl<T: GraphLabel> Eq for DefaultForest<ForestLabel<T>> {}
+
+/// Print the labels found in the forest, so that we can easily
+/// understand what those labels mean.
+pub fn print_labels(
+ atom: impl Borrow<DefaultAtom>,
+ forest: impl Borrow<DefaultForest<ForestLabel<GrammarLabel>>>,
+) -> Result<(), Box<dyn std::error::Error>> {
+ let forest = forest.borrow();
+ let atom = atom.borrow();
+
+ for node in forest.nodes() {
+ let label = forest.vertex_label(node)?;
+
+ if let Some(label) = label {
+ let label = label.label.label();
+
+ match label {
+ GrammarLabelType::TNT(tnt) => {
+ println!("{tnt} = {}", atom.name_of_tnt(tnt)?);
+ }
+ GrammarLabelType::Rule(pos) => {
+ println!("pos {pos} = {}", atom.rule_pos_string(pos)?);
+ }
+ }
+ } else {
+ return Err(Error::NodeNoLabel(node).into());
+ }
+ }
+
+ Ok(())
+}
+
+#[allow(unused_macros)]
+macro_rules! leaf (
+ ($label:expr, $type:tt) =>{
+ DefaultForest::<ForestLabel<$type>>::new_leaf($label)
+ };
+ ($label:expr) => {
+ DefaultForest::<ForestLabel<usize>>::new_leaf($label)
+ }
+);
+
+#[allow(unused_imports)]
+pub(crate) use leaf;
+
#[cfg(test)]
mod item_test {
use super::*;
- macro_rules! leaf (
- ($label:expr, $type:tt) =>{
- DefaultForest::<ForestLabel<$type>>::new_leaf($label)
- };
- ($label:expr) => {
- DefaultForest::<ForestLabel<usize>>::new_leaf($label)
- }
- );
-
#[test]
fn test_forest_api() -> Result<(), Box<dyn std::error::Error>> {
- let forest: DefaultForest<usize> = Default::default();
+ let forest: DefaultForest<ForestLabel<usize>> = Default::default();
// empty forest
@@ -537,11 +613,38 @@ mod item_test {
forest.clone_node(5)?;
- assert_eq!(forest.nodes_len(), 7);
+ assert_eq!(forest.nodes_len(), 8);
#[cfg(feature = "test-print-viz")]
forest.print_viz("forest.gv")?;
Ok(())
}
+
+ #[test]
+ fn test_eq() -> Result<(), Box<dyn std::error::Error>> {
+ let mut forest = leaf!(0, usize);
+
+ forest.plant(0, leaf!(1), false)?;
+ forest.plant(0, leaf!(2), false)?;
+ forest.plant(0, leaf!(3), false)?;
+ forest.plant(0, leaf!(4), false)?;
+ forest.plant(2, leaf!(5), false)?;
+
+ let mut test_forest = leaf!(0);
+ test_forest.plant(0, leaf!(1), false)?;
+ test_forest.plant(0, leaf!(2), false)?;
+ test_forest.plant(0, leaf!(3), false)?;
+ test_forest.plant(2, leaf!(5), false)?;
+
+ assert_ne!(forest, test_forest);
+ assert_ne!(test_forest, forest);
+
+ test_forest.plant(0, leaf!(4), false)?;
+
+ assert_eq!(forest, test_forest);
+ assert_eq!(test_forest, forest);
+
+ Ok(())
+ }
}
diff --git a/chain/src/item/genins.rs b/chain/src/item/genins.rs
index 99d9202..9ed3b74 100644
--- a/chain/src/item/genins.rs
+++ b/chain/src/item/genins.rs
@@ -14,6 +14,17 @@ use graph::Graph;
use core::borrow::Borrow;
use std::collections::HashSet as Set;
+/// Convert an error telling us that an index is out of bounds.
+///
+/// # Panics
+///
+/// The function panics if the error is not of the expected kind.
+fn index_out_of_bounds_conversion(ge: GrammarError) -> Error {
+ match ge {
+ GrammarError::IndexOutOfBounds(index, bound) => Error::IndexOutOfBounds(index, bound),
+ _ => Error::Invalid,
+ }
+}
/// A helper function to generate a fragment of forest.
///
/// It simply constructs a root node and then appends
@@ -56,6 +67,53 @@ pub fn generate_fragment(
Ok(result)
}
+/// Generate a virtual fragment representing the left-linear null
+/// closure [nt]^t.
+pub fn virtual_generate_fragment(
+ atom: impl Borrow<DefaultAtom>,
+ nt: usize,
+ t: usize,
+ pos: usize,
+) -> Result<DefaultForest<ForestLabel<GrammarLabel>>, crate::default::Error> {
+ let atom = atom.borrow();
+
+ let non_start = atom.nth_accumulator(nt).unwrap() * 2;
+
+ let mut result = DefaultForest::default();
+
+ for (label, child_iter) in atom.labels_of(non_start)? {
+ if matches!(*label.get_value(),
+ Some(TNT::Ter(ter)) if ter == t)
+ {
+ for child in child_iter {
+ // #[allow(unused_must_use)]
+ // {
+ // dbg!(non_start, child, atom.query_expansion(non_start, child));
+ // }
+
+ let line: Vec<GrammarLabelType> = atom
+ .query_expansion(non_start, child)
+ .map_err(index_out_of_bounds_conversion)?
+ .iter()
+ .copied()
+ .flatten()
+ .flat_map(|(nt, rule)| [(*rule).into(), TNT::Non(*nt).into()])
+ .rev()
+ .chain(std::iter::once(TNT::Ter(t).into()))
+ .collect();
+
+ if result.is_empty() {
+ result = generate_fragment(line, pos)?;
+ } else {
+ result.plant(0, generate_fragment(line, pos)?, false)?;
+ }
+ }
+ }
+ }
+
+ Ok(result)
+}
+
impl DefaultForest<ForestLabel<GrammarLabel>> {
/// Insert an item derivation forest into a recording forest.
///
@@ -68,26 +126,39 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
atom_child_iter: impl Iterator<Item = usize>,
atom: &DefaultAtom,
) -> Result<PaSe, Error> {
- /// Convert an error telling us that an index is out of bounds.
- ///
- /// # Panics
- ///
- /// The function panics if the error is not of the expected
- /// kind.
- fn index_out_of_bounds_conversion(ge: GrammarError) -> Error {
- match ge {
- GrammarError::IndexOutOfBounds(index, bound) => {
- Error::IndexOutOfBounds(index, bound)
- }
- _ => Error::Invalid,
- }
- }
-
let fragment = fragment.borrow();
let pase = label.forest_source();
- let parents: Vec<Parent> = {
+ let _fragment_root = if let Some(root) = fragment.root() {
+ root
+ } else {
+ return Err(Error::EmptyItem);
+ };
+
+ // Ensure the last node in the PaSe is a terminal or a
+ // non-terminal node, as an extra safety guard during
+ // development.
+ #[cfg(debug_assertions)]
+ {
+ if let Some(parent) = pase.parent() {
+ assert!(matches!(
+ self.vertex_label(self.nth_child(parent.node(), parent.edge())?),
+ Ok(Some(label))
+ if label.label().label().tnt().is_some()));
+ } else if let Some((_, leaf)) = pase.segment() {
+ assert!(matches!(
+ self.vertex_label(leaf),
+ Ok(Some(label))
+ if label.label().label().tnt().is_some()));
+ } else {
+ unreachable!("a pase is neither a parent nor a segment");
+ }
+ }
+
+ let mut is_empty_segment = false;
+
+ let mut parents: Vec<Parent> = {
let mut result = Vec::new();
if let Some(parent) = pase.parent() {
@@ -96,7 +167,15 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
let (root, leaf) = pase.segment().unwrap();
let mut seen_nodes = Set::new();
- let mut stack = vec![root];
+ let mut stack = if root == leaf {
+ // an empty segment means the root
+ is_empty_segment = true;
+
+ result.push(Parent::new(root, 0));
+ vec![]
+ } else {
+ vec![root]
+ };
while let Some(top) = stack.pop() {
if seen_nodes.contains(&top) {
@@ -118,7 +197,35 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
result
};
+ for parent in parents.iter() {
+ if !self.has_node(parent.node()) {
+ return Err(Error::IndexOutOfBounds(parent.node(), self.nodes_len()));
+ }
+ }
+
+ if !is_empty_segment {
+ parents = parents
+ .into_iter()
+ .flat_map(|parent| {
+ self.parents_of(parent.node()).unwrap().filter(|n| {
+ matches!(
+ self.vertex_label(n.node())
+ .unwrap()
+ .unwrap()
+ .label()
+ .label()
+ .tnt(),
+ Some(TNT::Non(_))
+ )
+ })
+ })
+ .collect();
+ }
+
+ let mut non_empty = false;
+
for atom_child in atom_child_iter {
+ // dbg!(label.label(), atom_child);
// Find reduction information.
let reduction_info = atom
.query_reduction(label.label(), atom_child)
@@ -142,20 +249,20 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
) {
for parent in self.parents_of(node.node())? {
debug_assert!(matches!(
- self.vertex_label(parent.node()),
- Ok(Some(label)) if
- label
- .label()
- .label()
- .rule()
- .is_some()));
+ self.vertex_label(parent.node()),
+ Ok(Some(label)) if
+ label
+ .label()
+ .label()
+ .rule()
+ .is_some()));
second_stack.extend(self.parents_of(parent.node())?.filter(|n| {
matches!(self.vertex_label(n.node()),
- Ok(Some(label))
- if matches!(
- label.label().label().tnt(),
- Some(TNT::Non(_))))
+ Ok(Some(label))
+ if matches!(
+ label.label().label().tnt(),
+ Some(TNT::Non(_))))
}));
}
}
@@ -169,12 +276,26 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
}
if stack.is_empty() {
+ dbg!(
+ label,
+ atom_child,
+ parents,
+ reduction_info,
+ atom.query_reduction(label.label(), atom_child).unwrap()
+ );
+
+ self.print_viz("dbg forest.gv").unwrap();
+
+ #[cfg(test)]
+ genins_test::print_labels(atom, self).unwrap();
+
return Err(Error::CannotPlant);
}
for parent in stack.into_iter() {
if parent.edge() + 1 >= self.degree(parent.node())? {
- self.plant(parent.node(), fragment, false)?;
+ // dbg!(&fragment);
+ self.plant(parent.node(), fragment, non_empty)?;
} else {
let nth_child = self.nth_child(parent.node(), parent.edge() + 1)?;
@@ -182,11 +303,26 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
// do nothing
continue;
}
+ dbg!("clone?");
let cloned_node = self.clone_node(nth_child)?;
- self.plant(cloned_node, fragment, false)?;
+ self.plant(cloned_node, fragment, non_empty)?;
}
+
+ non_empty = true;
+ }
+ }
+
+ // If the iterator is empty, just plant at the specified
+ // child.
+ if !non_empty {
+ for parent in parents.into_iter() {
+ let nth_child = self.nth_child(parent.node(), parent.edge())?;
+
+ self.plant(nth_child, fragment, non_empty)?;
+
+ non_empty = true;
}
}
@@ -214,11 +350,12 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
unreachable!("the forest is wrongly planted");
}
};
-
+ // dbg!(node, edge, root_label, leaf_label);
PaSe::Parent(node, edge)
} else {
let root_label = fragment.vertex_label(0)?.unwrap();
let leaf_label = fragment.vertex_label(fragment.nodes_len() - 1)?.unwrap();
+ // dbg!(root_label, leaf_label);
PaSe::Segment(
self.query_label(root_label).unwrap(),
@@ -229,3 +366,122 @@ impl DefaultForest<ForestLabel<GrammarLabel>> {
Ok(result)
}
}
+
+#[cfg(test)]
+mod genins_test {
+ use super::*;
+ #[allow(unused_imports)]
+ use crate::{default::DefaultChain, item::default::leaf, Chain};
+
+ use grammar::test_grammar_helper::*;
+
+ pub fn print_labels(
+ atom: impl Borrow<DefaultAtom>,
+ forest: impl Borrow<DefaultForest<ForestLabel<GrammarLabel>>>,
+ ) -> Result<(), Box<dyn std::error::Error>> {
+ let forest = forest.borrow();
+ let atom = atom.borrow();
+
+ for node in forest.nodes() {
+ let label = forest.vertex_label(node)?;
+
+ if let Some(label) = label {
+ let label = label.label.label();
+
+ match label {
+ GrammarLabelType::TNT(tnt) => {
+ println!("{tnt} = {}", atom.name_of_tnt(tnt)?);
+ }
+ GrammarLabelType::Rule(pos) => {
+ println!("pos {pos} = {}", atom.rule_pos_string(pos)?);
+ }
+ }
+ } else {
+ return Err(Error::NodeNoLabel(node).into());
+ }
+ }
+
+ Ok(())
+ }
+
+ #[test]
+ fn test_generate_fragment() -> Result<(), Box<dyn std::error::Error>> {
+ let grammar = new_notes_grammar()?;
+
+ let atom = DefaultAtom::from_grammar(grammar)?;
+
+ #[cfg(feature = "test-print-viz")]
+ atom.print_nfa("genins nfa.gv")?;
+
+ let fragment = generate_fragment([72.into(), TNT::Non(0).into()], 0)?;
+
+ let mut test_fragment = leaf!(
+ GrammarLabel::new(GrammarLabelType::from(72), 0),
+ GrammarLabel
+ );
+
+ test_fragment.plant(
+ 0,
+ leaf!(
+ GrammarLabel::new(GrammarLabelType::from(TNT::Non(0)), 0),
+ GrammarLabel
+ ),
+ false,
+ )?;
+
+ assert_eq!(fragment, test_fragment);
+
+ // virtual fragments
+
+ println!("nt = 0, t = 3");
+
+ let virtual_fragment = virtual_generate_fragment(&atom, 0, 3, 0)?;
+
+ assert_eq!(virtual_fragment.nodes_len(), 7);
+
+ let virtual_node = virtual_fragment.vertex_label(5)?.unwrap().label();
+
+ let test_fragment = generate_fragment(
+ [
+ TNT::Non(0).into(),
+ 2.into(),
+ TNT::Non(1).into(),
+ 8.into(),
+ TNT::Non(2).into(),
+ virtual_node.label(),
+ TNT::Ter(3).into(),
+ ],
+ 0,
+ )?;
+
+ print_labels(&atom, &virtual_fragment)?;
+
+ assert_eq!(virtual_fragment, test_fragment);
+
+ #[cfg(feature = "test-print-viz")]
+ virtual_fragment.print_viz("virtual fragment (0, 3).gv")?;
+
+ println!("nt = 3, t = 2");
+
+ let virtual_fragment = virtual_generate_fragment(&atom, 3, 2, 1)?;
+
+ let test_fragment =
+ generate_fragment([TNT::Non(3).into(), 38.into(), TNT::Ter(2).into()], 1)?;
+
+ print_labels(&atom, &virtual_fragment)?;
+
+ assert_eq!(virtual_fragment, test_fragment);
+
+ #[cfg(feature = "test-print-viz")]
+ virtual_fragment.print_viz("virtual fragment (3, 2).gv")?;
+
+ // querying reductions
+
+ assert!(matches!(atom.query_reduction(17, 9), Ok(Some(&[1]))));
+
+ assert!(matches!(atom.query_reduction(35, 9), Ok(Some(&[1, 2]))));
+ assert!(matches!(atom.query_reduction(35, 25), Ok(Some(&[2]))));
+
+ Ok(())
+ }
+}
diff --git a/chain/src/item/mod.rs b/chain/src/item/mod.rs
index 7fafcc8..c614802 100644
--- a/chain/src/item/mod.rs
+++ b/chain/src/item/mod.rs
@@ -26,7 +26,6 @@ use core::borrow::Borrow;
///
/// A segment represents every edge from the root node to the single
/// terminating node.
-#[allow(unused)]
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub enum PaSe {
/// An edge from a node, as the n-th child.
diff --git a/chain/src/plan.org b/chain/src/plan.org
index 1da33cc..77d000c 100644
--- a/chain/src/plan.org
+++ b/chain/src/plan.org
@@ -79,9 +79,9 @@
lack of this step might be the root cause of the failure of the
previous version of this project.
+ [X] Test atoms
-- [ ] Implement semiring. [0/5]
- + [ ] Define the trait.
- + [ ] Define items and rules for the chain-rule parser, as an
+- [-] Implement semiring. [2/5]
+ + [X] Define the trait.
+ + [X] Define items and rules for the chain-rule parser, as an
item-based description.
+ [ ] Implement the boolean semiring.
+ [ ] Implement the natural number semiring.
diff --git a/grammar/src/left_closure.rs b/grammar/src/left_closure.rs
index 39461f0..ddee28d 100644
--- a/grammar/src/left_closure.rs
+++ b/grammar/src/left_closure.rs
@@ -25,9 +25,11 @@ impl Grammar {
GrammarState::AfterComputeFirst,
))
}
- GrammarState::AfterLeftClosure
- | GrammarState::AfterNFA
- | GrammarState::AfterComputeFirst => {}
+ GrammarState::AfterComputeFirst => {
+ self.state = GrammarState::AfterLeftClosure;
+ }
+
+ _ => {}
}
let non_len = self.non_num();
diff --git a/grammar/src/lib.rs b/grammar/src/lib.rs
index 2c17a5f..a8e0fd7 100644
--- a/grammar/src/lib.rs
+++ b/grammar/src/lib.rs
@@ -338,7 +338,7 @@ impl Grammar {
self.accumulators
.get(n)
.copied()
- .ok_or_else(|| Error::IndexOutOfBounds(n, self.total()))
+ .ok_or_else(|| Error::IndexOutOfBounds(n, self.non_num()))
}
/// Return the index of the rules a rule position belongs to.
@@ -476,17 +476,28 @@ impl Grammar {
}
}
+ /// Return true if and only if the terminal can appear as the
+ /// first terminal in a string expanded from the non-terminal.
+ #[inline]
+ pub fn is_first_of(&self, non_terminal: usize, terminal: usize) -> Result<bool, Error> {
+ Ok(self
+ .firsts
+ .get(non_terminal)
+ .ok_or(Error::IndexOutOfBounds(non_terminal, self.firsts.len()))?
+ .contains(&Some(terminal)))
+ }
+
/// Return true if and only if the non-terminal is nullable.
#[inline]
pub fn is_nullable(&self, non_terminal: usize) -> Result<bool, Error> {
Ok(self
.firsts
.get(non_terminal)
- .ok_or(Error::IndexOutOfBounds(non_terminal, self.non.len()))?
+ .ok_or(Error::IndexOutOfBounds(non_terminal, self.firsts.len()))?
.contains(&None))
}
- // FIXME: We shall use a label to query this information as well,
+ // REVIEW: We shall use a label to query this information as well,
// probably.
/// Query the expansion information from the position `pos1` to
@@ -497,14 +508,6 @@ impl Grammar {
pos1: usize,
pos2: usize,
) -> Result<Option<&[(usize, usize)]>, Error> {
- if pos1 >= self.total() {
- return Err(Error::IndexOutOfBounds(pos1, self.total()));
- }
-
- if pos2 >= self.total() {
- return Err(Error::IndexOutOfBounds(pos2, self.total()));
- }
-
match self.state {
GrammarState::AfterLeftClosure => {}
_ => {
@@ -522,14 +525,6 @@ impl Grammar {
/// the position `pos2` .
#[inline]
pub fn query_reduction(&self, pos1: usize, pos2: usize) -> Result<Option<&[usize]>, Error> {
- if pos1 >= self.total() {
- return Err(Error::IndexOutOfBounds(pos1, self.total()));
- }
-
- if pos2 >= self.total() {
- return Err(Error::IndexOutOfBounds(pos2, self.total()));
- }
-
match self.state {
GrammarState::AfterLeftClosure => {}
_ => {
@@ -591,43 +586,122 @@ impl Grammar {
let mut left_p = first_label.is_left_p() || second_label.is_left_p();
+ // if first_source == 0 && second_label.get_moved() == 15 {
+ // dbg!(second_source, second_target, first_label, second_label);
+ // dbg!(self.expansion_map.get(&(second_source, second_target)));
+ // dbg!(self.expansion_map.get(&(first_source, second_target)));
+ // }
+
// Record left-linear expansion information.
- if let Some(second_nt) = self.get_nt_start_in_nfa(second_source) {
+ let original_expansion = self
+ .expansion_map
+ .get(&(second_source, second_target))
+ .cloned();
+
+ let second_nt_start = self.get_nt_start_in_nfa(second_source).is_some();
+
+ if !second_nt_start
+ && !matches!(self.expansion_map.get(&(first_source, second_target)),
+ Some(expansion)
+ if expansion.len() >=
+ original_expansion
+ .as_ref()
+ .map(|vec| vec.len())
+ .unwrap_or(1))
+ {
+ if let Some(original_expansion) = &original_expansion {
+ self.expansion_map
+ .insert((first_source, second_target), original_expansion.clone());
+ } else {
+ let this_nt = self
+ .get_rule_num(second_source.div_euclid(2))
+ .unwrap_or_else(|_| self.non_num());
+
+ self.expansion_map.insert(
+ (first_source, second_target),
+ vec![(this_nt, second_label.get_moved())],
+ );
+ }
+ } else if second_nt_start {
left_p = true;
- if !self
- .expansion_map
- .contains_key(&(first_source, second_target))
+ let original_moved = match self.expansion_map.get(&(first_source, second_source)) {
+ Some(old_expansion) if !old_expansion.is_empty() => old_expansion.last().unwrap().1,
+ _ => first_label.get_moved(),
+ };
+
+ let original_nt = self
+ .get_rule_num(first_source.div_euclid(2))
+ .unwrap_or_else(|_| self.non_num());
+
+ if !matches!(self.expansion_map.get(&(first_source, second_target)),
+ Some(expansion)
+ if expansion.len() >=
+ original_expansion
+ .as_ref()
+ .map(|vec| vec.len() + 1)
+ .unwrap_or(1))
{
- let original_expansion = self.expansion_map.get(&(second_source, second_target));
-
self.expansion_map.insert(
(first_source, second_target),
if let Some(original_expansion) = original_expansion {
- let mut result = original_expansion.clone();
- result.push((second_nt, second_label.get_moved()));
+ let mut result = original_expansion;
+ result.push((original_nt, original_moved));
result
} else {
- vec![(second_nt, second_label.get_moved())]
+ vec![(original_nt, original_moved)]
},
);
}
- } else if let Some(second_nt) = self.get_nt_end_in_nfa(second_source) {
- let original_reduction = self.reduction_map.get(&(second_source, second_target));
+ }
- self.reduction_map.insert(
- (first_source, second_target),
- if let Some(original_reduction) = original_reduction {
- let mut result = original_reduction.clone();
- result.push(second_nt);
+ // Record reduction information.
- result
- } else {
- vec![second_nt]
- },
- );
+ let original_reduction = self
+ .reduction_map
+ .get(&(second_source, second_target))
+ .cloned();
+
+ let second_nt_end = self.get_nt_end_in_nfa(second_source);
+
+ if second_nt_end.is_none()
+ && !matches!(self.reduction_map.get(&(first_source, second_target)),
+ Some(reduction)
+ if reduction.len() >=
+ original_reduction
+ .as_ref()
+ .map(|vec| vec.len())
+ .unwrap_or(0))
+ {
+ if let Some(original_reduction) = &original_reduction {
+ self.reduction_map
+ .insert((first_source, second_target), original_reduction.clone());
+ }
+ }
+
+ if let Some(second_nt) = second_nt_end {
+ if !matches!(self.reduction_map.get(&(first_source, second_target)),
+ Some(reduction)
+ if reduction.len() >=
+ original_reduction
+ .as_ref()
+ .map(|vec| vec.len() + 1)
+ .unwrap_or(1))
+ {
+ self.reduction_map.insert(
+ (first_source, second_target),
+ if let Some(original_reduction) = original_reduction {
+ let mut result = original_reduction;
+ result.push(second_nt);
+
+ result
+ } else {
+ vec![second_nt]
+ },
+ );
+ }
}
NfaLabel::new(second_label.get_value(), second_label.get_moved(), left_p)
@@ -659,6 +733,10 @@ impl Grammar {
/// Return a string describing a rule position.
pub fn rule_pos_to_string(&self, pos: usize) -> Result<String, Error> {
+ if pos == self.total() {
+ return Ok("End of rules".to_owned());
+ }
+
let rule_num = {
let mut result = None;
@@ -690,7 +768,7 @@ impl Grammar {
if rule_num == 0 {
pos
} else {
- pos - self.accumulators.get(rule_num - 1).copied().unwrap()
+ pos - self.accumulators.get(rule_num).copied().unwrap()
},
)
.unwrap())
diff --git a/grammar/src/test_grammar_helper.rs b/grammar/src/test_grammar_helper.rs
index 984eb50..9d86865 100644
--- a/grammar/src/test_grammar_helper.rs
+++ b/grammar/src/test_grammar_helper.rs
@@ -88,7 +88,6 @@ fn scan_tnt(
}
/// Return a simple testing grammar.
-#[allow(dead_code)]
pub fn new_grammar() -> Result<Grammar, Box<dyn std::error::Error>> {
let ter = vec![Terminal::new("a".to_owned()), Terminal::new("b".to_owned())];
let non = vec![
@@ -126,7 +125,6 @@ pub fn new_grammar() -> Result<Grammar, Box<dyn std::error::Error>> {
/// Return a grammar that might serve as the grammar for my notes,
/// somehow, without regular expressions.
-#[allow(dead_code)]
pub fn new_notes_grammar_no_regexp() -> Result<Grammar, Box<dyn std::error::Error>> {
let ter = vec![
Terminal::new("NL".to_owned()),
@@ -250,7 +248,6 @@ pub fn new_notes_grammar_no_regexp() -> Result<Grammar, Box<dyn std::error::Erro
/// Return a grammar that might serve as the grammar for my notes,
/// somehow.
-#[allow(dead_code)]
pub fn new_notes_grammar() -> Result<Grammar, Box<dyn std::error::Error>> {
let ter = vec![
Terminal::new("NL".to_owned()),
@@ -353,7 +350,6 @@ pub fn new_notes_grammar() -> Result<Grammar, Box<dyn std::error::Error>> {
}
/// Return a grammar that can express parentheses.
-#[allow(dead_code)]
pub fn new_paren_grammar() -> Result<Grammar, Box<dyn std::error::Error>> {
let ter = vec![
Terminal::new("LEFT".to_owned()),
@@ -444,7 +440,6 @@ pub fn new_left_recursive_grammar() -> Result<Grammar, Box<dyn std::error::Error
}
/// Return a right recursive grammar.
-#[allow(dead_code)]
pub fn new_right_recursive_grammar() -> Result<Grammar, Box<dyn std::error::Error>> {
let ter = vec![Terminal::new("B".to_owned()), Terminal::new("C".to_owned())];
let non = vec![
diff --git a/graph/src/labelled/binary.rs b/graph/src/labelled/binary.rs
index 201dda2..4ec7378 100644
--- a/graph/src/labelled/binary.rs
+++ b/graph/src/labelled/binary.rs
@@ -203,8 +203,51 @@ impl<T: GraphLabel> Graph for PLGraph<T> {
unimplemented!("use a `PLGBuilderMut` instead")
}
- fn print_viz(&self, _filename: &str) -> Result<(), std::io::Error> {
- todo!()
+ fn print_viz(&self, filename: &str) -> Result<(), std::io::Error> {
+ let filename = format!("output/{filename}");
+
+ let preamble = "digraph nfa {
+ fontname=\"Helvetica,Arial,sans-serif\"
+ node [fontname=\"Helvetica,Arial,sans-serif\"]
+ edge [fontname=\"Helvetica,Arial,sans-serif\"]
+ rankdir=LR;\n";
+
+ let mut post = String::new();
+
+ for node in self.nodes() {
+ post.push_str(&format!(
+ " {node} [label = \"{}\"]\n",
+ match self.vertex_label(node) {
+ Ok(Some(label)) => {
+ format!("{label}")
+ }
+ _ => {
+ " ε ".to_owned()
+ }
+ }
+ ));
+ }
+
+ for (source, target) in self.edges() {
+ post.push_str(&format!(" {source} -> {target}\n"));
+ }
+
+ post.push_str("}\n");
+
+ let result = format!("{preamble}{post}");
+
+ if std::fs::metadata(&filename).is_ok() {
+ std::fs::remove_file(&filename)?;
+ }
+
+ let mut file = std::fs::File::options()
+ .write(true)
+ .create(true)
+ .open(&filename)?;
+
+ use std::io::Write;
+
+ file.write_all(result.as_bytes())
}
}
diff --git a/graph/src/lib.rs b/graph/src/lib.rs
index 2a0c50d..87f39c0 100644
--- a/graph/src/lib.rs
+++ b/graph/src/lib.rs
@@ -129,8 +129,10 @@ pub trait Graph: Default {
let result = format!("{preamble}{post}");
- if std::fs::metadata(filename).is_ok() {
- std::fs::remove_file(filename)?;
+ let filename = format!("output/{filename}");
+
+ if std::fs::metadata(&filename).is_ok() {
+ std::fs::remove_file(&filename)?;
}
let mut file = std::fs::File::options()
diff --git a/nfa/src/default/regex.rs b/nfa/src/default/regex.rs
index 9e1ed5c..5e0d53b 100644
--- a/nfa/src/default/regex.rs
+++ b/nfa/src/default/regex.rs
@@ -879,7 +879,6 @@ mod test_des_rec {
use crate::desrec::DesRec;
- #[allow(dead_code)]
fn test_scanner<'a, 'b, T>(
_parser: &'b DefaultRegParser<T>,
input: &'a str,