diff options
author | JSDurand <mmemmew@gmail.com> | 2023-02-03 10:52:35 +0800 |
---|---|---|
committer | JSDurand <mmemmew@gmail.com> | 2023-02-03 10:52:35 +0800 |
commit | 265ff8f87dc7392fdf701f811eb2bf54d7bc6678 (patch) | |
tree | 35538c8ac7524e0d9f2acff8be21b72994728bc4 | |
parent | f28155105134b90fd86049c65478d307e0d8dbbc (diff) |
Finally produced the first correct forest
Finally the prototype parser has produced the first correct forest.
It is my first time to generate a correct forest, in fact, ever since
the beginning of this project.
-rw-r--r-- | chain/src/atom/default.rs | 19 | ||||
-rw-r--r-- | chain/src/default.rs | 82 | ||||
-rw-r--r-- | chain/src/item/default.rs | 125 | ||||
-rw-r--r-- | chain/src/item/genins.rs | 318 | ||||
-rw-r--r-- | chain/src/item/mod.rs | 1 | ||||
-rw-r--r-- | chain/src/plan.org | 6 | ||||
-rw-r--r-- | grammar/src/left_closure.rs | 8 | ||||
-rw-r--r-- | grammar/src/lib.rs | 160 | ||||
-rw-r--r-- | grammar/src/test_grammar_helper.rs | 5 | ||||
-rw-r--r-- | graph/src/labelled/binary.rs | 47 | ||||
-rw-r--r-- | graph/src/lib.rs | 6 | ||||
-rw-r--r-- | nfa/src/default/regex.rs | 1 |
12 files changed, 620 insertions, 158 deletions
diff --git a/chain/src/atom/default.rs b/chain/src/atom/default.rs index e88cfc9..9c91296 100644 --- a/chain/src/atom/default.rs +++ b/chain/src/atom/default.rs @@ -48,18 +48,31 @@ pub struct DefaultAtom { impl DefaultAtom { /// Return the string description of a rule position. pub fn rule_pos_string(&self, pos: usize) -> Result<String, Box<dyn std::error::Error>> { + if pos == self.grammar.total() { + return Ok("End of rules".to_owned()); + } + let rule_num = self.grammar.get_rule_num(pos)?; assert!(rule_num < self.grammar.non_num()); - let display_tnt = |tnt| self.name_of_tnt(tnt).unwrap_or_else(|e| format!("{e}")); + let display_tnt = |tnt| { + format!( + " {} ", + self.name_of_tnt(match tnt { + TNT::Non(_) => tnt, + TNT::Ter(t) => self.unpack_tnt(t).unwrap(), + }) + .unwrap_or_else(|e| format!("{e}")) + ) + }; Ok(self.regexp.get(rule_num).unwrap().to_string_with_dot( display_tnt, if rule_num == 0 { pos } else { - pos - self.grammar.nth_accumulator(rule_num - 1)? + pos - self.grammar.nth_accumulator(rule_num)? }, )?) } @@ -394,8 +407,6 @@ impl DefaultAtom { } } - // dbg!(&accumulators); - for nt in 0..nt_num { let children: std::collections::HashMap<_, _> = nfa // This is safe because of the above assertion. diff --git a/chain/src/default.rs b/chain/src/default.rs index 77a64ca..5e94623 100644 --- a/chain/src/default.rs +++ b/chain/src/default.rs @@ -8,7 +8,8 @@ use super::*; use crate::atom::{Atom, DefaultAtom}; use crate::item::{ - default::DefaultForest, generate_fragment, Forest, ForestLabel, ForestLabelError, + default::DefaultForest, generate_fragment, genins::virtual_generate_fragment, Forest, + ForestLabel, ForestLabelError, }; use core::fmt::Display; use grammar::{Error as GrammarError, GrammarLabel, GrammarLabelType, TNT}; @@ -35,6 +36,8 @@ pub enum Error { CannotClone(ForestLabelError), /// Cannot find a suitable node to plant the new forest fragment. CannotPlant, + /// Trying to insert an empty item. + EmptyItem, /// An invalid situation happens. Invalid, } @@ -52,6 +55,7 @@ impl Display for Error { Self::CannotReserve(e) => write!(f, "cannot reserve memory: {e}"), Self::CannotClone(fe) => write!(f, "cannot clone due to {fe}"), Self::CannotPlant => write!(f, "cannot plant a new forest fragment"), + Self::EmptyItem => write!(f, "trying to insert an empty item"), Self::Invalid => write!(f, "invalid"), } } @@ -393,7 +397,6 @@ impl Chain for DefaultChain { .is_nullable(0) .map_err(|_| Error::IndexOutOfBounds(0, atom.non_num()))?; - // NOTE: Is it really correct to use `Parent::new(0, 0)` here? builder.add_edge( first, root, @@ -585,15 +588,13 @@ impl Chain for DefaultChain { )?; } Some(Non(non)) => { - let first_fragment = - generate_fragment([atom_moved.into(), Non(non).into()], pos)?; - - let first_segment_pase = self.forest.insert_item( - *label, - first_fragment, - atom_child_iter.clone(), - &self.atom, - )?; + if !self + .atom + .is_first_of(non, t) + .map_err(index_out_of_bounds_conversion)? + { + continue; + } let virtual_node = self .atom @@ -601,59 +602,30 @@ impl Chain for DefaultChain { .map_err(index_out_of_bounds_conversion)?; if let Some(virtual_node) = virtual_node { + let first_fragment = + generate_fragment([atom_moved.into(), Non(non).into()], pos)?; + + let first_segment_pase = self.forest.insert_item( + *label, + first_fragment, + atom_child_iter.clone(), + &self.atom, + )?; + let accepting = self .atom .is_accepting(virtual_node) .map_err(index_out_of_bounds_conversion)?; - let virtual_fragment = { - // `non` is valid, as checked above - let non_start = self.atom.nth_accumulator(non).unwrap() * 2; - - let mut result = DefaultForest::default(); - - for (label, child_iter) in self.atom.labels_of(non_start)? { - if matches!(*label.get_value(), - Some(Ter(ter)) if ter == t) - { - for child in child_iter { - let mut line: Vec<_> = self - .atom - .query_expansion(non_start, child) - .map_err(index_out_of_bounds_conversion)? - .iter() - .copied() - .flatten() - .map(|(nt, rule)| [(*rule).into(), Non(*nt).into()]) - .collect(); - - line.push([label.get_moved().into(), Ter(t).into()]); - - let line: Vec<GrammarLabelType> = - line.into_iter().flatten().collect(); - - if result.is_empty() { - result = generate_fragment(line, pos)?; - } else { - result.plant( - 0, - generate_fragment(line, pos)?, - false, - )?; - } - } - } - } - - result - }; + let virtual_fragment = + virtual_generate_fragment(&self.atom, non, t, pos)?; // NOTE: We only need the PaSe from the // first segment, so we pass an empty // iterator, in which case the passed // label is only used for the PaSe. let virtual_pase = self.forest.insert_item( - Edge::new(0, first_segment_pase, true), + Edge::new(0, first_segment_pase, accepting), virtual_fragment, std::iter::empty(), &self.atom, @@ -810,8 +782,8 @@ mod test_chain { let atom = DefaultAtom::from_grammar(grammar)?; let mut chain = DefaultChain::unit(atom)?; - chain.chain(3, 00)?; + dbg!("hi"); chain.chain(1, 01)?; chain.chain(2, 02)?; chain.chain(2, 03)?; @@ -831,6 +803,8 @@ mod test_chain { { chain.graph.print_viz("chain.gv")?; chain.atom.print_nfa("nfa.gv")?; + chain.forest.print_viz("forest.gv")?; + item::default::print_labels(&chain.atom, &chain.forest)?; } Ok(()) diff --git a/chain/src/item/default.rs b/chain/src/item/default.rs index b71f940..f9d26ec 100644 --- a/chain/src/item/default.rs +++ b/chain/src/item/default.rs @@ -2,6 +2,8 @@ //! forest. use super::*; +use crate::atom::default::DefaultAtom; +use grammar::{GrammarLabel, GrammarLabelType}; use graph::{ builder::BuilderMut, labelled::binary::PLGBuilderMut, Graph, LabelGraph, PLGraph, RedirectGraph, }; @@ -125,6 +127,11 @@ impl<T: GraphLabel> Graph for DefaultForest<T> { fn replace_by_builder(&mut self, _builder: impl graph::Builder<Result = Self>) { unimplemented!() } + + #[inline] + fn print_viz(&self, filename: &str) -> Result<(), std::io::Error> { + self.graph.print_viz(filename) + } } impl<T: GraphLabel> ParentsGraph for DefaultForest<T> { @@ -360,9 +367,18 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> { stack.push(root); + let mut seen_nodes = std::collections::HashSet::<usize>::new(); + while let Some(top) = stack.pop() { + seen_nodes.insert(top); + for child in fragment.children_of(top)? { builder.add_edge(conversion!(top), conversion!(child), root_label)?; + + if !seen_nodes.contains(&child) { + seen_nodes.insert(child); + stack.push(child); + } } } @@ -451,22 +467,82 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> { } } +impl<T: GraphLabel> PartialEq for DefaultForest<ForestLabel<T>> { + fn eq(&self, other: &Self) -> bool { + let self_root = self.root(); + let other_root = other.root(); + + if (self_root.is_some() && other_root.is_none()) + || (self_root.is_none() && other_root.is_some()) + { + return false; + } else if self_root.is_none() && other_root.is_none() { + return true; + } + + // both roots are not empty + + let self_root = self_root.unwrap(); + let other_root = other_root.unwrap(); + + self.is_prefix(self_root, other).unwrap_or(false) + && other.is_prefix(other_root, self).unwrap_or(false) + } +} + +impl<T: GraphLabel> Eq for DefaultForest<ForestLabel<T>> {} + +/// Print the labels found in the forest, so that we can easily +/// understand what those labels mean. +pub fn print_labels( + atom: impl Borrow<DefaultAtom>, + forest: impl Borrow<DefaultForest<ForestLabel<GrammarLabel>>>, +) -> Result<(), Box<dyn std::error::Error>> { + let forest = forest.borrow(); + let atom = atom.borrow(); + + for node in forest.nodes() { + let label = forest.vertex_label(node)?; + + if let Some(label) = label { + let label = label.label.label(); + + match label { + GrammarLabelType::TNT(tnt) => { + println!("{tnt} = {}", atom.name_of_tnt(tnt)?); + } + GrammarLabelType::Rule(pos) => { + println!("pos {pos} = {}", atom.rule_pos_string(pos)?); + } + } + } else { + return Err(Error::NodeNoLabel(node).into()); + } + } + + Ok(()) +} + +#[allow(unused_macros)] +macro_rules! leaf ( + ($label:expr, $type:tt) =>{ + DefaultForest::<ForestLabel<$type>>::new_leaf($label) + }; + ($label:expr) => { + DefaultForest::<ForestLabel<usize>>::new_leaf($label) + } +); + +#[allow(unused_imports)] +pub(crate) use leaf; + #[cfg(test)] mod item_test { use super::*; - macro_rules! leaf ( - ($label:expr, $type:tt) =>{ - DefaultForest::<ForestLabel<$type>>::new_leaf($label) - }; - ($label:expr) => { - DefaultForest::<ForestLabel<usize>>::new_leaf($label) - } - ); - #[test] fn test_forest_api() -> Result<(), Box<dyn std::error::Error>> { - let forest: DefaultForest<usize> = Default::default(); + let forest: DefaultForest<ForestLabel<usize>> = Default::default(); // empty forest @@ -537,11 +613,38 @@ mod item_test { forest.clone_node(5)?; - assert_eq!(forest.nodes_len(), 7); + assert_eq!(forest.nodes_len(), 8); #[cfg(feature = "test-print-viz")] forest.print_viz("forest.gv")?; Ok(()) } + + #[test] + fn test_eq() -> Result<(), Box<dyn std::error::Error>> { + let mut forest = leaf!(0, usize); + + forest.plant(0, leaf!(1), false)?; + forest.plant(0, leaf!(2), false)?; + forest.plant(0, leaf!(3), false)?; + forest.plant(0, leaf!(4), false)?; + forest.plant(2, leaf!(5), false)?; + + let mut test_forest = leaf!(0); + test_forest.plant(0, leaf!(1), false)?; + test_forest.plant(0, leaf!(2), false)?; + test_forest.plant(0, leaf!(3), false)?; + test_forest.plant(2, leaf!(5), false)?; + + assert_ne!(forest, test_forest); + assert_ne!(test_forest, forest); + + test_forest.plant(0, leaf!(4), false)?; + + assert_eq!(forest, test_forest); + assert_eq!(test_forest, forest); + + Ok(()) + } } diff --git a/chain/src/item/genins.rs b/chain/src/item/genins.rs index 99d9202..9ed3b74 100644 --- a/chain/src/item/genins.rs +++ b/chain/src/item/genins.rs @@ -14,6 +14,17 @@ use graph::Graph; use core::borrow::Borrow; use std::collections::HashSet as Set; +/// Convert an error telling us that an index is out of bounds. +/// +/// # Panics +/// +/// The function panics if the error is not of the expected kind. +fn index_out_of_bounds_conversion(ge: GrammarError) -> Error { + match ge { + GrammarError::IndexOutOfBounds(index, bound) => Error::IndexOutOfBounds(index, bound), + _ => Error::Invalid, + } +} /// A helper function to generate a fragment of forest. /// /// It simply constructs a root node and then appends @@ -56,6 +67,53 @@ pub fn generate_fragment( Ok(result) } +/// Generate a virtual fragment representing the left-linear null +/// closure [nt]^t. +pub fn virtual_generate_fragment( + atom: impl Borrow<DefaultAtom>, + nt: usize, + t: usize, + pos: usize, +) -> Result<DefaultForest<ForestLabel<GrammarLabel>>, crate::default::Error> { + let atom = atom.borrow(); + + let non_start = atom.nth_accumulator(nt).unwrap() * 2; + + let mut result = DefaultForest::default(); + + for (label, child_iter) in atom.labels_of(non_start)? { + if matches!(*label.get_value(), + Some(TNT::Ter(ter)) if ter == t) + { + for child in child_iter { + // #[allow(unused_must_use)] + // { + // dbg!(non_start, child, atom.query_expansion(non_start, child)); + // } + + let line: Vec<GrammarLabelType> = atom + .query_expansion(non_start, child) + .map_err(index_out_of_bounds_conversion)? + .iter() + .copied() + .flatten() + .flat_map(|(nt, rule)| [(*rule).into(), TNT::Non(*nt).into()]) + .rev() + .chain(std::iter::once(TNT::Ter(t).into())) + .collect(); + + if result.is_empty() { + result = generate_fragment(line, pos)?; + } else { + result.plant(0, generate_fragment(line, pos)?, false)?; + } + } + } + } + + Ok(result) +} + impl DefaultForest<ForestLabel<GrammarLabel>> { /// Insert an item derivation forest into a recording forest. /// @@ -68,26 +126,39 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { atom_child_iter: impl Iterator<Item = usize>, atom: &DefaultAtom, ) -> Result<PaSe, Error> { - /// Convert an error telling us that an index is out of bounds. - /// - /// # Panics - /// - /// The function panics if the error is not of the expected - /// kind. - fn index_out_of_bounds_conversion(ge: GrammarError) -> Error { - match ge { - GrammarError::IndexOutOfBounds(index, bound) => { - Error::IndexOutOfBounds(index, bound) - } - _ => Error::Invalid, - } - } - let fragment = fragment.borrow(); let pase = label.forest_source(); - let parents: Vec<Parent> = { + let _fragment_root = if let Some(root) = fragment.root() { + root + } else { + return Err(Error::EmptyItem); + }; + + // Ensure the last node in the PaSe is a terminal or a + // non-terminal node, as an extra safety guard during + // development. + #[cfg(debug_assertions)] + { + if let Some(parent) = pase.parent() { + assert!(matches!( + self.vertex_label(self.nth_child(parent.node(), parent.edge())?), + Ok(Some(label)) + if label.label().label().tnt().is_some())); + } else if let Some((_, leaf)) = pase.segment() { + assert!(matches!( + self.vertex_label(leaf), + Ok(Some(label)) + if label.label().label().tnt().is_some())); + } else { + unreachable!("a pase is neither a parent nor a segment"); + } + } + + let mut is_empty_segment = false; + + let mut parents: Vec<Parent> = { let mut result = Vec::new(); if let Some(parent) = pase.parent() { @@ -96,7 +167,15 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { let (root, leaf) = pase.segment().unwrap(); let mut seen_nodes = Set::new(); - let mut stack = vec![root]; + let mut stack = if root == leaf { + // an empty segment means the root + is_empty_segment = true; + + result.push(Parent::new(root, 0)); + vec![] + } else { + vec![root] + }; while let Some(top) = stack.pop() { if seen_nodes.contains(&top) { @@ -118,7 +197,35 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { result }; + for parent in parents.iter() { + if !self.has_node(parent.node()) { + return Err(Error::IndexOutOfBounds(parent.node(), self.nodes_len())); + } + } + + if !is_empty_segment { + parents = parents + .into_iter() + .flat_map(|parent| { + self.parents_of(parent.node()).unwrap().filter(|n| { + matches!( + self.vertex_label(n.node()) + .unwrap() + .unwrap() + .label() + .label() + .tnt(), + Some(TNT::Non(_)) + ) + }) + }) + .collect(); + } + + let mut non_empty = false; + for atom_child in atom_child_iter { + // dbg!(label.label(), atom_child); // Find reduction information. let reduction_info = atom .query_reduction(label.label(), atom_child) @@ -142,20 +249,20 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { ) { for parent in self.parents_of(node.node())? { debug_assert!(matches!( - self.vertex_label(parent.node()), - Ok(Some(label)) if - label - .label() - .label() - .rule() - .is_some())); + self.vertex_label(parent.node()), + Ok(Some(label)) if + label + .label() + .label() + .rule() + .is_some())); second_stack.extend(self.parents_of(parent.node())?.filter(|n| { matches!(self.vertex_label(n.node()), - Ok(Some(label)) - if matches!( - label.label().label().tnt(), - Some(TNT::Non(_)))) + Ok(Some(label)) + if matches!( + label.label().label().tnt(), + Some(TNT::Non(_)))) })); } } @@ -169,12 +276,26 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { } if stack.is_empty() { + dbg!( + label, + atom_child, + parents, + reduction_info, + atom.query_reduction(label.label(), atom_child).unwrap() + ); + + self.print_viz("dbg forest.gv").unwrap(); + + #[cfg(test)] + genins_test::print_labels(atom, self).unwrap(); + return Err(Error::CannotPlant); } for parent in stack.into_iter() { if parent.edge() + 1 >= self.degree(parent.node())? { - self.plant(parent.node(), fragment, false)?; + // dbg!(&fragment); + self.plant(parent.node(), fragment, non_empty)?; } else { let nth_child = self.nth_child(parent.node(), parent.edge() + 1)?; @@ -182,11 +303,26 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { // do nothing continue; } + dbg!("clone?"); let cloned_node = self.clone_node(nth_child)?; - self.plant(cloned_node, fragment, false)?; + self.plant(cloned_node, fragment, non_empty)?; } + + non_empty = true; + } + } + + // If the iterator is empty, just plant at the specified + // child. + if !non_empty { + for parent in parents.into_iter() { + let nth_child = self.nth_child(parent.node(), parent.edge())?; + + self.plant(nth_child, fragment, non_empty)?; + + non_empty = true; } } @@ -214,11 +350,12 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { unreachable!("the forest is wrongly planted"); } }; - + // dbg!(node, edge, root_label, leaf_label); PaSe::Parent(node, edge) } else { let root_label = fragment.vertex_label(0)?.unwrap(); let leaf_label = fragment.vertex_label(fragment.nodes_len() - 1)?.unwrap(); + // dbg!(root_label, leaf_label); PaSe::Segment( self.query_label(root_label).unwrap(), @@ -229,3 +366,122 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { Ok(result) } } + +#[cfg(test)] +mod genins_test { + use super::*; + #[allow(unused_imports)] + use crate::{default::DefaultChain, item::default::leaf, Chain}; + + use grammar::test_grammar_helper::*; + + pub fn print_labels( + atom: impl Borrow<DefaultAtom>, + forest: impl Borrow<DefaultForest<ForestLabel<GrammarLabel>>>, + ) -> Result<(), Box<dyn std::error::Error>> { + let forest = forest.borrow(); + let atom = atom.borrow(); + + for node in forest.nodes() { + let label = forest.vertex_label(node)?; + + if let Some(label) = label { + let label = label.label.label(); + + match label { + GrammarLabelType::TNT(tnt) => { + println!("{tnt} = {}", atom.name_of_tnt(tnt)?); + } + GrammarLabelType::Rule(pos) => { + println!("pos {pos} = {}", atom.rule_pos_string(pos)?); + } + } + } else { + return Err(Error::NodeNoLabel(node).into()); + } + } + + Ok(()) + } + + #[test] + fn test_generate_fragment() -> Result<(), Box<dyn std::error::Error>> { + let grammar = new_notes_grammar()?; + + let atom = DefaultAtom::from_grammar(grammar)?; + + #[cfg(feature = "test-print-viz")] + atom.print_nfa("genins nfa.gv")?; + + let fragment = generate_fragment([72.into(), TNT::Non(0).into()], 0)?; + + let mut test_fragment = leaf!( + GrammarLabel::new(GrammarLabelType::from(72), 0), + GrammarLabel + ); + + test_fragment.plant( + 0, + leaf!( + GrammarLabel::new(GrammarLabelType::from(TNT::Non(0)), 0), + GrammarLabel + ), + false, + )?; + + assert_eq!(fragment, test_fragment); + + // virtual fragments + + println!("nt = 0, t = 3"); + + let virtual_fragment = virtual_generate_fragment(&atom, 0, 3, 0)?; + + assert_eq!(virtual_fragment.nodes_len(), 7); + + let virtual_node = virtual_fragment.vertex_label(5)?.unwrap().label(); + + let test_fragment = generate_fragment( + [ + TNT::Non(0).into(), + 2.into(), + TNT::Non(1).into(), + 8.into(), + TNT::Non(2).into(), + virtual_node.label(), + TNT::Ter(3).into(), + ], + 0, + )?; + + print_labels(&atom, &virtual_fragment)?; + + assert_eq!(virtual_fragment, test_fragment); + + #[cfg(feature = "test-print-viz")] + virtual_fragment.print_viz("virtual fragment (0, 3).gv")?; + + println!("nt = 3, t = 2"); + + let virtual_fragment = virtual_generate_fragment(&atom, 3, 2, 1)?; + + let test_fragment = + generate_fragment([TNT::Non(3).into(), 38.into(), TNT::Ter(2).into()], 1)?; + + print_labels(&atom, &virtual_fragment)?; + + assert_eq!(virtual_fragment, test_fragment); + + #[cfg(feature = "test-print-viz")] + virtual_fragment.print_viz("virtual fragment (3, 2).gv")?; + + // querying reductions + + assert!(matches!(atom.query_reduction(17, 9), Ok(Some(&[1])))); + + assert!(matches!(atom.query_reduction(35, 9), Ok(Some(&[1, 2])))); + assert!(matches!(atom.query_reduction(35, 25), Ok(Some(&[2])))); + + Ok(()) + } +} diff --git a/chain/src/item/mod.rs b/chain/src/item/mod.rs index 7fafcc8..c614802 100644 --- a/chain/src/item/mod.rs +++ b/chain/src/item/mod.rs @@ -26,7 +26,6 @@ use core::borrow::Borrow; /// /// A segment represents every edge from the root node to the single /// terminating node. -#[allow(unused)] #[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] pub enum PaSe { /// An edge from a node, as the n-th child. diff --git a/chain/src/plan.org b/chain/src/plan.org index 1da33cc..77d000c 100644 --- a/chain/src/plan.org +++ b/chain/src/plan.org @@ -79,9 +79,9 @@ lack of this step might be the root cause of the failure of the previous version of this project. + [X] Test atoms -- [ ] Implement semiring. [0/5] - + [ ] Define the trait. - + [ ] Define items and rules for the chain-rule parser, as an +- [-] Implement semiring. [2/5] + + [X] Define the trait. + + [X] Define items and rules for the chain-rule parser, as an item-based description. + [ ] Implement the boolean semiring. + [ ] Implement the natural number semiring. diff --git a/grammar/src/left_closure.rs b/grammar/src/left_closure.rs index 39461f0..ddee28d 100644 --- a/grammar/src/left_closure.rs +++ b/grammar/src/left_closure.rs @@ -25,9 +25,11 @@ impl Grammar { GrammarState::AfterComputeFirst, )) } - GrammarState::AfterLeftClosure - | GrammarState::AfterNFA - | GrammarState::AfterComputeFirst => {} + GrammarState::AfterComputeFirst => { + self.state = GrammarState::AfterLeftClosure; + } + + _ => {} } let non_len = self.non_num(); diff --git a/grammar/src/lib.rs b/grammar/src/lib.rs index 2c17a5f..a8e0fd7 100644 --- a/grammar/src/lib.rs +++ b/grammar/src/lib.rs @@ -338,7 +338,7 @@ impl Grammar { self.accumulators .get(n) .copied() - .ok_or_else(|| Error::IndexOutOfBounds(n, self.total())) + .ok_or_else(|| Error::IndexOutOfBounds(n, self.non_num())) } /// Return the index of the rules a rule position belongs to. @@ -476,17 +476,28 @@ impl Grammar { } } + /// Return true if and only if the terminal can appear as the + /// first terminal in a string expanded from the non-terminal. + #[inline] + pub fn is_first_of(&self, non_terminal: usize, terminal: usize) -> Result<bool, Error> { + Ok(self + .firsts + .get(non_terminal) + .ok_or(Error::IndexOutOfBounds(non_terminal, self.firsts.len()))? + .contains(&Some(terminal))) + } + /// Return true if and only if the non-terminal is nullable. #[inline] pub fn is_nullable(&self, non_terminal: usize) -> Result<bool, Error> { Ok(self .firsts .get(non_terminal) - .ok_or(Error::IndexOutOfBounds(non_terminal, self.non.len()))? + .ok_or(Error::IndexOutOfBounds(non_terminal, self.firsts.len()))? .contains(&None)) } - // FIXME: We shall use a label to query this information as well, + // REVIEW: We shall use a label to query this information as well, // probably. /// Query the expansion information from the position `pos1` to @@ -497,14 +508,6 @@ impl Grammar { pos1: usize, pos2: usize, ) -> Result<Option<&[(usize, usize)]>, Error> { - if pos1 >= self.total() { - return Err(Error::IndexOutOfBounds(pos1, self.total())); - } - - if pos2 >= self.total() { - return Err(Error::IndexOutOfBounds(pos2, self.total())); - } - match self.state { GrammarState::AfterLeftClosure => {} _ => { @@ -522,14 +525,6 @@ impl Grammar { /// the position `pos2` . #[inline] pub fn query_reduction(&self, pos1: usize, pos2: usize) -> Result<Option<&[usize]>, Error> { - if pos1 >= self.total() { - return Err(Error::IndexOutOfBounds(pos1, self.total())); - } - - if pos2 >= self.total() { - return Err(Error::IndexOutOfBounds(pos2, self.total())); - } - match self.state { GrammarState::AfterLeftClosure => {} _ => { @@ -591,43 +586,122 @@ impl Grammar { let mut left_p = first_label.is_left_p() || second_label.is_left_p(); + // if first_source == 0 && second_label.get_moved() == 15 { + // dbg!(second_source, second_target, first_label, second_label); + // dbg!(self.expansion_map.get(&(second_source, second_target))); + // dbg!(self.expansion_map.get(&(first_source, second_target))); + // } + // Record left-linear expansion information. - if let Some(second_nt) = self.get_nt_start_in_nfa(second_source) { + let original_expansion = self + .expansion_map + .get(&(second_source, second_target)) + .cloned(); + + let second_nt_start = self.get_nt_start_in_nfa(second_source).is_some(); + + if !second_nt_start + && !matches!(self.expansion_map.get(&(first_source, second_target)), + Some(expansion) + if expansion.len() >= + original_expansion + .as_ref() + .map(|vec| vec.len()) + .unwrap_or(1)) + { + if let Some(original_expansion) = &original_expansion { + self.expansion_map + .insert((first_source, second_target), original_expansion.clone()); + } else { + let this_nt = self + .get_rule_num(second_source.div_euclid(2)) + .unwrap_or_else(|_| self.non_num()); + + self.expansion_map.insert( + (first_source, second_target), + vec![(this_nt, second_label.get_moved())], + ); + } + } else if second_nt_start { left_p = true; - if !self - .expansion_map - .contains_key(&(first_source, second_target)) + let original_moved = match self.expansion_map.get(&(first_source, second_source)) { + Some(old_expansion) if !old_expansion.is_empty() => old_expansion.last().unwrap().1, + _ => first_label.get_moved(), + }; + + let original_nt = self + .get_rule_num(first_source.div_euclid(2)) + .unwrap_or_else(|_| self.non_num()); + + if !matches!(self.expansion_map.get(&(first_source, second_target)), + Some(expansion) + if expansion.len() >= + original_expansion + .as_ref() + .map(|vec| vec.len() + 1) + .unwrap_or(1)) { - let original_expansion = self.expansion_map.get(&(second_source, second_target)); - self.expansion_map.insert( (first_source, second_target), if let Some(original_expansion) = original_expansion { - let mut result = original_expansion.clone(); - result.push((second_nt, second_label.get_moved())); + let mut result = original_expansion; + result.push((original_nt, original_moved)); result } else { - vec![(second_nt, second_label.get_moved())] + vec![(original_nt, original_moved)] }, ); } - } else if let Some(second_nt) = self.get_nt_end_in_nfa(second_source) { - let original_reduction = self.reduction_map.get(&(second_source, second_target)); + } - self.reduction_map.insert( - (first_source, second_target), - if let Some(original_reduction) = original_reduction { - let mut result = original_reduction.clone(); - result.push(second_nt); + // Record reduction information. - result - } else { - vec![second_nt] - }, - ); + let original_reduction = self + .reduction_map + .get(&(second_source, second_target)) + .cloned(); + + let second_nt_end = self.get_nt_end_in_nfa(second_source); + + if second_nt_end.is_none() + && !matches!(self.reduction_map.get(&(first_source, second_target)), + Some(reduction) + if reduction.len() >= + original_reduction + .as_ref() + .map(|vec| vec.len()) + .unwrap_or(0)) + { + if let Some(original_reduction) = &original_reduction { + self.reduction_map + .insert((first_source, second_target), original_reduction.clone()); + } + } + + if let Some(second_nt) = second_nt_end { + if !matches!(self.reduction_map.get(&(first_source, second_target)), + Some(reduction) + if reduction.len() >= + original_reduction + .as_ref() + .map(|vec| vec.len() + 1) + .unwrap_or(1)) + { + self.reduction_map.insert( + (first_source, second_target), + if let Some(original_reduction) = original_reduction { + let mut result = original_reduction; + result.push(second_nt); + + result + } else { + vec![second_nt] + }, + ); + } } NfaLabel::new(second_label.get_value(), second_label.get_moved(), left_p) @@ -659,6 +733,10 @@ impl Grammar { /// Return a string describing a rule position. pub fn rule_pos_to_string(&self, pos: usize) -> Result<String, Error> { + if pos == self.total() { + return Ok("End of rules".to_owned()); + } + let rule_num = { let mut result = None; @@ -690,7 +768,7 @@ impl Grammar { if rule_num == 0 { pos } else { - pos - self.accumulators.get(rule_num - 1).copied().unwrap() + pos - self.accumulators.get(rule_num).copied().unwrap() }, ) .unwrap()) diff --git a/grammar/src/test_grammar_helper.rs b/grammar/src/test_grammar_helper.rs index 984eb50..9d86865 100644 --- a/grammar/src/test_grammar_helper.rs +++ b/grammar/src/test_grammar_helper.rs @@ -88,7 +88,6 @@ fn scan_tnt( } /// Return a simple testing grammar. -#[allow(dead_code)] pub fn new_grammar() -> Result<Grammar, Box<dyn std::error::Error>> { let ter = vec![Terminal::new("a".to_owned()), Terminal::new("b".to_owned())]; let non = vec![ @@ -126,7 +125,6 @@ pub fn new_grammar() -> Result<Grammar, Box<dyn std::error::Error>> { /// Return a grammar that might serve as the grammar for my notes, /// somehow, without regular expressions. -#[allow(dead_code)] pub fn new_notes_grammar_no_regexp() -> Result<Grammar, Box<dyn std::error::Error>> { let ter = vec![ Terminal::new("NL".to_owned()), @@ -250,7 +248,6 @@ pub fn new_notes_grammar_no_regexp() -> Result<Grammar, Box<dyn std::error::Erro /// Return a grammar that might serve as the grammar for my notes, /// somehow. -#[allow(dead_code)] pub fn new_notes_grammar() -> Result<Grammar, Box<dyn std::error::Error>> { let ter = vec![ Terminal::new("NL".to_owned()), @@ -353,7 +350,6 @@ pub fn new_notes_grammar() -> Result<Grammar, Box<dyn std::error::Error>> { } /// Return a grammar that can express parentheses. -#[allow(dead_code)] pub fn new_paren_grammar() -> Result<Grammar, Box<dyn std::error::Error>> { let ter = vec![ Terminal::new("LEFT".to_owned()), @@ -444,7 +440,6 @@ pub fn new_left_recursive_grammar() -> Result<Grammar, Box<dyn std::error::Error } /// Return a right recursive grammar. -#[allow(dead_code)] pub fn new_right_recursive_grammar() -> Result<Grammar, Box<dyn std::error::Error>> { let ter = vec![Terminal::new("B".to_owned()), Terminal::new("C".to_owned())]; let non = vec![ diff --git a/graph/src/labelled/binary.rs b/graph/src/labelled/binary.rs index 201dda2..4ec7378 100644 --- a/graph/src/labelled/binary.rs +++ b/graph/src/labelled/binary.rs @@ -203,8 +203,51 @@ impl<T: GraphLabel> Graph for PLGraph<T> { unimplemented!("use a `PLGBuilderMut` instead") } - fn print_viz(&self, _filename: &str) -> Result<(), std::io::Error> { - todo!() + fn print_viz(&self, filename: &str) -> Result<(), std::io::Error> { + let filename = format!("output/{filename}"); + + let preamble = "digraph nfa { + fontname=\"Helvetica,Arial,sans-serif\" + node [fontname=\"Helvetica,Arial,sans-serif\"] + edge [fontname=\"Helvetica,Arial,sans-serif\"] + rankdir=LR;\n"; + + let mut post = String::new(); + + for node in self.nodes() { + post.push_str(&format!( + " {node} [label = \"{}\"]\n", + match self.vertex_label(node) { + Ok(Some(label)) => { + format!("{label}") + } + _ => { + " ε ".to_owned() + } + } + )); + } + + for (source, target) in self.edges() { + post.push_str(&format!(" {source} -> {target}\n")); + } + + post.push_str("}\n"); + + let result = format!("{preamble}{post}"); + + if std::fs::metadata(&filename).is_ok() { + std::fs::remove_file(&filename)?; + } + + let mut file = std::fs::File::options() + .write(true) + .create(true) + .open(&filename)?; + + use std::io::Write; + + file.write_all(result.as_bytes()) } } diff --git a/graph/src/lib.rs b/graph/src/lib.rs index 2a0c50d..87f39c0 100644 --- a/graph/src/lib.rs +++ b/graph/src/lib.rs @@ -129,8 +129,10 @@ pub trait Graph: Default { let result = format!("{preamble}{post}"); - if std::fs::metadata(filename).is_ok() { - std::fs::remove_file(filename)?; + let filename = format!("output/{filename}"); + + if std::fs::metadata(&filename).is_ok() { + std::fs::remove_file(&filename)?; } let mut file = std::fs::File::options() diff --git a/nfa/src/default/regex.rs b/nfa/src/default/regex.rs index 9e1ed5c..5e0d53b 100644 --- a/nfa/src/default/regex.rs +++ b/nfa/src/default/regex.rs @@ -879,7 +879,6 @@ mod test_des_rec { use crate::desrec::DesRec; - #[allow(dead_code)] fn test_scanner<'a, 'b, T>( _parser: &'b DefaultRegParser<T>, input: &'a str, |