//! This file provides a default implementation of NFA. // TODO: The current focus of the project is to understand the growth // rate of the algorithm, to know whether I made a mistake in the // previous iteration of the implementation, or the algorithm is not // as fast as the author estimated, which is not quite likely, of // course. // // Thus I shall establish a friendly interface that allows me to view // and debug the atomic languages and the languages, transparently. use graph::{ builder::Builder, error::Error as GError, labelled::DLGBuilder, DLGraph, Graph, GraphLabel, LabelGraph, }; use crate::{default::regex::RegexType, error::Error, DOption, Nfa, Regex, SoC}; use core::fmt::{Debug, Display}; /// Default NFA implementation. #[derive(Debug, Clone)] pub struct DefaultNFA { graph: DLGraph, } impl Default for DefaultNFA { fn default() -> Self { Self { graph: Default::default(), } } } impl Graph for DefaultNFA { type Iter<'a> = as Graph>::Iter<'a> where T: 'a; #[inline] fn is_empty(&self) -> bool { self.graph.is_empty() } #[inline] fn nodes_len(&self) -> usize { self.graph.nodes_len() } #[inline] fn children_of(&self, node_id: usize) -> Result, GError> { self.graph.children_of(node_id) } #[inline] fn degree(&self, node_id: usize) -> Result { self.graph.degree(node_id) } #[inline] fn is_empty_node(&self, node_id: usize) -> Result { self.graph.is_empty_node(node_id) } #[inline] fn has_edge(&self, source: usize, target: usize) -> Result { self.graph.has_edge(source, target) } #[inline] fn print_viz(&self, filename: &str) -> Result<(), std::io::Error> { self.graph.print_viz(filename) } #[inline] fn replace_by_builder(&mut self, _builder: impl Builder) { unimplemented!() } } impl LabelGraph for DefaultNFA { type Iter<'a> = as LabelGraph>::Iter<'a> where T: 'a; type LabelIter<'a> = as LabelGraph>::LabelIter<'a> where T: 'a; #[inline] fn vertex_label(&self, node_id: usize) -> Result, GError> { if self.has_node(node_id) { unimplemented!() } else { Err(GError::IndexOutOfBounds(node_id, self.nodes_len())) } } #[inline] fn edge_label(&self, source: usize, target: usize) -> Result, GError> { self.graph.edge_label(source, target) } #[inline] fn find_children_with_label( &self, node_id: usize, label: &T, ) -> Result<>::Iter<'_>, GError> { self.graph.find_children_with_label(node_id, label) } #[inline] fn labels_of(&self, node_id: usize) -> Result, GError> { self.graph.labels_of(node_id) } #[inline] fn has_edge_label(&self, node_id: usize, label: &T, target: usize) -> Result { self.graph.has_edge_label(node_id, label, target) } } impl Nfa for DefaultNFA { type FromRegex = DefaultNFA; fn to_nfa( regexps: &[impl Regex>], sub_pred: impl Fn(T) -> Result, Error>, ) -> Result>, Error> { let total_regexps_len: usize = regexps.iter().map(Graph::nodes_len).sum(); if total_regexps_len == 0 { return Ok(Default::default()); } // We reserve two more rooms for later uses. let nfa_len = total_regexps_len * 2 + 2; let mut builder: DLGBuilder> = Builder::with_capacity(nfa_len); // Since DOption implements Copy when T does, we can use a // variable to hold the empty label to avoid repetitions. let empty_label: DOption = Default::default(); for _ in 0..nfa_len { builder.add_vertex(); } let accumulators: Vec = { let mut result = Vec::with_capacity(regexps.len() + 1); result.push(0); let mut accumulator = 0; for regexp in regexps.iter() { accumulator += regexp.nodes_len() * 2; result.push(accumulator); } result.pop(); result }; assert_eq!(accumulators.len(), regexps.len()); /// Get offset from `accumulators` safely. macro_rules! get_offset_safe ( ($num:expr) => { *accumulators.get($num).ok_or(Error::UnknownNode($num))? } ); /// Get offset from `accumulators` without checking bounds. macro_rules! get_offset_unsafe ( ($num:expr) => { { unsafe { *accumulators.get_unchecked($num) } } } ); for (index, regex) in regexps.iter().enumerate() { let root = if let Some(root) = regex.root() { root } else { // A regular expression without roots is empty, so we // skip it. continue; }; let regex_len = regex.nodes_len(); // It is safe here to call `get_offset_unsafe`, as `index` // is guaranteed to be strictly less than the length of // `accumulators` by an assertion above. let offset = get_offset_unsafe!(index); let mut stack: Vec = Vec::with_capacity(regex_len); stack.push(root); while let Some(top_index) = stack.pop() { let top_label = regex.vertex_label(top_index)?; let nfa_start = offset + 2 * top_index; let nfa_end = offset + 2 * top_index + 1; match top_label { RegexType::Kleene => { builder.add_edge(nfa_start, nfa_end, empty_label)?; let mut source = nfa_start; if !regex.is_empty_node(top_index)? { for child in regex.children_of(top_index)? { stack.push(child); let child_start = offset + 2 * child; let child_end = offset + 2 * child + 1; builder.add_edge(source, child_start, empty_label)?; source = child_end; } builder.add_edge(source, nfa_end, empty_label)?; builder.add_edge(nfa_end, nfa_start, empty_label)?; } } RegexType::Plus => { let mut source = nfa_start; if !regex.is_empty_node(top_index)? { for child in regex.children_of(top_index)? { stack.push(child); let child_start = offset + 2 * child; let child_end = offset + 2 * child + 1; builder.add_edge(source, child_start, empty_label)?; source = child_end; } builder.add_edge(source, nfa_end, empty_label)?; builder.add_edge(nfa_end, nfa_start, empty_label)?; } else { builder.add_edge(nfa_start, nfa_end, empty_label)?; } } RegexType::Optional => { builder.add_edge(nfa_start, nfa_end, empty_label)?; let mut source = nfa_start; if !regex.is_empty_node(top_index)? { for child in regex.children_of(top_index)? { stack.push(child); let child_start = offset + 2 * child; let child_end = offset + 2 * child + 1; builder.add_edge(source, child_start, empty_label)?; source = child_end; } builder.add_edge(source, nfa_end, empty_label)?; } } RegexType::Or => { if !regex.is_empty_node(top_index)? { for child in regex.children_of(top_index)? { stack.push(child); let child_start = offset + 2 * child; let child_end = offset + 2 * child + 1; builder.add_edge(nfa_start, child_start, empty_label)?; builder.add_edge(child_end, nfa_end, empty_label)?; } } else { builder.add_edge(nfa_start, nfa_end, empty_label)?; } } RegexType::Paren => { // Ignore Paren nodes since currently these // are used only for printing purposes. } RegexType::Empty => { let mut source = nfa_start; if !regex.is_empty_node(top_index)? { for child in regex.children_of(top_index)? { stack.push(child); let child_start = offset + 2 * child; let child_end = offset + 2 * child + 1; builder.add_edge(source, child_start, empty_label)?; source = child_end; } builder.add_edge(source, nfa_end, empty_label)?; } else { builder.add_edge(nfa_start, nfa_end, empty_label)?; } } RegexType::Lit(value) => { // The children would be ignored even if for // some reason this literal node had some // children. match sub_pred(value)? { SoC::Sub(sub_non) => { // a non-terminal let sub_offset = get_offset_safe!(sub_non); let sub_nfa_start = sub_offset + 2 * sub_non; let sub_nfa_end = sub_offset + 2 * sub_non + 1; builder.add_edge(nfa_start, sub_nfa_start, empty_label)?; builder.add_edge(sub_nfa_end, nfa_end, empty_label)?; } SoC::Carry(new_value) => { // a terminal builder.add_edge(nfa_start, nfa_end, DOption(Some(new_value)))?; } } } } } } let graph = builder.build(); Ok(DefaultNFA { graph }) } fn remove_epsilon(&mut self) -> Result<(), Error> { todo!() } fn remove_dead(&mut self) -> Result<(), Error> { todo!() } fn nulling(&mut self, f: impl Fn(T) -> bool) -> Result<(), Error> { let mut updated = true; let mut builder = self.graph.builder_ref(); while updated { updated = false; let mut nullable = false; let mut to_add = Vec::new(); for (source, target) in builder.edges() { for label in builder.edge_label(source, target)? { if f(label) { nullable = true; break; } } if nullable { for (label, child_iter) in builder.labels_of(target)? { for child in child_iter { if !builder.has_edge_label(source, label, child)? { updated = true; to_add.push((source, child, *label)); } } } } } for (source, child, label) in to_add { builder.add_edge(source, child, label)?; } } self.graph.replace_by_builder(builder); Ok(()) } } impl Display for DefaultNFA { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { Debug::fmt(self, f) } } #[cfg(test)] mod test_to_nfa { #![allow(unused_imports)] use super::*; use crate::SoC; use crate::{ default::regex::{DefaultRegParser, DefaultRegex, ParseDirection, ParseError, RegexType}, desrec::DesRec, }; fn new_regex() -> Result, ParseError> { let parser = DefaultRegParser::::default(); fn test_scanner( _parser: &DefaultRegParser, input: &str, ) -> Result, ParseDirection)>, ParseError> { use ParseDirection::*; use RegexType::*; if let Some(first) = input.chars().next() { match first { '*' => Ok(Some((1, Kleene, Right))), '+' => Ok(Some((1, Plus, Right))), '?' => Ok(Some((1, Optional, Right))), '|' => Ok(Some((1, Empty, Up))), '(' => Ok(Some((1, Or, Down))), ')' => Ok(Some((1, Paren, Up))), ' '..='~' => Ok(Some((1, Lit(first), Right))), _ => Err(ParseError::InvalidCharacter(first)), } } else { Ok(None) } } let input_string = "a*b?c+|(d*| +)?".to_owned(); Ok( DefaultRegParser::::parse(&parser, &input_string, Box::new(test_scanner), true)? .unwrap_or(Default::default()) .0, ) } #[test] fn test_to_nfa() -> Result<(), Box> { let regex = new_regex()?; println!("regex = {regex}"); let nfa: DefaultNFA> = DefaultNFA::to_nfa(&[regex], |label| Ok(SoC::Carry(label)))?; nfa.print_viz("nfa.gv")?; Ok(()) } }