From 18d7955b7d84c00467ede38baae53f4ce1fb6908 Mon Sep 17 00:00:00 2001 From: JSDurand Date: Fri, 20 Jan 2023 13:48:26 +0800 Subject: chain: a prototype is added. I have an ostensibly working prototype now. Further tests are needed to make sure that the algorithm meets the time complexity requirement, though. --- nfa/src/default/nfa.rs | 2 +- nfa/src/default/regex.rs | 141 +++++++++++++++++++++++++++++++++++++++++++++++ nfa/src/lib.rs | 4 +- 3 files changed, 145 insertions(+), 2 deletions(-) (limited to 'nfa/src') diff --git a/nfa/src/default/nfa.rs b/nfa/src/default/nfa.rs index a23c056..8d657d5 100644 --- a/nfa/src/default/nfa.rs +++ b/nfa/src/default/nfa.rs @@ -212,7 +212,7 @@ impl Nfa for DefaultNFA { let top_label = regex.vertex_label(top_index)?; let nfa_start = offset + 2 * top_index; - let nfa_end = offset + 2 * top_index + 1; + let nfa_end = nfa_start + 1; match top_label { RegexType::Kleene => { diff --git a/nfa/src/default/regex.rs b/nfa/src/default/regex.rs index c8ad370..1e3e87b 100644 --- a/nfa/src/default/regex.rs +++ b/nfa/src/default/regex.rs @@ -359,6 +359,147 @@ impl DefaultRegex { Ok(result) } + + /// Use a function to format the labels of the regular expressions + /// and format the entire regular expression with this aid, with a + /// dot at a specified position. + pub fn to_string_with_dot(&self, mut f: F, dot_pos: usize) -> Result + where + F: FnMut(T) -> String, + { + #[derive(Copy, Clone)] + enum StackElement { + Seen(usize), + Unseen(usize), + } + + impl StackElement { + fn index(&self) -> usize { + match self { + Seen(index) => *index, + Unseen(index) => *index, + } + } + + fn is_seen(self) -> bool { + matches!(self, Seen(_)) + } + } + + use StackElement::{Seen, Unseen}; + + let mut result = String::new(); + + let mut stack: Vec = Vec::new(); + let mut types = self.types.clone(); + types.push(RegexType::Paren); + + stack.push(Unseen(0)); + + while let Some(top) = stack.pop() { + let node_type = types.get(top.index()).unwrap(); + + match node_type { + RegexType::Kleene => { + if !top.is_seen() { + stack.push(Seen(top.index())); + + if self.degree(top.index()).unwrap() > 1 { + write!(result, "(")?; + stack.push(Unseen(types.len() - 1)); + } + + stack.extend( + self.graph + .children_of(top.index()) + .unwrap() + .map(Unseen) + .rev(), + ); + } else { + write!(result, "*")?; + } + } + RegexType::Plus => { + if !top.is_seen() { + stack.push(Seen(top.index())); + stack.extend( + self.graph + .children_of(top.index()) + .unwrap() + .map(Unseen) + .rev(), + ); + } else { + write!(result, "+")?; + } + } + RegexType::Optional => { + if !top.is_seen() { + stack.push(Seen(top.index())); + stack.extend( + self.graph + .children_of(top.index()) + .unwrap() + .map(Unseen) + .rev(), + ); + } else { + write!(result, "?")?; + } + } + RegexType::Or => { + if !top.is_seen() { + write!(result, "(")?; + + let len = self.len(); + + stack.push(Unseen(types.len() - 1)); + + for (child_index, child) in self + .graph + .children_of(top.index()) + .unwrap() + .enumerate() + .rev() + { + if child_index != len - 1 && child_index != 0 { + stack.push(Unseen(child)); + stack.push(Seen(top.index())); + } else { + stack.push(Unseen(child)); + } + } + } else { + write!(result, "|")?; + } + } + RegexType::Paren => { + write!(result, ")")?; + } + RegexType::Empty => { + stack.extend( + self.graph + .children_of(top.index()) + .unwrap() + .map(Unseen) + .rev(), + ); + + if self.graph.is_empty_node(top.index()).unwrap() { + write!(result, "ε")?; + } + } + RegexType::Lit(label) => write!(result, "{}", f(*label))?, + } + + if top.index() == dot_pos { + write!(result, " · ")?; + } + } + + Ok(result) + } } impl Display for DefaultRegex { diff --git a/nfa/src/lib.rs b/nfa/src/lib.rs index c1906e1..4440ea6 100644 --- a/nfa/src/lib.rs +++ b/nfa/src/lib.rs @@ -150,15 +150,17 @@ impl NfaLabel { pub fn get_value(&self) -> T { self.value } + /// Retrieve the moved position from the label. #[inline] pub fn get_moved(&self) -> usize { self.moved } + /// Retrieve whether or not the label comes from left-linear /// expansions. #[inline] - pub fn get_left_p(&self) -> bool { + pub fn is_left_p(&self) -> bool { self.left_p } } -- cgit v1.2.3-18-g5258