From a80db17473ff09cc72acba2c1975101e6dbedf39 Mon Sep 17 00:00:00 2001 From: JSDurand Date: Sun, 18 Jun 2023 15:03:34 +0800 Subject: fixed the bugs of node duplications and left-open nodes There were two main issues in the previous version. One is that there are lots of duplications of nodes when manipulating the forest. This does not mean that labels repeat: by the use of the data type this cannot happen. What happened is that there were cloned nodes whose children are exactly equal. In this case there is no need to clone that node in the first place. This is now fixed by checking carefully before cloning, so that we do not clone unnecessary nodes. The other issue, which is perhaps more important, is that there are nodes which are not closed. This means that when there should be a reuction of grammar rules, the forest does not mark the corresponding node as already reduced. The incorrect forests thus caused is hard to fix: I tried several different approaches to fix it afterwards, but all to no avail. I also tried to record enough information to fix these nodes during the manipulations. It turned out that recording nodes is a dead end, as I cannot properly syncronize the information in the forest and the information in the chain-rule machine. Any inconsistencies will result in incorrect operations later on. The approach I finally adapt is to perform every possible reduction at each step. This might lead to some more nodes than what we need. But those are technically expected to be there after all, and it is easy to filter them out, so it is fine, from my point of view at the moment. Therefore, what remains is to filter those nodes out and connect it to the holy Emacs. :D --- nfa/src/default/nfa.rs | 32 +++++++++++--------------------- 1 file changed, 11 insertions(+), 21 deletions(-) (limited to 'nfa') diff --git a/nfa/src/default/nfa.rs b/nfa/src/default/nfa.rs index d2fe88e..e38a1a9 100644 --- a/nfa/src/default/nfa.rs +++ b/nfa/src/default/nfa.rs @@ -20,7 +20,7 @@ pub struct DefaultNFA { graph: DLGraph, } -impl Default for DefaultNFA { +impl Default for DefaultNFA { fn default() -> Self { Self { graph: Default::default(), @@ -85,10 +85,9 @@ impl LabelExtGraph for DefaultNFA { } impl Nfa for DefaultNFA { - type FromRegex = DefaultNFA; + type FromRegex = DefaultNFA; - // Reminder: This is an adopted version of Thompson's - // construction. + // NOTE: This is an adopted version of Thompson's construction. fn to_nfa( regexps: &[impl Regex>], sub_pred: impl Fn(T) -> Result, Error>, @@ -107,10 +106,6 @@ impl Nfa for DefaultNFA { builder.add_vertices(nfa_len); - // for _ in 0..nfa_len { - // builder.add_vertex(); - // } - let default = LabelType::new(DOption(default), total_regexps_len, false); // add a default edge @@ -122,7 +117,7 @@ impl Nfa for DefaultNFA { result.push(0); for regexp in regexps.iter().take(regexps.len() - 1) { - result.push(result.last().unwrap() + regexp.nodes_len() * 2); + result.push(*result.last().unwrap() + regexp.nodes_len() * 2); } result @@ -137,14 +132,14 @@ impl Nfa for DefaultNFA { } ); - /// Get offset from `accumulators` without checking bounds. - macro_rules! get_offset_unsafe ( - ($num:expr) => { - { unsafe { *accumulators.get_unchecked($num) } } - } - ); + // /// Get offset from `accumulators` without checking bounds. + // macro_rules! get_offset_unsafe ( + // ($num:expr) => { + // { unsafe { *accumulators.get_unchecked($num) } } + // } + // ); - for (index, regex) in regexps.iter().enumerate() { + for (regex, offset) in regexps.iter().zip(accumulators.iter().copied()) { let root = if let Some(root) = regex.root() { root } else { @@ -155,11 +150,6 @@ impl Nfa for DefaultNFA { let regex_len = regex.nodes_len(); - // It is safe here to call `get_offset_unsafe`, as `index` - // is guaranteed to be strictly less than the length of - // `accumulators` by an assertion above. - let offset = get_offset_unsafe!(index); - let mut stack: Vec = Vec::with_capacity(regex_len); stack.push(root); -- cgit v1.2.3-18-g5258