From 780f3cc80cadf87ecfdb702ef90fcb606f2783fd Mon Sep 17 00:00:00 2001 From: JSDurand Date: Sun, 16 Jul 2023 18:06:18 +0800 Subject: Fix the bug of forgetting to check cloned nodes. In the process of splitting, cloning, and planting the forest, I forgot to check whether some cloned node of the node inquestion satisfy the condition. This used to cause forests that violate some fundamental assumptions. Now this is supposed to be fixed, but more tests await us. --- chain/src/default.rs | 21 +++----- chain/src/item/default/mod.rs | 50 ++++++----------- chain/src/item/default/splone.rs | 113 ++++++++++++++++++++++++++++++++------- chain/src/item/genins.rs | 11 +++- src/helper.c | 11 ++++ src/helper.h | 11 ++++ src/lib.rs | 42 +++++++++++++++ src/test.c | 31 ++++++++--- 8 files changed, 213 insertions(+), 77 deletions(-) diff --git a/chain/src/default.rs b/chain/src/default.rs index 7de720f..23baa5f 100644 --- a/chain/src/default.rs +++ b/chain/src/default.rs @@ -20,10 +20,7 @@ use std::fmt::Display; use graph_macro::Graph; -use std::{ - borrow::Borrow, - collections::{HashMap as Map, HashSet, TryReserveError}, -}; +use std::collections::{HashMap as Map, HashSet, TryReserveError}; /// The errors related to taking derivatives by chain rule. #[non_exhaustive] @@ -404,7 +401,7 @@ impl Chain for DefaultChain { } fn atom(&self) -> &Self::Atom { - self.atom.borrow() + &self.atom } fn epsilon(&self) -> Result { @@ -805,6 +802,8 @@ impl Chain for DefaultChain { // The 1-th node is a dummy node that should be removed. assert_ne!(root, 1); + // let _ = self.forest.print_viz("help.gv"); + self.forest.remove_node(1)?; let root_degree = self.forest.degree(root)?; @@ -858,10 +857,7 @@ impl Chain for DefaultChain { } for clone in all_completed_clones { - nodes.push( - self.forest - .reduction(clone, pos, ter, self.atom.borrow(), true)?, - ); + nodes.push(self.forest.reduction(clone, pos, ter, &self.atom, true)?); } } else if root_label.clone_index().is_some() { panic!( @@ -887,10 +883,7 @@ impl Chain for DefaultChain { } } - nodes.push( - self.forest - .reduction(root, pos, ter, self.atom.borrow(), true)?, - ); + nodes.push(self.forest.reduction(root, pos, ter, &self.atom, true)?); } dbg!(&nodes); @@ -1080,7 +1073,7 @@ mod test_chain { for (pos, t) in input.iter().copied().enumerate().take(2) { chain.chain(t, pos, no_item)?; - chain.forest().print_viz(&format!("forest {pos}.gv"))?; + // chain.forest().print_viz(&format!("forest {pos}.gv"))?; dbg!(pos, t); } diff --git a/chain/src/item/default/mod.rs b/chain/src/item/default/mod.rs index 7b0a843..0781dd2 100644 --- a/chain/src/item/default/mod.rs +++ b/chain/src/item/default/mod.rs @@ -498,7 +498,7 @@ impl Forest for DefaultForest> { let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); - let f_node_label_packed = builder_query(builder.borrow(), node_label.label()); + let f_node_label_packed = builder_query(&builder, node_label.label()); if let Some((f_node, _f_label, f_packed)) = f_node_label_packed { match (node_label.is_packed(), f_packed) { @@ -512,8 +512,7 @@ impl Forest for DefaultForest> { .ok_or(Error::NodeNoLabel(child))? .label(); - if let Some((f_child, _, _)) = - builder_query(builder.borrow(), child_label) + if let Some((f_child, _, _)) = builder_query(&builder, child_label) { builder.add_edge(f_node, f_child, node_label)?; } else { @@ -524,12 +523,7 @@ impl Forest for DefaultForest> { builder.add_edge(f_node, f_child, node_label)?; } } - } else if !children_match_p( - builder.borrow(), - f_node, - fragment.borrow(), - node, - )? { + } else if !children_match_p(&builder, f_node, fragment.borrow(), node)? { let cloned = self.clone_node(f_node, 0, false)?; let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); @@ -540,8 +534,7 @@ impl Forest for DefaultForest> { .ok_or(Error::NodeNoLabel(child))? .label(); - if let Some((f_child, _, _)) = - builder_query(builder.borrow(), child_label) + if let Some((f_child, _, _)) = builder_query(&builder, child_label) { builder.add_edge(cloned, f_child, node_label)?; } else { @@ -568,8 +561,7 @@ impl Forest for DefaultForest> { .vertex_label(child)? .ok_or(Error::NodeNoLabel(child))?; - let f_child = - builder_query(builder.borrow(), child_label.label()); + let f_child = builder_query(&builder, child_label.label()); if let Some((f_child, _, _)) = f_child { builder.add_edge(cloned, f_child, child_label)?; @@ -590,12 +582,8 @@ impl Forest for DefaultForest> { for cloned_child in fragment.children_of(node)? { let builder = PLGBuilderMut::from_graph_mut(&mut self.graph); - if !children_match_p( - builder.borrow(), - f_node, - fragment.borrow(), - cloned_child, - )? { + if !children_match_p(&builder, f_node, fragment.borrow(), cloned_child)? + { let cloned = self.clone_node(f_node, 0, false)?; let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); @@ -605,8 +593,7 @@ impl Forest for DefaultForest> { .vertex_label(child)? .ok_or(Error::NodeNoLabel(child))?; - let f_child = - builder_query(builder.borrow(), child_label.label()); + let f_child = builder_query(&builder, child_label.label()); if let Some((f_child, _, _)) = f_child { builder.add_edge(cloned, f_child, child_label)?; @@ -636,8 +623,7 @@ impl Forest for DefaultForest> { .vertex_label(child)? .ok_or(Error::NodeNoLabel(child))?; - let f_child = - builder_query(builder.borrow(), child_label.label()); + let f_child = builder_query(&builder, child_label.label()); if let Some((f_child, _, _)) = f_child { builder.add_edge(f_node_clone, f_child, child_label)?; @@ -654,12 +640,7 @@ impl Forest for DefaultForest> { continue 'node_loop; } - if children_match_p( - builder.borrow(), - f_node_clone, - fragment.borrow(), - node, - )? { + if children_match_p(&builder, f_node_clone, fragment.borrow(), node)? { continue 'node_loop; } } @@ -679,7 +660,7 @@ impl Forest for DefaultForest> { .vertex_label(child)? .ok_or(Error::NodeNoLabel(child))?; - let f_child = builder_query(builder.borrow(), child_label.label()); + let f_child = builder_query(&builder, child_label.label()); if let Some((f_child, _, _)) = f_child { builder.add_edge(cloned, f_child, child_label)?; @@ -710,8 +691,7 @@ impl Forest for DefaultForest> { .vertex_label(child)? .ok_or(Error::NodeNoLabel(child))?; - let f_child = - builder_query(builder.borrow(), child_label.label()); + let f_child = builder_query(&builder, child_label.label()); if let Some((f_child, _, _)) = f_child { builder.add_edge(f_node_clone, f_child, child_label)?; @@ -729,7 +709,7 @@ impl Forest for DefaultForest> { } if children_match_p( - builder.borrow(), + &builder, f_node_clone, fragment.borrow(), cloned_child, @@ -753,7 +733,7 @@ impl Forest for DefaultForest> { .vertex_label(child)? .ok_or(Error::NodeNoLabel(child))?; - let f_child = builder_query(builder.borrow(), child_label.label()); + let f_child = builder_query(&builder, child_label.label()); if let Some((f_child, _, _)) = f_child { builder.add_edge(cloned, f_child, child_label)?; @@ -785,7 +765,7 @@ impl Forest for DefaultForest> { .ok_or(Error::NodeNoLabel(child))? .label(); - let f_child_label_packed = builder_query(builder.borrow(), child_label_label); + let f_child_label_packed = builder_query(&builder, child_label_label); if let Some((f_child, _, _)) = f_child_label_packed { builder.add_edge(f_node, f_child, node_label)?; diff --git a/chain/src/item/default/splone.rs b/chain/src/item/default/splone.rs index c9d9c5a..09fca0b 100644 --- a/chain/src/item/default/splone.rs +++ b/chain/src/item/default/splone.rs @@ -209,30 +209,17 @@ impl DefaultForest> { } let node_end = node_label.label().end(); - let node_degree = self.degree(node)?; // We can check the end to know whether the new label is equal // to the old label. if node_end.is_none() { - if node_degree == edge_index + 2 { - let last_child = self.nth_child(node, node_degree - 1)?; - - if self.is_prefix(last_child, fragment.borrow())? { - return Ok(node); - } - } - - if node_degree <= edge_index + 1 { - self.plant(node, fragment, planted)?; + if let Some(node_to_plant) = self.find_node_to_plant(fragment, node, edge_index)? { + self.plant(node_to_plant, fragment, planted)?; + return Ok(node_to_plant); + } else { return Ok(node); } - - let cloned = self.clone_node(node, edge_index + 1, false)?; - - self.plant(cloned, fragment, planted)?; - - return Ok(cloned); } let new_label = self.create_new_label(node, None, edge_index, Some((fragment, planted)))?; @@ -516,11 +503,11 @@ impl DefaultForest> { if let Some((fragment, _planted)) = fragment { let modified_degree = std::cmp::max(child_degree, 1) - 1; - dbg!(node, end, edge_index, modified_degree); + // dbg!(node, end, edge_index, modified_degree); - dbg!(child, child_degree); + // dbg!(child, child_degree); - dbg!(&fragment); + // dbg!(&fragment); if self.has_same_children(child, node, modified_degree, edge_index + 1)? && child_degree != 0 @@ -588,6 +575,92 @@ impl DefaultForest> { } } + /// Determine if the `fragment` should be planted anew. If so, return + /// a node to plant. + /// + /// Precisely speaking, if `node` has the fragment planted at the + /// position `edge_index` + 1, then there is no need to plant. If + /// not, but if `edge_index` + 1 is equal to the degree of `node`, + /// then we can directly plant under `node`. + /// + /// Moreover, if `node` is cloned, then do the same check for each of + /// its clones. + /// + /// # Assumption + /// + /// This function assumes the node is open-ended. + fn find_node_to_plant( + &mut self, + fragment: &DefaultForest>, + node: usize, + edge_index: usize, + ) -> Result, Error> { + let mut node = node; + + let node_label = self.vertex_label(node)?.ok_or(Error::NodeNoLabel(node))?; + + if node_label.is_packed() { + dbg!("split pack: {node}"); + + return Err(Error::SplitPack(node)); + } + + if node_label.clone_index().is_some() { + let mut parents = self.parents_of(node)?; + + #[cfg(debug_assertions)] + assert_eq!(parents.len(), 1, "Assumption fails"); + + let packed = parents.next().unwrap().node(); + + for clone in self.children_of(packed)? { + let degree = self.degree(clone)?; + + if degree == 0 { + return Ok(Some(clone)); + } + + if degree >= edge_index + 2 { + let index_child = self.nth_child(clone, edge_index + 1)?; + + if self.is_prefix(index_child, fragment.borrow())? { + return Ok(None); + } + } + + if degree == edge_index + 1 { + return Ok(Some(clone)); + } + } + + node = self.clone_node(node, edge_index + 1, false)?; + + return Ok(Some(node)); + } + + let degree = self.degree(node)?; + + if degree == 0 { + return Ok(Some(node)); + } + + if degree >= edge_index + 2 { + let index_child = self.nth_child(node, edge_index + 1)?; + + if self.is_prefix(index_child, fragment.borrow())? { + return Ok(None); + } + } + + if degree == edge_index + 1 { + return Ok(Some(node)); + } + + node = self.clone_node(node, edge_index + 1, false)?; + + Ok(Some(node)) + } + /// Compare if two nodes have the same children in the same order. fn has_same_children( &self, diff --git a/chain/src/item/genins.rs b/chain/src/item/genins.rs index ce34df9..ac521cc 100644 --- a/chain/src/item/genins.rs +++ b/chain/src/item/genins.rs @@ -213,7 +213,7 @@ impl DefaultForest> { // operation for debugging purposes. let mut to_print = true; - if std::fs::metadata(format!("output/")).is_err() { + if std::fs::metadata("output/").is_err() { to_print = false; } @@ -578,8 +578,15 @@ impl DefaultForest> { let mut result = None; + // dbg!(leaf_label, &fragment); + + // crate::item::default::print_labels(atom, fragment).unwrap(); + + // dbg!(self.vertex_label(node)?); + for (index, child) in self.children_of(node)?.enumerate() { - if matches!(self.vertex_label(child)?, Some(child_label) if child_label == leaf_label) + // dbg!(self.vertex_label(child)?, child); + if matches!(self.vertex_label(child)?, Some(child_label) if child_label.label() == leaf_label.label()) { result = Some((index, child)); break; diff --git a/src/helper.c b/src/helper.c index 5d7e9f8..d52fa5f 100644 --- a/src/helper.c +++ b/src/helper.c @@ -1,4 +1,5 @@ #include "helper.h" +#include "big_endian.h" struct Label read_label(unsigned char *ptr) @@ -71,3 +72,13 @@ print_label(struct Label label) printf("%llu\n", label.content); } + +void +print_node(struct CForest *forest, uint64_t node) +{ + unsigned char node_ptr[8] = { 0 }; + + to_big_endian(node, node_ptr); + + print_forest_node(forest, node_ptr); +} diff --git a/src/helper.h b/src/helper.h index 18aa35f..37cd7fd 100644 --- a/src/helper.h +++ b/src/helper.h @@ -73,4 +73,15 @@ void print_label(struct Label label); // // And add functions for queryinf information from the forest. +// TODO: Declare opaque struct standing for forests, and connect to +// the function for printing labels of forests. +// +// This can be used in debuggers to help us diagnose the situation. + +struct CForest; + +void print_forest_node(struct CForest *forest, unsigned char *node); + +void print_node(struct CForest *forest, uint64_t node); + #endif diff --git a/src/lib.rs b/src/lib.rs index 685c66e..5412ed7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,6 +8,11 @@ extern crate grammar; use chain::{atom::DefaultAtom, default::DefaultChain, Chain}; use grammar::Grammar; +// For printing forests +use chain::item::{default::DefaultForest, ForestLabel}; +use grammar::GrammarLabel; +use graph::LabelGraph; + /// This struct is the representation of a parser. /// /// When the user constructs a parser, an instance of this struct will @@ -545,4 +550,41 @@ extern "C" fn parser_parse( } } +// TODO: Write a function to print the node label of a forest and +// expose it to C ABI. +// +// This can be used in LLDB. + +/// This struct is a wrapper around the forest. +/// +/// This is used so that we can call a C function receiving a pointer +/// to a forest struct. +#[derive(Debug, Clone)] +#[repr(C)] +pub struct CForest { + forest: DefaultForest>, +} + +/// Print the label of the node with id `node` in the forest `forest`. +/// +/// The parameter `node` should point to 8 bytes of unsigned +/// characters, which forms a number in 64 bits, in the *big endian* +/// format. +#[no_mangle] +extern "C" fn print_forest_node(forest: *mut CForest, node: *mut std::os::raw::c_uchar) { + let node = usize::from_be_bytes( + unsafe { std::slice::from_raw_parts(node, 8) } + .try_into() + .unwrap(), + ); + + let forest = unsafe { (*forest).forest.clone() }; + + let Ok(Some(label)) = forest.vertex_label(node) else { + return; + }; + + println!("node {node} has label {label}"); +} + pub mod bytes; diff --git a/src/test.c b/src/test.c index f3d5f3c..5ec2d58 100644 --- a/src/test.c +++ b/src/test.c @@ -19,7 +19,24 @@ main(int argc, char **argv) error_vec.len = error_vec_len; error_vec.capacity = error_vec_cap; - struct parser *parser = new_parser("start = \"a\"\"b\"\n", &error_vec); + char *grammar_string = "document = 1*( item )\n" +"\n" +"item = header [ price ] *1( note )\n" +"\n" +"header = *1star \"SP\" title %xA *( \"SP\" / %xA )\n" +"\n" +"title = 1*\"TEXT\"\n" +"\n" +"star = %x2A\n" +"\n" +"note = \"note:\" note-content %xA *( \"SP\" / %xA )\n" +"\n" +"note-content = 1*\"TEXT\"\n" +"\n" +"price = \"price:\" \"SP\" 1*\"DIGIT\" %xA *( \"SP\" / %xA )\n"; + + struct parser *parser = new_parser(grammar_string, &error_vec); + /* struct parser *parser = new_parser("start = \"a\"\"b\"\n", &error_vec); */ uint64_t error_length = from_big_endian(error_vec.len); @@ -39,7 +56,7 @@ main(int argc, char **argv) return 1; } - unsigned char *input = malloc (sizeof(unsigned char) * 16); + unsigned char *input = malloc (sizeof(unsigned char) * 10 * 8); if (input == NULL) { clean_parser(parser); @@ -49,14 +66,16 @@ main(int argc, char **argv) return EXIT_FAILURE; } - for (int i = 0; i < 16; i++) *(input+i) = 0; + int input_unenc[10] = { 3, 0, 2, 2, 2, 1, 1, 1, 0, 1 }; - *(input+15) = 1; + for (int i = 0; i < 10; i++) + to_big_endian(input_unenc[i], input + (8 * i)); unsigned char input_len[8] = { 0 }; - input_len[7] = 16; + input_len[7] = 8*2; - struct UnsignedVec input_vec = (struct UnsignedVec) { input_len, NULL, input }; + struct UnsignedVec input_vec = + (struct UnsignedVec) { input_len, NULL, input }; int result = parser_recognize (parser, -- cgit v1.2.3-18-g5258