From 780f3cc80cadf87ecfdb702ef90fcb606f2783fd Mon Sep 17 00:00:00 2001 From: JSDurand Date: Sun, 16 Jul 2023 18:06:18 +0800 Subject: Fix the bug of forgetting to check cloned nodes. In the process of splitting, cloning, and planting the forest, I forgot to check whether some cloned node of the node inquestion satisfy the condition. This used to cause forests that violate some fundamental assumptions. Now this is supposed to be fixed, but more tests await us. --- src/helper.c | 11 +++++++++++ src/helper.h | 11 +++++++++++ src/lib.rs | 42 ++++++++++++++++++++++++++++++++++++++++++ src/test.c | 31 +++++++++++++++++++++++++------ 4 files changed, 89 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/helper.c b/src/helper.c index 5d7e9f8..d52fa5f 100644 --- a/src/helper.c +++ b/src/helper.c @@ -1,4 +1,5 @@ #include "helper.h" +#include "big_endian.h" struct Label read_label(unsigned char *ptr) @@ -71,3 +72,13 @@ print_label(struct Label label) printf("%llu\n", label.content); } + +void +print_node(struct CForest *forest, uint64_t node) +{ + unsigned char node_ptr[8] = { 0 }; + + to_big_endian(node, node_ptr); + + print_forest_node(forest, node_ptr); +} diff --git a/src/helper.h b/src/helper.h index 18aa35f..37cd7fd 100644 --- a/src/helper.h +++ b/src/helper.h @@ -73,4 +73,15 @@ void print_label(struct Label label); // // And add functions for queryinf information from the forest. +// TODO: Declare opaque struct standing for forests, and connect to +// the function for printing labels of forests. +// +// This can be used in debuggers to help us diagnose the situation. + +struct CForest; + +void print_forest_node(struct CForest *forest, unsigned char *node); + +void print_node(struct CForest *forest, uint64_t node); + #endif diff --git a/src/lib.rs b/src/lib.rs index 685c66e..5412ed7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,6 +8,11 @@ extern crate grammar; use chain::{atom::DefaultAtom, default::DefaultChain, Chain}; use grammar::Grammar; +// For printing forests +use chain::item::{default::DefaultForest, ForestLabel}; +use grammar::GrammarLabel; +use graph::LabelGraph; + /// This struct is the representation of a parser. /// /// When the user constructs a parser, an instance of this struct will @@ -545,4 +550,41 @@ extern "C" fn parser_parse( } } +// TODO: Write a function to print the node label of a forest and +// expose it to C ABI. +// +// This can be used in LLDB. + +/// This struct is a wrapper around the forest. +/// +/// This is used so that we can call a C function receiving a pointer +/// to a forest struct. +#[derive(Debug, Clone)] +#[repr(C)] +pub struct CForest { + forest: DefaultForest>, +} + +/// Print the label of the node with id `node` in the forest `forest`. +/// +/// The parameter `node` should point to 8 bytes of unsigned +/// characters, which forms a number in 64 bits, in the *big endian* +/// format. +#[no_mangle] +extern "C" fn print_forest_node(forest: *mut CForest, node: *mut std::os::raw::c_uchar) { + let node = usize::from_be_bytes( + unsafe { std::slice::from_raw_parts(node, 8) } + .try_into() + .unwrap(), + ); + + let forest = unsafe { (*forest).forest.clone() }; + + let Ok(Some(label)) = forest.vertex_label(node) else { + return; + }; + + println!("node {node} has label {label}"); +} + pub mod bytes; diff --git a/src/test.c b/src/test.c index f3d5f3c..5ec2d58 100644 --- a/src/test.c +++ b/src/test.c @@ -19,7 +19,24 @@ main(int argc, char **argv) error_vec.len = error_vec_len; error_vec.capacity = error_vec_cap; - struct parser *parser = new_parser("start = \"a\"\"b\"\n", &error_vec); + char *grammar_string = "document = 1*( item )\n" +"\n" +"item = header [ price ] *1( note )\n" +"\n" +"header = *1star \"SP\" title %xA *( \"SP\" / %xA )\n" +"\n" +"title = 1*\"TEXT\"\n" +"\n" +"star = %x2A\n" +"\n" +"note = \"note:\" note-content %xA *( \"SP\" / %xA )\n" +"\n" +"note-content = 1*\"TEXT\"\n" +"\n" +"price = \"price:\" \"SP\" 1*\"DIGIT\" %xA *( \"SP\" / %xA )\n"; + + struct parser *parser = new_parser(grammar_string, &error_vec); + /* struct parser *parser = new_parser("start = \"a\"\"b\"\n", &error_vec); */ uint64_t error_length = from_big_endian(error_vec.len); @@ -39,7 +56,7 @@ main(int argc, char **argv) return 1; } - unsigned char *input = malloc (sizeof(unsigned char) * 16); + unsigned char *input = malloc (sizeof(unsigned char) * 10 * 8); if (input == NULL) { clean_parser(parser); @@ -49,14 +66,16 @@ main(int argc, char **argv) return EXIT_FAILURE; } - for (int i = 0; i < 16; i++) *(input+i) = 0; + int input_unenc[10] = { 3, 0, 2, 2, 2, 1, 1, 1, 0, 1 }; - *(input+15) = 1; + for (int i = 0; i < 10; i++) + to_big_endian(input_unenc[i], input + (8 * i)); unsigned char input_len[8] = { 0 }; - input_len[7] = 16; + input_len[7] = 8*2; - struct UnsignedVec input_vec = (struct UnsignedVec) { input_len, NULL, input }; + struct UnsignedVec input_vec = + (struct UnsignedVec) { input_len, NULL, input }; int result = parser_recognize (parser, -- cgit v1.2.3-18-g5258