diff options
author | JSDurand <mmemmew@gmail.com> | 2023-07-16 18:06:18 +0800 |
---|---|---|
committer | JSDurand <mmemmew@gmail.com> | 2023-07-16 18:06:18 +0800 |
commit | 780f3cc80cadf87ecfdb702ef90fcb606f2783fd (patch) | |
tree | 7d978d43b1c6f58c358e6f8e8d9f30c0303a7a98 /src | |
parent | 6a24e0a805c597b8f835c5c72a0e4dcdd64ca39b (diff) |
Fix the bug of forgetting to check cloned nodes.
In the process of splitting, cloning, and planting the forest, I
forgot to check whether some cloned node of the node inquestion
satisfy the condition. This used to cause forests that violate some
fundamental assumptions. Now this is supposed to be fixed, but more
tests await us.
Diffstat (limited to 'src')
-rw-r--r-- | src/helper.c | 11 | ||||
-rw-r--r-- | src/helper.h | 11 | ||||
-rw-r--r-- | src/lib.rs | 42 | ||||
-rw-r--r-- | src/test.c | 31 |
4 files changed, 89 insertions, 6 deletions
diff --git a/src/helper.c b/src/helper.c index 5d7e9f8..d52fa5f 100644 --- a/src/helper.c +++ b/src/helper.c @@ -1,4 +1,5 @@ #include "helper.h" +#include "big_endian.h" struct Label read_label(unsigned char *ptr) @@ -71,3 +72,13 @@ print_label(struct Label label) printf("%llu\n", label.content); } + +void +print_node(struct CForest *forest, uint64_t node) +{ + unsigned char node_ptr[8] = { 0 }; + + to_big_endian(node, node_ptr); + + print_forest_node(forest, node_ptr); +} diff --git a/src/helper.h b/src/helper.h index 18aa35f..37cd7fd 100644 --- a/src/helper.h +++ b/src/helper.h @@ -73,4 +73,15 @@ void print_label(struct Label label); // // And add functions for queryinf information from the forest. +// TODO: Declare opaque struct standing for forests, and connect to +// the function for printing labels of forests. +// +// This can be used in debuggers to help us diagnose the situation. + +struct CForest; + +void print_forest_node(struct CForest *forest, unsigned char *node); + +void print_node(struct CForest *forest, uint64_t node); + #endif @@ -8,6 +8,11 @@ extern crate grammar; use chain::{atom::DefaultAtom, default::DefaultChain, Chain}; use grammar::Grammar; +// For printing forests +use chain::item::{default::DefaultForest, ForestLabel}; +use grammar::GrammarLabel; +use graph::LabelGraph; + /// This struct is the representation of a parser. /// /// When the user constructs a parser, an instance of this struct will @@ -545,4 +550,41 @@ extern "C" fn parser_parse( } } +// TODO: Write a function to print the node label of a forest and +// expose it to C ABI. +// +// This can be used in LLDB. + +/// This struct is a wrapper around the forest. +/// +/// This is used so that we can call a C function receiving a pointer +/// to a forest struct. +#[derive(Debug, Clone)] +#[repr(C)] +pub struct CForest { + forest: DefaultForest<ForestLabel<GrammarLabel>>, +} + +/// Print the label of the node with id `node` in the forest `forest`. +/// +/// The parameter `node` should point to 8 bytes of unsigned +/// characters, which forms a number in 64 bits, in the *big endian* +/// format. +#[no_mangle] +extern "C" fn print_forest_node(forest: *mut CForest, node: *mut std::os::raw::c_uchar) { + let node = usize::from_be_bytes( + unsafe { std::slice::from_raw_parts(node, 8) } + .try_into() + .unwrap(), + ); + + let forest = unsafe { (*forest).forest.clone() }; + + let Ok(Some(label)) = forest.vertex_label(node) else { + return; + }; + + println!("node {node} has label {label}"); +} + pub mod bytes; @@ -19,7 +19,24 @@ main(int argc, char **argv) error_vec.len = error_vec_len; error_vec.capacity = error_vec_cap; - struct parser *parser = new_parser("start = \"a\"\"b\"\n", &error_vec); + char *grammar_string = "document = 1*( item )\n" +"\n" +"item = header [ price ] *1( note )\n" +"\n" +"header = *1star \"SP\" title %xA *( \"SP\" / %xA )\n" +"\n" +"title = 1*\"TEXT\"\n" +"\n" +"star = %x2A\n" +"\n" +"note = \"note:\" note-content %xA *( \"SP\" / %xA )\n" +"\n" +"note-content = 1*\"TEXT\"\n" +"\n" +"price = \"price:\" \"SP\" 1*\"DIGIT\" %xA *( \"SP\" / %xA )\n"; + + struct parser *parser = new_parser(grammar_string, &error_vec); + /* struct parser *parser = new_parser("start = \"a\"\"b\"\n", &error_vec); */ uint64_t error_length = from_big_endian(error_vec.len); @@ -39,7 +56,7 @@ main(int argc, char **argv) return 1; } - unsigned char *input = malloc (sizeof(unsigned char) * 16); + unsigned char *input = malloc (sizeof(unsigned char) * 10 * 8); if (input == NULL) { clean_parser(parser); @@ -49,14 +66,16 @@ main(int argc, char **argv) return EXIT_FAILURE; } - for (int i = 0; i < 16; i++) *(input+i) = 0; + int input_unenc[10] = { 3, 0, 2, 2, 2, 1, 1, 1, 0, 1 }; - *(input+15) = 1; + for (int i = 0; i < 10; i++) + to_big_endian(input_unenc[i], input + (8 * i)); unsigned char input_len[8] = { 0 }; - input_len[7] = 16; + input_len[7] = 8*2; - struct UnsignedVec input_vec = (struct UnsignedVec) { input_len, NULL, input }; + struct UnsignedVec input_vec = + (struct UnsignedVec) { input_len, NULL, input }; int result = parser_recognize (parser, |