diff options
-rw-r--r-- | chain/src/default.rs | 21 | ||||
-rw-r--r-- | chain/src/item/default/mod.rs | 50 | ||||
-rw-r--r-- | chain/src/item/default/splone.rs | 113 | ||||
-rw-r--r-- | chain/src/item/genins.rs | 11 | ||||
-rw-r--r-- | src/helper.c | 11 | ||||
-rw-r--r-- | src/helper.h | 11 | ||||
-rw-r--r-- | src/lib.rs | 42 | ||||
-rw-r--r-- | src/test.c | 31 |
8 files changed, 213 insertions, 77 deletions
diff --git a/chain/src/default.rs b/chain/src/default.rs index 7de720f..23baa5f 100644 --- a/chain/src/default.rs +++ b/chain/src/default.rs @@ -20,10 +20,7 @@ use std::fmt::Display; use graph_macro::Graph; -use std::{ - borrow::Borrow, - collections::{HashMap as Map, HashSet, TryReserveError}, -}; +use std::collections::{HashMap as Map, HashSet, TryReserveError}; /// The errors related to taking derivatives by chain rule. #[non_exhaustive] @@ -404,7 +401,7 @@ impl Chain for DefaultChain { } fn atom(&self) -> &Self::Atom { - self.atom.borrow() + &self.atom } fn epsilon(&self) -> Result<bool, Self::Error> { @@ -805,6 +802,8 @@ impl Chain for DefaultChain { // The 1-th node is a dummy node that should be removed. assert_ne!(root, 1); + // let _ = self.forest.print_viz("help.gv"); + self.forest.remove_node(1)?; let root_degree = self.forest.degree(root)?; @@ -858,10 +857,7 @@ impl Chain for DefaultChain { } for clone in all_completed_clones { - nodes.push( - self.forest - .reduction(clone, pos, ter, self.atom.borrow(), true)?, - ); + nodes.push(self.forest.reduction(clone, pos, ter, &self.atom, true)?); } } else if root_label.clone_index().is_some() { panic!( @@ -887,10 +883,7 @@ impl Chain for DefaultChain { } } - nodes.push( - self.forest - .reduction(root, pos, ter, self.atom.borrow(), true)?, - ); + nodes.push(self.forest.reduction(root, pos, ter, &self.atom, true)?); } dbg!(&nodes); @@ -1080,7 +1073,7 @@ mod test_chain { for (pos, t) in input.iter().copied().enumerate().take(2) { chain.chain(t, pos, no_item)?; - chain.forest().print_viz(&format!("forest {pos}.gv"))?; + // chain.forest().print_viz(&format!("forest {pos}.gv"))?; dbg!(pos, t); } diff --git a/chain/src/item/default/mod.rs b/chain/src/item/default/mod.rs index 7b0a843..0781dd2 100644 --- a/chain/src/item/default/mod.rs +++ b/chain/src/item/default/mod.rs @@ -498,7 +498,7 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> { let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); - let f_node_label_packed = builder_query(builder.borrow(), node_label.label()); + let f_node_label_packed = builder_query(&builder, node_label.label()); if let Some((f_node, _f_label, f_packed)) = f_node_label_packed { match (node_label.is_packed(), f_packed) { @@ -512,8 +512,7 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> { .ok_or(Error::NodeNoLabel(child))? .label(); - if let Some((f_child, _, _)) = - builder_query(builder.borrow(), child_label) + if let Some((f_child, _, _)) = builder_query(&builder, child_label) { builder.add_edge(f_node, f_child, node_label)?; } else { @@ -524,12 +523,7 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> { builder.add_edge(f_node, f_child, node_label)?; } } - } else if !children_match_p( - builder.borrow(), - f_node, - fragment.borrow(), - node, - )? { + } else if !children_match_p(&builder, f_node, fragment.borrow(), node)? { let cloned = self.clone_node(f_node, 0, false)?; let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); @@ -540,8 +534,7 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> { .ok_or(Error::NodeNoLabel(child))? .label(); - if let Some((f_child, _, _)) = - builder_query(builder.borrow(), child_label) + if let Some((f_child, _, _)) = builder_query(&builder, child_label) { builder.add_edge(cloned, f_child, node_label)?; } else { @@ -568,8 +561,7 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> { .vertex_label(child)? .ok_or(Error::NodeNoLabel(child))?; - let f_child = - builder_query(builder.borrow(), child_label.label()); + let f_child = builder_query(&builder, child_label.label()); if let Some((f_child, _, _)) = f_child { builder.add_edge(cloned, f_child, child_label)?; @@ -590,12 +582,8 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> { for cloned_child in fragment.children_of(node)? { let builder = PLGBuilderMut::from_graph_mut(&mut self.graph); - if !children_match_p( - builder.borrow(), - f_node, - fragment.borrow(), - cloned_child, - )? { + if !children_match_p(&builder, f_node, fragment.borrow(), cloned_child)? + { let cloned = self.clone_node(f_node, 0, false)?; let mut builder = PLGBuilderMut::from_graph_mut(&mut self.graph); @@ -605,8 +593,7 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> { .vertex_label(child)? .ok_or(Error::NodeNoLabel(child))?; - let f_child = - builder_query(builder.borrow(), child_label.label()); + let f_child = builder_query(&builder, child_label.label()); if let Some((f_child, _, _)) = f_child { builder.add_edge(cloned, f_child, child_label)?; @@ -636,8 +623,7 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> { .vertex_label(child)? .ok_or(Error::NodeNoLabel(child))?; - let f_child = - builder_query(builder.borrow(), child_label.label()); + let f_child = builder_query(&builder, child_label.label()); if let Some((f_child, _, _)) = f_child { builder.add_edge(f_node_clone, f_child, child_label)?; @@ -654,12 +640,7 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> { continue 'node_loop; } - if children_match_p( - builder.borrow(), - f_node_clone, - fragment.borrow(), - node, - )? { + if children_match_p(&builder, f_node_clone, fragment.borrow(), node)? { continue 'node_loop; } } @@ -679,7 +660,7 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> { .vertex_label(child)? .ok_or(Error::NodeNoLabel(child))?; - let f_child = builder_query(builder.borrow(), child_label.label()); + let f_child = builder_query(&builder, child_label.label()); if let Some((f_child, _, _)) = f_child { builder.add_edge(cloned, f_child, child_label)?; @@ -710,8 +691,7 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> { .vertex_label(child)? .ok_or(Error::NodeNoLabel(child))?; - let f_child = - builder_query(builder.borrow(), child_label.label()); + let f_child = builder_query(&builder, child_label.label()); if let Some((f_child, _, _)) = f_child { builder.add_edge(f_node_clone, f_child, child_label)?; @@ -729,7 +709,7 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> { } if children_match_p( - builder.borrow(), + &builder, f_node_clone, fragment.borrow(), cloned_child, @@ -753,7 +733,7 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> { .vertex_label(child)? .ok_or(Error::NodeNoLabel(child))?; - let f_child = builder_query(builder.borrow(), child_label.label()); + let f_child = builder_query(&builder, child_label.label()); if let Some((f_child, _, _)) = f_child { builder.add_edge(cloned, f_child, child_label)?; @@ -785,7 +765,7 @@ impl<T: GraphLabel> Forest<T> for DefaultForest<ForestLabel<T>> { .ok_or(Error::NodeNoLabel(child))? .label(); - let f_child_label_packed = builder_query(builder.borrow(), child_label_label); + let f_child_label_packed = builder_query(&builder, child_label_label); if let Some((f_child, _, _)) = f_child_label_packed { builder.add_edge(f_node, f_child, node_label)?; diff --git a/chain/src/item/default/splone.rs b/chain/src/item/default/splone.rs index c9d9c5a..09fca0b 100644 --- a/chain/src/item/default/splone.rs +++ b/chain/src/item/default/splone.rs @@ -209,30 +209,17 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { } let node_end = node_label.label().end(); - let node_degree = self.degree(node)?; // We can check the end to know whether the new label is equal // to the old label. if node_end.is_none() { - if node_degree == edge_index + 2 { - let last_child = self.nth_child(node, node_degree - 1)?; - - if self.is_prefix(last_child, fragment.borrow())? { - return Ok(node); - } - } - - if node_degree <= edge_index + 1 { - self.plant(node, fragment, planted)?; + if let Some(node_to_plant) = self.find_node_to_plant(fragment, node, edge_index)? { + self.plant(node_to_plant, fragment, planted)?; + return Ok(node_to_plant); + } else { return Ok(node); } - - let cloned = self.clone_node(node, edge_index + 1, false)?; - - self.plant(cloned, fragment, planted)?; - - return Ok(cloned); } let new_label = self.create_new_label(node, None, edge_index, Some((fragment, planted)))?; @@ -516,11 +503,11 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { if let Some((fragment, _planted)) = fragment { let modified_degree = std::cmp::max(child_degree, 1) - 1; - dbg!(node, end, edge_index, modified_degree); + // dbg!(node, end, edge_index, modified_degree); - dbg!(child, child_degree); + // dbg!(child, child_degree); - dbg!(&fragment); + // dbg!(&fragment); if self.has_same_children(child, node, modified_degree, edge_index + 1)? && child_degree != 0 @@ -588,6 +575,92 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { } } + /// Determine if the `fragment` should be planted anew. If so, return + /// a node to plant. + /// + /// Precisely speaking, if `node` has the fragment planted at the + /// position `edge_index` + 1, then there is no need to plant. If + /// not, but if `edge_index` + 1 is equal to the degree of `node`, + /// then we can directly plant under `node`. + /// + /// Moreover, if `node` is cloned, then do the same check for each of + /// its clones. + /// + /// # Assumption + /// + /// This function assumes the node is open-ended. + fn find_node_to_plant( + &mut self, + fragment: &DefaultForest<ForestLabel<GrammarLabel>>, + node: usize, + edge_index: usize, + ) -> Result<Option<usize>, Error> { + let mut node = node; + + let node_label = self.vertex_label(node)?.ok_or(Error::NodeNoLabel(node))?; + + if node_label.is_packed() { + dbg!("split pack: {node}"); + + return Err(Error::SplitPack(node)); + } + + if node_label.clone_index().is_some() { + let mut parents = self.parents_of(node)?; + + #[cfg(debug_assertions)] + assert_eq!(parents.len(), 1, "Assumption fails"); + + let packed = parents.next().unwrap().node(); + + for clone in self.children_of(packed)? { + let degree = self.degree(clone)?; + + if degree == 0 { + return Ok(Some(clone)); + } + + if degree >= edge_index + 2 { + let index_child = self.nth_child(clone, edge_index + 1)?; + + if self.is_prefix(index_child, fragment.borrow())? { + return Ok(None); + } + } + + if degree == edge_index + 1 { + return Ok(Some(clone)); + } + } + + node = self.clone_node(node, edge_index + 1, false)?; + + return Ok(Some(node)); + } + + let degree = self.degree(node)?; + + if degree == 0 { + return Ok(Some(node)); + } + + if degree >= edge_index + 2 { + let index_child = self.nth_child(node, edge_index + 1)?; + + if self.is_prefix(index_child, fragment.borrow())? { + return Ok(None); + } + } + + if degree == edge_index + 1 { + return Ok(Some(node)); + } + + node = self.clone_node(node, edge_index + 1, false)?; + + Ok(Some(node)) + } + /// Compare if two nodes have the same children in the same order. fn has_same_children( &self, diff --git a/chain/src/item/genins.rs b/chain/src/item/genins.rs index ce34df9..ac521cc 100644 --- a/chain/src/item/genins.rs +++ b/chain/src/item/genins.rs @@ -213,7 +213,7 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { // operation for debugging purposes. let mut to_print = true; - if std::fs::metadata(format!("output/")).is_err() { + if std::fs::metadata("output/").is_err() { to_print = false; } @@ -578,8 +578,15 @@ impl DefaultForest<ForestLabel<GrammarLabel>> { let mut result = None; + // dbg!(leaf_label, &fragment); + + // crate::item::default::print_labels(atom, fragment).unwrap(); + + // dbg!(self.vertex_label(node)?); + for (index, child) in self.children_of(node)?.enumerate() { - if matches!(self.vertex_label(child)?, Some(child_label) if child_label == leaf_label) + // dbg!(self.vertex_label(child)?, child); + if matches!(self.vertex_label(child)?, Some(child_label) if child_label.label() == leaf_label.label()) { result = Some((index, child)); break; diff --git a/src/helper.c b/src/helper.c index 5d7e9f8..d52fa5f 100644 --- a/src/helper.c +++ b/src/helper.c @@ -1,4 +1,5 @@ #include "helper.h" +#include "big_endian.h" struct Label read_label(unsigned char *ptr) @@ -71,3 +72,13 @@ print_label(struct Label label) printf("%llu\n", label.content); } + +void +print_node(struct CForest *forest, uint64_t node) +{ + unsigned char node_ptr[8] = { 0 }; + + to_big_endian(node, node_ptr); + + print_forest_node(forest, node_ptr); +} diff --git a/src/helper.h b/src/helper.h index 18aa35f..37cd7fd 100644 --- a/src/helper.h +++ b/src/helper.h @@ -73,4 +73,15 @@ void print_label(struct Label label); // // And add functions for queryinf information from the forest. +// TODO: Declare opaque struct standing for forests, and connect to +// the function for printing labels of forests. +// +// This can be used in debuggers to help us diagnose the situation. + +struct CForest; + +void print_forest_node(struct CForest *forest, unsigned char *node); + +void print_node(struct CForest *forest, uint64_t node); + #endif @@ -8,6 +8,11 @@ extern crate grammar; use chain::{atom::DefaultAtom, default::DefaultChain, Chain}; use grammar::Grammar; +// For printing forests +use chain::item::{default::DefaultForest, ForestLabel}; +use grammar::GrammarLabel; +use graph::LabelGraph; + /// This struct is the representation of a parser. /// /// When the user constructs a parser, an instance of this struct will @@ -545,4 +550,41 @@ extern "C" fn parser_parse( } } +// TODO: Write a function to print the node label of a forest and +// expose it to C ABI. +// +// This can be used in LLDB. + +/// This struct is a wrapper around the forest. +/// +/// This is used so that we can call a C function receiving a pointer +/// to a forest struct. +#[derive(Debug, Clone)] +#[repr(C)] +pub struct CForest { + forest: DefaultForest<ForestLabel<GrammarLabel>>, +} + +/// Print the label of the node with id `node` in the forest `forest`. +/// +/// The parameter `node` should point to 8 bytes of unsigned +/// characters, which forms a number in 64 bits, in the *big endian* +/// format. +#[no_mangle] +extern "C" fn print_forest_node(forest: *mut CForest, node: *mut std::os::raw::c_uchar) { + let node = usize::from_be_bytes( + unsafe { std::slice::from_raw_parts(node, 8) } + .try_into() + .unwrap(), + ); + + let forest = unsafe { (*forest).forest.clone() }; + + let Ok(Some(label)) = forest.vertex_label(node) else { + return; + }; + + println!("node {node} has label {label}"); +} + pub mod bytes; @@ -19,7 +19,24 @@ main(int argc, char **argv) error_vec.len = error_vec_len; error_vec.capacity = error_vec_cap; - struct parser *parser = new_parser("start = \"a\"\"b\"\n", &error_vec); + char *grammar_string = "document = 1*( item )\n" +"\n" +"item = header [ price ] *1( note )\n" +"\n" +"header = *1star \"SP\" title %xA *( \"SP\" / %xA )\n" +"\n" +"title = 1*\"TEXT\"\n" +"\n" +"star = %x2A\n" +"\n" +"note = \"note:\" note-content %xA *( \"SP\" / %xA )\n" +"\n" +"note-content = 1*\"TEXT\"\n" +"\n" +"price = \"price:\" \"SP\" 1*\"DIGIT\" %xA *( \"SP\" / %xA )\n"; + + struct parser *parser = new_parser(grammar_string, &error_vec); + /* struct parser *parser = new_parser("start = \"a\"\"b\"\n", &error_vec); */ uint64_t error_length = from_big_endian(error_vec.len); @@ -39,7 +56,7 @@ main(int argc, char **argv) return 1; } - unsigned char *input = malloc (sizeof(unsigned char) * 16); + unsigned char *input = malloc (sizeof(unsigned char) * 10 * 8); if (input == NULL) { clean_parser(parser); @@ -49,14 +66,16 @@ main(int argc, char **argv) return EXIT_FAILURE; } - for (int i = 0; i < 16; i++) *(input+i) = 0; + int input_unenc[10] = { 3, 0, 2, 2, 2, 1, 1, 1, 0, 1 }; - *(input+15) = 1; + for (int i = 0; i < 10; i++) + to_big_endian(input_unenc[i], input + (8 * i)); unsigned char input_len[8] = { 0 }; - input_len[7] = 16; + input_len[7] = 8*2; - struct UnsignedVec input_vec = (struct UnsignedVec) { input_len, NULL, input }; + struct UnsignedVec input_vec = + (struct UnsignedVec) { input_len, NULL, input }; int result = parser_recognize (parser, |