From 7033187abaf42772097377c0a1ffc2cd4cefdada Mon Sep 17 00:00:00 2001 From: JSDurand Date: Fri, 4 Aug 2023 10:12:04 +0800 Subject: minor adjustments Not bug deals but adjustments of details. --- src/lib.rs | 273 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 268 insertions(+), 5 deletions(-) (limited to 'src/lib.rs') diff --git a/src/lib.rs b/src/lib.rs index 7cc5223..aed8536 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,8 +9,8 @@ use chain::{atom::DefaultAtom, default::DefaultChain, Chain}; use grammar::Grammar; // For printing forests -use chain::item::{default::DefaultForest, ForestLabel}; -use grammar::GrammarLabel; +use chain::item::{default::DefaultForest, ForestLabel, ForestLabelType}; +use grammar::{GrammarLabel, TNT}; use graph::LabelGraph; /// This struct is the representation of a parser. @@ -483,9 +483,6 @@ extern "C" fn parser_parse( match forest { Ok(forest) => { - use graph::Graph; - forest.print_viz("test forest.gv").unwrap(); - Box::leak(parser_box); let mut bytes = bytes::forest_to_bytes(&forest); @@ -556,6 +553,272 @@ extern "C" fn parser_parse( } } +fn read_label(label: *mut std::os::raw::c_uchar) -> ForestLabel { + let status: u8 = unsafe { *label }; + + let label_status: ForestLabelType; + + match status { + 0 => { + label_status = ForestLabelType::Plain; + } + 1 => { + label_status = ForestLabelType::Packed; + } + _ => { + label_status = ForestLabelType::Cloned(usize::from_be_bytes( + unsafe { std::slice::from_raw_parts(label.add(1), 8) } + .try_into() + .unwrap(), + )); + } + } + + let start = usize::from_be_bytes( + unsafe { std::slice::from_raw_parts(label.add(9), 8) } + .try_into() + .unwrap(), + ); + + let end = usize::from_be_bytes( + unsafe { std::slice::from_raw_parts(label.add(17), 8) } + .try_into() + .unwrap(), + ); + + let discriminant: u8 = unsafe { *label.add(25) }; + + let content = usize::from_be_bytes( + unsafe { std::slice::from_raw_parts(label.add(26), 8) } + .try_into() + .unwrap(), + ); + + let inner_label: GrammarLabel; + + match discriminant { + 0 => { + inner_label = GrammarLabel::new_closed(TNT::Ter(content), start, end); + } + 1 => { + inner_label = GrammarLabel::new_closed(TNT::Non(content), start, end); + } + _ => { + inner_label = GrammarLabel::new_closed(content, start, end); + } + } + + ForestLabel::new(inner_label, label_status) +} + +macro_rules! return_error { + ($err:expr, $elen:ident, $ecap:ident, $evec:ident) => { + let mut e_string = $err; + + let e_string_len_slice = e_string.len().to_be_bytes(); + let e_string_cap_slice = e_string.capacity().to_be_bytes(); + + unsafe { + for i in 0..8 { + *($elen.add(i)) = e_string_len_slice.get(i).copied().unwrap(); + *($ecap.add(i)) = e_string_cap_slice.get(i).copied().unwrap(); + } + + (*$evec).data = e_string.as_mut_ptr() as *mut std::os::raw::c_char; + } + + std::mem::forget(e_string); + + return; + }; +} + +#[no_mangle] +extern "C" fn print_forest( + forest_vec: *mut LenVec, + error_vec: *mut LenVec, + filename: *mut std::os::raw::c_char, +) { + let forest_len = usize::from_be_bytes( + unsafe { std::slice::from_raw_parts((*forest_vec).len, 8) } + .try_into() + .unwrap(), + ); + + let error_len = unsafe { (*error_vec).len }; + let error_cap = unsafe { (*error_vec).capacity }; + + if forest_len < 27 { + return_error!( + format!("forest bytes length {forest_len} < 27"), + error_len, + error_cap, + error_vec + ); + } + + let nodes_len = usize::from_be_bytes( + unsafe { std::slice::from_raw_parts((*forest_vec).data.add(11), 8) } + .try_into() + .unwrap(), + ); + + println!("the forest has {nodes_len} nodes"); + + let special_marks = unsafe { std::slice::from_raw_parts((*forest_vec).data.add(8), 3) }; + + if special_marks != &[114, 101, 112] { + return_error!( + format!( + "the forest does not begin with the special mark\nThe first bytes are: \ + {:?}\n", + special_marks + ), + error_len, + error_cap, + error_vec + ); + } + + let labels_offset = usize::from_be_bytes( + unsafe { std::slice::from_raw_parts((*forest_vec).data.add(19), 8) } + .try_into() + .unwrap(), + ); + + println!("labels_offset = {labels_offset}"); + + if forest_len < labels_offset + 34 * nodes_len || forest_len < (27 + 16 * nodes_len) { + return_error!( + format!( + "the forest length is too small: {forest_len}\n\ + labels offset + 34 * nodes_len = {}, all {nodes_len} \ + nodes take {}\n", + labels_offset + 34 * nodes_len, + 27 + 16 * nodes_len + ), + error_len, + error_cap, + error_vec + ); + } + + let mut total_degree = 0usize; + + let preamble = "digraph forest { + fontname=\"Helvetica,Arial,sans-serif\" + node [fontname=\"Helvetica,Arial,sans-serif\", ordering=out] + edge [fontname=\"Helvetica,Arial,sans-serif\"] + rankdir=LR;\n"; + + let mut post = String::new(); + + for node in 0..nodes_len { + let degree = usize::from_be_bytes( + unsafe { std::slice::from_raw_parts((*forest_vec).data.add(27 + 16 * node), 8) } + .try_into() + .unwrap(), + ); + + total_degree += degree; + + post.push_str(&format!( + " {node} [label = \"{node}:{}\"]\n", + read_label(unsafe { (*forest_vec).data.add(labels_offset + 34 * node) }) + )); + } + + println!("total degree = {total_degree}"); + + let correct_len: usize = 27 + 50 * nodes_len + 8 * total_degree; + + println!("correct length = {correct_len}"); + + if forest_len != correct_len { + return_error!( + format!("the forest length {forest_len} should be equal to: {correct_len}\n"), + error_len, + error_cap, + error_vec + ); + } + + for source in 0..nodes_len { + let degree = usize::from_be_bytes( + unsafe { std::slice::from_raw_parts((*forest_vec).data.add(27 + 16 * source), 8) } + .try_into() + .unwrap(), + ); + + let node_offset = usize::from_be_bytes( + unsafe { std::slice::from_raw_parts((*forest_vec).data.add(27 + 16 * source + 8), 8) } + .try_into() + .unwrap(), + ); + + if forest_len <= node_offset + 8 * degree { + return_error!( + format!( + "the forest length {forest_len} is <= {node_offset} + 8 * {degree} = {}\n", + node_offset + 8 * degree + ), + error_len, + error_cap, + error_vec + ); + } + + for i in 0..degree { + let target = usize::from_be_bytes( + unsafe { + std::slice::from_raw_parts((*forest_vec).data.add(node_offset + 8 * i), 8) + } + .try_into() + .unwrap(), + ); + + post.push_str(&format!(" {source} -> {target}\n")); + } + } + + post.push_str("}\n"); + + let result = format!("{preamble}{post}"); + + let parsed_filename; + + match unsafe { std::ffi::CStr::from_ptr(filename).to_str() } { + Ok(ccstr) => { + parsed_filename = ccstr; + } + Err(e) => { + return_error!(format!("error: {e}"), error_len, error_cap, error_vec); + } + } + + if std::fs::metadata(parsed_filename).is_ok() { + let _ = std::fs::remove_file(parsed_filename); + } + + let file = std::fs::File::options() + .write(true) + .create(true) + .open(parsed_filename); + + use std::io::Write; + + match file { + Ok(mut file) => { + if let Err(e) = file.write_all(result.as_bytes()) { + return_error!(format!("error: {e}"), error_len, error_cap, error_vec); + } + } + Err(e) => { + return_error!(format!("error: {e}"), error_len, error_cap, error_vec); + } + } +} + // TODO: Write a function to print the node label of a forest and // expose it to C ABI. // -- cgit v1.2.3-18-g5258