summaryrefslogtreecommitdiff
path: root/src/lib.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib.rs')
-rw-r--r--src/lib.rs273
1 files changed, 268 insertions, 5 deletions
diff --git a/src/lib.rs b/src/lib.rs
index 7cc5223..aed8536 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -9,8 +9,8 @@ use chain::{atom::DefaultAtom, default::DefaultChain, Chain};
use grammar::Grammar;
// For printing forests
-use chain::item::{default::DefaultForest, ForestLabel};
-use grammar::GrammarLabel;
+use chain::item::{default::DefaultForest, ForestLabel, ForestLabelType};
+use grammar::{GrammarLabel, TNT};
use graph::LabelGraph;
/// This struct is the representation of a parser.
@@ -483,9 +483,6 @@ extern "C" fn parser_parse(
match forest {
Ok(forest) => {
- use graph::Graph;
- forest.print_viz("test forest.gv").unwrap();
-
Box::leak(parser_box);
let mut bytes = bytes::forest_to_bytes(&forest);
@@ -556,6 +553,272 @@ extern "C" fn parser_parse(
}
}
+fn read_label(label: *mut std::os::raw::c_uchar) -> ForestLabel<GrammarLabel> {
+ let status: u8 = unsafe { *label };
+
+ let label_status: ForestLabelType;
+
+ match status {
+ 0 => {
+ label_status = ForestLabelType::Plain;
+ }
+ 1 => {
+ label_status = ForestLabelType::Packed;
+ }
+ _ => {
+ label_status = ForestLabelType::Cloned(usize::from_be_bytes(
+ unsafe { std::slice::from_raw_parts(label.add(1), 8) }
+ .try_into()
+ .unwrap(),
+ ));
+ }
+ }
+
+ let start = usize::from_be_bytes(
+ unsafe { std::slice::from_raw_parts(label.add(9), 8) }
+ .try_into()
+ .unwrap(),
+ );
+
+ let end = usize::from_be_bytes(
+ unsafe { std::slice::from_raw_parts(label.add(17), 8) }
+ .try_into()
+ .unwrap(),
+ );
+
+ let discriminant: u8 = unsafe { *label.add(25) };
+
+ let content = usize::from_be_bytes(
+ unsafe { std::slice::from_raw_parts(label.add(26), 8) }
+ .try_into()
+ .unwrap(),
+ );
+
+ let inner_label: GrammarLabel;
+
+ match discriminant {
+ 0 => {
+ inner_label = GrammarLabel::new_closed(TNT::Ter(content), start, end);
+ }
+ 1 => {
+ inner_label = GrammarLabel::new_closed(TNT::Non(content), start, end);
+ }
+ _ => {
+ inner_label = GrammarLabel::new_closed(content, start, end);
+ }
+ }
+
+ ForestLabel::new(inner_label, label_status)
+}
+
+macro_rules! return_error {
+ ($err:expr, $elen:ident, $ecap:ident, $evec:ident) => {
+ let mut e_string = $err;
+
+ let e_string_len_slice = e_string.len().to_be_bytes();
+ let e_string_cap_slice = e_string.capacity().to_be_bytes();
+
+ unsafe {
+ for i in 0..8 {
+ *($elen.add(i)) = e_string_len_slice.get(i).copied().unwrap();
+ *($ecap.add(i)) = e_string_cap_slice.get(i).copied().unwrap();
+ }
+
+ (*$evec).data = e_string.as_mut_ptr() as *mut std::os::raw::c_char;
+ }
+
+ std::mem::forget(e_string);
+
+ return;
+ };
+}
+
+#[no_mangle]
+extern "C" fn print_forest(
+ forest_vec: *mut LenVec<std::os::raw::c_uchar>,
+ error_vec: *mut LenVec<std::os::raw::c_char>,
+ filename: *mut std::os::raw::c_char,
+) {
+ let forest_len = usize::from_be_bytes(
+ unsafe { std::slice::from_raw_parts((*forest_vec).len, 8) }
+ .try_into()
+ .unwrap(),
+ );
+
+ let error_len = unsafe { (*error_vec).len };
+ let error_cap = unsafe { (*error_vec).capacity };
+
+ if forest_len < 27 {
+ return_error!(
+ format!("forest bytes length {forest_len} < 27"),
+ error_len,
+ error_cap,
+ error_vec
+ );
+ }
+
+ let nodes_len = usize::from_be_bytes(
+ unsafe { std::slice::from_raw_parts((*forest_vec).data.add(11), 8) }
+ .try_into()
+ .unwrap(),
+ );
+
+ println!("the forest has {nodes_len} nodes");
+
+ let special_marks = unsafe { std::slice::from_raw_parts((*forest_vec).data.add(8), 3) };
+
+ if special_marks != &[114, 101, 112] {
+ return_error!(
+ format!(
+ "the forest does not begin with the special mark\nThe first bytes are: \
+ {:?}\n",
+ special_marks
+ ),
+ error_len,
+ error_cap,
+ error_vec
+ );
+ }
+
+ let labels_offset = usize::from_be_bytes(
+ unsafe { std::slice::from_raw_parts((*forest_vec).data.add(19), 8) }
+ .try_into()
+ .unwrap(),
+ );
+
+ println!("labels_offset = {labels_offset}");
+
+ if forest_len < labels_offset + 34 * nodes_len || forest_len < (27 + 16 * nodes_len) {
+ return_error!(
+ format!(
+ "the forest length is too small: {forest_len}\n\
+ labels offset + 34 * nodes_len = {}, all {nodes_len} \
+ nodes take {}\n",
+ labels_offset + 34 * nodes_len,
+ 27 + 16 * nodes_len
+ ),
+ error_len,
+ error_cap,
+ error_vec
+ );
+ }
+
+ let mut total_degree = 0usize;
+
+ let preamble = "digraph forest {
+ fontname=\"Helvetica,Arial,sans-serif\"
+ node [fontname=\"Helvetica,Arial,sans-serif\", ordering=out]
+ edge [fontname=\"Helvetica,Arial,sans-serif\"]
+ rankdir=LR;\n";
+
+ let mut post = String::new();
+
+ for node in 0..nodes_len {
+ let degree = usize::from_be_bytes(
+ unsafe { std::slice::from_raw_parts((*forest_vec).data.add(27 + 16 * node), 8) }
+ .try_into()
+ .unwrap(),
+ );
+
+ total_degree += degree;
+
+ post.push_str(&format!(
+ " {node} [label = \"{node}:{}\"]\n",
+ read_label(unsafe { (*forest_vec).data.add(labels_offset + 34 * node) })
+ ));
+ }
+
+ println!("total degree = {total_degree}");
+
+ let correct_len: usize = 27 + 50 * nodes_len + 8 * total_degree;
+
+ println!("correct length = {correct_len}");
+
+ if forest_len != correct_len {
+ return_error!(
+ format!("the forest length {forest_len} should be equal to: {correct_len}\n"),
+ error_len,
+ error_cap,
+ error_vec
+ );
+ }
+
+ for source in 0..nodes_len {
+ let degree = usize::from_be_bytes(
+ unsafe { std::slice::from_raw_parts((*forest_vec).data.add(27 + 16 * source), 8) }
+ .try_into()
+ .unwrap(),
+ );
+
+ let node_offset = usize::from_be_bytes(
+ unsafe { std::slice::from_raw_parts((*forest_vec).data.add(27 + 16 * source + 8), 8) }
+ .try_into()
+ .unwrap(),
+ );
+
+ if forest_len <= node_offset + 8 * degree {
+ return_error!(
+ format!(
+ "the forest length {forest_len} is <= {node_offset} + 8 * {degree} = {}\n",
+ node_offset + 8 * degree
+ ),
+ error_len,
+ error_cap,
+ error_vec
+ );
+ }
+
+ for i in 0..degree {
+ let target = usize::from_be_bytes(
+ unsafe {
+ std::slice::from_raw_parts((*forest_vec).data.add(node_offset + 8 * i), 8)
+ }
+ .try_into()
+ .unwrap(),
+ );
+
+ post.push_str(&format!(" {source} -> {target}\n"));
+ }
+ }
+
+ post.push_str("}\n");
+
+ let result = format!("{preamble}{post}");
+
+ let parsed_filename;
+
+ match unsafe { std::ffi::CStr::from_ptr(filename).to_str() } {
+ Ok(ccstr) => {
+ parsed_filename = ccstr;
+ }
+ Err(e) => {
+ return_error!(format!("error: {e}"), error_len, error_cap, error_vec);
+ }
+ }
+
+ if std::fs::metadata(parsed_filename).is_ok() {
+ let _ = std::fs::remove_file(parsed_filename);
+ }
+
+ let file = std::fs::File::options()
+ .write(true)
+ .create(true)
+ .open(parsed_filename);
+
+ use std::io::Write;
+
+ match file {
+ Ok(mut file) => {
+ if let Err(e) = file.write_all(result.as_bytes()) {
+ return_error!(format!("error: {e}"), error_len, error_cap, error_vec);
+ }
+ }
+ Err(e) => {
+ return_error!(format!("error: {e}"), error_len, error_cap, error_vec);
+ }
+ }
+}
+
// TODO: Write a function to print the node label of a forest and
// expose it to C ABI.
//