summaryrefslogtreecommitdiff
path: root/src/lib.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib.rs')
-rw-r--r--src/lib.rs552
1 files changed, 542 insertions, 10 deletions
diff --git a/src/lib.rs b/src/lib.rs
index f5457c3..685c66e 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,16 +1,548 @@
-// TODO: Add Emacs bindings
+#![warn(missing_docs)]
+//! This top level package provides necessary functions for Emacs to
+//! call.
-pub fn add(left: usize, right: usize) -> usize {
- left + right
+extern crate chain;
+extern crate grammar;
+
+use chain::{atom::DefaultAtom, default::DefaultChain, Chain};
+use grammar::Grammar;
+
+/// This struct is the representation of a parser.
+///
+/// When the user constructs a parser, an instance of this struct will
+/// be constructed and the user will receive an opaque pointer to this
+/// struct.
+#[derive(Debug, Clone)]
+#[repr(C)]
+pub struct Parser {
+ chain: DefaultChain,
+}
+
+impl Parser {
+ /// Construct a parser from the grammar string.
+ ///
+ /// The grammar is supposed to conform to the Augmented
+ /// Backus-Naur Format. See RFC 5234 for exact details of this
+ /// format.
+ pub fn new(s: &str) -> Result<Self, String> {
+ let grammar: Grammar = s.parse().map_err(|err| format!("{err}"))?;
+ let atom: DefaultAtom =
+ DefaultAtom::from_grammar(grammar).map_err(|err| format!("{err}"))?;
+
+ DefaultChain::unit(atom)
+ .map_err(|err| format!("{err}"))
+ .map(|chain| Self { chain })
+ }
+}
+
+/// Actual function that is called through C ABI.
+///
+/// The parameter `ERROR_LEN` is supposed to point to an integer,
+/// which will be set to the length of the error message, if and only
+/// if an error occurs, in which case the `ERROR_STR` will be set to
+/// point to the actual error message.
+///
+/// It is expected that `*ERROR_STR` should hold the value `NULL` .
+#[no_mangle]
+extern "C" fn new_parser(
+ grammar_string: *mut std::os::raw::c_char,
+ error_vec: *mut LenVec<std::os::raw::c_char>,
+) -> *mut Parser {
+ let parsed_str;
+
+ let error_len = unsafe { (*error_vec).len };
+ let error_cap = unsafe { (*error_vec).capacity };
+
+ unsafe {
+ match std::ffi::CStr::from_ptr(grammar_string).to_str() {
+ Ok(ccstr) => {
+ parsed_str = ccstr.to_string();
+ }
+ Err(e) => {
+ let mut e_string = format!("error: {e}");
+
+ let e_string_len_slice = e_string.len().to_be_bytes();
+ let e_string_cap_slice = e_string.capacity().to_be_bytes();
+
+ for i in 0..8 {
+ *(error_len.add(i)) = e_string_len_slice.get(i).copied().unwrap();
+ *(error_cap.add(i)) = e_string_cap_slice.get(i).copied().unwrap();
+ }
+
+ (*error_vec).data = e_string.as_mut_ptr() as *mut std::os::raw::c_char;
+
+ std::mem::forget(e_string);
+
+ return std::ptr::null_mut();
+ }
+ }
+ }
+
+ match Parser::new(&parsed_str) {
+ Ok(result) => unsafe {
+ for i in 0..8 {
+ *(error_len.add(i)) = 0;
+ }
+
+ Box::into_raw(Box::new(result))
+ },
+ Err(e) => unsafe {
+ let mut e_string = format!("error: {e}");
+
+ let e_string_len_slice = e_string.len().to_be_bytes();
+ let e_string_cap_slice = e_string.capacity().to_be_bytes();
+
+ for i in 0..8 {
+ *(error_len.add(i)) = e_string_len_slice.get(i).copied().unwrap();
+ *(error_cap.add(i)) = e_string_cap_slice.get(i).copied().unwrap();
+ }
+
+ (*error_vec).data = e_string.as_mut_ptr() as *mut std::os::raw::c_char;
+
+ std::mem::forget(e_string);
+
+ std::ptr::null_mut()
+ },
+ }
+}
+
+#[no_mangle]
+extern "C" fn clean_parser(parser: *const std::ffi::c_void) {
+ unsafe {
+ drop(Box::from_raw(parser as *mut Parser));
+ }
+}
+
+/// To make it easier to pass arrays to and from C.
+#[repr(C)]
+pub struct LenVec<T> {
+ /// This must be an array of unsigned char of length 8.
+ ///
+ /// A less length leads to access to invalid memory location,
+ /// while a longer length will be ignored, possibly leading to
+ /// wrong length.
+ ///
+ /// The length will be interpreted as a 64-bits integer stored in
+ /// big endian format.
+ pub len: *mut std::os::raw::c_uchar,
+ /// This must be an array of unsigned chars of length 8.
+ ///
+ /// This is only used so that Rust knows how to reconstruct the
+ /// data from C, in order for Rust to deallocate the objects
+ /// exposed by this library.
+ pub capacity: *mut std::os::raw::c_uchar,
+ /// The actual pointer to the data.
+ ///
+ /// In case this should be set by the function on the Rust end,
+ /// this field should be `NULL`.
+ pub data: *mut T,
+}
+
+#[no_mangle]
+extern "C" fn clean_signed(vec: *mut LenVec<std::os::raw::c_char>, flag: std::os::raw::c_uchar) {
+ let len = usize::from_be_bytes(
+ unsafe { std::slice::from_raw_parts((*vec).len, 8) }
+ .try_into()
+ .unwrap(),
+ );
+ let capacity = usize::from_be_bytes(
+ unsafe { std::slice::from_raw_parts((*vec).capacity, 8) }
+ .try_into()
+ .unwrap(),
+ );
+
+ if (flag & 1) != 0 {
+ drop(unsafe { Vec::from_raw_parts((*vec).len, 8, 8) });
+ }
+
+ if (flag & 2) != 0 {
+ drop(unsafe { Vec::from_raw_parts((*vec).capacity, 8, 8) });
+ }
+
+ if (flag & 4) != 0 {
+ drop(unsafe { String::from_raw_parts((*vec).data as *mut u8, len, capacity) });
+ }
+
+ if (flag & 8) != 0 {
+ drop(unsafe { Box::from_raw(vec) });
+ }
+}
+
+#[no_mangle]
+extern "C" fn clean_unsigned(vec: *mut LenVec<std::os::raw::c_uchar>, flag: std::os::raw::c_uchar) {
+ let len = usize::from_be_bytes(
+ unsafe { std::slice::from_raw_parts((*vec).len, 8) }
+ .try_into()
+ .unwrap(),
+ );
+ let capacity = usize::from_be_bytes(
+ unsafe { std::slice::from_raw_parts((*vec).capacity, 8) }
+ .try_into()
+ .unwrap(),
+ );
+
+ if (flag & 1) != 0 {
+ drop(unsafe { Vec::from_raw_parts((*vec).len, 8, 8) });
+ }
+
+ if (flag & 2) != 0 {
+ drop(unsafe { Vec::from_raw_parts((*vec).capacity, 8, 8) });
+ }
+
+ if (flag & 4) != 0 {
+ drop(unsafe { Vec::<u8>::from_raw_parts((*vec).data, len, capacity) });
+ }
+
+ if (flag & 8) != 0 {
+ drop(unsafe { Box::from_raw(vec) });
+ }
+}
+
+#[no_mangle]
+extern "C" fn parser_recognize(
+ parser: *mut Parser,
+ input_vec: *mut LenVec<std::os::raw::c_uchar>,
+ error_vec: *mut LenVec<std::os::raw::c_char>,
+ reset_p: std::os::raw::c_uchar,
+) -> std::os::raw::c_int {
+ let input_len = usize::from_be_bytes(
+ unsafe { std::slice::from_raw_parts((*input_vec).len, 8) }
+ .try_into()
+ .unwrap(),
+ );
+
+ let mut parser_box;
+ let input_array_len = input_len;
+ let input_array;
+
+ let error_len = unsafe { (*error_vec).len };
+ let error_cap = unsafe { (*error_vec).capacity };
+
+ unsafe {
+ parser_box = Box::from_raw(parser);
+
+ input_array = std::slice::from_raw_parts((*input_vec).data, input_array_len);
+ }
+
+ // If the parser has already been used before, reset it to the
+ // initial state.
+
+ if reset_p != 0 && !parser_box.chain.history().is_empty() {
+ match DefaultChain::unit(parser_box.chain.atom().clone()) {
+ Ok(chain) => {
+ parser_box.chain = chain;
+ }
+ Err(e) => {
+ let mut e_string = format!("error: {e}");
+
+ Box::leak(parser_box);
+
+ let e_string_len_slice = e_string.len().to_be_bytes();
+ let e_string_cap_slice = e_string.capacity().to_be_bytes();
+
+ unsafe {
+ for i in 0..8 {
+ *(error_len.add(i)) = e_string_len_slice.get(i).copied().unwrap();
+ *(error_cap.add(i)) = e_string_cap_slice.get(i).copied().unwrap();
+ }
+
+ (*error_vec).data = e_string.as_mut_ptr() as *mut std::os::raw::c_char;
+ }
+
+ std::mem::forget(e_string);
+
+ return 0;
+ }
+ }
+ }
+
+ if input_array_len.rem_euclid(8) != 0 {
+ let mut e_string =
+ format!("error: input length should be divisible by 8, but got {input_array_len}");
+
+ let e_string_len_slice = e_string.len().to_be_bytes();
+ let e_string_cap_slice = e_string.capacity().to_be_bytes();
+
+ Box::leak(parser_box);
+
+ unsafe {
+ for i in 0..8 {
+ *(error_len.add(i)) = e_string_len_slice.get(i).copied().unwrap();
+ *(error_cap.add(i)) = e_string_cap_slice.get(i).copied().unwrap();
+ }
+
+ (*error_vec).data = e_string.as_mut_ptr() as *mut std::os::raw::c_char;
+ }
+
+ std::mem::forget(e_string);
+
+ return 0;
+ }
+
+ #[cfg(target_pointer_width = "64")]
+ let input_iter = input_array
+ .chunks_exact(8)
+ .map(|chunk| usize::from_be_bytes(<[u8; 8]>::try_from(chunk).unwrap()));
+
+ #[cfg(not(target_pointer_width = "64"))]
+ compile_error!("this program assumes to be run on 64-bits machines");
+
+ for (index, token) in input_iter.enumerate() {
+ let chain_result = parser_box.chain.chain(token, index, true);
+
+ if let Err(e) = chain_result {
+ let mut e_string = format!("error: {e}");
+
+ let e_string_len_slice = e_string.len().to_be_bytes();
+ let e_string_cap_slice = e_string.capacity().to_be_bytes();
+
+ Box::leak(parser_box);
+
+ unsafe {
+ for i in 0..8 {
+ *(error_len.add(i)) = e_string_len_slice.get(i).copied().unwrap();
+ *(error_cap.add(i)) = e_string_cap_slice.get(i).copied().unwrap();
+ }
+
+ (*error_vec).data = e_string.as_mut_ptr() as *mut std::os::raw::c_char;
+ }
+
+ std::mem::forget(e_string);
+
+ return 0;
+ }
+ }
+
+ match parser_box.chain.epsilon() {
+ Ok(result) => {
+ Box::leak(parser_box);
+
+ result as std::os::raw::c_int
+ }
+ Err(e) => {
+ let mut e_string = format!("error: {e}");
+
+ let e_string_len_slice = e_string.len().to_be_bytes();
+ let e_string_cap_slice = e_string.capacity().to_be_bytes();
+
+ Box::leak(parser_box);
+
+ unsafe {
+ for i in 0..8 {
+ *(error_len.add(i)) = e_string_len_slice.get(i).copied().unwrap();
+ *(error_cap.add(i)) = e_string_cap_slice.get(i).copied().unwrap();
+ }
+
+ (*error_vec).data = e_string.as_mut_ptr() as *mut std::os::raw::c_char;
+ }
+
+ std::mem::forget(e_string);
+
+ 0
+ }
+ }
}
-#[cfg(test)]
-mod tests {
- use super::*;
+#[no_mangle]
+extern "C" fn parser_parse(
+ parser: *mut Parser,
+ input_vec: *mut LenVec<std::os::raw::c_uchar>,
+ error_vec: *mut LenVec<std::os::raw::c_char>,
+ reset_p: std::os::raw::c_uchar,
+) -> *mut LenVec<std::os::raw::c_uchar> {
+ let input_len = usize::from_be_bytes(
+ unsafe { std::slice::from_raw_parts((*input_vec).len, 8) }
+ .try_into()
+ .unwrap(),
+ );
+
+ let mut parser_box;
+ let input_array;
- #[test]
- fn it_works() {
- let result = add(2, 2);
- assert_eq!(result, 4);
+ let error_len = unsafe { (*error_vec).len };
+ let error_cap = unsafe { (*error_vec).capacity };
+
+ unsafe {
+ parser_box = Box::from_raw(parser);
+
+ input_array = std::slice::from_raw_parts((*input_vec).data, input_len);
+ }
+
+ // If the parser has already been used before, reset it to the
+ // initial state.
+
+ if reset_p != 0 && !parser_box.chain.history().is_empty() {
+ match DefaultChain::unit(parser_box.chain.atom().clone()) {
+ Ok(chain) => {
+ parser_box.chain = chain;
+ }
+ Err(e) => {
+ let mut e_string = format!("error: {e}");
+
+ Box::leak(parser_box);
+
+ let e_string_len_slice = e_string.len().to_be_bytes();
+ let e_string_cap_slice = e_string.capacity().to_be_bytes();
+
+ unsafe {
+ for i in 0..8 {
+ *(error_len.add(i)) = e_string_len_slice.get(i).copied().unwrap();
+ *(error_cap.add(i)) = e_string_cap_slice.get(i).copied().unwrap();
+ }
+
+ (*error_vec).data = e_string.as_mut_ptr() as *mut std::os::raw::c_char;
+ }
+
+ std::mem::forget(e_string);
+
+ return std::ptr::null_mut();
+ }
+ }
+ }
+
+ if input_len.rem_euclid(8) != 0 {
+ let mut e_string =
+ format!("error: input length should be divisible by 8, but got {input_len}");
+
+ let e_string_len_slice = e_string.len().to_be_bytes();
+ let e_string_cap_slice = e_string.capacity().to_be_bytes();
+
+ Box::leak(parser_box);
+
+ unsafe {
+ for i in 0..8 {
+ *(error_len.add(i)) = e_string_len_slice.get(i).copied().unwrap();
+ *(error_cap.add(i)) = e_string_cap_slice.get(i).copied().unwrap();
+ }
+
+ (*error_vec).data = e_string.as_mut_ptr() as *mut std::os::raw::c_char;
+ }
+
+ std::mem::forget(e_string);
+
+ return std::ptr::null_mut();
+ } else if input_len == 0 {
+ Box::leak(parser_box);
+
+ return std::ptr::null_mut();
+ }
+
+ #[cfg(target_pointer_width = "64")]
+ let input_iter = input_array
+ .chunks_exact(8)
+ .map(|chunk| usize::from_be_bytes(<[u8; 8]>::try_from(chunk).unwrap()));
+
+ #[cfg(not(target_pointer_width = "64"))]
+ compile_error!("this program assumes to be run on 64-bits machines");
+
+ let mut last_pos: usize = 0;
+ let mut last_token: usize = 0;
+
+ for (index, token) in input_iter.enumerate() {
+ last_pos = index;
+ last_token = token;
+
+ let chain_result = parser_box.chain.chain(token, index, false);
+
+ if let Err(e) = chain_result {
+ let mut e_string = format!("error: {e}");
+
+ let e_string_len_slice = e_string.len().to_be_bytes();
+ let e_string_cap_slice = e_string.capacity().to_be_bytes();
+
+ Box::leak(parser_box);
+
+ unsafe {
+ for i in 0..8 {
+ *(error_len.add(i)) = e_string_len_slice.get(i).copied().unwrap();
+ *(error_cap.add(i)) = e_string_cap_slice.get(i).copied().unwrap();
+ }
+
+ (*error_vec).data = e_string.as_mut_ptr() as *mut std::os::raw::c_char;
+ }
+
+ std::mem::forget(e_string);
+
+ return std::ptr::null_mut();
+ }
+ }
+
+ match parser_box.chain.epsilon() {
+ Ok(result) => {
+ if result {
+ let forest = parser_box.chain.end_of_input(last_pos + 1, last_token);
+
+ match forest {
+ Ok(forest) => {
+ Box::leak(parser_box);
+
+ let mut bytes = bytes::forest_to_bytes(&forest);
+
+ let bytes_len = bytes.len().to_be_bytes().to_vec();
+
+ let bytes_capacity = bytes.capacity().to_be_bytes().to_vec();
+
+ let bytes_vec: LenVec<std::os::raw::c_uchar> = LenVec {
+ len: Box::leak(bytes_len.into_boxed_slice()).as_mut_ptr(),
+ capacity: Box::leak(bytes_capacity.into_boxed_slice()).as_mut_ptr(),
+ data: bytes.as_mut_ptr(),
+ };
+
+ std::mem::forget(bytes);
+
+ Box::into_raw(Box::new(bytes_vec))
+ }
+ Err(e) => {
+ let mut e_string = format!("error: {e}");
+
+ let e_string_len_slice = e_string.len().to_be_bytes();
+ let e_string_cap_slice = e_string.capacity().to_be_bytes();
+
+ Box::leak(parser_box);
+
+ unsafe {
+ for i in 0..8 {
+ *(error_len.add(i)) = e_string_len_slice.get(i).copied().unwrap();
+ *(error_cap.add(i)) = e_string_cap_slice.get(i).copied().unwrap();
+ }
+
+ (*error_vec).data = e_string.as_mut_ptr() as *mut std::os::raw::c_char;
+ }
+
+ std::mem::forget(e_string);
+
+ std::ptr::null_mut()
+ }
+ }
+ } else {
+ Box::leak(parser_box);
+
+ std::ptr::null_mut()
+ }
+ }
+ Err(e) => {
+ let mut e_string = format!("error: {e}");
+
+ let e_string_len_slice = e_string.len().to_be_bytes();
+ let e_string_cap_slice = e_string.capacity().to_be_bytes();
+
+ Box::leak(parser_box);
+
+ unsafe {
+ for i in 0..8 {
+ *(error_len.add(i)) = e_string_len_slice.get(i).copied().unwrap();
+ *(error_cap.add(i)) = e_string_cap_slice.get(i).copied().unwrap();
+ }
+
+ (*error_vec).data = e_string.as_mut_ptr() as *mut std::os::raw::c_char;
+ }
+
+ std::mem::forget(e_string);
+
+ std::ptr::null_mut()
+ }
}
}
+
+pub mod bytes;