summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJSDurand <mmemmew@gmail.com>2023-07-08 12:30:21 +0800
committerJSDurand <mmemmew@gmail.com>2023-07-08 12:31:13 +0800
commit9a317e56f8a6126583f7d0c431bf878d9b1fe7b1 (patch)
tree7bb6004196b38446a5ab0cb3a0ab642d35f113e9 /src
parent691f969eb104fa3d4c2a1667693fd0382eb9d6b5 (diff)
Finished the Emacs binding.
Now the binding part is finished. What remains is a bug encountered when planting a fragment to the forest which intersects a packed node, which would lead to invalid forests. This will also cause problem when planting a packed fragment, but until now my testing grammars do not produce packed fragments, so this problem is not encountered yet. I am still figuring out efficient ways to solve this problem.
Diffstat (limited to 'src')
-rw-r--r--src/big_endian.c28
-rw-r--r--src/big_endian.h9
-rw-r--r--src/big_endian.obin0 -> 1112 bytes
-rw-r--r--src/binding.c286
-rw-r--r--src/bytes.rs175
-rw-r--r--src/helper.c73
-rw-r--r--src/helper.h75
-rw-r--r--src/helper.obin0 -> 1768 bytes
-rw-r--r--src/lib.rs552
-rw-r--r--src/old binding.txt218
-rwxr-xr-xsrc/rep.sobin0 -> 50480 bytes
-rwxr-xr-xsrc/testbin0 -> 33904 bytes
-rw-r--r--src/test.c224
-rw-r--r--src/test.el28
14 files changed, 1658 insertions, 10 deletions
diff --git a/src/big_endian.c b/src/big_endian.c
new file mode 100644
index 0000000..2db8624
--- /dev/null
+++ b/src/big_endian.c
@@ -0,0 +1,28 @@
+#include "big_endian.h"
+
+void to_big_endian(uint64_t num, unsigned char *result)
+{
+ *(result+0) = (num >> 56) & 0xff;
+ *(result+1) = (num >> 48) & 0xff;
+ *(result+2) = (num >> 40) & 0xff;
+ *(result+3) = (num >> 32) & 0xff;
+ *(result+4) = (num >> 24) & 0xff;
+ *(result+5) = (num >> 16) & 0xff;
+ *(result+6) = (num >> 8) & 0xff;
+ *(result+7) = (num >> 0) & 0xff;
+}
+
+uint64_t from_big_endian(unsigned char *num)
+{
+ uint64_t result = ((uint64_t) (*num) << 56);
+
+ result += ((uint64_t) (*(num+1)) << 48);
+ result += ((uint64_t) (*(num+2)) << 40);
+ result += ((uint64_t) (*(num+3)) << 32);
+ result += ((uint64_t) (*(num+4)) << 24);
+ result += ((uint64_t) (*(num+5)) << 16);
+ result += ((uint64_t) (*(num+6)) << 8);
+ result += ((uint64_t) (*(num+7)) << 0);
+
+ return result;
+}
diff --git a/src/big_endian.h b/src/big_endian.h
new file mode 100644
index 0000000..e6c587e
--- /dev/null
+++ b/src/big_endian.h
@@ -0,0 +1,9 @@
+#ifndef BIG_ENDIAN_H
+#define BIG_ENDIAN_H
+#include <stdint.h>
+
+void to_big_endian(uint64_t num, unsigned char *result);
+
+uint64_t from_big_endian(unsigned char *num);
+
+#endif
diff --git a/src/big_endian.o b/src/big_endian.o
new file mode 100644
index 0000000..6d77a13
--- /dev/null
+++ b/src/big_endian.o
Binary files differ
diff --git a/src/binding.c b/src/binding.c
new file mode 100644
index 0000000..43f73ba
--- /dev/null
+++ b/src/binding.c
@@ -0,0 +1,286 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+#include <emacs-module.h>
+#include "helper.h"
+
+int plugin_is_GPL_compatible = 3;
+
+emacs_value
+rep_new_parser(emacs_env *env, ptrdiff_t narg, emacs_value *args, void *data)
+{
+ char *buffer = NULL;
+ ptrdiff_t len = 0;
+ emacs_value error_data[1];
+
+ unsigned char error_vec_len[8] = { 0 };
+ unsigned char error_vec_cap[8] = { 0 };
+
+ struct SignedVec error_vec = { 0 };
+
+ error_vec.len = error_vec_len;
+ error_vec.capacity = error_vec_cap;
+
+ env->copy_string_contents(env, *args, NULL, &len);
+ len++;
+ buffer = malloc(sizeof(char) * len);
+
+ if (buffer == NULL) return env->intern(env, "nil");
+
+ env->copy_string_contents(env, *args, buffer, &len);
+
+ struct parser *new_parser_result = new_parser(buffer, &error_vec);
+
+ free(buffer);
+
+ uint64_t error_length = from_big_endian(error_vec.len);
+
+ if (error_length) {
+ error_data[0] = env->make_string(env, error_vec.data, (ptrdiff_t) error_length);
+
+ clean_signed(&error_vec, 4);
+
+ env->non_local_exit_signal(env, env->intern(env, "error"),
+ env->funcall(env, env->intern(env, "list"),
+ 1, error_data));
+
+ return env->intern(env, "nil");
+ }
+
+ return env->make_user_ptr(env, clean_parser, new_parser_result);
+}
+
+emacs_value
+rep_parser_recognize(emacs_env *env, ptrdiff_t narg, emacs_value *args, void *data)
+{
+ struct parser *parser = env->get_user_ptr(env, *args);
+
+ if (env->non_local_exit_check(env) != emacs_funcall_exit_return)
+ return env->intern(env, "nil");
+
+ unsigned char *buffer = NULL;
+ ptrdiff_t len = 0;
+
+ emacs_value error_data[1];
+
+ if (!env->eq(env,
+ env->type_of(env, *(args+1)),
+ env->intern(env, "vector"))) {
+ error_data[0] = env->make_string
+ (env, "INPUT should be a vector of integers", 36);
+
+ env->non_local_exit_signal(env, env->intern(env, "wrong-type-argument"),
+ env->funcall(env, env->intern(env, "list"),
+ 1, error_data));
+
+ return env->intern(env, "nil");
+ }
+
+ len = env->vec_size(env, *(args+1));
+
+ buffer = malloc(sizeof(unsigned char) * len * 8);
+
+ if (buffer == NULL) {
+ error_data[0] = env->make_string(env, "out of memory", 13);
+
+ env->non_local_exit_signal(env, env->intern(env, "error"),
+ env->funcall(env, env->intern(env, "list"),
+ 1, error_data));
+
+ return env->intern(env, "nil");
+ }
+
+ for (ptrdiff_t i = 0; i < len; i++) {
+ emacs_value element = env->vec_get(env, *(args+1), i);
+
+ to_big_endian((uint64_t) env->extract_integer(env, element),
+ buffer + 8 * i);
+ }
+
+ if (env->non_local_exit_check(env) != emacs_funcall_exit_return) {
+ free(buffer);
+ return env->intern(env, "nil");
+ }
+
+ unsigned char error_vec_len[8] = { 0 };
+ unsigned char error_vec_cap[8] = { 0 };
+
+ struct SignedVec error_vec = { 0 };
+
+ error_vec.len = error_vec_len;
+ error_vec.capacity = error_vec_cap;
+
+ unsigned char input_len[8] = { 0 };
+
+ to_big_endian((uint64_t) len * 8, input_len);
+
+ struct UnsignedVec input_vec = (struct UnsignedVec)
+ { input_len, NULL, buffer };
+
+ int result = parser_recognize(parser, &input_vec, &error_vec, (unsigned char) 1);
+
+ free(buffer);
+
+ uint64_t error_length = from_big_endian(error_vec.len);
+
+ if (error_length) {
+ error_data[0] = env->make_string(env, error_vec.data, (ptrdiff_t) error_length);
+
+ clean_signed(&error_vec, 4);
+
+ env->non_local_exit_signal(env, env->intern(env, "error"),
+ env->funcall(env, env->intern(env, "list"),
+ 1, error_data));
+
+ return env->intern(env, "nil");
+ }
+
+ return (result) ? env->intern(env, "t") : env->intern(env, "nil");
+}
+
+void
+clean_forest(void *ptr)
+{
+ clean_unsigned((struct UnsignedVec *) ptr, 15);
+}
+
+emacs_value
+rep_parser_parse(emacs_env *env, ptrdiff_t narg, emacs_value *args, void *data)
+{
+ struct parser *parser = env->get_user_ptr(env, *args);
+
+ if (env->non_local_exit_check(env) != emacs_funcall_exit_return)
+ return env->intern(env, "nil");
+
+ unsigned char *buffer = NULL;
+ ptrdiff_t len = 0;
+
+ emacs_value error_data[1];
+
+ if (!env->eq(env,
+ env->type_of(env, *(args+1)),
+ env->intern(env, "vector"))) {
+ error_data[0] = env->make_string
+ (env, "INPUT should be a vector of integers", 36);
+
+ env->non_local_exit_signal(env, env->intern(env, "wrong-type-argument"),
+ env->funcall(env, env->intern(env, "list"),
+ 1, error_data));
+
+ return env->intern(env, "nil");
+ }
+
+ len = env->vec_size(env, *(args+1));
+
+ buffer = malloc(sizeof(char) * len * 8);
+
+ if (buffer == NULL) {
+ error_data[0] = env->make_string(env, "out of memory", 13);
+
+ env->non_local_exit_signal(env, env->intern(env, "error"),
+ env->funcall(env, env->intern(env, "list"),
+ 1, error_data));
+
+ return env->intern(env, "nil");
+ }
+
+ for (ptrdiff_t i = 0; i < len; i++) {
+ emacs_value element = env->vec_get(env, *(args+1), i);
+
+ to_big_endian((uint64_t) env->extract_integer(env, element),
+ buffer + 8 * i);
+ }
+
+ if (env->non_local_exit_check(env) != emacs_funcall_exit_return) {
+ free(buffer);
+ return env->intern(env, "nil");
+ }
+
+ unsigned char error_vec_len[8] = { 0 };
+ unsigned char error_vec_cap[8] = { 0 };
+
+ struct SignedVec error_vec = { 0 };
+
+ error_vec.len = error_vec_len;
+ error_vec.capacity = error_vec_cap;
+
+ unsigned char input_len[8] = { 0 };
+
+ to_big_endian((uint64_t) len * 8, input_len);
+
+struct UnsignedVec input_vec = (struct UnsignedVec)
+ { input_len, NULL, buffer };
+
+ struct UnsignedVec *result = parser_parse(parser, &input_vec, &error_vec, (unsigned char) 1);
+
+ free(buffer);
+
+ uint64_t error_length = from_big_endian(error_vec.len);
+
+ if (error_length) {
+ error_data[0] = env->make_string(env, error_vec.data, (ptrdiff_t) error_length);
+
+ clean_signed(&error_vec, 4);
+
+ env->non_local_exit_signal(env, env->intern(env, "error"),
+ env->funcall(env, env->intern(env, "list"),
+ 1, error_data));
+
+ return env->intern(env, "nil");
+ }
+
+ if (result) {
+ return env->make_user_ptr(env, clean_forest, result);
+ }
+
+ return env->intern(env, "nil");
+}
+
+int
+emacs_module_init(struct emacs_runtime *ert)
+{
+ emacs_env *env = ert->get_environment(ert);
+
+ unsigned char emacs_version = 0;
+
+ if ((unsigned long) env->size >= sizeof(struct emacs_env_27))
+ emacs_version = 27;
+ else if ((unsigned long) env->size >= sizeof(struct emacs_env_26))
+ emacs_version = 26;
+ else if ((unsigned long) env->size >= sizeof(struct emacs_env_25))
+ emacs_version = 25;
+ else return 2;
+
+ emacs_value newfunc = env->make_function
+ (env, 1, 1, rep_new_parser,
+ "Create a new parser from the grammar string GRAMMAR_STRING."
+ "\n\n\(fn grammar_string)",
+ NULL);
+
+ env->funcall(env, env->intern(env, "defalias"),
+ 2, (emacs_value[]){ env->intern(env, "rep-new-parser"), newfunc });
+
+ emacs_value recognizefunc = env->make_function
+ (env, 2, 2, rep_parser_recognize,
+ "Recognize an INPUT using PARSER."
+ "\n\n\(fn parser INPUT)",
+ NULL);
+
+ env->funcall(env, env->intern(env, "defalias"),
+ 2, (emacs_value[]){ env->intern(env, "rep-recognize"), recognizefunc });
+
+ emacs_value parsefunc = env->make_function
+ (env, 2, 2, rep_parser_parse,
+ "Parse an INPUT using PARSER."
+ "\n\n\(fn parser input)",
+ NULL);
+
+ env->funcall(env, env->intern(env, "defalias"),
+ 2, (emacs_value[]){ env->intern(env, "rep-parse"), parsefunc });
+
+ env->funcall(env, env->intern(env, "provide"),
+ 1, (emacs_value[]){ env->intern(env, "rep") });
+
+ return 0;
+}
diff --git a/src/bytes.rs b/src/bytes.rs
new file mode 100644
index 0000000..f764077
--- /dev/null
+++ b/src/bytes.rs
@@ -0,0 +1,175 @@
+//! This module defines functions to turn a forest of forest labels
+//! into a sequence of bytes.
+//!
+//! To be more specific, a forest consists of two parts: a vector of
+//! node labels and a graph specifying the relations between nodes.
+//!
+//! # Number format (endianness)
+//!
+//! Every number mentionned in this format is an unsigned integer of
+//! 64 bits, or 8 bytes. Every such number is specified in the big
+//! endian format.
+//!
+//! # Parts of the sequence of bytes
+//!
+//! The sequence of bytes has three parts:
+//!
+//! ## Header
+//!
+//! The header specifies metadata of this forest.
+//!
+//! ### Special mark
+//!
+//! The first three bytes form the string "rep", as a special mark.
+//!
+//! ### Number of nodes
+//!
+//! The next 8 bytes specifies the number of nodes of this forest.
+//!
+//! ## Graph
+//!
+//! Next comes the underlying graph for this forest. This part
+//! consists of a vector of vectors of numbers. Each vector of
+//! numbers represents the list of children of a node. So the number
+//! of vectors of numbers is equal to the number of nodes.
+//!
+//! ### Vector of vectors of numbers
+//!
+//! The vectors are not simply concatenated one after another, as that
+//! way one cannot read a random node in a constant amount of time.
+//!
+//! Instead, we first specify the number of children of each node
+//! first, along with the offset for that node of the vector of
+//! children.
+//!
+//! As an example, if a graph has three nodes, represented as the
+//! adjacency list: `[[1, 2], [2], []]`, then its representation as a
+//! sequence of bytes is as follows:
+//! ```text
+//! 2, x, 1, y, 0, z, 1, 2, 2,
+//! ^ ^ ^
+//! x y z
+//! ```
+//!
+//! This has the advantage that we can read the children of the `n`-th
+//! node in constant time.
+//!
+//! ## Vector of labels
+//!
+//! Each label occupies a fixed number of bytes, so we simply put the
+//! labels one after another. The only thing to note here is the
+//! format of the labels.
+//!
+//! ### Labels
+//!
+//! Each label has 3 parts:
+//!
+//! 1. Status: either Packed, Cloned, or Plain. If the node is
+//! cloned, it has a clone index. So in total this part occupies 1
+//! byte for the status and 8 bytes for the clone index.
+//! 2. Start and end: the range in the input sentence. We just store
+//! two numbers here. Hence this part occupies 16 bytes.
+//! 3. Grammar label: either a terminal, a non-terminal, or a rule.
+//! Each variant needs a number to speify its index. So in total this
+//! part occupies 1 byte for the variant discriminant and 8 bytes for
+//! the number.
+//!
+//! To sum up, each label occupies 34 bytes.
+
+use chain::item::{default::DefaultForest, ForestLabel};
+use grammar::{GrammarLabel, GrammarLabelType, TNT};
+use graph::{Graph, LabelGraph};
+
+pub(super) fn forest_to_bytes(forest: &DefaultForest<ForestLabel<GrammarLabel>>) -> Vec<u8> {
+ // First calculate the total number of bytes.
+ let nodes_len = forest.nodes_len();
+
+ let degrees: Vec<_> = forest
+ .nodes()
+ .map(|node| forest.degree(node).unwrap_or(0))
+ .collect();
+
+ let total_degree: usize = degrees.iter().copied().sum();
+
+ let len: usize = 8 // total number of bytes at the start
+ + 3 // special mark
+ + 8 // number of nodes
+ + 8 // offset of labels
+ + 16 * nodes_len // degree & offset for each node
+ + 8 * total_degree // children of each node
+ + 34 * nodes_len // labels
+ ;
+
+ // Then fill in the bytes.
+
+ let mut bytes: Vec<u8> = Vec::with_capacity(len);
+
+ // First the headers
+
+ bytes.extend(len.to_be_bytes());
+ bytes.extend([114, 101, 112]); // rep
+ bytes.extend(nodes_len.to_be_bytes());
+ bytes.extend((len - 34 * nodes_len).to_be_bytes());
+
+ let mut accumulated: usize = 0;
+
+ // Then degrees and offsets
+
+ for degree in degrees.iter().copied() {
+ bytes.extend(degree.to_be_bytes());
+ bytes.extend((8 + 3 + 8 + 8 + 16 * nodes_len + 8 * accumulated).to_be_bytes());
+
+ accumulated += degree;
+ }
+
+ // Then the children
+
+ bytes.extend(forest.nodes().flat_map(|node| {
+ forest
+ .children_of(node)
+ .unwrap_or_default()
+ .flat_map(|child| child.to_be_bytes())
+ }));
+
+ // Finally the labels
+
+ 'nodes_loop: for node in forest.nodes() {
+ let label = match forest.vertex_label(node) {
+ Ok(Some(label)) => label,
+ _ => continue 'nodes_loop,
+ };
+
+ if label.is_plain() {
+ bytes.extend(std::iter::repeat(0).take(9));
+ } else if label.is_packed() {
+ bytes.extend(std::iter::once(1).chain(std::iter::repeat(0).take(8)));
+ } else if let Some(index) = label.clone_index() {
+ bytes.extend(std::iter::once(2).chain(index.to_be_bytes()));
+ }
+
+ let label = label.label();
+
+ bytes.extend(label.start().to_be_bytes());
+ bytes.extend(label.end().unwrap_or(0).to_be_bytes());
+
+ let label = label.label();
+
+ match label {
+ GrammarLabelType::TNT(TNT::Ter(t)) => {
+ bytes.extend(std::iter::once(0).chain(t.to_be_bytes()));
+ }
+ GrammarLabelType::TNT(TNT::Non(n)) => {
+ bytes.extend(std::iter::once(1).chain(n.to_be_bytes()));
+ }
+ GrammarLabelType::Rule(r) => {
+ bytes.extend(std::iter::once(2).chain(r.to_be_bytes()));
+ }
+ }
+ }
+
+ if bytes.len() != len {
+ dbg!();
+ }
+
+ bytes
+}
diff --git a/src/helper.c b/src/helper.c
new file mode 100644
index 0000000..5d7e9f8
--- /dev/null
+++ b/src/helper.c
@@ -0,0 +1,73 @@
+#include "helper.h"
+
+struct Label
+read_label(unsigned char *ptr)
+{
+ struct Label label = { 0 };
+
+ switch (*ptr) {
+ case Plain:
+ label.status = Plain;
+ break;
+ case Packed:
+ label.status = Packed;
+ break;
+ default:
+ label.status = Clone;
+ label.clone_index = from_big_endian(ptr+1);
+ break;
+ }
+
+ label.start = from_big_endian(ptr+9);
+ label.end = from_big_endian(ptr+17);
+
+ switch (*(ptr+25)) {
+ case Terminal:
+ label.variant = Terminal;
+ break;
+ case Nonterminal:
+ label.variant = Nonterminal;
+ break;
+ default:
+ label.variant = Rule;
+ break;
+ }
+
+ label.content = from_big_endian(ptr+26);
+
+ return label;
+}
+
+void
+print_label(struct Label label)
+{
+ switch (label.status) {
+ case Plain:
+ printf("a plain node ");
+ break;
+ case Packed:
+ printf("a packed node ");
+ break;
+ default:
+ printf("a cloned node with index %llu ", label.clone_index);
+ break;
+ }
+
+ printf("spanning (%llu, %llu) ", label.start, label.end);
+
+ printf("labelled as a ");
+
+ switch (label.variant) {
+ case Terminal:
+ printf("terminal ");
+ break;
+ case Nonterminal:
+ printf("non-terminal ");
+ break;
+ default:
+ printf("rule ");
+ break;
+ }
+
+ printf("%llu\n", label.content);
+}
diff --git a/src/helper.h b/src/helper.h
new file mode 100644
index 0000000..8be7497
--- /dev/null
+++ b/src/helper.h
@@ -0,0 +1,75 @@
+#ifndef HELPER_H
+#define HELPER_H
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+#include "big_endian.h"
+
+struct SignedVec {
+ unsigned char *len;
+ unsigned char *capacity;
+ char *data;
+};
+
+struct UnsignedVec {
+ unsigned char *len;
+ unsigned char *capacity;
+ unsigned char *data;
+};
+
+struct parser;
+
+struct parser *new_parser(char *grammar_string,
+ struct SignedVec *error_vec);
+
+void clean_parser(void *parser);
+
+int parser_recognize(struct parser *parser,
+ struct UnsignedVec *input_vec,
+ struct SignedVec *error_vec,
+ unsigned char reset_p);
+
+struct UnsignedVec *
+parser_parse
+(struct parser *parser,
+ struct UnsignedVec *input_vec,
+ struct SignedVec *error_vec,
+ unsigned char reset_p );
+
+struct UnsignedVec *
+parser_parse(struct parser *parser,
+ struct UnsignedVec *input_vec,
+ struct SignedVec *error_vec,
+ unsigned char reset_p);
+
+void clean_signed(struct SignedVec *vec, unsigned char flag);
+void clean_unsigned(struct UnsignedVec *vec, unsigned char flag);
+
+typedef enum Status {
+ Plain = 0,
+ Packed = 1,
+ Clone = 2,
+} Status;
+
+typedef enum Variant {
+ Terminal = 0,
+ Nonterminal = 1,
+ Rule = 2,
+} Variant;
+
+struct Label {
+ Status status;
+ uint64_t clone_index;
+ uint64_t start;
+ uint64_t end;
+ Variant variant;
+ uint64_t content;
+};
+
+struct Label read_label(unsigned char *ptr);
+
+void print_label(struct Label label);
+
+#endif
diff --git a/src/helper.o b/src/helper.o
new file mode 100644
index 0000000..e8801dd
--- /dev/null
+++ b/src/helper.o
Binary files differ
diff --git a/src/lib.rs b/src/lib.rs
index f5457c3..685c66e 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,16 +1,548 @@
-// TODO: Add Emacs bindings
+#![warn(missing_docs)]
+//! This top level package provides necessary functions for Emacs to
+//! call.
-pub fn add(left: usize, right: usize) -> usize {
- left + right
+extern crate chain;
+extern crate grammar;
+
+use chain::{atom::DefaultAtom, default::DefaultChain, Chain};
+use grammar::Grammar;
+
+/// This struct is the representation of a parser.
+///
+/// When the user constructs a parser, an instance of this struct will
+/// be constructed and the user will receive an opaque pointer to this
+/// struct.
+#[derive(Debug, Clone)]
+#[repr(C)]
+pub struct Parser {
+ chain: DefaultChain,
+}
+
+impl Parser {
+ /// Construct a parser from the grammar string.
+ ///
+ /// The grammar is supposed to conform to the Augmented
+ /// Backus-Naur Format. See RFC 5234 for exact details of this
+ /// format.
+ pub fn new(s: &str) -> Result<Self, String> {
+ let grammar: Grammar = s.parse().map_err(|err| format!("{err}"))?;
+ let atom: DefaultAtom =
+ DefaultAtom::from_grammar(grammar).map_err(|err| format!("{err}"))?;
+
+ DefaultChain::unit(atom)
+ .map_err(|err| format!("{err}"))
+ .map(|chain| Self { chain })
+ }
+}
+
+/// Actual function that is called through C ABI.
+///
+/// The parameter `ERROR_LEN` is supposed to point to an integer,
+/// which will be set to the length of the error message, if and only
+/// if an error occurs, in which case the `ERROR_STR` will be set to
+/// point to the actual error message.
+///
+/// It is expected that `*ERROR_STR` should hold the value `NULL` .
+#[no_mangle]
+extern "C" fn new_parser(
+ grammar_string: *mut std::os::raw::c_char,
+ error_vec: *mut LenVec<std::os::raw::c_char>,
+) -> *mut Parser {
+ let parsed_str;
+
+ let error_len = unsafe { (*error_vec).len };
+ let error_cap = unsafe { (*error_vec).capacity };
+
+ unsafe {
+ match std::ffi::CStr::from_ptr(grammar_string).to_str() {
+ Ok(ccstr) => {
+ parsed_str = ccstr.to_string();
+ }
+ Err(e) => {
+ let mut e_string = format!("error: {e}");
+
+ let e_string_len_slice = e_string.len().to_be_bytes();
+ let e_string_cap_slice = e_string.capacity().to_be_bytes();
+
+ for i in 0..8 {
+ *(error_len.add(i)) = e_string_len_slice.get(i).copied().unwrap();
+ *(error_cap.add(i)) = e_string_cap_slice.get(i).copied().unwrap();
+ }
+
+ (*error_vec).data = e_string.as_mut_ptr() as *mut std::os::raw::c_char;
+
+ std::mem::forget(e_string);
+
+ return std::ptr::null_mut();
+ }
+ }
+ }
+
+ match Parser::new(&parsed_str) {
+ Ok(result) => unsafe {
+ for i in 0..8 {
+ *(error_len.add(i)) = 0;
+ }
+
+ Box::into_raw(Box::new(result))
+ },
+ Err(e) => unsafe {
+ let mut e_string = format!("error: {e}");
+
+ let e_string_len_slice = e_string.len().to_be_bytes();
+ let e_string_cap_slice = e_string.capacity().to_be_bytes();
+
+ for i in 0..8 {
+ *(error_len.add(i)) = e_string_len_slice.get(i).copied().unwrap();
+ *(error_cap.add(i)) = e_string_cap_slice.get(i).copied().unwrap();
+ }
+
+ (*error_vec).data = e_string.as_mut_ptr() as *mut std::os::raw::c_char;
+
+ std::mem::forget(e_string);
+
+ std::ptr::null_mut()
+ },
+ }
+}
+
+#[no_mangle]
+extern "C" fn clean_parser(parser: *const std::ffi::c_void) {
+ unsafe {
+ drop(Box::from_raw(parser as *mut Parser));
+ }
+}
+
+/// To make it easier to pass arrays to and from C.
+#[repr(C)]
+pub struct LenVec<T> {
+ /// This must be an array of unsigned char of length 8.
+ ///
+ /// A less length leads to access to invalid memory location,
+ /// while a longer length will be ignored, possibly leading to
+ /// wrong length.
+ ///
+ /// The length will be interpreted as a 64-bits integer stored in
+ /// big endian format.
+ pub len: *mut std::os::raw::c_uchar,
+ /// This must be an array of unsigned chars of length 8.
+ ///
+ /// This is only used so that Rust knows how to reconstruct the
+ /// data from C, in order for Rust to deallocate the objects
+ /// exposed by this library.
+ pub capacity: *mut std::os::raw::c_uchar,
+ /// The actual pointer to the data.
+ ///
+ /// In case this should be set by the function on the Rust end,
+ /// this field should be `NULL`.
+ pub data: *mut T,
+}
+
+#[no_mangle]
+extern "C" fn clean_signed(vec: *mut LenVec<std::os::raw::c_char>, flag: std::os::raw::c_uchar) {
+ let len = usize::from_be_bytes(
+ unsafe { std::slice::from_raw_parts((*vec).len, 8) }
+ .try_into()
+ .unwrap(),
+ );
+ let capacity = usize::from_be_bytes(
+ unsafe { std::slice::from_raw_parts((*vec).capacity, 8) }
+ .try_into()
+ .unwrap(),
+ );
+
+ if (flag & 1) != 0 {
+ drop(unsafe { Vec::from_raw_parts((*vec).len, 8, 8) });
+ }
+
+ if (flag & 2) != 0 {
+ drop(unsafe { Vec::from_raw_parts((*vec).capacity, 8, 8) });
+ }
+
+ if (flag & 4) != 0 {
+ drop(unsafe { String::from_raw_parts((*vec).data as *mut u8, len, capacity) });
+ }
+
+ if (flag & 8) != 0 {
+ drop(unsafe { Box::from_raw(vec) });
+ }
+}
+
+#[no_mangle]
+extern "C" fn clean_unsigned(vec: *mut LenVec<std::os::raw::c_uchar>, flag: std::os::raw::c_uchar) {
+ let len = usize::from_be_bytes(
+ unsafe { std::slice::from_raw_parts((*vec).len, 8) }
+ .try_into()
+ .unwrap(),
+ );
+ let capacity = usize::from_be_bytes(
+ unsafe { std::slice::from_raw_parts((*vec).capacity, 8) }
+ .try_into()
+ .unwrap(),
+ );
+
+ if (flag & 1) != 0 {
+ drop(unsafe { Vec::from_raw_parts((*vec).len, 8, 8) });
+ }
+
+ if (flag & 2) != 0 {
+ drop(unsafe { Vec::from_raw_parts((*vec).capacity, 8, 8) });
+ }
+
+ if (flag & 4) != 0 {
+ drop(unsafe { Vec::<u8>::from_raw_parts((*vec).data, len, capacity) });
+ }
+
+ if (flag & 8) != 0 {
+ drop(unsafe { Box::from_raw(vec) });
+ }
+}
+
+#[no_mangle]
+extern "C" fn parser_recognize(
+ parser: *mut Parser,
+ input_vec: *mut LenVec<std::os::raw::c_uchar>,
+ error_vec: *mut LenVec<std::os::raw::c_char>,
+ reset_p: std::os::raw::c_uchar,
+) -> std::os::raw::c_int {
+ let input_len = usize::from_be_bytes(
+ unsafe { std::slice::from_raw_parts((*input_vec).len, 8) }
+ .try_into()
+ .unwrap(),
+ );
+
+ let mut parser_box;
+ let input_array_len = input_len;
+ let input_array;
+
+ let error_len = unsafe { (*error_vec).len };
+ let error_cap = unsafe { (*error_vec).capacity };
+
+ unsafe {
+ parser_box = Box::from_raw(parser);
+
+ input_array = std::slice::from_raw_parts((*input_vec).data, input_array_len);
+ }
+
+ // If the parser has already been used before, reset it to the
+ // initial state.
+
+ if reset_p != 0 && !parser_box.chain.history().is_empty() {
+ match DefaultChain::unit(parser_box.chain.atom().clone()) {
+ Ok(chain) => {
+ parser_box.chain = chain;
+ }
+ Err(e) => {
+ let mut e_string = format!("error: {e}");
+
+ Box::leak(parser_box);
+
+ let e_string_len_slice = e_string.len().to_be_bytes();
+ let e_string_cap_slice = e_string.capacity().to_be_bytes();
+
+ unsafe {
+ for i in 0..8 {
+ *(error_len.add(i)) = e_string_len_slice.get(i).copied().unwrap();
+ *(error_cap.add(i)) = e_string_cap_slice.get(i).copied().unwrap();
+ }
+
+ (*error_vec).data = e_string.as_mut_ptr() as *mut std::os::raw::c_char;
+ }
+
+ std::mem::forget(e_string);
+
+ return 0;
+ }
+ }
+ }
+
+ if input_array_len.rem_euclid(8) != 0 {
+ let mut e_string =
+ format!("error: input length should be divisible by 8, but got {input_array_len}");
+
+ let e_string_len_slice = e_string.len().to_be_bytes();
+ let e_string_cap_slice = e_string.capacity().to_be_bytes();
+
+ Box::leak(parser_box);
+
+ unsafe {
+ for i in 0..8 {
+ *(error_len.add(i)) = e_string_len_slice.get(i).copied().unwrap();
+ *(error_cap.add(i)) = e_string_cap_slice.get(i).copied().unwrap();
+ }
+
+ (*error_vec).data = e_string.as_mut_ptr() as *mut std::os::raw::c_char;
+ }
+
+ std::mem::forget(e_string);
+
+ return 0;
+ }
+
+ #[cfg(target_pointer_width = "64")]
+ let input_iter = input_array
+ .chunks_exact(8)
+ .map(|chunk| usize::from_be_bytes(<[u8; 8]>::try_from(chunk).unwrap()));
+
+ #[cfg(not(target_pointer_width = "64"))]
+ compile_error!("this program assumes to be run on 64-bits machines");
+
+ for (index, token) in input_iter.enumerate() {
+ let chain_result = parser_box.chain.chain(token, index, true);
+
+ if let Err(e) = chain_result {
+ let mut e_string = format!("error: {e}");
+
+ let e_string_len_slice = e_string.len().to_be_bytes();
+ let e_string_cap_slice = e_string.capacity().to_be_bytes();
+
+ Box::leak(parser_box);
+
+ unsafe {
+ for i in 0..8 {
+ *(error_len.add(i)) = e_string_len_slice.get(i).copied().unwrap();
+ *(error_cap.add(i)) = e_string_cap_slice.get(i).copied().unwrap();
+ }
+
+ (*error_vec).data = e_string.as_mut_ptr() as *mut std::os::raw::c_char;
+ }
+
+ std::mem::forget(e_string);
+
+ return 0;
+ }
+ }
+
+ match parser_box.chain.epsilon() {
+ Ok(result) => {
+ Box::leak(parser_box);
+
+ result as std::os::raw::c_int
+ }
+ Err(e) => {
+ let mut e_string = format!("error: {e}");
+
+ let e_string_len_slice = e_string.len().to_be_bytes();
+ let e_string_cap_slice = e_string.capacity().to_be_bytes();
+
+ Box::leak(parser_box);
+
+ unsafe {
+ for i in 0..8 {
+ *(error_len.add(i)) = e_string_len_slice.get(i).copied().unwrap();
+ *(error_cap.add(i)) = e_string_cap_slice.get(i).copied().unwrap();
+ }
+
+ (*error_vec).data = e_string.as_mut_ptr() as *mut std::os::raw::c_char;
+ }
+
+ std::mem::forget(e_string);
+
+ 0
+ }
+ }
}
-#[cfg(test)]
-mod tests {
- use super::*;
+#[no_mangle]
+extern "C" fn parser_parse(
+ parser: *mut Parser,
+ input_vec: *mut LenVec<std::os::raw::c_uchar>,
+ error_vec: *mut LenVec<std::os::raw::c_char>,
+ reset_p: std::os::raw::c_uchar,
+) -> *mut LenVec<std::os::raw::c_uchar> {
+ let input_len = usize::from_be_bytes(
+ unsafe { std::slice::from_raw_parts((*input_vec).len, 8) }
+ .try_into()
+ .unwrap(),
+ );
+
+ let mut parser_box;
+ let input_array;
- #[test]
- fn it_works() {
- let result = add(2, 2);
- assert_eq!(result, 4);
+ let error_len = unsafe { (*error_vec).len };
+ let error_cap = unsafe { (*error_vec).capacity };
+
+ unsafe {
+ parser_box = Box::from_raw(parser);
+
+ input_array = std::slice::from_raw_parts((*input_vec).data, input_len);
+ }
+
+ // If the parser has already been used before, reset it to the
+ // initial state.
+
+ if reset_p != 0 && !parser_box.chain.history().is_empty() {
+ match DefaultChain::unit(parser_box.chain.atom().clone()) {
+ Ok(chain) => {
+ parser_box.chain = chain;
+ }
+ Err(e) => {
+ let mut e_string = format!("error: {e}");
+
+ Box::leak(parser_box);
+
+ let e_string_len_slice = e_string.len().to_be_bytes();
+ let e_string_cap_slice = e_string.capacity().to_be_bytes();
+
+ unsafe {
+ for i in 0..8 {
+ *(error_len.add(i)) = e_string_len_slice.get(i).copied().unwrap();
+ *(error_cap.add(i)) = e_string_cap_slice.get(i).copied().unwrap();
+ }
+
+ (*error_vec).data = e_string.as_mut_ptr() as *mut std::os::raw::c_char;
+ }
+
+ std::mem::forget(e_string);
+
+ return std::ptr::null_mut();
+ }
+ }
+ }
+
+ if input_len.rem_euclid(8) != 0 {
+ let mut e_string =
+ format!("error: input length should be divisible by 8, but got {input_len}");
+
+ let e_string_len_slice = e_string.len().to_be_bytes();
+ let e_string_cap_slice = e_string.capacity().to_be_bytes();
+
+ Box::leak(parser_box);
+
+ unsafe {
+ for i in 0..8 {
+ *(error_len.add(i)) = e_string_len_slice.get(i).copied().unwrap();
+ *(error_cap.add(i)) = e_string_cap_slice.get(i).copied().unwrap();
+ }
+
+ (*error_vec).data = e_string.as_mut_ptr() as *mut std::os::raw::c_char;
+ }
+
+ std::mem::forget(e_string);
+
+ return std::ptr::null_mut();
+ } else if input_len == 0 {
+ Box::leak(parser_box);
+
+ return std::ptr::null_mut();
+ }
+
+ #[cfg(target_pointer_width = "64")]
+ let input_iter = input_array
+ .chunks_exact(8)
+ .map(|chunk| usize::from_be_bytes(<[u8; 8]>::try_from(chunk).unwrap()));
+
+ #[cfg(not(target_pointer_width = "64"))]
+ compile_error!("this program assumes to be run on 64-bits machines");
+
+ let mut last_pos: usize = 0;
+ let mut last_token: usize = 0;
+
+ for (index, token) in input_iter.enumerate() {
+ last_pos = index;
+ last_token = token;
+
+ let chain_result = parser_box.chain.chain(token, index, false);
+
+ if let Err(e) = chain_result {
+ let mut e_string = format!("error: {e}");
+
+ let e_string_len_slice = e_string.len().to_be_bytes();
+ let e_string_cap_slice = e_string.capacity().to_be_bytes();
+
+ Box::leak(parser_box);
+
+ unsafe {
+ for i in 0..8 {
+ *(error_len.add(i)) = e_string_len_slice.get(i).copied().unwrap();
+ *(error_cap.add(i)) = e_string_cap_slice.get(i).copied().unwrap();
+ }
+
+ (*error_vec).data = e_string.as_mut_ptr() as *mut std::os::raw::c_char;
+ }
+
+ std::mem::forget(e_string);
+
+ return std::ptr::null_mut();
+ }
+ }
+
+ match parser_box.chain.epsilon() {
+ Ok(result) => {
+ if result {
+ let forest = parser_box.chain.end_of_input(last_pos + 1, last_token);
+
+ match forest {
+ Ok(forest) => {
+ Box::leak(parser_box);
+
+ let mut bytes = bytes::forest_to_bytes(&forest);
+
+ let bytes_len = bytes.len().to_be_bytes().to_vec();
+
+ let bytes_capacity = bytes.capacity().to_be_bytes().to_vec();
+
+ let bytes_vec: LenVec<std::os::raw::c_uchar> = LenVec {
+ len: Box::leak(bytes_len.into_boxed_slice()).as_mut_ptr(),
+ capacity: Box::leak(bytes_capacity.into_boxed_slice()).as_mut_ptr(),
+ data: bytes.as_mut_ptr(),
+ };
+
+ std::mem::forget(bytes);
+
+ Box::into_raw(Box::new(bytes_vec))
+ }
+ Err(e) => {
+ let mut e_string = format!("error: {e}");
+
+ let e_string_len_slice = e_string.len().to_be_bytes();
+ let e_string_cap_slice = e_string.capacity().to_be_bytes();
+
+ Box::leak(parser_box);
+
+ unsafe {
+ for i in 0..8 {
+ *(error_len.add(i)) = e_string_len_slice.get(i).copied().unwrap();
+ *(error_cap.add(i)) = e_string_cap_slice.get(i).copied().unwrap();
+ }
+
+ (*error_vec).data = e_string.as_mut_ptr() as *mut std::os::raw::c_char;
+ }
+
+ std::mem::forget(e_string);
+
+ std::ptr::null_mut()
+ }
+ }
+ } else {
+ Box::leak(parser_box);
+
+ std::ptr::null_mut()
+ }
+ }
+ Err(e) => {
+ let mut e_string = format!("error: {e}");
+
+ let e_string_len_slice = e_string.len().to_be_bytes();
+ let e_string_cap_slice = e_string.capacity().to_be_bytes();
+
+ Box::leak(parser_box);
+
+ unsafe {
+ for i in 0..8 {
+ *(error_len.add(i)) = e_string_len_slice.get(i).copied().unwrap();
+ *(error_cap.add(i)) = e_string_cap_slice.get(i).copied().unwrap();
+ }
+
+ (*error_vec).data = e_string.as_mut_ptr() as *mut std::os::raw::c_char;
+ }
+
+ std::mem::forget(e_string);
+
+ std::ptr::null_mut()
+ }
}
}
+
+pub mod bytes;
diff --git a/src/old binding.txt b/src/old binding.txt
new file mode 100644
index 0000000..0311f49
--- /dev/null
+++ b/src/old binding.txt
@@ -0,0 +1,218 @@
+//! This file provides the Emacs binding for the core.
+
+#![allow(unused_imports)]
+
+extern crate repcore;
+
+use repcore::{
+ derive::stdag::{PFRecorder, STDAGParser},
+ grammar::{Grammar, GrammarInfo},
+ semiring::{IdentityRecorder, Nat, PFS},
+ tokenizer::to_tokenizer,
+ valuation::{Count, ParseForest},
+};
+
+use repcore::parser::Parser as PParser;
+use repcore::parser::ParserReportConfig as PParserConfig;
+
+// TODO: Write Emacs binding
+
+// Let's design the API for Emacs to use.
+//
+// Emacs offers the grammar file or grammar string, and then we
+// generate a parser.
+//
+// Thereafter when we want to parse some input, Emacs gives the input
+// file name or the input file string to this package, and then the
+// package gives back the parse result.
+//
+// Besides, we need a data type to deal with errors.
+//
+// Handling a string is not a problem for us, but to return a parser
+// to Emacs, and to return the parse result, are kind of troublesome.
+//
+// To achieve this, we need data types for a parser and for the parse
+// result. Since a parser is just an implementation detail and should
+// not be of concern to the user, we mught just represent a parser as
+// just a custom user-ptr in Emacs.
+//
+// As to the parse result, we return a set of tuples:
+//
+// (label, i, k, j),
+//
+// where LABEL is represented as a tuple
+//
+// (INDEX, STRING),
+//
+// where index is the index of the rule and the string is the name of
+// the non-terminal or the terminal that is spanned by this packed
+// node.
+//
+// This is not intended for the user to inspect and play with. The
+// package should provide another function to interpret this data, for
+// example to search something or to display it. The reason we don't
+// directly return something to display is that the plain forest might
+// contain cyclic data, and is inconvenient to represent in Emacs. So
+// we only try to display this information to the user if the need
+// arises.
+
+#[derive(Debug, Clone)]
+#[repr(C)]
+pub struct Parser {
+ g: Grammar,
+ gi: GrammarInfo,
+ stparser: STDAGParser<PFS, ParseForest, PFRecorder>,
+}
+
+impl Parser {
+ fn new(gs: &str) -> Result<Self, Box<dyn std::error::Error>> {
+ let g: Grammar = gs.parse()?;
+
+ let mut stparser = STDAGParser::new();
+
+ let mut gi = g.generate_info()?;
+
+ stparser.init(&g, &mut gi)?;
+
+ Ok(Self { g, gi, stparser })
+ }
+}
+
+#[no_mangle]
+extern "C" fn new_parser(
+ gs: *mut std::os::raw::c_char,
+ error: *mut std::os::raw::c_int,
+) -> *mut Parser {
+ let parsed_str;
+ let parser_result;
+
+ unsafe {
+ let cstr = std::ffi::CStr::from_ptr(gs).to_str();
+
+ if cstr.is_err() {
+ *error = 1;
+ return std::ptr::null_mut();
+ }
+
+ parsed_str = cstr.unwrap().to_owned();
+
+ parser_result = Parser::new(&parsed_str);
+
+ if parser_result.is_err() {
+ *error = 2;
+ eprintln!("failed: {parser_result:?}");
+ return std::ptr::null_mut();
+ }
+
+ *error = 0;
+ }
+
+ Box::into_raw(Box::new(parser_result.unwrap()))
+}
+
+#[no_mangle]
+extern "C" fn clean_parser(parser: *const std::ffi::c_void) {
+ unsafe {
+ Box::from_raw(parser as *mut Parser);
+ }
+}
+
+// #[no_mangle]
+// extern "C" fn reset_parser(parser: *mut Parser) {
+// let mut parser_box;
+// unsafe {
+// parser_box = Box::from_raw(parser);
+// }
+
+// parser_box
+// .stparser
+// .init(&parser_box.g, &parser_box.gi)
+// .unwrap();
+
+// std::mem::forget(parser_box);
+// }
+
+// FIXME: Use a pointer to int to indicate the type of errors, and use
+// a pointer to a custom error struct to convey the error messages.
+#[no_mangle]
+extern "C" fn parser_parse(
+ parser: *mut Parser,
+ file: *const std::os::raw::c_char,
+) -> std::os::raw::c_int {
+ let mut parser_box;
+ let cstr;
+
+ unsafe {
+ parser_box = Box::from_raw(parser);
+ cstr = std::ffi::CStr::from_ptr(file).to_str();
+
+ if cstr.is_err() {
+ eprintln!("error reading string: {:?}", cstr);
+ return 0;
+ }
+ }
+
+ parser_box.stparser.reinit();
+
+ let file_string = std::fs::read_to_string(cstr.unwrap());
+
+ if file_string.is_err() {
+ eprintln!("error reading file: {:?}", file_string);
+ return 0;
+ }
+
+ let file_string = file_string.unwrap();
+
+ let parse_result = parser_box
+ .stparser
+ .parse(&file_string.chars().collect::<Vec<_>>());
+
+ if parse_result.is_err() {
+ eprintln!("failed to parse: {:?}", parse_result);
+ return 0;
+ }
+
+ let (accepting, _) = parse_result.unwrap();
+
+ std::mem::forget(parser_box);
+
+ accepting as std::os::raw::c_int
+}
+
+#[no_mangle]
+extern "C" fn parser_parse_string(
+ parser: *mut Parser,
+ input: *const std::os::raw::c_char,
+) -> std::os::raw::c_int {
+ let mut parser_box;
+ let cstr;
+
+ unsafe {
+ parser_box = Box::from_raw(parser);
+ cstr = std::ffi::CStr::from_ptr(input).to_str();
+
+ if cstr.is_err() {
+ eprintln!("error reading string: {:?}", cstr);
+ return 0;
+ }
+ }
+
+ parser_box.stparser.reinit();
+
+ let file_string = cstr.unwrap().to_owned();
+
+ let parse_result = parser_box
+ .stparser
+ .parse(&file_string.chars().collect::<Vec<_>>());
+
+ if parse_result.is_err() {
+ eprintln!("failed to parse: {:?}", parse_result);
+ return 0;
+ }
+
+ let (accepting, _) = parse_result.unwrap();
+
+ std::mem::forget(parser_box);
+
+ accepting as std::os::raw::c_int
+}
diff --git a/src/rep.so b/src/rep.so
new file mode 100755
index 0000000..2bc412c
--- /dev/null
+++ b/src/rep.so
Binary files differ
diff --git a/src/test b/src/test
new file mode 100755
index 0000000..5806c2d
--- /dev/null
+++ b/src/test
Binary files differ
diff --git a/src/test.c b/src/test.c
new file mode 100644
index 0000000..d309fb7
--- /dev/null
+++ b/src/test.c
@@ -0,0 +1,224 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <limits.h>
+#include <string.h>
+#include <emacs-module.h>
+
+#include "big_endian.h"
+#include "helper.h"
+
+int
+main(int argc, char **argv)
+{
+ unsigned char error_vec_len[8] = { 0 };
+ unsigned char error_vec_cap[8] = { 0 };
+
+ struct SignedVec error_vec = { 0 };
+
+ error_vec.len = error_vec_len;
+ error_vec.capacity = error_vec_cap;
+
+ struct parser *parser = new_parser("start = \"a\"\"b\"\n", &error_vec);
+
+ uint64_t error_length = from_big_endian(error_vec.len);
+
+ if (error_length) {
+ printf("error: ");
+
+ char *error_str = error_vec.data;
+
+ for (int i = 0; i < error_length; i++) {
+ printf("%c", *(error_str+i));
+ }
+
+ printf("\n");
+
+ clean_signed(&error_vec, 4);
+
+ return 1;
+ }
+
+ unsigned char *input = malloc (sizeof(unsigned char) * 16);
+
+ for (int i = 0; i < 16; i++) *(input+i) = 0;
+
+ *(input+15) = 1;
+
+ unsigned char input_len[8] = { 0 };
+ input_len[7] = 16;
+
+ struct UnsignedVec input_vec = (struct UnsignedVec) { input_len, NULL, input };
+
+ int result = parser_recognize
+ (parser,
+ &input_vec,
+ &error_vec,
+ (unsigned char) 0);
+
+ error_length = from_big_endian(error_vec.len);
+
+ if (error_length) {
+ clean_parser((void *) parser);
+ free(input);
+
+ printf("error: ");
+
+ char *error_str = error_vec.data;
+
+ for (uint64_t i = 0; i < error_length; i++) {
+ printf("%c", *(error_str + (unsigned long) i));
+ }
+
+ printf("\n");
+
+ clean_signed(&error_vec, 4);
+
+ return 1;
+ }
+
+ if (result)
+ printf("result = true\n");
+ else
+ printf("result = false\n");
+
+ for (int i = 0; i < 8; i++) {
+ error_vec.len[i] = 0;
+ error_vec.capacity[i] = 0;
+ }
+
+ struct UnsignedVec *forest_ptr = parser_parse
+ (parser,
+ &input_vec,
+ &error_vec,
+ (unsigned char) 1);
+
+ error_length = from_big_endian(error_vec.len);
+
+ if (error_length) {
+ clean_parser((void *) parser);
+ free(input);
+
+ printf("error: ");
+
+ char *error_str = error_vec.data;
+
+ for (uint64_t i = 0; i < error_length; i++) {
+ printf("%c", *(error_str + (unsigned long) i));
+ }
+
+ printf("\n");
+
+ clean_signed(&error_vec, 4);
+
+ return 1;
+ }
+
+ free(input);
+ clean_parser((void *) parser);
+
+ if (forest_ptr) {
+ uint64_t forest_len = from_big_endian(forest_ptr->len);
+
+ /* forest_len++; */
+
+ printf("a non-empty forest of length %llu\n", forest_len);
+
+ unsigned char *forest = forest_ptr->data;
+
+ uint64_t forest_real_len = from_big_endian(forest);
+
+ if (forest_real_len != forest_len) {
+ fprintf(stderr, "wrong length: %llu\n", forest_real_len);
+
+ clean_unsigned(forest_ptr, 15);
+
+ return 1;
+ }
+
+ if (forest_len < 27) {
+ fprintf(stderr, "length too small: %llu\n", forest_len);
+
+ clean_unsigned(forest_ptr, 15);
+
+ return 1;
+ }
+
+ printf("the lengths match\n");
+
+ if (*(forest+8) != 114 || *(forest+9) != 101 || *(forest+10) != 112) {
+ fprintf(stderr, "the forest does not begin with the special mark\n");
+
+ fprintf(stderr, "the first bytes are: ");
+ fprintf(stderr, "%c", *(forest+8));
+ fprintf(stderr, "%c", *(forest+9));
+ fprintf(stderr, "%c\n", *(forest+10));
+
+ clean_unsigned(forest_ptr, 15);
+
+ return 1;
+ }
+
+ printf("the special mark is correct.\n");
+
+ uint64_t nodes_len = from_big_endian(forest+11);
+
+ printf("forest has %llu nodes\n", nodes_len);
+
+ uint64_t labels_offset = from_big_endian(forest+19);
+
+ printf("the offset of labels is %llu\n", labels_offset);
+
+ if (forest_len < labels_offset ||
+ forest_len < (27 + 16 * nodes_len)) {
+ fprintf(stderr, "length too small: %llu\n", forest_len);
+
+ clean_unsigned(forest_ptr, 15);
+
+ return 1;
+ }
+
+ uint64_t total_degree = 0;
+
+ for (uint64_t i = 0; i < nodes_len; i++) {
+ uint64_t offset = 27 + 16 * i;
+
+ uint64_t degree = from_big_endian(forest+offset);
+ uint64_t node_offset = from_big_endian(forest+offset+8);
+
+ total_degree += degree;
+
+ printf("the %llu-th node has degree %llu and offset %llu\n",
+ i, degree, node_offset);
+
+ printf("label: ");
+ print_label(read_label(forest + labels_offset + 34 * i));
+
+ for (uint64_t j = 0; j < degree; j++) {
+ uint64_t child = from_big_endian(forest+node_offset+8*j);
+
+ printf("the %llu-th child is %llu\n", j, child);
+ }
+
+ printf("\n");
+ }
+
+ uint64_t correct = 27 + 50 * nodes_len + 8 * total_degree;
+
+ if (forest_len != correct) {
+ fprintf(stderr, "length does not conform to the format: %llu\n", correct);
+
+ clean_unsigned(forest_ptr, 15);
+
+ return 1;
+ }
+
+ printf("the forest has the correct length according to the format\n");
+
+ clean_unsigned(forest_ptr, 15);
+ } else {
+ printf("no forest\n");
+ }
+
+ return 0;
+}
diff --git a/src/test.el b/src/test.el
new file mode 100644
index 0000000..d106a03
--- /dev/null
+++ b/src/test.el
@@ -0,0 +1,28 @@
+(load-file "rep.so")
+(setq rep-dir (vc-call-backend 'Git 'root default-directory))
+
+(setq test-parser
+ (rep-new-parser
+ (with-temp-buffer
+ (insert-file-contents
+ (expand-file-name
+ "test.abnf"
+ (expand-file-name
+ "abnf grammars"
+ (expand-file-name "grammar" rep-dir))))
+ (buffer-string))))
+
+(defvar input nil "A vector that represents a testing input.")
+
+(setq input (vector 3 0 2 2 2 1 1 1 0 1))
+
+(rep-recognize test-parser input)
+(rep-parse test-parser input)
+
+;; (rep-parse-string
+;; test-parser
+;; "* title\nprice: 512\nnote: this is a test\n")
+
+;; (rep-parse-string
+;; test-parser
+;; "183t3ru")