Move the trait `Forest` to the crate "forest".

The purpose of this change is to share this trait with other crates, such as the forth-coming "semiring" crate that will be responsible for handling some simple semiring operations as well as the querying, in my plans.
author: JSDurand <mmemmew@gmail.com> 2023-08-10 11:14:04 +0800
committer: JSDurand <mmemmew@gmail.com> 2023-08-10 11:14:04 +0800
commit: b8a2d05a3c0d835556d5ddbd44e4a1e201302af5 (patch)
tree: 72e833f41b9a8ffd9d3e7216d2f158504343a420 /src
parent: f14c8a2aeab18a9bfa380df28f94736580e08f48 (diff)
2 files changed, 4 insertions, 179 deletions
diff --git a/src/bytes.rs b/src/bytes.rs
deleted file mode 100644
index f764077..0000000
--- a/src/bytes.rs
+++ /dev/null
@@ -1,175 +0,0 @@
-//! This module defines functions to turn a forest of forest labels
-//! into a sequence of bytes.
-//!
-//! To be more specific, a forest consists of two parts: a vector of
-//! node labels and a graph specifying the relations between nodes.
-//!
-//! # Number format (endianness)
-//!
-//! Every number mentionned in this format is an unsigned integer of
-//! 64 bits, or 8 bytes.  Every such number is specified in the big
-//! endian format.
-//!
-//! # Parts of the sequence of bytes
-//!
-//! The sequence of bytes has three parts:
-//!
-//! ## Header
-//!
-//! The header specifies metadata of this forest.
-//!
-//! ### Special mark
-//!
-//! The first three bytes form the string "rep", as a special mark.
-//!
-//! ### Number of nodes
-//!
-//! The next 8 bytes specifies the number of nodes of this forest.
-//!
-//! ## Graph
-//!
-//! Next comes the underlying graph for this forest.  This part
-//! consists of a vector of vectors of numbers.  Each vector of
-//! numbers represents the list of children of a node.  So the number
-//! of vectors of numbers is equal to the number of nodes.
-//!
-//! ### Vector of vectors of numbers
-//!
-//! The vectors are not simply concatenated one after another, as that
-//! way one cannot read a random node in a constant amount of time.
-//!
-//! Instead, we first specify the number of children of each node
-//! first, along with the offset for that node of the vector of
-//! children.
-//!
-//! As an example, if a graph has three nodes, represented as the
-//! adjacency list: `[[1, 2], [2], []]`, then its representation as a
-//! sequence of bytes is as follows:
-//! ```text
-//! 2, x, 1, y, 0, z, 1, 2, 2,
-//!                   ^     ^  ^
-//!                   x     y  z
-//! ```
-//!
-//! This has the advantage that we can read the children of the `n`-th
-//! node in constant time.
-//!
-//! ## Vector of labels
-//!
-//! Each label occupies a fixed number of bytes, so we simply put the
-//! labels one after another.  The only thing to note here is the
-//! format of the labels.
-//!
-//! ### Labels
-//!
-//! Each label has 3 parts:
-//!
-//! 1. Status: either Packed, Cloned, or Plain.  If the node is
-//! cloned, it has a clone index.  So in total this part occupies 1
-//! byte for the status and 8 bytes for the clone index.
-//! 2. Start and end: the range in the input sentence.  We just store
-//! two numbers here.  Hence this part occupies 16 bytes.
-//! 3. Grammar label: either a terminal, a non-terminal, or a rule.
-//! Each variant needs a number to speify its index.  So in total this
-//! part occupies 1 byte for the variant discriminant and 8 bytes for
-//! the number.
-//!
-//! To sum up, each label occupies 34 bytes.
-
-use chain::item::{default::DefaultForest, ForestLabel};
-use grammar::{GrammarLabel, GrammarLabelType, TNT};
-use graph::{Graph, LabelGraph};
-
-pub(super) fn forest_to_bytes(forest: &DefaultForest<ForestLabel<GrammarLabel>>) -> Vec<u8> {
-    // First calculate the total number of bytes.
-    let nodes_len = forest.nodes_len();
-
-    let degrees: Vec<_> = forest
-        .nodes()
-        .map(|node| forest.degree(node).unwrap_or(0))
-        .collect();
-
-    let total_degree: usize = degrees.iter().copied().sum();
-
-    let len: usize = 8          // total number of bytes at the start
-        + 3                     // special mark
-        + 8                     // number of nodes
-        + 8                     // offset of labels
-        + 16 * nodes_len        // degree & offset for each node
-        + 8 * total_degree      // children of each node
-        + 34 * nodes_len        // labels
-        ;
-
-    // Then fill in the bytes.
-
-    let mut bytes: Vec<u8> = Vec::with_capacity(len);
-
-    // First the headers
-
-    bytes.extend(len.to_be_bytes());
-    bytes.extend([114, 101, 112]); // rep
-    bytes.extend(nodes_len.to_be_bytes());
-    bytes.extend((len - 34 * nodes_len).to_be_bytes());
-
-    let mut accumulated: usize = 0;
-
-    // Then degrees and offsets
-
-    for degree in degrees.iter().copied() {
-        bytes.extend(degree.to_be_bytes());
-        bytes.extend((8 + 3 + 8 + 8 + 16 * nodes_len + 8 * accumulated).to_be_bytes());
-
-        accumulated += degree;
-    }
-
-    // Then the children
-
-    bytes.extend(forest.nodes().flat_map(|node| {
-        forest
-            .children_of(node)
-            .unwrap_or_default()
-            .flat_map(|child| child.to_be_bytes())
-    }));
-
-    // Finally the labels
-
-    'nodes_loop: for node in forest.nodes() {
-        let label = match forest.vertex_label(node) {
-            Ok(Some(label)) => label,
-            _ => continue 'nodes_loop,
-        };
-
-        if label.is_plain() {
-            bytes.extend(std::iter::repeat(0).take(9));
-        } else if label.is_packed() {
-            bytes.extend(std::iter::once(1).chain(std::iter::repeat(0).take(8)));
-        } else if let Some(index) = label.clone_index() {
-            bytes.extend(std::iter::once(2).chain(index.to_be_bytes()));
-        }
-
-        let label = label.label();
-
-        bytes.extend(label.start().to_be_bytes());
-        bytes.extend(label.end().unwrap_or(0).to_be_bytes());
-
-        let label = label.label();
-
-        match label {
-            GrammarLabelType::TNT(TNT::Ter(t)) => {
-                bytes.extend(std::iter::once(0).chain(t.to_be_bytes()));
-            }
-            GrammarLabelType::TNT(TNT::Non(n)) => {
-                bytes.extend(std::iter::once(1).chain(n.to_be_bytes()));
-            }
-            GrammarLabelType::Rule(r) => {
-                bytes.extend(std::iter::once(2).chain(r.to_be_bytes()));
-            }
-        }
-    }
-
-    if bytes.len() != len {
-        dbg!();
-    }
-
-    bytes
-}
diff --git a/src/lib.rs b/src/lib.rs
index 3483847..69be41f 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -9,10 +9,12 @@ use chain::{atom::DefaultAtom, default::DefaultChain, Chain};
 use grammar::Grammar;
 
 // For printing forests
-use chain::item::{default::DefaultForest, ForestLabel, ForestLabelType};
+use chain::item::default::DefaultForest;
 use grammar::{GrammarLabel, TNT};
 use graph::LabelGraph;
 
+use forest::{ForestLabel, ForestLabelType};
+
 /// This struct is the representation of a parser.
 ///
 /// When the user constructs a parser, an instance of this struct will
@@ -485,7 +487,7 @@ extern "C" fn parser_parse(
                     Ok(forest) => {
                         Box::leak(parser_box);
 
-                        let mut bytes = bytes::forest_to_bytes(&forest);
+                        let mut bytes = forest::forest_to_bytes(&forest);
 
                         let bytes_len = bytes.len().to_be_bytes().to_vec();
 
@@ -835,5 +837,3 @@ extern "C" fn print_forest_node(forest: *mut CForest, node: *mut std::os::raw::c
 
     println!("node {node} has label {label}");
 }
-
-pub mod bytes;
author	JSDurand <mmemmew@gmail.com>	2023-08-10 11:14:04 +0800
committer	JSDurand <mmemmew@gmail.com>	2023-08-10 11:14:04 +0800
commit	b8a2d05a3c0d835556d5ddbd44e4a1e201302af5 (patch)
tree	72e833f41b9a8ffd9d3e7216d2f158504343a420 /src
parent	f14c8a2aeab18a9bfa380df28f94736580e08f48 (diff)