//! This module defines functions to turn a forest of forest labels
//! into a sequence of bytes.
//!
//! To be more specific, a forest consists of two parts: a vector of
//! node labels and a graph specifying the relations between nodes.
//!
//! # Number format (endianness)
//!
//! Every number mentionned in this format is an unsigned integer of
//! 64 bits, or 8 bytes.  Every such number is specified in the big
//! endian format.
//!
//! # Parts of the sequence of bytes
//!
//! The sequence of bytes has three parts:
//!
//! ## Header
//!
//! The header specifies metadata of this forest.
//!
//! ### Special mark
//!
//! The first three bytes form the string "rep", as a special mark.
//!
//! ### Number of nodes
//!
//! The next 8 bytes specifies the number of nodes of this forest.
//!
//! ## Graph
//!
//! Next comes the underlying graph for this forest.  This part
//! consists of a vector of vectors of numbers.  Each vector of
//! numbers represents the list of children of a node.  So the number
//! of vectors of numbers is equal to the number of nodes.
//!
//! ### Vector of vectors of numbers
//!
//! The vectors are not simply concatenated one after another, as that
//! way one cannot read a random node in a constant amount of time.
//!
//! Instead, we first specify the number of children of each node
//! first, along with the offset for that node of the vector of
//! children.
//!
//! As an example, if a graph has three nodes, represented as the
//! adjacency list: `[[1, 2], [2], []]`, then its representation as a
//! sequence of bytes is as follows:
//! ```text
//! 2, x, 1, y, 0, z, 1, 2, 2,
//!                   ^     ^  ^
//!                   x     y  z
//! ```
//!
//! This has the advantage that we can read the children of the `n`-th
//! node in constant time.
//!
//! ## Vector of labels
//!
//! Each label occupies a fixed number of bytes, so we simply put the
//! labels one after another.  The only thing to note here is the
//! format of the labels.
//!
//! ### Labels
//!
//! Each label has 3 parts:
//!
//! 1. Status: either Packed, Cloned, or Plain.  If the node is
//! cloned, it has a clone index.  So in total this part occupies 1
//! byte for the status and 8 bytes for the clone index.
//! 2. Start and end: the range in the input sentence.  We just store
//! two numbers here.  Hence this part occupies 16 bytes.
//! 3. Grammar label: either a terminal, a non-terminal, or a rule.
//! Each variant needs a number to speify its index.  So in total this
//! part occupies 1 byte for the variant discriminant and 8 bytes for
//! the number.
//!
//! To sum up, each label occupies 34 bytes.

use chain::item::{default::DefaultForest, ForestLabel};
use grammar::{GrammarLabel, GrammarLabelType, TNT};
use graph::{Graph, LabelGraph};

pub(super) fn forest_to_bytes(forest: &DefaultForest<ForestLabel<GrammarLabel>>) -> Vec<u8> {
    // First calculate the total number of bytes.
    let nodes_len = forest.nodes_len();

    let degrees: Vec<_> = forest
        .nodes()
        .map(|node| forest.degree(node).unwrap_or(0))
        .collect();

    let total_degree: usize = degrees.iter().copied().sum();

    let len: usize = 8          // total number of bytes at the start
        + 3                     // special mark
        + 8                     // number of nodes
        + 8                     // offset of labels
        + 16 * nodes_len        // degree & offset for each node
        + 8 * total_degree      // children of each node
        + 34 * nodes_len        // labels
        ;

    // Then fill in the bytes.

    let mut bytes: Vec<u8> = Vec::with_capacity(len);

    // First the headers

    bytes.extend(len.to_be_bytes());
    bytes.extend([114, 101, 112]); // rep
    bytes.extend(nodes_len.to_be_bytes());
    bytes.extend((len - 34 * nodes_len).to_be_bytes());

    let mut accumulated: usize = 0;

    // Then degrees and offsets

    for degree in degrees.iter().copied() {
        bytes.extend(degree.to_be_bytes());
        bytes.extend((8 + 3 + 8 + 8 + 16 * nodes_len + 8 * accumulated).to_be_bytes());

        accumulated += degree;
    }

    // Then the children

    bytes.extend(forest.nodes().flat_map(|node| {
        forest
            .children_of(node)
            .unwrap_or_default()
            .flat_map(|child| child.to_be_bytes())
    }));

    // Finally the labels

    'nodes_loop: for node in forest.nodes() {
        let label = match forest.vertex_label(node) {
            Ok(Some(label)) => label,
            _ => continue 'nodes_loop,
        };

        if label.is_plain() {
            bytes.extend(std::iter::repeat(0).take(9));
        } else if label.is_packed() {
            bytes.extend(std::iter::once(1).chain(std::iter::repeat(0).take(8)));
        } else if let Some(index) = label.clone_index() {
            bytes.extend(std::iter::once(2).chain(index.to_be_bytes()));
        }

        let label = label.label();

        bytes.extend(label.start().to_be_bytes());
        bytes.extend(label.end().unwrap_or(0).to_be_bytes());

        let label = label.label();

        match label {
            GrammarLabelType::TNT(TNT::Ter(t)) => {
                bytes.extend(std::iter::once(0).chain(t.to_be_bytes()));
            }
            GrammarLabelType::TNT(TNT::Non(n)) => {
                bytes.extend(std::iter::once(1).chain(n.to_be_bytes()));
            }
            GrammarLabelType::Rule(r) => {
                bytes.extend(std::iter::once(2).chain(r.to_be_bytes()));
            }
        }
    }

    if bytes.len() != len {
        dbg!();
    }

    bytes
}