#![warn(missing_docs)]
//! This crate implements non-deterministic finite automata.
//!
//! By default this uses the graph from the crate [`graph`].  To use
//! another external graph, add a module in which the external graph
//! implements the Graph trait from the [`graph`] crate, and then use
//! that external graph type as [`Graph`][graph::Graph] here.

pub mod error;

extern crate graph;

use core::fmt::Display;

use std::ops::{Deref, DerefMut};

use graph::{Graph, GraphLabel, LabelExtGraph};

use error::Error;

pub use desrec::DesRec;

use default::regex::RegexType;

/// The expected behaviour of a regular language.
///
/// Nondeterministic finite automata are equivalent to regular
/// languages.  Since regular languages are easier to understand for a
/// human being, nondeterministic finite automata include the data for
/// the equivalent regular languages.
pub trait Regex<T: GraphLabel>: Graph + Display + Clone {
    /// Return the label of a vertex, or an error if the node is
    /// invalid.
    fn vertex_label(&self, node_id: usize) -> Result<T, Error>;

    #[inline]
    /// Return the root node of the regular language.
    ///
    /// Implementations can follow different conventions for the root
    /// node, and hence this function.
    ///
    /// If the regular language is empty, the implementation should
    /// return None.
    ///
    /// The default implementation uses the convention that the root
    /// node is always the first node.
    fn root(&self) -> Option<usize> {
        if self.is_empty() {
            None
        } else {
            Some(0)
        }
    }

    // TODO: Add functions that determine if certain "positions" in a
    // regular language satisfy some special properties, like at the
    // end of a Kleene star, or at the end of a regular language, et
    // cetera.  These might be needed later.
}

/// Since `Option<T>` does not inherit the `Display` from `T`, we wrap
/// it to provide an automatic implementation of `Display`.
///
/// # Convert to `Option`
///
/// One can dereference a `DOption` to obtain an `Option`.
#[derive(Debug, Clone, Copy, Ord, PartialOrd, Eq, PartialEq, Hash)]
pub struct DOption<T>(pub Option<T>);

impl<T> Default for DOption<T> {
    fn default() -> Self {
        Self(None)
    }
}

impl<T> Deref for DOption<T> {
    type Target = Option<T>;

    fn deref(&self) -> &Self::Target {
        &self.0
    }
}

impl<T> DerefMut for DOption<T> {
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.0
    }
}

impl<T: Display> Display for DOption<T> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self.deref() {
            Some(value) => Display::fmt(value, f),
            None => write!(f, "ε"),
        }
    }
}

/// Substitute or Carry
///
/// This enumeration indicates whether a label from a regular
/// expression should be substituted by another regular expression, or
/// to be carried around in the resulting nondeterministic finite
/// automaton, in the process of the [`to_nfa`][Nfa::to_nfa] function.
///
/// # Transform labels
///
/// The label that is returned to be carried can be different from the
/// original label, as a way to transform the labels.
///
/// # Remark on the abbreviation
///
/// It happens "by chance" that this abbreviation coincides with the
/// abbreviation of "system on chip".  Since I know nothing about this
/// topic, this is just a meaningless pun.
#[derive(Debug, Copy, Clone)]
pub enum SoC<T> {
    /// To be substituted by another regular expression.
    Sub(usize),
    /// To carry around this label.
    Carry(T),
}

/// This type records some information that is obtained from the
/// process of converting a regular expression to a nondeterministic
/// finite automaton.
#[derive(Debug, Clone, Copy, Ord, PartialOrd, Eq, PartialEq, Hash, Default)]
pub struct NfaLabel<T: GraphLabel> {
    /// A terminal or a non-terminal.
    value: T,
    /// The corresponding position in the rules.
    moved: usize,
    /// Whether this comes from left-linear expansion.
    left_p: bool,
}

impl<T: GraphLabel> NfaLabel<T> {
    /// Construct a new label.
    #[inline]
    pub fn new(value: T, moved: usize, left_p: bool) -> Self {
        Self {
            value,
            moved,
            left_p,
        }
    }

    /// Retrieve the value from the label.
    #[inline]
    pub fn get_value(&self) -> T {
        self.value
    }
    /// Retrieve the moved position from the label.
    #[inline]
    pub fn get_moved(&self) -> usize {
        self.moved
    }
    /// Retrieve whether or not the label comes from left-linear
    /// expansions.
    #[inline]
    pub fn get_left_p(&self) -> bool {
        self.left_p
    }
}

impl<T: GraphLabel> Display for NfaLabel<T> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "edge {} at {}{}", self.value, self.moved, {
            if self.left_p {
                ", by left"
            } else {
                ""
            }
        })
    }
}

/// A convenient alias of the information of two edges.
///
/// If the tuple is (a, b, c, la, lb), then the first edge goes from a
/// to b, is labelled la, and the second edge goes from b to c, and is
/// labelled by lb.
#[derive(Debug, Clone, Copy)]
pub struct TwoEdges<T: Copy>(usize, usize, usize, T, T);

impl<T: Copy> TwoEdges<T> {
    /// Extract the first edge.
    pub fn first_edge(&self) -> (usize, usize, T) {
        (self.0, self.1, self.3)
    }

    /// Extract the second edge.
    pub fn second_edge(&self) -> (usize, usize, T) {
        (self.1, self.2, self.4)
    }
}

/// The type of nondeterministic finite automata that is obtained from
/// a regular expression, via the method [`to_nfa`][Nfa::to_nfa].
pub type LabelType<T> = NfaLabel<DOption<T>>;

/// The expected behvaiour of a nondeterministic finite automaton.
///
/// Every NFA is a special labelled graph.
pub trait Nfa<T: GraphLabel>: LabelExtGraph<T> {
    /// When we convert a regular expression to a nondeterministic
    /// finite automaton, the label should be optional, so we use a
    /// different type for the result type.
    type FromRegex<S: GraphLabel + Display + Default>;

    // FIXME: This should not be needed.
    /// Remove all empty transitions from the nondeterministic finite
    /// automaton.
    #[inline]
    fn remove_epsilon<F>(&mut self, _f: F) -> Result<(), Error>
    where
        F: FnMut(T) -> bool,
    {
        // self.closure(f, true, |two_edges| two_edges.second_edge().2)
        unimplemented!("deprecated")
    }

    /// Return a state-minimal NFA equivalent with the original one.
    ///
    /// This is not required.  It is just to allow me to experiment
    /// with NFA optimization algorithms.
    fn minimize(&self) -> Result<Box<Self>, Error> {
        Err(Error::UnsupportedOperation)
    }

    /// Check every node or edge by a given predicate.
    ///
    /// This should also verify that every node and edge has correct
    /// indices, so that we can safely use `unwrap` later.  A
    /// consequence is that, if one only wants to check the validity
    /// of nodes and edges, one can pass a function that always
    /// returns `true`.
    #[inline]
    fn labels_satisfy(&self, f: impl Fn(T) -> bool) -> Result<bool, Error> {
        let nodes_len = self.nodes_len();
        let mut result = true;

        for node in self.nodes() {
            for (label, children_iter) in self.labels_of(node)? {
                for child in children_iter {
                    if child >= nodes_len {
                        dbg!(node, label);
                        return Err(graph::error::Error::IndexOutOfBounds(child, nodes_len).into());
                    }
                }

                // NOTE: Avoid executing `f` if `result` is already
                // false.  But still proceed in verifying that nodes
                // and edges are correct: the correctness of nodes and
                // edges is more important than the function execution
                // results, as the former directly affects the safety
                // of many algorithms.
                if result && !f(*label) {
                    dbg!(node, label);
                    result = false;
                }
            }
        }

        Ok(result)
    }

    /// Build a nondeterministic finite automaton out of a set
    /// `regexps` of regular expressions.
    ///
    /// The `sub_pred` is a predicate function that returns an
    /// indication whether to carry the value around or to substitute
    /// by another regular language in the given set.
    ///
    /// The `default` parameter specifies the label of a default edge,
    /// that goes from a node to another, both of which are not
    /// associated with the given regular languages.
    ///
    /// This function should perform Thompson's construction,
    /// basically.
    fn to_nfa(
        regexps: &[impl Regex<RegexType<T>>],
        sub_pred: impl Fn(T) -> Result<SoC<T>, Error>,
        default: Option<T>,
    ) -> Result<Self::FromRegex<LabelType<T>>, Error>;

    /// Remove all dead states from the nondeterministic finite
    /// automaton.
    ///
    /// A state is dead if there are no edges going to the state, and
    /// if it is not reserved.
    ///
    /// # Note
    ///
    /// Actually an implementation is allowed to just remove all edges
    /// out of the dead nodes.  Then these nodes are considered gone
    /// from the graph, and we don't need to re-index the existing
    /// edges.  We can call this "a poor man's removal of nodes".
    fn remove_dead(&mut self, reserve: impl FnMut(usize) -> bool) -> Result<(), Error>;

    // FIXME: This should not be needed.
    /// For each edge from A to B whose edge is considered nullable by
    /// a function `f`, and for every edge from B to C, add an edge
    /// from A to C.
    ///
    /// This is needed specifically by the algorithm to produce a set
    /// of atomic languages that represent "null-closed" derived
    /// languages.
    #[inline]
    fn nulling(&mut self, _f: impl FnMut(T) -> bool) -> Result<(), Error> {
        // self.closure(f, false, |two_edges| two_edges.second_edge().2)
        unimplemented!("deprecated")
    }

    /// Return the *closure* of the nondeterministic finite automaton.
    ///
    /// # Definition
    ///
    /// The closure of a nondeterministic finite automaton N is
    /// defined as the unique *minimal* nondeterministic finite
    /// automaton N+ that can be obtained by adjoining some edges to N
    /// such that if there are edges a -> b and b -> c, and if the
    /// edge a -> b is deemed as *nullable* by some function, then
    /// there is an edge a -> c, where the minimality is the
    /// minimality of the set of edges: if there is another such
    /// nondeterministic finite automaton M satisfying the above
    /// property, then the set of edges of N+ is a subset of the set
    /// of edges of M.
    ///
    /// # Remove edges afterwards
    ///
    /// If `remove_after_p` is true, remove all those nullable edges.
    ///
    /// # Transformation of labels
    ///
    /// We can apply a transformer to labels, to be able to combine
    /// labels in non-trivial ways.  If one just wants the *new* label
    /// that is the label of the edge from b to c in the above
    /// definition, one can use the function that sends `two_edges` to
    /// `two_edges.second_edge().2`.
    ///
    /// # Error
    ///
    /// The function should emit errors if the edges of the
    /// nondeterministic finite automaton point to invalid nodes.
    fn closure(
        &mut self,
        predicate: impl FnMut(T) -> bool,
        remove_after_p: bool,
        transform: impl FnMut(TwoEdges<T>) -> T,
        remove_predicate: impl FnMut(T) -> bool,
    ) -> Result<(), Error>;
}

pub mod default;
pub mod desrec;