#![warn(missing_docs)]
//! This crate implements non-deterministic finite automata.
//!
//! By default this uses the graph from the crate [`graph`].  To use
//! another external graph, add a module in which the external graph
//! implements the Graph trait from the [`graph`] crate, and then use
//! that external graph type as [`Graph`][graph::Graph] here.

pub mod error;

extern crate graph;

use core::fmt::Display;

use std::ops::{Deref, DerefMut};

use graph::{Graph, GraphLabel, LabelExtGraph};

use error::Error;

pub use desrec::DesRec;

use default::regex::RegexType;

/// The expected behaviour of a regular language.
///
/// Nondeterministic finite automata are equivalent to regular
/// languages.  Since regular languages are easier to understand for a
/// human being, nondeterministic finite automata include the data for
/// the equivalent regular languages.
pub trait Regex<T: GraphLabel>: Graph + Display + Clone {
    /// Return the label of a vertex, or an error if the node is
    /// invalid.
    fn vertex_label(&self, node_id: usize) -> Result<T, Error>;

    #[inline]
    /// Return the root node of the regular language.
    ///
    /// Implementations can follow different conventions for the root
    /// node, and hence this function.
    ///
    /// If the regular language is empty, the implementation should
    /// return None.
    ///
    /// The default implementation uses the convention that the root
    /// node is always the first node.
    fn root(&self) -> Option<usize> {
        if self.is_empty() {
            None
        } else {
            Some(0)
        }
    }

    // TODO: Add functions that determine if certain "positions" in a
    // regular language satisfy some special properties, like at the
    // end of a Kleene star, or at the end of a regular language, et
    // cetera.  These might be needed later.
}

/// Since `Option<T>` does not inherit the `Display` from `T`, we wrap
/// it to provide an automatic implementation of `Display`.
#[derive(Debug, Clone, Copy, Ord, PartialOrd, Eq, PartialEq, Hash)]
pub struct DOption<T>(Option<T>);

impl<T> Default for DOption<T> {
    fn default() -> Self {
        Self(None)
    }
}

impl<T> Deref for DOption<T> {
    type Target = Option<T>;

    fn deref(&self) -> &Self::Target {
        &self.0
    }
}

impl<T> DerefMut for DOption<T> {
    fn deref_mut(&mut self) -> &mut Self::Target {
        &mut self.0
    }
}

impl<T: Display> Display for DOption<T> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self.deref() {
            Some(value) => Display::fmt(value, f),
            None => write!(f, "ε"),
        }
    }
}

/// Substitute or Carry
///
/// This enumeration indicates whether a label from a regular
/// expression should be substituted by another regular expression, or
/// to be carried around in the resulting nondeterministic finite
/// automaton, in the process of the [`to_nfa`][Nfa::to_nfa] function.
///
/// # Transform labels
///
/// The label that is returned to be carried can be different from the
/// original label, as a way to transform the labels.
///
/// # Remark on the abbreviation
///
/// It happens "by chance" that this abbreviation coincides with the
/// abbreviation of "system on chip".  Since I know nothing about this
/// topic, this is just a meaningless pun.
#[derive(Debug, Copy, Clone)]
pub enum SoC<T> {
    /// To be substituted by another regular expression.
    Sub(usize),
    /// To carry around this label.
    Carry(T),
}

/// The expected behvaiour of a nondeterministic finite automaton.
///
/// Every NFA is a special labelled graph.
pub trait Nfa<T: GraphLabel>: LabelExtGraph<T> {
    // TODO: This trait should have a type for the labels.

    /// Remove all empty transitions from the nondeterministic finite
    /// automaton.
    fn remove_epsilon<F>(&mut self, f: F) -> Result<(), Error>
    where
        F: Fn(T) -> bool;

    /// Return a state-minimal NFA equivalent with the original one.
    ///
    /// This is not required.  It is just to allow me to experiment
    /// with NFA optimization algorithms.
    fn minimize(&self) -> Result<Box<Self>, Error> {
        Err(Error::UnsupportedOperation)
    }

    /// Check every node or edge by a given predicate.
    ///
    /// This should also verify that every node and edge has correct
    /// indices, so that we can safely use `unwrap` later.  A
    /// consequence is that, if one only wants to check the validity
    /// of nodes and edges, one can pass a function that always
    /// returns `true`.
    #[inline]
    fn labels_satisfy(&self, f: impl Fn(T) -> bool) -> Result<bool, Error> {
        let nodes_len = self.nodes_len();
        let mut result = true;

        for node in self.nodes() {
            for (label, children_iter) in self.labels_of(node)? {
                for child in children_iter {
                    if child >= nodes_len {
                        dbg!(node, label);
                        return Err(graph::error::Error::IndexOutOfBounds(child, nodes_len).into());
                    }
                }

                // NOTE: Avoid executing `f` if `result` is already
                // false.  But still proceed in verifying that nodes
                // and edges are correct: the correctness of nodes and
                // edges is more important than the function execution
                // results, as the former directly affects the safety
                // of many algorithms.
                if result && !f(*label) {
                    dbg!(node, label);
                    result = false;
                }
            }
        }

        Ok(result)
    }

    /// When we convert a regular expression to a nondeterministic
    /// finite automaton, the label should be optional, so we use a
    /// different type for the result type.
    type FromRegex<S: GraphLabel + Display + Default>;

    /// Build a nondeterministic finite automaton out of a set
    /// `regexps` of regular expressions.
    ///
    /// The `sub_pred` is a predicate function that returns an
    /// indication whether to carry the value around or to substitute
    /// by another regular language in the given set.
    ///
    /// The `default` parameter specifies the label of a default edge,
    /// that goes from a node to another, both of which are not
    /// associated with the given regular languages.
    ///
    /// This function should perform Thompson's construction,
    /// basically.
    fn to_nfa(
        regexps: &[impl Regex<RegexType<T>>],
        // TODO: The transformation should produce more information.
        sub_pred: impl Fn(T) -> Result<SoC<T>, Error>,
        default: Option<T>,
    ) -> Result<Self::FromRegex<DOption<T>>, Error>;

    /// Remove all dead states from the nondeterministic finite
    /// automaton.
    ///
    /// A state is dead if there are no edges going to the state, and
    /// if it is not reserved.
    ///
    /// # Note
    ///
    /// Actually an implementation is allowed to just remove all edges
    /// out of the dead nodes.  Then these nodes are considered gone
    /// from the graph, and we don't need to re-index the existing
    /// edges.  We can call this "a poor man's removal of nodes".
    fn remove_dead(&mut self, reserve: impl Fn(usize) -> bool) -> Result<(), Error>;

    /// For each edge from A to B whose edge is considered nullable by
    /// a function `f`, and for every edge from B to C, add an edge
    /// from A to C.
    ///
    /// This is needed specifically by the algorithm to produce a set
    /// of atomic languages that represent "null-closed" derived
    /// languages.
    fn nulling(&mut self, f: impl Fn(T) -> bool) -> Result<(), Error>;
}

pub mod default;
pub mod desrec;