summaryrefslogtreecommitdiff
path: root/grammar/src/lib.rs
diff options
context:
space:
mode:
Diffstat (limited to 'grammar/src/lib.rs')
-rw-r--r--grammar/src/lib.rs85
1 files changed, 83 insertions, 2 deletions
diff --git a/grammar/src/lib.rs b/grammar/src/lib.rs
index 11cb161..ea1299f 100644
--- a/grammar/src/lib.rs
+++ b/grammar/src/lib.rs
@@ -93,7 +93,7 @@ impl Display for TNT {
}
/// Errors related to grammar operations.
-#[derive(Debug, Copy, Clone)]
+#[derive(Debug, Clone)]
#[non_exhaustive]
pub enum Error {
/// The operation requires the grammar to be after a certain
@@ -101,6 +101,8 @@ pub enum Error {
WrongState(GrammarState, GrammarState),
/// The first component is the index, and the second the bound.
IndexOutOfBounds(usize, usize),
+ /// The given name of a terminal or a non-terminal is unknown.
+ UnknownTNTName(String),
/// Fail to build the N-th regular expression, due to the
/// ParseError.
BuildFail(usize, ParseError),
@@ -123,6 +125,11 @@ impl Display for Error {
"Failed to build the {n}-th regular expression due to error: {pe}"
),
Error::NFAFail(nfae) => write!(f, "failed to build NFA because of {nfae}"),
+ Error::UnknownTNTName(name) => write!(
+ f,
+ "the name {name} is unknown \
+ for a terminal or a non-terminal."
+ ),
Error::WrongState(current, threshold) => {
write!(f, "require state {threshold}, but in state {current}")
}
@@ -158,6 +165,12 @@ impl Rule {
pub fn len(&self) -> usize {
self.regex.len()
}
+
+ /// Wrap a regular expression into a rule.
+ #[inline]
+ pub fn new(regex: DefaultRegex<TNT>) -> Self {
+ Self { regex }
+ }
}
/// The state of Grammar.
@@ -190,6 +203,37 @@ impl Display for GrammarState {
}
}
+/// This enum represents the name of either a terminal or a
+/// non-terminal.
+#[derive(Debug, Clone, Eq, PartialEq, Hash)]
+pub enum TNTName {
+ /// The name of a terminal.
+ Ter(String),
+ /// The name of a non-terminal.
+ Non(String),
+}
+
+impl TNTName {
+ /// Construct the name of a terminal or a non-terminal.
+ #[inline]
+ pub fn new(name: String, terminal_p: bool) -> Self {
+ if terminal_p {
+ Self::Ter(name)
+ } else {
+ Self::Non(name)
+ }
+ }
+
+ /// Return the underlying name.
+ #[inline]
+ pub fn name(&self) -> &str {
+ match self {
+ TNTName::Ter(name) => name,
+ TNTName::Non(name) => name,
+ }
+ }
+}
+
/// The type of a grammar.
#[derive(Debug, Clone, Default)]
pub struct Grammar {
@@ -197,6 +241,8 @@ pub struct Grammar {
ter: Vec<Terminal>,
/// A list of non-terminals.
non: Vec<Nonterminal>,
+ /// A map from the names of terminals or non-terminals to TNT.
+ tnt_map: HashMap<TNTName, TNT>,
/// A list of rules.
///
/// The length of the list must match that of the list of
@@ -286,6 +332,22 @@ impl Grammar {
let expansion_map = Default::default();
let reduction_map = Default::default();
+ let mut tnt_map: HashMap<TNTName, TNT> = Default::default();
+
+ for (index, ter_element) in ter.iter().enumerate() {
+ tnt_map.insert(
+ TNTName::new(ter_element.name().to_string(), true),
+ TNT::Ter(index),
+ );
+ }
+
+ for (index, non_element) in non.iter().enumerate() {
+ tnt_map.insert(
+ TNTName::new(non_element.name().to_string(), false),
+ TNT::Non(index),
+ );
+ }
+
// NOTE: We cannot calculate accumulators here, as we want the
// accumulators of the regular expression of the left-closure,
// not of the original one.
@@ -294,6 +356,7 @@ impl Grammar {
Self {
ter,
non,
+ tnt_map,
rules,
firsts,
first_nodes,
@@ -304,6 +367,16 @@ impl Grammar {
}
}
+ /// Convert from the name of a terminal or a non-terminal to a
+ /// struct TNT.
+ #[inline]
+ pub fn name_to_tnt(&self, name: &TNTName) -> Result<TNT, Error> {
+ self.tnt_map
+ .get(name)
+ .copied()
+ .ok_or_else(|| Error::UnknownTNTName(name.name().to_string()))
+ }
+
/// Return the name of a terminal or a non-terminal.
pub fn name_of_tnt(&self, tnt: TNT) -> Result<String, Error> {
match tnt {
@@ -313,6 +386,14 @@ impl Grammar {
.get(t)
.ok_or(Error::IndexOutOfBounds(t, self.ter.len()))?
.name()
+ .chars()
+ .map(|c| if crate::abnf::is_v_char(c) {
+ c.to_string()
+ } else {
+ format!("{:#x}", c as usize)
+ })
+ .collect::<Vec<_>>()
+ .join("")
)),
TNT::Non(n) => Ok(format!(
"N{}",
@@ -822,7 +903,7 @@ impl Display for Grammar {
f,
"{}",
rule.regex.to_string_with(|tnt| format!(
- "({})",
+ " {} ",
self.name_of_tnt(tnt)
.unwrap_or_else(|_| format!("Unknown {tnt:?}"))
))?