diff options
Diffstat (limited to 'grammar/src/lib.rs')
-rw-r--r-- | grammar/src/lib.rs | 85 |
1 files changed, 83 insertions, 2 deletions
diff --git a/grammar/src/lib.rs b/grammar/src/lib.rs index 11cb161..ea1299f 100644 --- a/grammar/src/lib.rs +++ b/grammar/src/lib.rs @@ -93,7 +93,7 @@ impl Display for TNT { } /// Errors related to grammar operations. -#[derive(Debug, Copy, Clone)] +#[derive(Debug, Clone)] #[non_exhaustive] pub enum Error { /// The operation requires the grammar to be after a certain @@ -101,6 +101,8 @@ pub enum Error { WrongState(GrammarState, GrammarState), /// The first component is the index, and the second the bound. IndexOutOfBounds(usize, usize), + /// The given name of a terminal or a non-terminal is unknown. + UnknownTNTName(String), /// Fail to build the N-th regular expression, due to the /// ParseError. BuildFail(usize, ParseError), @@ -123,6 +125,11 @@ impl Display for Error { "Failed to build the {n}-th regular expression due to error: {pe}" ), Error::NFAFail(nfae) => write!(f, "failed to build NFA because of {nfae}"), + Error::UnknownTNTName(name) => write!( + f, + "the name {name} is unknown \ + for a terminal or a non-terminal." + ), Error::WrongState(current, threshold) => { write!(f, "require state {threshold}, but in state {current}") } @@ -158,6 +165,12 @@ impl Rule { pub fn len(&self) -> usize { self.regex.len() } + + /// Wrap a regular expression into a rule. + #[inline] + pub fn new(regex: DefaultRegex<TNT>) -> Self { + Self { regex } + } } /// The state of Grammar. @@ -190,6 +203,37 @@ impl Display for GrammarState { } } +/// This enum represents the name of either a terminal or a +/// non-terminal. +#[derive(Debug, Clone, Eq, PartialEq, Hash)] +pub enum TNTName { + /// The name of a terminal. + Ter(String), + /// The name of a non-terminal. + Non(String), +} + +impl TNTName { + /// Construct the name of a terminal or a non-terminal. + #[inline] + pub fn new(name: String, terminal_p: bool) -> Self { + if terminal_p { + Self::Ter(name) + } else { + Self::Non(name) + } + } + + /// Return the underlying name. + #[inline] + pub fn name(&self) -> &str { + match self { + TNTName::Ter(name) => name, + TNTName::Non(name) => name, + } + } +} + /// The type of a grammar. #[derive(Debug, Clone, Default)] pub struct Grammar { @@ -197,6 +241,8 @@ pub struct Grammar { ter: Vec<Terminal>, /// A list of non-terminals. non: Vec<Nonterminal>, + /// A map from the names of terminals or non-terminals to TNT. + tnt_map: HashMap<TNTName, TNT>, /// A list of rules. /// /// The length of the list must match that of the list of @@ -286,6 +332,22 @@ impl Grammar { let expansion_map = Default::default(); let reduction_map = Default::default(); + let mut tnt_map: HashMap<TNTName, TNT> = Default::default(); + + for (index, ter_element) in ter.iter().enumerate() { + tnt_map.insert( + TNTName::new(ter_element.name().to_string(), true), + TNT::Ter(index), + ); + } + + for (index, non_element) in non.iter().enumerate() { + tnt_map.insert( + TNTName::new(non_element.name().to_string(), false), + TNT::Non(index), + ); + } + // NOTE: We cannot calculate accumulators here, as we want the // accumulators of the regular expression of the left-closure, // not of the original one. @@ -294,6 +356,7 @@ impl Grammar { Self { ter, non, + tnt_map, rules, firsts, first_nodes, @@ -304,6 +367,16 @@ impl Grammar { } } + /// Convert from the name of a terminal or a non-terminal to a + /// struct TNT. + #[inline] + pub fn name_to_tnt(&self, name: &TNTName) -> Result<TNT, Error> { + self.tnt_map + .get(name) + .copied() + .ok_or_else(|| Error::UnknownTNTName(name.name().to_string())) + } + /// Return the name of a terminal or a non-terminal. pub fn name_of_tnt(&self, tnt: TNT) -> Result<String, Error> { match tnt { @@ -313,6 +386,14 @@ impl Grammar { .get(t) .ok_or(Error::IndexOutOfBounds(t, self.ter.len()))? .name() + .chars() + .map(|c| if crate::abnf::is_v_char(c) { + c.to_string() + } else { + format!("{:#x}", c as usize) + }) + .collect::<Vec<_>>() + .join("") )), TNT::Non(n) => Ok(format!( "N{}", @@ -822,7 +903,7 @@ impl Display for Grammar { f, "{}", rule.regex.to_string_with(|tnt| format!( - "({})", + " {} ", self.name_of_tnt(tnt) .unwrap_or_else(|_| format!("Unknown {tnt:?}")) ))? |