src/grammar.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108

#ifndef GRAMMAR_H
#define GRAMMAR_H

#include <stdarg.h>
#include <stdio.h>
#include "list.h"
#include "utf8.h"

/* The class file for grammars */

/* A grammar is a list of rules.  A rule replaces a non-terminal with
   a finite string of terminals and non-terminals.

   For us, a terminal is a positive integer, and a non-terminal is a
   negative integer.  Since the signs of these two types is fixed, we
   represent them as unsigned integers.  Since we want to deal with
   unicode points, the terminal is stored as unsigned long, so that we
   can include all unicode poitns. */

typedef unsigned long T;
typedef unsigned NT;

/* T or NT

   Don't worry, it is not explosive. */

struct TNT_s {
  int type;                     /* 0 => T and NT otherwise */
  union { T t; NT nt; } data;
};

typedef struct TNT_s TNT;
typedef struct Rule_s Rule;

/* A grammar is more than a list of rules: it should include a
   correspondance to associate the names of non-terminals with
   unsigned integers.  Basically, this means it should hold a list of
   strings as well. */
typedef struct Grammar_s Grammar;

/* G is expected to be allocated before this function.

   NAMES is a list of CPA pointers. */
unsigned char build_grammar(Grammar *g, List *rules, List *names);

/* This accepts one and only one optional argument, which is of type
   either T or NT, depending on TYPE being 0 or not. */
TNT *new_tnt(int type, ...);

/* An array of code-points. */

struct cpa_s {
  NUM *array;
  UNUM size;
};

typedef struct cpa_s cpa;

NUM find_in_cpa_list(NUM *string, NUM size, List *list);

/* For debugging purposes, we need to print out rules to examine their
   values. */

/* assume element is a cpa pointer */
void print_name(void *element);

void print_tnt(void *element);

void print_rule(void *r);

/* This will generate errors if G is not a list of rules. */
void print_grammar(Grammar *g);

/* constructors */

/* FORMAT specifies the types of TNTs.  It is a string of t and n's.
   FORMAT_LEN is the length of FORMAT, excluding the terminating null
   byte.

   Upon failure NULL is returned. */
List *new_tnt_string(char *format, int format_len, ...);

TNT *new_tnt_pointer(size_t size);

/* RIGHT should be created by new_tnt_string or something alike.

   If RIGHT is NULL, NULL is returned. */
Rule *new_rule(NT left, List *right);

Grammar *new_grammar();

/* destructors */
void destroy_rule(void *rule, int flag);
void destroy_rule_and_free_all(void *rule);
void destroy_rule_and_free_first(void *rule);
void destroy_rule_no_free(void *rule);
void destroy_cpa_and_free_all(void *element);
void destroy_grammar(void *grammar, int flag);

/* convenient macro */

#define NEW_CPA(X, ARRAY, SIZE) do {            \
    X = MYALLOC(cpa, 1);                        \
    X->array = ARRAY;                           \
    X->size = SIZE;                             \
  } while (0)

#endif