summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJSDurand <mmemmew@gmail.com>2022-01-22 02:36:54 +0800
committerJSDurand <mmemmew@gmail.com>2022-01-22 02:36:54 +0800
commitbad2b1934da66021cbc7f0d715686706bd444449 (patch)
tree78c340d5f834bbc5864a97dcb23ff4ec41e23072
parent53865aad225ffbe5cf3c42736e5a2095092f9fff (diff)
Implemented a hash table with any type of keys
-rw-r--r--src/bsr.h38
-rw-r--r--src/grammar.c137
-rw-r--r--src/ht.c242
-rw-r--r--src/ht.h72
-rw-r--r--src/test/check_grammar.c48
-rw-r--r--src/test/check_ht.c21
-rw-r--r--src/util.h1
7 files changed, 406 insertions, 153 deletions
diff --git a/src/bsr.h b/src/bsr.h
index 8578ab2..d6b96e5 100644
--- a/src/bsr.h
+++ b/src/bsr.h
@@ -20,7 +20,7 @@
BSR has already been added to a set. I guess the reason is that
this is not the bottleneck of performance in its applications. But
I am not satisfied with that solution. I want to take advantage of
- hash-table's almost constant time look-up feature to implement
+ hash-table's almost constant-time look-up feature to implement
this. */
/* A BSR set has two types.
@@ -53,8 +53,13 @@
each type. Each set is an array of arrays, where the root array's
indices correspond to non-terminals, and the leaf arrays correspond
to rules of that non-terminal. Then the rest is implemented
- through hash-tables, where a hash table's values are yet another
- hash table, and so on.
+ through hash-tables, whose keys are tuples of integers. For the
+ first type, the keys are the (i, k) pairs and the values are those
+ j. For the second, the keys are the (b, i, k) tupels and the
+ values are those j. The reason to use i and k as keys is that they
+ represent the left and the right extent of the BSR respectively, so
+ we index them in order to find them easily. In addition, this will
+ be useful if we want to extract SPPF out of a BSR set.
I don't know if there are better implementations. If I think of
better ways in the future, I will re-implement the BSR sets. */
@@ -70,7 +75,32 @@
(X, a, i, j, k),
i.e. as a first-type BSR set. But the meaning of the "i" is
- different. */
+ different.
+
+ However, we shall notice that there is a natural "grading" on the
+ process descriptors. The k-th grading consists of the descriptors
+ with the last element in the tuple equal to k. In the clustered
+ nonterminal parsing algorithm, the process descriptors that are
+ added as a consequence of dealing with a process descriptor of
+ grading k must be of grade greater than or equal to k. This means
+ that if all process descriptors of grade k have been processed and
+ the descriptors awaiting to be processed all have grades > k, then
+ we won't see any process descriptors of grade <= k again. So we
+ can actually keep a list of process descriptors that serve the role
+ of the set U_k in the afore-mentionned paper. This list
+ corresponds to the grading of the descriptors. This way we don't
+ need to store the k in the descriptor. Therefore a process
+ descriptor is represented as
+
+ (X, a, i, j).
+
+ Again we store an array of arrays corresponding to X and a. Thus
+ the hash table simply has keys i and values j.
+
+ The main benefit of this is that after we find that all process
+ descriptors in R_k have been processed, we can free those
+ descriptors in U_k. I think this is a good way to lower the space
+ requirements of this algorithm. */
int place(int x);
diff --git a/src/grammar.c b/src/grammar.c
index a24f9cb..bbc6934 100644
--- a/src/grammar.c
+++ b/src/grammar.c
@@ -558,6 +558,10 @@ nt_first(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts,
ht *terminal_ht = NULL, *pred_ht = NULL;
+ NUM *temp = NULL;
+ T *tempT = NULL;
+ PT *tempPT = NULL;
+
/* lazy macro */
#define BREAKOUT k = string_len
@@ -568,30 +572,42 @@ nt_first(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts,
for (NUM i = 0; i < left_len; i++) {
Rule_group *rg = grammar_rule(g, (NT) i);
NUM rg_length = rg_len(rg);
-
- for (NUM j = 0; j < rg_length;) {
- List *string = rg_nth(rg, j++);
+ /* fleprintf("rg_length = %ld\n", rg_length); */
+ for (NUM j = 0; j < rg_length; j++) {
+ List *string = rg_nth(rg, j);
NUM string_len = list_length(string);
+ /* fleprintf("j = %ld, len = %ld\n", j, string_len); */
TNT *top = NULL;
- for (NUM k = 0; k < string_len;) {
- top = (TNT *) list_nth(string, k++);
+ for (NUM k = 0; k < string_len; k++) {
+ top = (TNT *) list_nth(string, k);
switch (top->type) {
case TERMINAL:
+ SAFE_MALLOC(NUM, tempT, 1, return 1;);
+ *tempT = top->data.t;
if (first &&
- ht_find(terminal_hts+i, top->data.t) == NULL) {
+ ht_find(terminal_hts+i, tempT) == NULL) {
changed = 1;
- ht_insert(terminal_hts+i, top->data.t, (void*)1);
+ ht_insert(terminal_hts+i, tempT, (void*)1);
+ /* fleprintf("After insertion we find %p for %ld\n",
+ * ht_find(terminal_hts+i, tempT),
+ * *tempT); */
BREAKOUT;
+ } else {
+ free(tempT);
}
break;
case PREDICATE:
+ SAFE_MALLOC(NUM, tempPT, 1, return 1;);
+ *tempPT = top->data.pt;
if (first &&
- ht_find(predicate_hts+i, top->data.pt) == NULL) {
+ ht_find(predicate_hts+i, tempPT) == NULL) {
changed = 1;
- ht_insert(predicate_hts+i, top->data.pt, (void*)1);
+ ht_insert(predicate_hts+i, tempPT, (void*)1);
BREAKOUT;
+ } else {
+ free(tempPT);
}
break;
default:
@@ -602,13 +618,20 @@ nt_first(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts,
found. */
NUM len = ht_size(terminal_ht);
- NUM *keys = ht_keys(terminal_ht);
+ void **keys = ht_keys(terminal_ht);
for (NUM ell = 0; ell < len;) {
- NUM key = *(keys+ell++);
- if (ht_find(terminal_hts+i, key) == NULL) {
+ NUM *key = *(keys+ell++);
+ SAFE_MALLOC(NUM, temp, 1, return 1;);
+ *temp = *key;
+ /* fleprintf("i = %ld, j = %ld, ", i, j); */
+ if (ht_find(terminal_hts+i, temp) == NULL) {
+ /* eprintf("new item\n"); */
changed = 1;
- ht_insert(terminal_hts+i, key, (void*)1);
+ ht_insert(terminal_hts+i, temp, (void*)1);
+ } else {
+ /* eprintf("already found for key = %ld\n", *temp); */
+ free(temp);
}
}
@@ -616,10 +639,17 @@ nt_first(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts,
keys = ht_keys(pred_ht);
for (NUM ell = 0; ell < len;) {
- NUM key = *(keys+ell++);
- if (ht_find(predicate_hts+i, key) == NULL) {
+ NUM *key = *(keys+ell++);
+ SAFE_MALLOC(NUM, temp, 1, return 1;);
+ *temp = *key;
+ /* fleprintf("i = %ld, j = %ld, ", i, j); */
+ if (ht_find(predicate_hts+i, temp) == NULL) {
+ /* eprintf("new item\n"); */
changed = 1;
- ht_insert(predicate_hts+i, key, (void*)1);
+ ht_insert(predicate_hts+i, temp, (void*)1);
+ } else {
+ free(temp);
+ /* eprintf("already found for key = %ld\n", *temp); */
}
}
@@ -649,22 +679,34 @@ tnt_first(CC_MOD(ht *) terminal_hts, CC_MOD(ht *) predicate_hts,
if (!tnt_len) return 0;
- NUM temp_len = 0, *keys = NULL;
+ NUM temp_len = 0, **keys = NULL;
TNT *top = NULL;
NT current = 0;
+ NUM *temp = NULL;
+ T *tempT = NULL;
+ PT *tempPT = NULL;
+
for (NUM i = 0; i < tnt_len; i++) {
top = (TNT *) list_nth(tnts, i);
switch (top->type) {
case TERMINAL:
- ht_insert(result_terminals, top->data.t, (void *)1);
+ if (ht_find(result_terminals, &(top->data.t)) == NULL) {
+ SAFE_MALLOC(T, tempT, 1, return 1;);
+ *tempT = top->data.t;
+ ht_insert(result_terminals, tempT, (void *)1);
+ }
return 0;
break;
case PREDICATE:
- ht_insert(result_predicates, top->data.pt, (void *)1);
+ if (ht_find(result_predicates, &(top->data.pt)) == NULL) {
+ SAFE_MALLOC(PT, tempPT, 1, return 1;);
+ *tempPT = top->data.pt;
+ ht_insert(result_predicates, tempPT, (void *)1);
+ }
return 0;
break;
default:
@@ -676,16 +718,26 @@ tnt_first(CC_MOD(ht *) terminal_hts, CC_MOD(ht *) predicate_hts,
}
temp_len = ht_size(terminal_hts+current);
- keys = ht_keys(terminal_hts+current);
+ keys = (NUM **) ht_keys(terminal_hts+current);
- for (NUM j = 0; j < temp_len;)
- ht_insert(result_terminals, *(keys+j++), (void *)1);
+ for (NUM j = 0; j < temp_len; j++) {
+ if (ht_find(result_terminals, *(keys+j)) == NULL) {
+ SAFE_MALLOC(NUM, temp, 1, return 1;);
+ *temp = **(keys+j);
+ ht_insert(result_terminals, temp, (void *)1);
+ }
+ }
temp_len = ht_size(predicate_hts+current);
- keys = ht_keys(predicate_hts+current);
+ keys = (NUM **) ht_keys(predicate_hts+current);
- for (NUM j = 0; j < temp_len;)
- ht_insert(result_predicates, *(keys+j++), (void *)1);
+ for (NUM j = 0; j < temp_len; j++) {
+ if (ht_find(result_predicates, *(keys+j)) == NULL) {
+ SAFE_MALLOC(NUM, temp, 1, return 1;);
+ *temp = **(keys+j);
+ ht_insert(result_predicates, temp, (void *)1);
+ }
+ }
if (!(*(nts+current))) i = tnt_len;
@@ -702,13 +754,22 @@ nt_follow(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts,
ht * const restrict result_terminals,
ht * const restrict result_predicates)
{
- ht_insert(result_terminals, END_OF_INPUT, (void*)1);
+ NUM *tempN = NULL;
+
+ SAFE_MALLOC(NUM, tempN, 1, return 1;);
+ *tempN = END_OF_INPUT;
+
+ if (ht_find(result_terminals, tempN) == NULL) {
+ ht_insert(result_terminals, tempN, (void*)1);
+ } else {
+ free(tempN);
+ }
NUM left_len = grammar_left_len(g);
BOOL changed = 0, first = 1;
- NUM ht_len = 0, *keys = NULL;
+ NUM ht_len = 0, **keys = NULL;
List *temp = NULL;
@@ -782,28 +843,34 @@ nt_follow(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts,
add_to_follow:
ht_len = ht_size(result_terminals+i);
- keys = ht_keys(result_terminals+i);
+ keys = (NUM **) ht_keys(result_terminals+i);
for (NUM ell = 0; ell < ht_len;) {
- NUM key = *(keys+ell++);
+ SAFE_MALLOC(NUM, tempN, 1, return 1;);
+ *tempN = **(keys+ell++);
if (ht_find(result_terminals+top->data.nt,
- key) == NULL) {
+ tempN) == NULL) {
changed = 1;
ht_insert(result_terminals+top->data.nt,
- key, (void*)1);
+ tempN, (void*)1);
+ } else {
+ free(tempN);
}
}
ht_len = ht_size(result_predicates+i);
- keys = ht_keys(result_predicates+i);
+ keys = (NUM **) ht_keys(result_predicates+i);
for (NUM ell = 0; ell < ht_len;) {
- NUM key = *(keys+ell++);
+ SAFE_MALLOC(NUM, tempN, 1, return 1;);
+ *tempN = **(keys+ell++);
if (ht_find(result_predicates+top->data.nt,
- key) == NULL) {
+ tempN) == NULL) {
changed = 1;
ht_insert(result_predicates+top->data.nt,
- key, (void*)1);
+ tempN, (void*)1);
+ } else {
+ free(tempN);
}
}
diff --git a/src/ht.c b/src/ht.c
index c44a57e..2fc1206 100644
--- a/src/ht.c
+++ b/src/ht.c
@@ -1,20 +1,73 @@
+#include <stdarg.h>
#include <string.h>
#include <stdio.h>
#include "ht.h"
#define HT_REHASH_THRESHOLD 0.5
+P_ATTR
+static NUM
+num_hft(CCR_MOD(void *) key)
+{
+ if (key == NULL) {
+ fleprintf0("Got NULL key!\n");
+ return 0;
+ }
+
+ return *((NUM *) key);
+}
+
+P_ATTR
+static BOOL
+num_cft(CCR_MOD(void *) keya, CCR_MOD(void *) keyb)
+{
+ if ((keya == NULL && keyb != NULL) ||
+ (keyb == NULL && keya != NULL))
+ return 0;
+
+ if (keya == NULL && keyb == NULL) return 1;
+
+ return *((NUM *) keya) == *((NUM *) keyb);
+}
+
/* SIZE is a hint to the number of elements that might be put in the
table. In fact we will allocate more memory than this number, as a
- hash table works better if it is not fully used. */
+ hash table works better if it is not fully used.
+
+ NARGS is the number of arguments to follow. It should be between 0
+ and 2 (inclusive). If there are more than one argument, the first
+ argument is used as the hashing function; if there are two
+ arguments, the second argument is used as the comparison
+ function. */
ht *
-new_ht(UNUM size)
+new_ht(UNUM size, int nargs, ...)
{
+
+ if (nargs < 0 || nargs > 2) {
+ fleprintf("NARGS should be between zero and two, but got %d\n",
+ nargs);
+ return NULL;
+ }
+
+ hash_func_t hft = num_hft;
+ compare_func_t cft = num_cft;
+
+ va_list args;
+
+ if (nargs) {
+ va_start(args, nargs);
+ if (nargs >= 1) hft = va_arg(args, hash_func_t);
+ if (nargs == 2) cft = va_arg(args, compare_func_t);
+ va_end(args);
+ }
+
ht *htp = NULL;
SAFE_MALLOC(ht, htp, 1, return NULL;);
htp->capability = (size<<1)+(size>>1);
htp->size = 0;
+ htp->hf = hft;
+ htp->cf = cft;
/* For safer clean up */
htp->keys = NULL;
@@ -47,13 +100,43 @@ void
destroy_ht(ht * restrict htp, HT_DESTROY_FLAG flag)
{
switch (flag) {
+ case DESTROY_KEY_SELF:
+ case DESTROY_KEY_NO_SELF:
+ case DESTROY_EVERY_SELF:
case DESTROY_EVERY_NO_SELF:
+ case DESTROY_KEY_VALUE_FIRST_SELF:
+ case DESTROY_KEY_VALUE_FIRST_NO_SELF:
+ for (NUM i = 0; i < htp->size;)
+ free(*(htp->keys+i++));
+ break;
+ case DESTROY_KEY_FIRST_SELF:
+ case DESTROY_KEY_FIRST_NO_SELF:
+ case DESTROY_EVERY_FIRST_SELF:
+ case DESTROY_EVERY_FIRST_NO_SELF:
+ case DESTROY_KEY_FIRST_VALUE_SELF:
+ case DESTROY_KEY_FIRST_VALUE_NO_SELF:
+ free(*(htp->keys));
+ break;
+ default:
+ break;
+ }
+
+ switch (flag) {
+ case DESTROY_VALUE_SELF:
+ case DESTROY_VALUE_NO_SELF:
case DESTROY_EVERY_SELF:
+ case DESTROY_EVERY_NO_SELF:
+ case DESTROY_KEY_FIRST_VALUE_SELF:
+ case DESTROY_KEY_FIRST_VALUE_NO_SELF:
for (NUM i = 0; i < htp->size;)
free(*(htp->values+i++));
break;
- case DESTROY_FIRST_NO_SELF:
- case DESTROY_FIRST_SELF:
+ case DESTROY_VALUE_FIRST_SELF:
+ case DESTROY_VALUE_FIRST_NO_SELF:
+ case DESTROY_EVERY_FIRST_SELF:
+ case DESTROY_EVERY_FIRST_NO_SELF:
+ case DESTROY_KEY_VALUE_FIRST_SELF:
+ case DESTROY_KEY_VALUE_FIRST_NO_SELF:
free(*(htp->values));
break;
default:
@@ -68,8 +151,14 @@ destroy_ht(ht * restrict htp, HT_DESTROY_FLAG flag)
switch (flag) {
case DESTROY_NONE_SELF:
+ case DESTROY_KEY_SELF:
+ case DESTROY_KEY_FIRST_SELF:
+ case DESTROY_VALUE_SELF:
+ case DESTROY_VALUE_FIRST_SELF:
case DESTROY_EVERY_SELF:
- case DESTROY_FIRST_SELF:
+ case DESTROY_EVERY_FIRST_SELF:
+ case DESTROY_KEY_FIRST_VALUE_SELF:
+ case DESTROY_KEY_VALUE_FIRST_SELF:
free(htp);
break;
default:
@@ -81,22 +170,22 @@ static BOOL
ht_expand(ht *htp)
{
htp->capability <<= 1;
- htp->capability++;
- NUM *keys = NULL, size = htp->size;
+ void **keys = NULL;
+ NUM size = htp->size;
void **values = NULL;
if (size) {
- SAFE_MALLOC(NUM, keys, size, goto cleanup;);
- SAFE_MALLOC(NUM, values, size, goto cleanup;);
+ SAFE_MALLOC(void*, keys, size, goto cleanup;);
+ SAFE_MALLOC(void*, values, size, goto cleanup;);
- memcpy(keys, htp->keys, sizeof(NUM) * size);
+ memcpy(keys, htp->keys, sizeof(void*) * size);
memcpy(values, htp->values, sizeof(void*) * size);
}
- SAFE_REALLOC(NUM, htp->keys, htp->capability, goto cleanup;);
+ SAFE_REALLOC(void*, htp->keys, htp->capability, goto cleanup;);
- memset(htp->keys, 0xff, sizeof (NUM) * htp->capability);
+ memset(htp->keys, 0, sizeof (void*) * htp->capability);
SAFE_REALLOC(void*, htp->values, htp->capability, goto cleanup;);
@@ -122,8 +211,46 @@ ht_expand(ht *htp)
return 1;
}
+#define PERTURBATION_SHIFT 5
+
+/* On error return non-zero. */
+static BOOL
+ht_probe(CC_MOD(ht *) htp, CC_MOD(void *) key, NUM *result)
+{
+ NUM hashed = htp->hf(key);
+ NUM ikey = hashed % htp->capability;
+ unsigned long perturbation = hashed - ikey;
+
+ for (NUM count = 0;
+ count < htp->capability * HT_REHASH_THRESHOLD &&
+ *(htp->indices+ikey) >= 0 &&
+ /* continue if keys don't match */
+ !(htp->cf
+ (*(htp->keys+*(htp->indices+ikey)), key));
+ count++) {
+ ikey = 5 * ikey + 1 + perturbation;
+ perturbation >>= PERTURBATION_SHIFT;
+ ikey %= htp->capability;
+ }
+
+ if (*(htp->indices+ikey) >= 0 &&
+ !(htp->cf(*(htp->keys+*(htp->indices+ikey)), key))) {
+ /* failed for some reason */
+ fleprintf("Fail to probe a location for the key, ikey = %ld, "
+ "last index = %ld\n", ikey, *(htp->indices+ikey));
+ return 1;
+ }
+
+ *result = ikey;
+
+ /* if (*((NUM*) key) == 3) {
+ * fleprintf("ikey = %ld, result = %ld\n", ikey, *result);
+ * } */
+ return 0;
+}
+
BOOL
-ht_insert(ht *htp, NUM key, void *value)
+ht_insert(ht * const restrict htp, void *key, void *value)
{
if (htp->size+1 >= HT_REHASH_THRESHOLD * htp->capability) {
if (ht_expand(htp)) {
@@ -132,15 +259,10 @@ ht_insert(ht *htp, NUM key, void *value)
}
}
- NUM ikey = key % htp->capability;
+ NUM ikey = 0;
- /* linear probing */
- for (NUM count = 0;
- count < htp->capability * HT_REHASH_THRESHOLD &&
- *(htp->indices+ikey) >= 0 &&
- *(htp->keys+*(htp->indices+ikey)) != key;
- count++)
- ikey = (ikey + 1) % htp->capability;
+ /* check errors */
+ if (ht_probe(htp, key, &ikey)) return 1;
if (*(htp->indices+ikey) < 0) {
/* not inserted before */
@@ -151,15 +273,9 @@ ht_insert(ht *htp, NUM key, void *value)
*(htp->values+htp->size) = value;
(htp->size)++;
- } else if (*(htp->keys+*(htp->indices+ikey)) == key) {
- *(htp->values+*(htp->indices+ikey)) = value;
} else {
- fleprintf("Fail to probe an appropriate slot for %ld, "
- "the result ikey = %ld, last index = %ld, "
- "last key = %ld\n",
- key, ikey, *(htp->keys+*(htp->indices+ikey)),
- *(htp->indices+ikey));
- return 1;
+ /* error check is performed in ht_probe */
+ *(htp->values+*(htp->indices+ikey)) = value;
}
/* fleprintf("ikey = %ld, size = %d, capability = %llu\n",
@@ -169,33 +285,39 @@ ht_insert(ht *htp, NUM key, void *value)
}
BOOL
-ht_delete(ht * const restrict htp, NUM key, int flag)
+ht_delete(ht * const restrict htp, void *key, HT_DELETE_FLAG flag)
{
- NUM ikey = key % htp->capability;
+ NUM ikey = 0;
- /* linear probing */
- for (NUM count = 0;
- count < htp->capability * HT_REHASH_THRESHOLD &&
- *(htp->indices+ikey) > 0 &&
- *(htp->keys+*(htp->indices+ikey)) != key;
- count++)
- ikey = (ikey + 1) % htp->capability;
+ /* check errors */
+ if (ht_probe(htp, key, &ikey)) return 1;
if (*(htp->indices+ikey) < 0) {
/* not inserted before */
- } else if (*(htp->keys+*(htp->indices+ikey)) == key) {
+ } else {
(htp->size)--;
- *(htp->keys+*(htp->indices+ikey)) = -1;
- if (flag) {
+
+ switch (flag) {
+ case DELETE_KEY:
+ case DELETE_EVERY:
+ free(*(htp->keys+*(htp->indices+ikey)));
+ break;
+ default:
+ break;
+ }
+
+ switch (flag) {
+ case DELETE_VALUE:
+ case DELETE_EVERY:
free(*(htp->values+*(htp->indices+ikey)));
- *(htp->values+*(htp->indices+ikey)) = NULL;
+ break;
+ default:
+ break;
}
+
+ *(htp->values+*(htp->indices+ikey)) = NULL;
+ *(htp->keys+*(htp->indices+ikey)) = NULL;
*(htp->indices+ikey) = -1;
- } else {
- fleprintf("Fail to probe an appropriate slot for %ld, "
- "the result ikey = %ld\n",
- key, ikey);
- return 1;
}
return 0;
@@ -203,28 +325,16 @@ ht_delete(ht * const restrict htp, NUM key, int flag)
P_ATTR
void *
-ht_find(CCR_MOD(ht *) htp, NUM key)
+ht_find(CCR_MOD(ht *) htp, void *key)
{
- NUM ikey = key % htp->capability;
+ NUM ikey = 0;
- /* linear probing */
- for (NUM count = 0;
- count < htp->capability * HT_REHASH_THRESHOLD &&
- *(htp->indices+ikey) >= 0 &&
- *(htp->keys+*(htp->indices+ikey)) != key;
- count++)
- ikey = (ikey + 1) % htp->capability;
+ /* check errors */
+ if (ht_probe(htp, key, &ikey)) return NULL;
- if (*(htp->indices+ikey) >= 0 &&
- *(htp->keys+*(htp->indices+ikey)) == key)
- return *(htp->values+*(htp->indices+ikey));
- else if (*(htp->indices+ikey) >= 0 &&
- *(htp->keys+*(htp->indices+ikey)) >= 0)
- fleprintf("Fail to probe an appropriate slot for %ld, "
- "with the result ikey = %ld\n",
- key, ikey);
+ if (*(htp->indices+ikey) < 0) return NULL;
- return NULL;
+ return *(htp->values+*(htp->indices+ikey));
}
P_ATTR
@@ -242,7 +352,7 @@ ht_values(CCR_MOD(ht *) htp)
}
P_ATTR
-NUM *
+void **
ht_keys(CCR_MOD(ht *) htp)
{
return htp->keys;
diff --git a/src/ht.h b/src/ht.h
index e5d16f7..617fd94 100644
--- a/src/ht.h
+++ b/src/ht.h
@@ -2,7 +2,7 @@
#define HT_H
#include "util.h"
-enum { HT_INIT_CAP = 257 };
+enum { HT_INIT_CAP = 256 };
/* TODO: Modify the probing algorithm. Imitate the algorithm used by
python's hash tables. The source code of the python dict object
@@ -21,51 +21,91 @@ enum { HT_INIT_CAP = 257 };
know if two keys are "equal". */
/* type for a hash function */
-typedef NUM (*hash_func_t) (void *key);
+typedef NUM (*hash_func_t) (CCR_MOD(void *)key);
/* comparison function type */
-typedef BOOL (*compare_func_t) (void *keya, void *keyb);
+typedef BOOL (*compare_func_t)
+(CCR_MOD(void *)keya, CCR_MOD(void *)keyb);
struct ht_s {
- NUM *keys;
+ void **keys;
void **values;
NUM *indices;
UNUM capability;
/* REVIEW: What is the purpose of SIZE? */
/* ANSWER: For looping over keys / values. */
int size;
- /* hash_func_t hf;
- * compare_func_t cf; */
+ hash_func_t hf;
+ compare_func_t cf;
};
typedef struct ht_s ht;
/* On error return NULL */
-ht *new_ht(UNUM size);
-
+ht *new_ht(UNUM size, int nargs, ...);
+
+/* This enum empowers the user the ability to control how to free the
+ hash table. The destroy function always frees keys, values, and
+ indices pointers.
+
+ If the option contains the word "SELF", it means to also free the
+ hash table pointer itself. Otherwise, it contains NO_SELF, and it
+ means not to free the hash table pointer.
+
+ If it contains the word "KEY" (respectively "VALUE"), then it means
+ to also free the pointers stored in the keys (respectively values)
+ array. How to free the pointers? If the option contains the word
+ "FIRST", then it means to free the first pointer stored in the
+ array corresponding to the word before "FIRST". And it frees each
+ and every of the other word's pointers. To free both keys and
+ values pointers in the same manner, i.e. free both of the first
+ pointers or free each and every pointers, use the option containing
+ "EVERY" or "EVERY_FIRST" respectively. */
enum HT_DESTROY_FLAG_e {
DESTROY_NONE_NO_SELF,
DESTROY_NONE_SELF,
- DESTROY_EVERY_NO_SELF,
+ DESTROY_KEY_SELF,
+ DESTROY_KEY_FIRST_SELF,
+ DESTROY_KEY_NO_SELF,
+ DESTROY_KEY_FIRST_NO_SELF,
+ DESTROY_VALUE_SELF,
+ DESTROY_VALUE_FIRST_SELF,
+ DESTROY_VALUE_NO_SELF,
+ DESTROY_VALUE_FIRST_NO_SELF,
DESTROY_EVERY_SELF,
- DESTROY_FIRST_NO_SELF,
- DESTROY_FIRST_SELF
+ DESTROY_EVERY_FIRST_SELF,
+ DESTROY_EVERY_NO_SELF,
+ DESTROY_EVERY_FIRST_NO_SELF,
+ DESTROY_KEY_FIRST_VALUE_SELF,
+ DESTROY_KEY_FIRST_VALUE_NO_SELF,
+ DESTROY_KEY_VALUE_FIRST_SELF,
+ DESTROY_KEY_VALUE_FIRST_NO_SELF
};
typedef enum HT_DESTROY_FLAG_e HT_DESTROY_FLAG;
+
void destroy_ht(ht * restrict htp, HT_DESTROY_FLAG flag);
-BOOL ht_insert(ht *htp, NUM key, void *value);
+BOOL ht_insert(ht * const restrict htp, void *key, void *value);
P_ATTR NUM ht_size(CCR_MOD(ht *) htp);
P_ATTR void **ht_values(CCR_MOD(ht *) htp);
-P_ATTR NUM *ht_keys(CCR_MOD(ht *) htp);
+P_ATTR void **ht_keys(CCR_MOD(ht *) htp);
+
+enum HT_DELETE_FLAG_e {
+ DELETE_NONE,
+ DELETE_KEY,
+ DELETE_VALUE,
+ DELETE_EVERY
+};
+
+typedef enum HT_DELETE_FLAG_e HT_DELETE_FLAG;
-/* If FLAG is non-zero, also free the value pointer. */
-BOOL ht_delete(ht * const restrict htp, NUM key, int flag);
+BOOL ht_delete(ht * const restrict htp, void *key,
+ HT_DELETE_FLAG flag);
-P_ATTR void *ht_find(CCR_MOD(ht *) htp, NUM key);
+P_ATTR void *ht_find(CCR_MOD(ht *) htp, void *key);
#endif
diff --git a/src/test/check_grammar.c b/src/test/check_grammar.c
index ca70fa9..2b6c8f2 100644
--- a/src/test/check_grammar.c
+++ b/src/test/check_grammar.c
@@ -3,6 +3,8 @@
#include <stdlib.h>
#include "../grammar.h"
+#define GET_KEY(T, K, N) *((T*)*(K+N))
+
#define READ_INTO_CPA(N, U, I, VA, VI, CP) do { \
U = new_utf8(N, 1); \
I = get_info((str *)U, 0); \
@@ -174,11 +176,11 @@ main(UNUSED int argc, UNUSED char **argv)
for (NUM i = 0; i < left_len; i++) {
ht *temp = NULL;
- temp = new_ht(left_len << 1);
+ temp = new_ht(left_len << 1, 0);
*(terminal_hts+i) = *temp;
free(temp);
- temp = new_ht(left_len << 1);
+ temp = new_ht(left_len << 1, 0);
*(predicate_hts+i) = *temp;
free(temp);
}
@@ -188,15 +190,15 @@ main(UNUSED int argc, UNUSED char **argv)
goto cleanup;
}
- NUM ht_len = 0, *keys = NULL;
+ NUM ht_len = 0, **keys = NULL;
for (NUM i = 0; i < left_len;) {
printf("Nonterminal %ld contains the following terminals in the "
"FIRST set:\n", i);
ht_len = ht_size(terminal_hts+i);
- keys = ht_keys(terminal_hts+i++);
+ keys = (NUM **) ht_keys(terminal_hts+i++);
for (NUM j = 0; j < ht_len;) {
- printf("T %ld", *(keys+j));
+ printf("T %ld", GET_KEY(NUM, keys, j));
if (++j<ht_len) printf(", ");
else printf("\n");
}
@@ -210,11 +212,11 @@ main(UNUSED int argc, UNUSED char **argv)
for (NUM i = 0; i < left_len; i++) {
ht *temp = NULL;
- temp = new_ht(left_len << 1);
+ temp = new_ht(left_len << 1, 0);
*(test_terminal_hts+i) = *temp;
free(temp);
- temp = new_ht(left_len << 1);
+ temp = new_ht(left_len << 1, 0);
*(test_predicate_hts+i) = *temp;
free(temp);
}
@@ -238,17 +240,17 @@ main(UNUSED int argc, UNUSED char **argv)
printf(" has the following FIRST set:\n");
ht_len = ht_size(test_terminal_hts);
- keys = ht_keys(test_terminal_hts);
+ keys = (NUM **) ht_keys(test_terminal_hts);
for (NUM i = 0; i < ht_len;) {
- printf("T %ld", *(keys+i));
+ printf("T %ld", GET_KEY(NUM, keys, i));
if (++i<ht_len) printf(", ");
else printf("\n");
}
ht_len = ht_size(test_predicate_hts);
- keys = ht_keys(test_predicate_hts);
+ keys = (NUM **) ht_keys(test_predicate_hts);
for (NUM i = 0; i < ht_len;) {
- printf("PT %ld", *(keys+i));
+ printf("PT %ld", GET_KEY(NUM, keys, i));
if (++i<ht_len) printf(", ");
else printf("\n");
}
@@ -265,11 +267,11 @@ main(UNUSED int argc, UNUSED char **argv)
for (NUM i = 0; i < left_len; i++) {
ht *temp = NULL;
- temp = new_ht(left_len << 1);
+ temp = new_ht(left_len << 1, 0);
*(result_terminal_hts+i) = *temp;
free(temp);
- temp = new_ht(left_len << 1);
+ temp = new_ht(left_len << 1, 0);
*(result_predicate_hts+i) = *temp;
free(temp);
}
@@ -284,10 +286,10 @@ main(UNUSED int argc, UNUSED char **argv)
printf("Nonterminal %ld contains the following terminals in the "
"FOLLOW set:\n", i);
ht_len = ht_size(result_terminal_hts+i);
- keys = ht_keys(result_terminal_hts+i++);
+ keys = (NUM **) ht_keys(result_terminal_hts+i++);
for (NUM j = 0; j < ht_len;) {
- if (*(keys+j) == END_OF_INPUT) printf("T $");
- else printf("T %ld", *(keys+j));
+ if (GET_KEY(NUM, keys, j) == END_OF_INPUT) printf("T $");
+ else printf("T %ld", GET_KEY(NUM, keys, j));
if (++j<ht_len) printf(", ");
else printf("\n");
}
@@ -297,42 +299,42 @@ main(UNUSED int argc, UNUSED char **argv)
if (epsilon_array) free(epsilon_array);
if (terminal_hts) {
for (NUM i = 0; i < left_len; i++)
- destroy_ht(terminal_hts+i, DESTROY_NONE_NO_SELF);
+ destroy_ht(terminal_hts+i, DESTROY_KEY_NO_SELF);
free(terminal_hts);
}
if (predicate_hts) {
for (NUM i = 0; i < left_len; i++)
- destroy_ht(predicate_hts+i, DESTROY_NONE_NO_SELF);
+ destroy_ht(predicate_hts+i, DESTROY_KEY_NO_SELF);
free(predicate_hts);
}
if (test_terminal_hts) {
for (NUM i = 0; i < left_len; i++)
- destroy_ht(test_terminal_hts+i, DESTROY_NONE_NO_SELF);
+ destroy_ht(test_terminal_hts+i, DESTROY_KEY_NO_SELF);
free(test_terminal_hts);
}
if (test_predicate_hts) {
for (NUM i = 0; i < left_len; i++)
- destroy_ht(test_predicate_hts+i, DESTROY_NONE_NO_SELF);
+ destroy_ht(test_predicate_hts+i, DESTROY_KEY_NO_SELF);
free(test_predicate_hts);
}
if (result_terminal_hts) {
for (NUM i = 0; i < left_len; i++)
- destroy_ht(result_terminal_hts+i, DESTROY_NONE_NO_SELF);
+ destroy_ht(result_terminal_hts+i, DESTROY_KEY_NO_SELF);
free(result_terminal_hts);
}
if (result_predicate_hts) {
for (NUM i = 0; i < left_len; i++)
- destroy_ht(result_predicate_hts+i, DESTROY_NONE_NO_SELF);
+ destroy_ht(result_predicate_hts+i, DESTROY_KEY_NO_SELF);
free(result_predicate_hts);
}
if (test_tnt_string) destroy_list(test_tnt_string, 1);
- if (tnt_ht) destroy_ht(tnt_ht, DESTROY_NONE_SELF);
+ if (tnt_ht) destroy_ht(tnt_ht, DESTROY_KEY_NO_SELF);
destroy_grammar(g, 1);
diff --git a/src/test/check_ht.c b/src/test/check_ht.c
index 00cd2f1..0298087 100644
--- a/src/test/check_ht.c
+++ b/src/test/check_ht.c
@@ -3,19 +3,20 @@
int main(int UNUSED argc, char ** UNUSED argv)
{
- ht *htp = new_ht(HT_INIT_CAP);
+
+ ht *htp = new_ht(HT_INIT_CAP, 0);
NUM *temp = MYALLOC(NUM, 1), key = 1023;
*temp = 12345;
- if (ht_insert(htp, key, temp)) {
+ if (ht_insert(htp, &key, temp)) {
fleprintf0("Fail to insert\n");
free(temp);
destroy_ht(htp, DESTROY_EVERY_SELF);
return 1;
}
- if ((temp = ht_find(htp, key))) {
+ if ((temp = ht_find(htp, &key))) {
fleprintf("We found value %ld for key %ld\n",
*temp, key);
} else
@@ -27,25 +28,27 @@ int main(int UNUSED argc, char ** UNUSED argv)
for (NUM i = 0; i < size; i++)
fleprintf("The %ld-th element has key %ld and value %ld\n",
- i, *(ht_keys(htp)+i), *((NUM *)*(ht_values(htp)+i)));
+ i,
+ *((NUM *)*(ht_keys(htp)+i)),
+ *((NUM *)*(ht_values(htp)+i)));
- if (ht_delete(htp, key, 1)) {
+ if (ht_delete(htp, &key, DELETE_VALUE)) {
fleprintf("Fail to delete key %ld\n", key);
- destroy_ht(htp, DESTROY_EVERY_SELF);
+ destroy_ht(htp, DESTROY_VALUE_SELF);
return 1;
}
fleprintf0("After the deletion, ");
- if ((temp = ht_find(htp, key))) {
+ if ((temp = ht_find(htp, &key))) {
eprintf("We found value %ld for key %ld\n",
*temp, key);
- destroy_ht(htp, DESTROY_EVERY_SELF);
+ destroy_ht(htp, DESTROY_VALUE_SELF);
return 1;
} else {
eprintf("We found no value for key %ld\n", key);
}
- destroy_ht(htp, DESTROY_EVERY_SELF);
+ destroy_ht(htp, DESTROY_VALUE_SELF);
return 0;
}
diff --git a/src/util.h b/src/util.h
index 3fd533c..fae8517 100644
--- a/src/util.h
+++ b/src/util.h
@@ -16,6 +16,7 @@ typedef unsigned char BOOL;
#define HC_ATTR __attribute__((__hot__, __const__))
#define H_ATTR __attribute__((__hot__))
#define P_ATTR __attribute__((__pure__))
+#define C_ATTR __attribute__((__const__))
#define UNUSED __attribute__((__unused__))
#define U_ATTR UNUSED