Implemented a hash table with any type of keys

author: JSDurand <mmemmew@gmail.com> 2022-01-22 02:36:54 +0800
committer: JSDurand <mmemmew@gmail.com> 2022-01-22 02:36:54 +0800
commit: bad2b1934da66021cbc7f0d715686706bd444449 (patch)
tree: 78c340d5f834bbc5864a97dcb23ff4ec41e23072
parent: 53865aad225ffbe5cf3c42736e5a2095092f9fff (diff)
7 files changed, 406 insertions, 153 deletions
diff --git a/src/bsr.h b/src/bsr.h
index 8578ab2..d6b96e5 100644
--- a/src/bsr.h
+++ b/src/bsr.h
@@ -20,7 +20,7 @@
    BSR has already been added to a set.  I guess the reason is that
    this is not the bottleneck of performance in its applications.  But
    I am not satisfied with that solution.  I want to take advantage of
-   hash-table's almost constant time look-up feature to implement
+   hash-table's almost constant-time look-up feature to implement
    this. */
 
 /* A BSR set has two types.
@@ -53,8 +53,13 @@
    each type.  Each set is an array of arrays, where the root array's
    indices correspond to non-terminals, and the leaf arrays correspond
    to rules of that non-terminal.  Then the rest is implemented
-   through hash-tables, where a hash table's values are yet another
-   hash table, and so on.
+   through hash-tables, whose keys are tuples of integers.  For the
+   first type, the keys are the (i, k) pairs and the values are those
+   j.  For the second, the keys are the (b, i, k) tupels and the
+   values are those j.  The reason to use i and k as keys is that they
+   represent the left and the right extent of the BSR respectively, so
+   we index them in order to find them easily.  In addition, this will
+   be useful if we want to extract SPPF out of a BSR set.
 
    I don't know if there are better implementations.  If I think of
    better ways in the future, I will re-implement the BSR sets. */
@@ -70,7 +75,32 @@
    (X, a, i, j, k),
 
    i.e. as a first-type BSR set.  But the meaning of the "i" is
-   different. */
+   different.
+
+   However, we shall notice that there is a natural "grading" on the
+   process descriptors.  The k-th grading consists of the descriptors
+   with the last element in the tuple equal to k.  In the clustered
+   nonterminal parsing algorithm, the process descriptors that are
+   added as a consequence of dealing with a process descriptor of
+   grading k must be of grade greater than or equal to k.  This means
+   that if all process descriptors of grade k have been processed and
+   the descriptors awaiting to be processed all have grades > k, then
+   we won't see any process descriptors of grade <= k again.  So we
+   can actually keep a list of process descriptors that serve the role
+   of the set U_k in the afore-mentionned paper.  This list
+   corresponds to the grading of the descriptors.  This way we don't
+   need to store the k in the descriptor.  Therefore a process
+   descriptor is represented as
+
+   (X, a, i, j).
+
+   Again we store an array of arrays corresponding to X and a.  Thus
+   the hash table simply has keys i and values j.
+
+   The main benefit of this is that after we find that all process
+   descriptors in R_k have been processed, we can free those
+   descriptors in U_k.  I think this is a good way to lower the space
+   requirements of this algorithm. */
 
 int place(int x);
 
diff --git a/src/grammar.c b/src/grammar.c
index a24f9cb..bbc6934 100644
--- a/src/grammar.c
+++ b/src/grammar.c
@@ -558,6 +558,10 @@ nt_first(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts,
 
   ht *terminal_ht = NULL, *pred_ht = NULL;
 
+  NUM *temp = NULL;
+  T *tempT = NULL;
+  PT *tempPT = NULL;
+
   /* lazy macro */
 
 #define BREAKOUT k = string_len
@@ -568,30 +572,42 @@ nt_first(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts,
     for (NUM i = 0; i < left_len; i++) {
       Rule_group *rg = grammar_rule(g, (NT) i);
       NUM rg_length = rg_len(rg);
-
-      for (NUM j = 0; j < rg_length;) {
-        List *string = rg_nth(rg, j++);
+      /* fleprintf("rg_length = %ld\n", rg_length); */
+      for (NUM j = 0; j < rg_length; j++) {
+        List *string = rg_nth(rg, j);
         NUM string_len = list_length(string);
+        /* fleprintf("j = %ld, len = %ld\n", j, string_len); */
         TNT *top = NULL;
 
-        for (NUM k = 0; k < string_len;) {
-          top = (TNT *) list_nth(string, k++);
+        for (NUM k = 0; k < string_len; k++) {
+          top = (TNT *) list_nth(string, k);
 
           switch (top->type) {
           case TERMINAL:
+            SAFE_MALLOC(NUM, tempT, 1, return 1;);
+            *tempT = top->data.t;
             if (first &&
-                ht_find(terminal_hts+i, top->data.t) == NULL) {
+                ht_find(terminal_hts+i, tempT) == NULL) {
               changed = 1;
-              ht_insert(terminal_hts+i, top->data.t, (void*)1);
+              ht_insert(terminal_hts+i, tempT, (void*)1);
+              /* fleprintf("After insertion we find %p for %ld\n",
+               *           ht_find(terminal_hts+i, tempT),
+               *           *tempT); */
               BREAKOUT;
+            } else {
+              free(tempT);
             }
             break;
           case PREDICATE:
+            SAFE_MALLOC(NUM, tempPT, 1, return 1;);
+            *tempPT = top->data.pt;
             if (first &&
-                ht_find(predicate_hts+i, top->data.pt) == NULL) {
+                ht_find(predicate_hts+i, tempPT) == NULL) {
               changed = 1;
-              ht_insert(predicate_hts+i, top->data.pt, (void*)1);
+              ht_insert(predicate_hts+i, tempPT, (void*)1);
               BREAKOUT;
+            } else {
+              free(tempPT);
             }
             break;
           default:
@@ -602,13 +618,20 @@ nt_first(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts,
                found. */
 
             NUM len = ht_size(terminal_ht);
-            NUM *keys = ht_keys(terminal_ht);
+            void **keys = ht_keys(terminal_ht);
 
             for (NUM ell = 0; ell < len;) {
-              NUM key = *(keys+ell++);
-              if (ht_find(terminal_hts+i, key) == NULL) {
+              NUM *key = *(keys+ell++);
+              SAFE_MALLOC(NUM, temp, 1, return 1;);
+              *temp = *key;
+              /* fleprintf("i = %ld, j = %ld, ", i, j); */
+              if (ht_find(terminal_hts+i, temp) == NULL) {
+                /* eprintf("new item\n"); */
                 changed = 1;
-                ht_insert(terminal_hts+i, key, (void*)1);
+                ht_insert(terminal_hts+i, temp, (void*)1);
+              } else {
+                /* eprintf("already found for key = %ld\n", *temp); */
+                free(temp);
               }
             }
 
@@ -616,10 +639,17 @@ nt_first(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts,
             keys = ht_keys(pred_ht);
 
             for (NUM ell = 0; ell < len;) {
-              NUM key = *(keys+ell++);
-              if (ht_find(predicate_hts+i, key) == NULL) {
+              NUM *key = *(keys+ell++);
+              SAFE_MALLOC(NUM, temp, 1, return 1;);
+              *temp = *key;
+              /* fleprintf("i = %ld, j = %ld, ", i, j); */
+              if (ht_find(predicate_hts+i, temp) == NULL) {
+                /* eprintf("new item\n"); */
                 changed = 1;
-                ht_insert(predicate_hts+i, key, (void*)1);
+                ht_insert(predicate_hts+i, temp, (void*)1);
+              } else {
+                free(temp);
+                /* eprintf("already found for key = %ld\n", *temp); */
               }
             }
 
@@ -649,22 +679,34 @@ tnt_first(CC_MOD(ht *) terminal_hts, CC_MOD(ht *) predicate_hts,
 
   if (!tnt_len) return 0;
 
-  NUM temp_len = 0, *keys = NULL;
+  NUM temp_len = 0, **keys = NULL;
 
   TNT *top = NULL;
 
   NT current = 0;
 
+  NUM *temp = NULL;
+  T *tempT = NULL;
+  PT *tempPT = NULL;
+
   for (NUM i = 0; i < tnt_len; i++) {
     top = (TNT *) list_nth(tnts, i);
 
     switch (top->type) {
     case TERMINAL:
-      ht_insert(result_terminals, top->data.t, (void *)1);
+      if (ht_find(result_terminals, &(top->data.t)) == NULL) {
+        SAFE_MALLOC(T, tempT, 1, return 1;);
+        *tempT = top->data.t;
+        ht_insert(result_terminals, tempT, (void *)1);
+      }
       return 0;
       break;
     case PREDICATE:
-      ht_insert(result_predicates, top->data.pt, (void *)1);
+      if (ht_find(result_predicates, &(top->data.pt)) == NULL) {
+        SAFE_MALLOC(PT, tempPT, 1, return 1;);
+        *tempPT = top->data.pt;
+        ht_insert(result_predicates, tempPT, (void *)1);
+      }
       return 0;
       break;
     default:
@@ -676,16 +718,26 @@ tnt_first(CC_MOD(ht *) terminal_hts, CC_MOD(ht *) predicate_hts,
       }
 
       temp_len = ht_size(terminal_hts+current);
-      keys = ht_keys(terminal_hts+current);
+      keys = (NUM **) ht_keys(terminal_hts+current);
 
-      for (NUM j = 0; j < temp_len;)
-        ht_insert(result_terminals, *(keys+j++), (void *)1);
+      for (NUM j = 0; j < temp_len; j++) {
+        if (ht_find(result_terminals, *(keys+j)) == NULL) {
+          SAFE_MALLOC(NUM, temp, 1, return 1;);
+          *temp = **(keys+j);
+          ht_insert(result_terminals, temp, (void *)1);
+        }
+      }
 
       temp_len = ht_size(predicate_hts+current);
-      keys = ht_keys(predicate_hts+current);
+      keys = (NUM **) ht_keys(predicate_hts+current);
 
-      for (NUM j = 0; j < temp_len;)
-        ht_insert(result_predicates, *(keys+j++), (void *)1);
+      for (NUM j = 0; j < temp_len; j++) {
+        if (ht_find(result_predicates, *(keys+j)) == NULL) {
+          SAFE_MALLOC(NUM, temp, 1, return 1;);
+          *temp = **(keys+j);
+          ht_insert(result_predicates, temp, (void *)1);
+        }
+      }
 
       if (!(*(nts+current))) i = tnt_len;
 
@@ -702,13 +754,22 @@ nt_follow(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts,
           ht * const restrict result_terminals,
           ht * const restrict result_predicates)
 {
-  ht_insert(result_terminals, END_OF_INPUT, (void*)1);
+  NUM *tempN = NULL;
+
+  SAFE_MALLOC(NUM, tempN, 1, return 1;);
+  *tempN = END_OF_INPUT;
+
+  if (ht_find(result_terminals, tempN) == NULL) {
+    ht_insert(result_terminals, tempN, (void*)1);
+  } else {
+    free(tempN);
+  }
 
   NUM left_len = grammar_left_len(g);
 
   BOOL changed = 0, first = 1;
 
-  NUM ht_len = 0, *keys = NULL;
+  NUM ht_len = 0, **keys = NULL;
 
   List *temp = NULL;
 
@@ -782,28 +843,34 @@ nt_follow(CC_MOD(Grammar *) g, CCR_MOD(BOOL *) nts,
           add_to_follow:
 
             ht_len = ht_size(result_terminals+i);
-            keys = ht_keys(result_terminals+i);
+            keys = (NUM **) ht_keys(result_terminals+i);
 
             for (NUM ell = 0; ell < ht_len;) {
-              NUM key = *(keys+ell++);
+              SAFE_MALLOC(NUM, tempN, 1, return 1;);
+              *tempN = **(keys+ell++);
               if (ht_find(result_terminals+top->data.nt,
-                          key) == NULL) {
+                          tempN) == NULL) {
                 changed = 1;
                 ht_insert(result_terminals+top->data.nt,
-                          key, (void*)1);
+                          tempN, (void*)1);
+              } else {
+                free(tempN);
               }
             }
 
             ht_len = ht_size(result_predicates+i);
-            keys = ht_keys(result_predicates+i);
+            keys = (NUM **) ht_keys(result_predicates+i);
 
             for (NUM ell = 0; ell < ht_len;) {
-              NUM key = *(keys+ell++);
+              SAFE_MALLOC(NUM, tempN, 1, return 1;);
+              *tempN = **(keys+ell++);
               if (ht_find(result_predicates+top->data.nt,
-                          key) == NULL) {
+                          tempN) == NULL) {
                 changed = 1;
                 ht_insert(result_predicates+top->data.nt,
-                          key, (void*)1);
+                          tempN, (void*)1);
+              } else {
+                free(tempN);
               }
             }
 
diff --git a/src/ht.c b/src/ht.c
index c44a57e..2fc1206 100644
--- a/src/ht.c
+++ b/src/ht.c
@@ -1,20 +1,73 @@
+#include <stdarg.h>
 #include <string.h>
 #include <stdio.h>
 #include "ht.h"
 
 #define HT_REHASH_THRESHOLD 0.5
 
+P_ATTR
+static NUM
+num_hft(CCR_MOD(void *) key)
+{
+  if (key == NULL) {
+    fleprintf0("Got NULL key!\n");
+    return 0;
+  }
+
+  return *((NUM *) key);
+}
+
+P_ATTR
+static BOOL
+num_cft(CCR_MOD(void *) keya, CCR_MOD(void *) keyb)
+{
+  if ((keya == NULL && keyb != NULL) ||
+      (keyb == NULL && keya != NULL))
+    return 0;
+
+  if (keya == NULL && keyb == NULL) return 1;
+
+  return *((NUM *) keya) == *((NUM *) keyb);
+}
+
 /* SIZE is a hint to the number of elements that might be put in the
    table.  In fact we will allocate more memory than this number, as a
-   hash table works better if it is not fully used. */
+   hash table works better if it is not fully used.
+
+   NARGS is the number of arguments to follow.  It should be between 0
+   and 2 (inclusive).  If there are more than one argument, the first
+   argument is used as the hashing function; if there are two
+   arguments, the second argument is used as the comparison
+   function. */
 ht *
-new_ht(UNUM size)
+new_ht(UNUM size, int nargs, ...)
 {
+
+  if (nargs < 0 || nargs > 2) {
+    fleprintf("NARGS should be between zero and two, but got %d\n",
+              nargs);
+    return NULL;
+  }
+
+  hash_func_t hft = num_hft;
+  compare_func_t cft = num_cft;
+
+  va_list args;
+
+  if (nargs) {
+    va_start(args, nargs);
+    if (nargs >= 1) hft = va_arg(args, hash_func_t);
+    if (nargs == 2) cft = va_arg(args, compare_func_t);
+    va_end(args);
+  }
+
   ht *htp = NULL;
   SAFE_MALLOC(ht, htp, 1, return NULL;);
 
   htp->capability = (size<<1)+(size>>1);
   htp->size = 0;
+  htp->hf = hft;
+  htp->cf = cft;
 
   /* For safer clean up */
   htp->keys = NULL;
@@ -47,13 +100,43 @@ void
 destroy_ht(ht * restrict htp, HT_DESTROY_FLAG flag)
 {
   switch (flag) {
+  case DESTROY_KEY_SELF:
+  case DESTROY_KEY_NO_SELF:
+  case DESTROY_EVERY_SELF:
   case DESTROY_EVERY_NO_SELF:
+  case DESTROY_KEY_VALUE_FIRST_SELF:
+  case DESTROY_KEY_VALUE_FIRST_NO_SELF:
+    for (NUM i = 0; i < htp->size;)
+      free(*(htp->keys+i++));
+    break;
+  case DESTROY_KEY_FIRST_SELF:
+  case DESTROY_KEY_FIRST_NO_SELF:
+  case DESTROY_EVERY_FIRST_SELF:
+  case DESTROY_EVERY_FIRST_NO_SELF:
+  case DESTROY_KEY_FIRST_VALUE_SELF:
+  case DESTROY_KEY_FIRST_VALUE_NO_SELF:
+    free(*(htp->keys));
+    break;
+  default:
+    break;
+  }
+
+  switch (flag) {
+  case DESTROY_VALUE_SELF:
+  case DESTROY_VALUE_NO_SELF:
   case DESTROY_EVERY_SELF:
+  case DESTROY_EVERY_NO_SELF:
+  case DESTROY_KEY_FIRST_VALUE_SELF:
+  case DESTROY_KEY_FIRST_VALUE_NO_SELF:
     for (NUM i = 0; i < htp->size;)
       free(*(htp->values+i++));
     break;
-  case DESTROY_FIRST_NO_SELF:
-  case DESTROY_FIRST_SELF:
+  case DESTROY_VALUE_FIRST_SELF:
+  case DESTROY_VALUE_FIRST_NO_SELF:
+  case DESTROY_EVERY_FIRST_SELF:
+  case DESTROY_EVERY_FIRST_NO_SELF:
+  case DESTROY_KEY_VALUE_FIRST_SELF:
+  case DESTROY_KEY_VALUE_FIRST_NO_SELF:
     free(*(htp->values));
     break;
   default:
@@ -68,8 +151,14 @@ destroy_ht(ht * restrict htp, HT_DESTROY_FLAG flag)
 
   switch (flag) {
   case DESTROY_NONE_SELF:
+  case DESTROY_KEY_SELF:
+  case DESTROY_KEY_FIRST_SELF:
+  case DESTROY_VALUE_SELF:
+  case DESTROY_VALUE_FIRST_SELF:
   case DESTROY_EVERY_SELF:
-  case DESTROY_FIRST_SELF:
+  case DESTROY_EVERY_FIRST_SELF:
+  case DESTROY_KEY_FIRST_VALUE_SELF:
+  case DESTROY_KEY_VALUE_FIRST_SELF:
     free(htp);
     break;
   default:
@@ -81,22 +170,22 @@ static BOOL
 ht_expand(ht *htp)
 {
   htp->capability <<= 1;
-  htp->capability++;
 
-  NUM *keys = NULL, size = htp->size;
+  void **keys = NULL;
+  NUM size = htp->size;
   void **values = NULL;
 
   if (size) {
-    SAFE_MALLOC(NUM, keys, size, goto cleanup;);
-    SAFE_MALLOC(NUM, values, size, goto cleanup;);
+    SAFE_MALLOC(void*, keys, size, goto cleanup;);
+    SAFE_MALLOC(void*, values, size, goto cleanup;);
 
-    memcpy(keys, htp->keys, sizeof(NUM) * size);
+    memcpy(keys, htp->keys, sizeof(void*) * size);
     memcpy(values, htp->values, sizeof(void*) * size);
   }
 
-  SAFE_REALLOC(NUM, htp->keys, htp->capability, goto cleanup;);
+  SAFE_REALLOC(void*, htp->keys, htp->capability, goto cleanup;);
 
-  memset(htp->keys, 0xff, sizeof (NUM) * htp->capability);
+  memset(htp->keys, 0, sizeof (void*) * htp->capability);
 
   SAFE_REALLOC(void*, htp->values, htp->capability, goto cleanup;);
 
@@ -122,8 +211,46 @@ ht_expand(ht *htp)
   return 1;
 }
 
+#define PERTURBATION_SHIFT 5
+
+/* On error return non-zero. */
+static BOOL
+ht_probe(CC_MOD(ht *) htp, CC_MOD(void *) key, NUM *result)
+{
+  NUM hashed = htp->hf(key);
+  NUM ikey = hashed % htp->capability;
+  unsigned long perturbation = hashed - ikey;
+
+  for (NUM count = 0;
+       count < htp->capability * HT_REHASH_THRESHOLD &&
+       *(htp->indices+ikey) >= 0 &&
+         /* continue if keys don't match */
+         !(htp->cf
+           (*(htp->keys+*(htp->indices+ikey)), key));
+       count++) {
+    ikey = 5 * ikey + 1 + perturbation;
+    perturbation >>= PERTURBATION_SHIFT;
+    ikey %= htp->capability;
+  }
+
+  if (*(htp->indices+ikey) >= 0 &&
+      !(htp->cf(*(htp->keys+*(htp->indices+ikey)), key))) {
+    /* failed for some reason */
+    fleprintf("Fail to probe a location for the key, ikey = %ld, "
+              "last index = %ld\n", ikey, *(htp->indices+ikey));
+    return 1;
+  }
+
+  *result = ikey;
+
+  /* if (*((NUM*) key) == 3) {
+   *   fleprintf("ikey = %ld, result = %ld\n", ikey, *result);
+   * } */
+  return 0;
+}
+
 BOOL
-ht_insert(ht *htp, NUM key, void *value)
+ht_insert(ht * const restrict htp, void *key, void *value)
 {
   if (htp->size+1 >= HT_REHASH_THRESHOLD * htp->capability) {
     if (ht_expand(htp)) {
@@ -132,15 +259,10 @@ ht_insert(ht *htp, NUM key, void *value)
     }
   }
 
-  NUM ikey = key % htp->capability;
+  NUM ikey = 0;
 
-  /* linear probing */
-  for (NUM count = 0;
-       count < htp->capability * HT_REHASH_THRESHOLD &&
-         *(htp->indices+ikey) >= 0 &&
-         *(htp->keys+*(htp->indices+ikey)) != key;
-       count++)
-    ikey = (ikey + 1) % htp->capability;
+  /* check errors */
+  if (ht_probe(htp, key, &ikey)) return 1;
 
   if (*(htp->indices+ikey) < 0) {
     /* not inserted before */
@@ -151,15 +273,9 @@ ht_insert(ht *htp, NUM key, void *value)
     *(htp->values+htp->size) = value;
 
     (htp->size)++;
-  } else if (*(htp->keys+*(htp->indices+ikey)) == key) {
-    *(htp->values+*(htp->indices+ikey)) = value;
   } else {
-    fleprintf("Fail to probe an appropriate slot for %ld, "
-              "the result ikey = %ld, last index = %ld, "
-              "last key = %ld\n",
-              key, ikey, *(htp->keys+*(htp->indices+ikey)),
-              *(htp->indices+ikey));
-    return 1;
+    /* error check is performed in ht_probe */
+    *(htp->values+*(htp->indices+ikey)) = value;
   }
 
   /* fleprintf("ikey = %ld, size = %d, capability = %llu\n",
@@ -169,33 +285,39 @@ ht_insert(ht *htp, NUM key, void *value)
 }
 
 BOOL
-ht_delete(ht * const restrict htp, NUM key, int flag)
+ht_delete(ht * const restrict htp, void *key, HT_DELETE_FLAG flag)
 {
-  NUM ikey = key % htp->capability;
+  NUM ikey = 0;
 
-  /* linear probing */
-  for (NUM count = 0;
-       count < htp->capability * HT_REHASH_THRESHOLD &&
-         *(htp->indices+ikey) > 0 &&
-         *(htp->keys+*(htp->indices+ikey)) != key;
-       count++)
-    ikey = (ikey + 1) % htp->capability;
+  /* check errors */
+  if (ht_probe(htp, key, &ikey)) return 1;
 
   if (*(htp->indices+ikey) < 0) {
     /* not inserted before */
-  } else if (*(htp->keys+*(htp->indices+ikey)) == key) {
+  } else {
     (htp->size)--;
-    *(htp->keys+*(htp->indices+ikey)) = -1;
-    if (flag) {
+
+    switch (flag) {
+    case DELETE_KEY:
+    case DELETE_EVERY:
+      free(*(htp->keys+*(htp->indices+ikey)));
+      break;
+    default:
+      break;
+    }
+
+    switch (flag) {
+    case DELETE_VALUE:
+    case DELETE_EVERY:
       free(*(htp->values+*(htp->indices+ikey)));
-      *(htp->values+*(htp->indices+ikey)) = NULL;
+      break;
+    default:
+      break;
     }
+
+    *(htp->values+*(htp->indices+ikey)) = NULL;
+    *(htp->keys+*(htp->indices+ikey)) = NULL;
     *(htp->indices+ikey) = -1;
-  } else {
-    fleprintf("Fail to probe an appropriate slot for %ld, "
-              "the result ikey = %ld\n",
-              key, ikey);
-    return 1;
   }
 
   return 0;
@@ -203,28 +325,16 @@ ht_delete(ht * const restrict htp, NUM key, int flag)
 
 P_ATTR
 void *
-ht_find(CCR_MOD(ht *) htp, NUM key)
+ht_find(CCR_MOD(ht *) htp, void *key)
 {
-  NUM ikey = key % htp->capability;
+  NUM ikey = 0;
 
-  /* linear probing */
-  for (NUM count = 0;
-       count < htp->capability * HT_REHASH_THRESHOLD &&
-         *(htp->indices+ikey) >= 0 &&
-         *(htp->keys+*(htp->indices+ikey)) != key;
-       count++)
-    ikey = (ikey + 1) % htp->capability;
+  /* check errors */
+  if (ht_probe(htp, key, &ikey)) return NULL;
 
-  if (*(htp->indices+ikey) >= 0 &&
-      *(htp->keys+*(htp->indices+ikey)) == key)
-    return *(htp->values+*(htp->indices+ikey));
-  else if (*(htp->indices+ikey) >= 0 &&
-           *(htp->keys+*(htp->indices+ikey)) >= 0)
-    fleprintf("Fail to probe an appropriate slot for %ld, "
-              "with the result ikey = %ld\n",
-              key, ikey);
+  if (*(htp->indices+ikey) < 0) return NULL;
 
-  return NULL;
+  return *(htp->values+*(htp->indices+ikey));
 }
 
 P_ATTR
@@ -242,7 +352,7 @@ ht_values(CCR_MOD(ht *) htp)
 }
 
 P_ATTR
-NUM *
+void **
 ht_keys(CCR_MOD(ht *) htp)
 {
   return htp->keys;
diff --git a/src/ht.h b/src/ht.h
index e5d16f7..617fd94 100644
--- a/src/ht.h
+++ b/src/ht.h
@@ -2,7 +2,7 @@
 #define HT_H
 #include "util.h"
 
-enum { HT_INIT_CAP = 257 };
+enum { HT_INIT_CAP = 256 };
 
 /* TODO: Modify the probing algorithm.  Imitate the algorithm used by
    python's hash tables.  The source code of the python dict object
@@ -21,51 +21,91 @@ enum { HT_INIT_CAP = 257 };
      know if two keys are "equal". */
 
 /* type for a hash function */
-typedef NUM (*hash_func_t) (void *key);
+typedef NUM (*hash_func_t) (CCR_MOD(void *)key);
 
 /* comparison function type */
-typedef BOOL (*compare_func_t) (void *keya, void *keyb);
+typedef BOOL (*compare_func_t)
+(CCR_MOD(void *)keya, CCR_MOD(void *)keyb);
 
 struct ht_s {
-  NUM *keys;
+  void **keys;
   void **values;
   NUM *indices;
   UNUM capability;
   /* REVIEW: What is the purpose of SIZE? */
   /* ANSWER: For looping over keys / values. */
   int size;
-  /* hash_func_t hf;
-   * compare_func_t cf; */
+  hash_func_t hf;
+  compare_func_t cf;
 };
 
 typedef struct ht_s ht;
 
 /* On error return NULL */
-ht *new_ht(UNUM size);
-
+ht *new_ht(UNUM size, int nargs, ...);
+
+/* This enum empowers the user the ability to control how to free the
+   hash table.  The destroy function always frees keys, values, and
+   indices pointers.
+
+   If the option contains the word "SELF", it means to also free the
+   hash table pointer itself.  Otherwise, it contains NO_SELF, and it
+   means not to free the hash table pointer.
+
+   If it contains the word "KEY" (respectively "VALUE"), then it means
+   to also free the pointers stored in the keys (respectively values)
+   array.  How to free the pointers?  If the option contains the word
+   "FIRST", then it means to free the first pointer stored in the
+   array corresponding to the word before "FIRST".  And it frees each
+   and every of the other word's pointers.  To free both keys and
+   values pointers in the same manner, i.e. free both of the first
+   pointers or free each and every pointers, use the option containing
+   "EVERY" or "EVERY_FIRST" respectively. */
 enum HT_DESTROY_FLAG_e {
   DESTROY_NONE_NO_SELF,
   DESTROY_NONE_SELF,
-  DESTROY_EVERY_NO_SELF,
+  DESTROY_KEY_SELF,
+  DESTROY_KEY_FIRST_SELF,
+  DESTROY_KEY_NO_SELF,
+  DESTROY_KEY_FIRST_NO_SELF,
+  DESTROY_VALUE_SELF,
+  DESTROY_VALUE_FIRST_SELF,
+  DESTROY_VALUE_NO_SELF,
+  DESTROY_VALUE_FIRST_NO_SELF,
   DESTROY_EVERY_SELF,
-  DESTROY_FIRST_NO_SELF,
-  DESTROY_FIRST_SELF
+  DESTROY_EVERY_FIRST_SELF,
+  DESTROY_EVERY_NO_SELF,
+  DESTROY_EVERY_FIRST_NO_SELF,
+  DESTROY_KEY_FIRST_VALUE_SELF,
+  DESTROY_KEY_FIRST_VALUE_NO_SELF,
+  DESTROY_KEY_VALUE_FIRST_SELF,
+  DESTROY_KEY_VALUE_FIRST_NO_SELF
 };
 
 typedef enum HT_DESTROY_FLAG_e HT_DESTROY_FLAG;
+
 void destroy_ht(ht * restrict htp, HT_DESTROY_FLAG flag);
 
-BOOL ht_insert(ht *htp, NUM key, void *value);
+BOOL ht_insert(ht * const restrict htp, void *key, void *value);
 
 P_ATTR NUM ht_size(CCR_MOD(ht *) htp);
 
 P_ATTR void **ht_values(CCR_MOD(ht *) htp);
 
-P_ATTR NUM *ht_keys(CCR_MOD(ht *) htp);
+P_ATTR void **ht_keys(CCR_MOD(ht *) htp);
+
+enum HT_DELETE_FLAG_e {
+  DELETE_NONE,
+  DELETE_KEY,
+  DELETE_VALUE,
+  DELETE_EVERY
+};
+
+typedef enum HT_DELETE_FLAG_e HT_DELETE_FLAG;
 
-/* If FLAG is non-zero, also free the value pointer. */
-BOOL ht_delete(ht * const restrict htp, NUM key, int flag);
+BOOL ht_delete(ht * const restrict htp, void *key,
+               HT_DELETE_FLAG flag);
 
-P_ATTR void *ht_find(CCR_MOD(ht *) htp, NUM key);
+P_ATTR void *ht_find(CCR_MOD(ht *) htp, void *key);
 
 #endif
diff --git a/src/test/check_grammar.c b/src/test/check_grammar.c
index ca70fa9..2b6c8f2 100644
--- a/src/test/check_grammar.c
+++ b/src/test/check_grammar.c
@@ -3,6 +3,8 @@
 #include <stdlib.h>
 #include "../grammar.h"
 
+#define GET_KEY(T, K, N) *((T*)*(K+N))
+
 #define READ_INTO_CPA(N, U, I, VA, VI, CP) do {         \
     U = new_utf8(N, 1);                                 \
     I = get_info((str *)U, 0);                          \
@@ -174,11 +176,11 @@ main(UNUSED int argc, UNUSED char **argv)
 
   for (NUM i = 0; i < left_len; i++) {
     ht *temp = NULL;
-    temp = new_ht(left_len << 1);
+    temp = new_ht(left_len << 1, 0);
     *(terminal_hts+i) = *temp;
     free(temp);
 
-    temp = new_ht(left_len << 1);
+    temp = new_ht(left_len << 1, 0);
     *(predicate_hts+i) = *temp;
     free(temp);
   }
@@ -188,15 +190,15 @@ main(UNUSED int argc, UNUSED char **argv)
     goto cleanup;
   }
 
-  NUM ht_len = 0, *keys = NULL;
+  NUM ht_len = 0, **keys = NULL;
 
   for (NUM i = 0; i < left_len;) {
     printf("Nonterminal %ld contains the following terminals in the "
            "FIRST set:\n", i);
     ht_len = ht_size(terminal_hts+i);
-    keys = ht_keys(terminal_hts+i++);
+    keys = (NUM **) ht_keys(terminal_hts+i++);
     for (NUM j = 0; j < ht_len;) {
-      printf("T %ld", *(keys+j));
+      printf("T %ld", GET_KEY(NUM, keys, j));
       if (++j<ht_len) printf(", ");
       else printf("\n");
     }
@@ -210,11 +212,11 @@ main(UNUSED int argc, UNUSED char **argv)
 
   for (NUM i = 0; i < left_len; i++) {
     ht *temp = NULL;
-    temp = new_ht(left_len << 1);
+    temp = new_ht(left_len << 1, 0);
     *(test_terminal_hts+i) = *temp;
     free(temp);
 
-    temp = new_ht(left_len << 1);
+    temp = new_ht(left_len << 1, 0);
     *(test_predicate_hts+i) = *temp;
     free(temp);
   }
@@ -238,17 +240,17 @@ main(UNUSED int argc, UNUSED char **argv)
   printf(" has the following FIRST set:\n");
 
   ht_len = ht_size(test_terminal_hts);
-  keys = ht_keys(test_terminal_hts);
+  keys = (NUM **) ht_keys(test_terminal_hts);
   for (NUM i = 0; i < ht_len;) {
-    printf("T %ld", *(keys+i));
+    printf("T %ld", GET_KEY(NUM, keys, i));
     if (++i<ht_len) printf(", ");
     else printf("\n");
   }
 
   ht_len = ht_size(test_predicate_hts);
-  keys = ht_keys(test_predicate_hts);
+  keys = (NUM **) ht_keys(test_predicate_hts);
   for (NUM i = 0; i < ht_len;) {
-    printf("PT %ld", *(keys+i));
+    printf("PT %ld", GET_KEY(NUM, keys, i));
     if (++i<ht_len) printf(", ");
     else printf("\n");
   }
@@ -265,11 +267,11 @@ main(UNUSED int argc, UNUSED char **argv)
 
   for (NUM i = 0; i < left_len; i++) {
     ht *temp = NULL;
-    temp = new_ht(left_len << 1);
+    temp = new_ht(left_len << 1, 0);
     *(result_terminal_hts+i) = *temp;
     free(temp);
 
-    temp = new_ht(left_len << 1);
+    temp = new_ht(left_len << 1, 0);
     *(result_predicate_hts+i) = *temp;
     free(temp);
   }
@@ -284,10 +286,10 @@ main(UNUSED int argc, UNUSED char **argv)
     printf("Nonterminal %ld contains the following terminals in the "
            "FOLLOW set:\n", i);
     ht_len = ht_size(result_terminal_hts+i);
-    keys = ht_keys(result_terminal_hts+i++);
+    keys = (NUM **) ht_keys(result_terminal_hts+i++);
     for (NUM j = 0; j < ht_len;) {
-      if (*(keys+j) == END_OF_INPUT) printf("T $");
-      else printf("T %ld", *(keys+j));
+      if (GET_KEY(NUM, keys, j) == END_OF_INPUT) printf("T $");
+      else printf("T %ld", GET_KEY(NUM, keys, j));
       if (++j<ht_len) printf(", ");
       else printf("\n");
     }
@@ -297,42 +299,42 @@ main(UNUSED int argc, UNUSED char **argv)
   if (epsilon_array) free(epsilon_array);
   if (terminal_hts) {
     for (NUM i = 0; i < left_len; i++)
-      destroy_ht(terminal_hts+i, DESTROY_NONE_NO_SELF);
+      destroy_ht(terminal_hts+i, DESTROY_KEY_NO_SELF);
     free(terminal_hts);
   }
 
   if (predicate_hts) {
     for (NUM i = 0; i < left_len; i++)
-      destroy_ht(predicate_hts+i, DESTROY_NONE_NO_SELF);
+      destroy_ht(predicate_hts+i, DESTROY_KEY_NO_SELF);
     free(predicate_hts);
   }
 
   if (test_terminal_hts) {
     for (NUM i = 0; i < left_len; i++)
-      destroy_ht(test_terminal_hts+i, DESTROY_NONE_NO_SELF);
+      destroy_ht(test_terminal_hts+i, DESTROY_KEY_NO_SELF);
     free(test_terminal_hts);
   }
 
   if (test_predicate_hts) {
     for (NUM i = 0; i < left_len; i++)
-      destroy_ht(test_predicate_hts+i, DESTROY_NONE_NO_SELF);
+      destroy_ht(test_predicate_hts+i, DESTROY_KEY_NO_SELF);
     free(test_predicate_hts);
   }
 
   if (result_terminal_hts) {
     for (NUM i = 0; i < left_len; i++)
-      destroy_ht(result_terminal_hts+i, DESTROY_NONE_NO_SELF);
+      destroy_ht(result_terminal_hts+i, DESTROY_KEY_NO_SELF);
     free(result_terminal_hts);
   }
 
   if (result_predicate_hts) {
     for (NUM i = 0; i < left_len; i++)
-      destroy_ht(result_predicate_hts+i, DESTROY_NONE_NO_SELF);
+      destroy_ht(result_predicate_hts+i, DESTROY_KEY_NO_SELF);
     free(result_predicate_hts);
   }
 
   if (test_tnt_string) destroy_list(test_tnt_string, 1);
-  if (tnt_ht) destroy_ht(tnt_ht, DESTROY_NONE_SELF);
+  if (tnt_ht) destroy_ht(tnt_ht, DESTROY_KEY_NO_SELF);
 
   destroy_grammar(g, 1);
 
diff --git a/src/test/check_ht.c b/src/test/check_ht.c
index 00cd2f1..0298087 100644
--- a/src/test/check_ht.c
+++ b/src/test/check_ht.c
@@ -3,19 +3,20 @@
 
 int main(int UNUSED argc, char ** UNUSED argv)
 {
-  ht *htp = new_ht(HT_INIT_CAP);
+
+  ht *htp = new_ht(HT_INIT_CAP, 0);
 
   NUM *temp = MYALLOC(NUM, 1), key = 1023;
   *temp = 12345;
 
-  if (ht_insert(htp, key, temp)) {
+  if (ht_insert(htp, &key, temp)) {
     fleprintf0("Fail to insert\n");
     free(temp);
     destroy_ht(htp, DESTROY_EVERY_SELF);
     return 1;
   }
 
-  if ((temp = ht_find(htp, key))) {
+  if ((temp = ht_find(htp, &key))) {
     fleprintf("We found value %ld for key %ld\n",
               *temp, key);
   } else
@@ -27,25 +28,27 @@ int main(int UNUSED argc, char ** UNUSED argv)
   
   for (NUM i = 0; i < size; i++)
     fleprintf("The %ld-th element has key %ld and value %ld\n",
-              i, *(ht_keys(htp)+i), *((NUM *)*(ht_values(htp)+i)));
+              i,
+              *((NUM *)*(ht_keys(htp)+i)),
+              *((NUM *)*(ht_values(htp)+i)));
 
-  if (ht_delete(htp, key, 1)) {
+  if (ht_delete(htp, &key, DELETE_VALUE)) {
     fleprintf("Fail to delete key %ld\n", key);
-    destroy_ht(htp, DESTROY_EVERY_SELF);
+    destroy_ht(htp, DESTROY_VALUE_SELF);
     return 1;
   }
 
   fleprintf0("After the deletion, ");
 
-  if ((temp = ht_find(htp, key))) {
+  if ((temp = ht_find(htp, &key))) {
     eprintf("We found value %ld for key %ld\n",
             *temp, key);
-    destroy_ht(htp, DESTROY_EVERY_SELF);
+    destroy_ht(htp, DESTROY_VALUE_SELF);
     return 1;
   } else {
     eprintf("We found no value for key %ld\n", key);
   }
 
-  destroy_ht(htp, DESTROY_EVERY_SELF);
+  destroy_ht(htp, DESTROY_VALUE_SELF);
   return 0;
 }
diff --git a/src/util.h b/src/util.h
index 3fd533c..fae8517 100644
--- a/src/util.h
+++ b/src/util.h
@@ -16,6 +16,7 @@ typedef unsigned char BOOL;
 #define HC_ATTR __attribute__((__hot__, __const__))
 #define H_ATTR __attribute__((__hot__))
 #define P_ATTR __attribute__((__pure__))
+#define C_ATTR __attribute__((__const__))
 #define UNUSED __attribute__((__unused__))
 #define U_ATTR UNUSED
author	JSDurand <mmemmew@gmail.com>	2022-01-22 02:36:54 +0800
committer	JSDurand <mmemmew@gmail.com>	2022-01-22 02:36:54 +0800
commit	bad2b1934da66021cbc7f0d715686706bd444449 (patch)
tree	78c340d5f834bbc5864a97dcb23ff4ec41e23072
parent	53865aad225ffbe5cf3c42736e5a2095092f9fff (diff)