summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJSDurand <mmemmew@gmail.com>2022-01-12 20:26:08 +0800
committerJSDurand <mmemmew@gmail.com>2022-01-12 20:26:08 +0800
commit5730d6c04258e192195bfbbbed76d68fd78ed458 (patch)
tree92e64bd9576155000cf462ed1db3624e3bed3f8d
parent91016bd3855375796fd1eabd501ebc12491f2655 (diff)
Implement a simple hash table.
It is a very basic and simple hash table. It is so simple that I hesitate to call it a hash table. Anyways, I think it suffices for my purposes here. * Makefile.am: Add necessary files. * grammar.c (new_tnt_string): Formatting. * ht.c (new_ht): Constructor (destroy_ht): Destructor (ht_expand): Rehash (ht_insert, ht_delete, ht_find): Main functions. * list.c (add_to_list, list_assure_size): Modify the use of realloc. * test/check_ht.c: Ensure this is working correctly. * util.c (read_entire_file): Modify the use of realloc.
-rw-r--r--src/Makefile.am9
-rw-r--r--src/grammar.c2
-rw-r--r--src/ht.c201
-rw-r--r--src/ht.h23
-rw-r--r--src/list.c43
-rw-r--r--src/test/check_ht.c43
-rw-r--r--src/util.c12
7 files changed, 292 insertions, 41 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index f43fbee..a708ba4 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -2,9 +2,9 @@ AM_CFLAGS = -Wall -Wextra
noinst_LIBRARIES = libeps.a
libeps_a_SOURCES = grammar.c list.c util.c reader.c \
-str.c utf8.c dfa.c \
+str.c utf8.c dfa.c ht.c \
grammar.h list.h util.h reader.h \
-str_partial.h str.h utf8.h dfa.h
+str_partial.h str.h utf8.h dfa.h ht.h
libeps_a_CFLAGS = $(AM_CFLAGS) --pedantic
@@ -21,7 +21,8 @@ CLEANFILES = TAGS
# tests
-check_PROGRAMS = check_list check_grammar check_reader check_dfa
+check_PROGRAMS = check_list check_grammar check_reader check_dfa \
+check_ht
check_list_SOURCES = test/check_list.c list.c
@@ -33,6 +34,8 @@ str.c utf8.c util.c
check_dfa_SOURCES = test/check_dfa.c dfa.c list.c str.c utf8.c
+check_ht_SOURCES = test/check_ht.c ht.c
+
TESTS = $(check_PROGRAMS)
AM_COLOR_TESTS = always
diff --git a/src/grammar.c b/src/grammar.c
index 508c8d8..692e858 100644
--- a/src/grammar.c
+++ b/src/grammar.c
@@ -322,7 +322,7 @@ print_grammar(const Grammar * const g)
List *
new_tnt_string(char *format, int format_len, ...)
{
- /* FORMAT_LEN does not include the terminating null byte, so it
+ /* FORMAT_LEN does not include the terminating null byte, and it
should be > 0. */
if (format_len <= 0) return NULL;
diff --git a/src/ht.c b/src/ht.c
new file mode 100644
index 0000000..923eca7
--- /dev/null
+++ b/src/ht.c
@@ -0,0 +1,201 @@
+#include <string.h>
+#include <stdio.h>
+#include "ht.h"
+
+#define HT_REHASH_THRESHOLD 0.5
+
+struct ht_s {
+ NUM *keys;
+ void **values;
+ UNUM capability;
+ /* REVIEW: What is the purpose of SIZE? */
+ int size;
+};
+
+/* SIZE is a hint to the number of elements that might be put in the
+ table. In fact we will allocate more memory than this number, as a
+ hash table works better if it is not fully used. */
+ht *
+new_ht(UNUM size)
+{
+ ht *htp = MYALLOC(ht, 1);
+
+ if (htp == NULL) {
+ fleprintf0("Fail to malloc\n");
+ return NULL;
+ }
+
+ htp->capability = (size<<1)+(size>>1);
+ htp->size = 0;
+ htp->keys = MYALLOC(NUM, htp->capability);
+
+ if (htp->keys == NULL) {
+ fleprintf0("Fail to malloc\n");
+ free(htp);
+ return NULL;
+ }
+
+ htp->values = MYALLOC(void*, htp->capability);
+
+ if (htp->values == NULL) {
+ fleprintf0("Fail to malloc\n");
+ free(htp->keys);
+ free(htp);
+ return NULL;
+ }
+
+ /* Initialize the keys to be -1 and the values to be NULL */
+ memset(htp->keys, 0xff, sizeof (NUM) * htp->capability);
+ memset(htp->values, 0, sizeof (NUM) * htp->capability);
+
+ return htp;
+}
+
+void
+destroy_ht(ht * restrict htp, int flag)
+{
+ switch (flag) {
+ case 1:
+ for (UNUM i = 0; i < htp->capability;)
+ free(*(htp->values+i++));
+ break;
+ case 2:
+ free(*(htp->values));
+ break;
+ default:
+ break;
+ }
+
+ free(htp->values);
+
+ free(htp->keys);
+
+ free(htp);
+}
+
+static BOOL
+ht_expand(ht *htp)
+{
+ NUM oldcap = htp->capability;
+ htp->capability <<= 1;
+ htp->capability++;
+
+ NUM *newkeys = realloc(htp->keys, htp->capability);
+
+ if (newkeys == NULL) {
+ fleprintf0("Fail to realloc\n");
+ return 1;
+ }
+
+ htp->keys = newkeys;
+
+ memset(htp->keys+oldcap, 0xff,
+ sizeof (NUM) * (htp->capability - oldcap));
+
+ void **newvalues = realloc(htp->values, htp->capability);
+
+ if (newvalues == NULL) {
+ fleprintf0("Fail to realloc\n");
+ return 1;
+ }
+
+ htp->values = newvalues;
+
+ memset(htp->values+oldcap, 0,
+ sizeof(NUM) * (htp->capability - oldcap));
+
+ return 0;
+}
+
+BOOL
+ht_insert(ht *htp, NUM key, void *value)
+{
+ if (htp->size+1 >= HT_REHASH_THRESHOLD * htp->capability)
+ ht_expand(htp);
+
+ NUM ikey = key % htp->capability;
+
+ /* linear probing */
+ for (NUM count = 0;
+ count < htp->capability * HT_REHASH_THRESHOLD &&
+ *(htp->keys+ikey) > 0 &&
+ *(htp->keys+ikey) != key;
+ count++)
+ ikey = (ikey + 1) % htp->capability;
+
+ if (*(htp->keys+ikey) < 0) {
+ /* not inserted before */
+ *(htp->keys+ikey) = key;
+ *(htp->values+ikey) = value;
+ (htp->size)++;
+ } else if (*(htp->keys+ikey) == key) {
+ *(htp->values+ikey) = value;
+ } else {
+ fleprintf("Fail to probe an appropriate slot for %ld, "
+ "the result ikey = %ld\n",
+ key, ikey);
+ return 1;
+ }
+
+ /* fleprintf("ikey = %ld, size = %d, capability = %llu\n",
+ * ikey, htp->size, htp->capability); */
+
+ return 0;
+}
+
+BOOL
+ht_delete(ht * const restrict htp, NUM key, int flag)
+{
+ NUM ikey = key % htp->capability;
+
+ /* linear probing */
+ for (NUM count = 0;
+ count < htp->capability * HT_REHASH_THRESHOLD &&
+ *(htp->keys+ikey) > 0 &&
+ *(htp->keys+ikey) != key;
+ count++)
+ ikey = (ikey + 1) % htp->capability;
+
+ if (*(htp->keys+ikey) < 0) {
+ /* not inserted before */
+ return 0;
+ } else if (*(htp->keys+ikey) == key) {
+ (htp->size)--;
+ *(htp->keys+ikey) = -1;
+ if (flag) {
+ free(*(htp->values+ikey));
+ *(htp->values+ikey) = NULL;
+ }
+ return 0;
+ } else {
+ fleprintf("Fail to probe an appropriate slot for %ld, "
+ "the result ikey = %ld\n",
+ key, ikey);
+ return 1;
+ }
+
+ return 0;
+}
+
+void *
+ht_find(ht * const restrict htp, NUM key)
+{
+ NUM ikey = key % htp->capability;
+
+ /* linear probing */
+ for (NUM count = 0;
+ count < htp->capability * HT_REHASH_THRESHOLD &&
+ *(htp->keys+ikey) > 0 &&
+ *(htp->keys+ikey) != key;
+ count++)
+ ikey = (ikey + 1) % htp->capability;
+
+ if (*(htp->keys+ikey) == key)
+ return *(htp->values+ikey);
+ else if (*(htp->keys+ikey) > 0)
+ fleprintf("Fail to probe an appropriate slot for %ld, "
+ "with the result ikey = %ld\n",
+ key, ikey);
+
+ return NULL;
+}
diff --git a/src/ht.h b/src/ht.h
new file mode 100644
index 0000000..54c0497
--- /dev/null
+++ b/src/ht.h
@@ -0,0 +1,23 @@
+#ifndef HT_H
+#define HT_H
+#include "util.h"
+
+enum { HT_INIT_CAP = 257 };
+
+typedef struct ht_s ht;
+
+ht *new_ht(UNUM size);
+
+void destroy_ht(ht * restrict htp, int flag);
+
+BOOL ht_insert(ht *htp, NUM key, void *value);
+
+/* This is just for the completeness. In theory I do not need to
+ delete keys.
+
+ If FLAG is non-zero, also free the value pointer. */
+BOOL ht_delete(ht * const restrict htp, NUM key, int flag);
+
+void *ht_find(ht * const restrict htp, NUM key);
+
+#endif
diff --git a/src/list.c b/src/list.c
index 3c430f6..db8bc44 100644
--- a/src/list.c
+++ b/src/list.c
@@ -39,8 +39,7 @@ add_to_list(List *ls, void *element)
/* The capacity can be zero only when the list has been destroyed,
in which case adding an element to that list is definitely an
error. */
- eprintf("%s:%d, Adding an element to a destroyed list.\n",
- __FILE__, __LINE__);
+ fleprintf0("Adding an element to a destroyed list.\n");
}
(ls->len)++;
@@ -55,30 +54,17 @@ add_to_list(List *ls, void *element)
if (new_capacity >= ls->capacity + max_diff)
new_capacity = ls->capacity + max_diff;
- void **array = MYALLOC(void *, ls->capacity);
-
- if (array == NULL) {
- ls->len -= 1;
- return 1;
- }
-
- for (NUM i = 0; i < ls->capacity; i++)
- *(array+i) = *(ls->array+i);
-
/* The new_capacity will not be zero, so upon failure it returns
NULL. */
- ls->array = realloc(ls->array, sizeof(void*) * new_capacity);
+ void **newarr = realloc(ls->array, sizeof(void*) * new_capacity);
- if (ls->array == NULL) {
+ if (newarr == NULL) {
+ fleprintf0("Fail to realloc\n");
ls->len -= 1;
- free(array);
return 1;
}
- for (NUM i = 0; i < ls->capacity; i++)
- *(ls->array+i) = *(array+i);
-
- free(array);
+ ls->array = newarr;
for (NUM i = ls->capacity; i < new_capacity; i++)
*(ls->array+i) = NULL;
@@ -179,21 +165,14 @@ list_assure_size(List *ls, NUM size)
{
if (ls->capacity >= size) return 0; /* we are good */
- void **array = MYALLOC(void *, ls->capacity);
+ void **newarray = realloc(ls->array, sizeof(void*) * size);
- if (array == NULL) return 1;
-
- for (NUM i = 0; i < ls->len; i++)
- *(array+i) = *(ls->array+i);
-
- ls->array = realloc(ls->array, sizeof(void*) * size);
-
- if (ls->array == NULL) return 1;
-
- for (NUM i = 0; i < ls->len; i++)
- *(ls->array+i) = *(array+i);
+ if (newarray == NULL) {
+ fleprintf0("Fail to realloc\n");
+ return 1;
+ }
- free(array);
+ ls->array = newarray;
for (NUM i = ls->capacity; i < size; i++)
*(ls->array+i) = NULL;
diff --git a/src/test/check_ht.c b/src/test/check_ht.c
new file mode 100644
index 0000000..2419ce5
--- /dev/null
+++ b/src/test/check_ht.c
@@ -0,0 +1,43 @@
+#include <stdio.h>
+#include "../ht.h"
+
+int main(int UNUSED argc, char ** UNUSED argv)
+{
+ ht *htp = new_ht(HT_INIT_CAP);
+
+ NUM *temp = MYALLOC(NUM, 1), key = 1023;
+ *temp = 12345;
+
+ if (ht_insert(htp, key, temp)) {
+ fleprintf0("Fail to insert\n");
+ free(temp);
+ destroy_ht(htp, 1);
+ return 1;
+ }
+
+ if ((temp = ht_find(htp, key))) {
+ fleprintf("We found value %ld for key %ld\n",
+ *temp, key);
+ } else
+ fleprintf("We found no value for key %ld\n", key);
+
+ if (ht_delete(htp, key, 1)) {
+ fleprintf("Fail to delete key %ld\n", key);
+ destroy_ht(htp, 1);
+ return 1;
+ }
+
+ fleprintf0("After the deletion, ");
+
+ if ((temp = ht_find(htp, key))) {
+ eprintf("We found value %ld for key %ld\n",
+ *temp, key);
+ destroy_ht(htp, 1);
+ return 1;
+ } else {
+ eprintf("We found no value for key %ld\n", key);
+ }
+
+ destroy_ht(htp, 1);
+ return 0;
+}
diff --git a/src/util.c b/src/util.c
index 1af8e5e..fb18e23 100644
--- a/src/util.c
+++ b/src/util.c
@@ -7,7 +7,7 @@
If errors occur, return non-zero; otherwise the return value is
zero.
-
+
Note that *str should be already allocated.
Note that the length of the string str is actually 1+length, and
@@ -29,16 +29,18 @@ read_entire_file(const char *file_name, char **str, NUM *len)
fseek(file, 0, SEEK_END);
file_size = ftell(file);
fseek(file, 0, SEEK_SET);
-
+
*len = 1+file_size;
- *str = realloc(*str, 1+file_size);
-
- if (*str == NULL) {
+ char *newstr = realloc(*str, 1+file_size);
+
+ if (newstr == NULL) {
fleprintf0("Cannot realloc\n");
return 1;
}
+ *str = newstr;
+
fread(*str, sizeof(char), file_size, file);
fclose(file);