From 5730d6c04258e192195bfbbbed76d68fd78ed458 Mon Sep 17 00:00:00 2001 From: JSDurand Date: Wed, 12 Jan 2022 20:26:08 +0800 Subject: Implement a simple hash table. It is a very basic and simple hash table. It is so simple that I hesitate to call it a hash table. Anyways, I think it suffices for my purposes here. * Makefile.am: Add necessary files. * grammar.c (new_tnt_string): Formatting. * ht.c (new_ht): Constructor (destroy_ht): Destructor (ht_expand): Rehash (ht_insert, ht_delete, ht_find): Main functions. * list.c (add_to_list, list_assure_size): Modify the use of realloc. * test/check_ht.c: Ensure this is working correctly. * util.c (read_entire_file): Modify the use of realloc. --- src/Makefile.am | 9 ++- src/grammar.c | 2 +- src/ht.c | 201 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/ht.h | 23 ++++++ src/list.c | 43 +++-------- src/test/check_ht.c | 43 +++++++++++ src/util.c | 12 ++-- 7 files changed, 292 insertions(+), 41 deletions(-) create mode 100644 src/ht.c create mode 100644 src/ht.h create mode 100644 src/test/check_ht.c diff --git a/src/Makefile.am b/src/Makefile.am index f43fbee..a708ba4 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -2,9 +2,9 @@ AM_CFLAGS = -Wall -Wextra noinst_LIBRARIES = libeps.a libeps_a_SOURCES = grammar.c list.c util.c reader.c \ -str.c utf8.c dfa.c \ +str.c utf8.c dfa.c ht.c \ grammar.h list.h util.h reader.h \ -str_partial.h str.h utf8.h dfa.h +str_partial.h str.h utf8.h dfa.h ht.h libeps_a_CFLAGS = $(AM_CFLAGS) --pedantic @@ -21,7 +21,8 @@ CLEANFILES = TAGS # tests -check_PROGRAMS = check_list check_grammar check_reader check_dfa +check_PROGRAMS = check_list check_grammar check_reader check_dfa \ +check_ht check_list_SOURCES = test/check_list.c list.c @@ -33,6 +34,8 @@ str.c utf8.c util.c check_dfa_SOURCES = test/check_dfa.c dfa.c list.c str.c utf8.c +check_ht_SOURCES = test/check_ht.c ht.c + TESTS = $(check_PROGRAMS) AM_COLOR_TESTS = always diff --git a/src/grammar.c b/src/grammar.c index 508c8d8..692e858 100644 --- a/src/grammar.c +++ b/src/grammar.c @@ -322,7 +322,7 @@ print_grammar(const Grammar * const g) List * new_tnt_string(char *format, int format_len, ...) { - /* FORMAT_LEN does not include the terminating null byte, so it + /* FORMAT_LEN does not include the terminating null byte, and it should be > 0. */ if (format_len <= 0) return NULL; diff --git a/src/ht.c b/src/ht.c new file mode 100644 index 0000000..923eca7 --- /dev/null +++ b/src/ht.c @@ -0,0 +1,201 @@ +#include +#include +#include "ht.h" + +#define HT_REHASH_THRESHOLD 0.5 + +struct ht_s { + NUM *keys; + void **values; + UNUM capability; + /* REVIEW: What is the purpose of SIZE? */ + int size; +}; + +/* SIZE is a hint to the number of elements that might be put in the + table. In fact we will allocate more memory than this number, as a + hash table works better if it is not fully used. */ +ht * +new_ht(UNUM size) +{ + ht *htp = MYALLOC(ht, 1); + + if (htp == NULL) { + fleprintf0("Fail to malloc\n"); + return NULL; + } + + htp->capability = (size<<1)+(size>>1); + htp->size = 0; + htp->keys = MYALLOC(NUM, htp->capability); + + if (htp->keys == NULL) { + fleprintf0("Fail to malloc\n"); + free(htp); + return NULL; + } + + htp->values = MYALLOC(void*, htp->capability); + + if (htp->values == NULL) { + fleprintf0("Fail to malloc\n"); + free(htp->keys); + free(htp); + return NULL; + } + + /* Initialize the keys to be -1 and the values to be NULL */ + memset(htp->keys, 0xff, sizeof (NUM) * htp->capability); + memset(htp->values, 0, sizeof (NUM) * htp->capability); + + return htp; +} + +void +destroy_ht(ht * restrict htp, int flag) +{ + switch (flag) { + case 1: + for (UNUM i = 0; i < htp->capability;) + free(*(htp->values+i++)); + break; + case 2: + free(*(htp->values)); + break; + default: + break; + } + + free(htp->values); + + free(htp->keys); + + free(htp); +} + +static BOOL +ht_expand(ht *htp) +{ + NUM oldcap = htp->capability; + htp->capability <<= 1; + htp->capability++; + + NUM *newkeys = realloc(htp->keys, htp->capability); + + if (newkeys == NULL) { + fleprintf0("Fail to realloc\n"); + return 1; + } + + htp->keys = newkeys; + + memset(htp->keys+oldcap, 0xff, + sizeof (NUM) * (htp->capability - oldcap)); + + void **newvalues = realloc(htp->values, htp->capability); + + if (newvalues == NULL) { + fleprintf0("Fail to realloc\n"); + return 1; + } + + htp->values = newvalues; + + memset(htp->values+oldcap, 0, + sizeof(NUM) * (htp->capability - oldcap)); + + return 0; +} + +BOOL +ht_insert(ht *htp, NUM key, void *value) +{ + if (htp->size+1 >= HT_REHASH_THRESHOLD * htp->capability) + ht_expand(htp); + + NUM ikey = key % htp->capability; + + /* linear probing */ + for (NUM count = 0; + count < htp->capability * HT_REHASH_THRESHOLD && + *(htp->keys+ikey) > 0 && + *(htp->keys+ikey) != key; + count++) + ikey = (ikey + 1) % htp->capability; + + if (*(htp->keys+ikey) < 0) { + /* not inserted before */ + *(htp->keys+ikey) = key; + *(htp->values+ikey) = value; + (htp->size)++; + } else if (*(htp->keys+ikey) == key) { + *(htp->values+ikey) = value; + } else { + fleprintf("Fail to probe an appropriate slot for %ld, " + "the result ikey = %ld\n", + key, ikey); + return 1; + } + + /* fleprintf("ikey = %ld, size = %d, capability = %llu\n", + * ikey, htp->size, htp->capability); */ + + return 0; +} + +BOOL +ht_delete(ht * const restrict htp, NUM key, int flag) +{ + NUM ikey = key % htp->capability; + + /* linear probing */ + for (NUM count = 0; + count < htp->capability * HT_REHASH_THRESHOLD && + *(htp->keys+ikey) > 0 && + *(htp->keys+ikey) != key; + count++) + ikey = (ikey + 1) % htp->capability; + + if (*(htp->keys+ikey) < 0) { + /* not inserted before */ + return 0; + } else if (*(htp->keys+ikey) == key) { + (htp->size)--; + *(htp->keys+ikey) = -1; + if (flag) { + free(*(htp->values+ikey)); + *(htp->values+ikey) = NULL; + } + return 0; + } else { + fleprintf("Fail to probe an appropriate slot for %ld, " + "the result ikey = %ld\n", + key, ikey); + return 1; + } + + return 0; +} + +void * +ht_find(ht * const restrict htp, NUM key) +{ + NUM ikey = key % htp->capability; + + /* linear probing */ + for (NUM count = 0; + count < htp->capability * HT_REHASH_THRESHOLD && + *(htp->keys+ikey) > 0 && + *(htp->keys+ikey) != key; + count++) + ikey = (ikey + 1) % htp->capability; + + if (*(htp->keys+ikey) == key) + return *(htp->values+ikey); + else if (*(htp->keys+ikey) > 0) + fleprintf("Fail to probe an appropriate slot for %ld, " + "with the result ikey = %ld\n", + key, ikey); + + return NULL; +} diff --git a/src/ht.h b/src/ht.h new file mode 100644 index 0000000..54c0497 --- /dev/null +++ b/src/ht.h @@ -0,0 +1,23 @@ +#ifndef HT_H +#define HT_H +#include "util.h" + +enum { HT_INIT_CAP = 257 }; + +typedef struct ht_s ht; + +ht *new_ht(UNUM size); + +void destroy_ht(ht * restrict htp, int flag); + +BOOL ht_insert(ht *htp, NUM key, void *value); + +/* This is just for the completeness. In theory I do not need to + delete keys. + + If FLAG is non-zero, also free the value pointer. */ +BOOL ht_delete(ht * const restrict htp, NUM key, int flag); + +void *ht_find(ht * const restrict htp, NUM key); + +#endif diff --git a/src/list.c b/src/list.c index 3c430f6..db8bc44 100644 --- a/src/list.c +++ b/src/list.c @@ -39,8 +39,7 @@ add_to_list(List *ls, void *element) /* The capacity can be zero only when the list has been destroyed, in which case adding an element to that list is definitely an error. */ - eprintf("%s:%d, Adding an element to a destroyed list.\n", - __FILE__, __LINE__); + fleprintf0("Adding an element to a destroyed list.\n"); } (ls->len)++; @@ -55,30 +54,17 @@ add_to_list(List *ls, void *element) if (new_capacity >= ls->capacity + max_diff) new_capacity = ls->capacity + max_diff; - void **array = MYALLOC(void *, ls->capacity); - - if (array == NULL) { - ls->len -= 1; - return 1; - } - - for (NUM i = 0; i < ls->capacity; i++) - *(array+i) = *(ls->array+i); - /* The new_capacity will not be zero, so upon failure it returns NULL. */ - ls->array = realloc(ls->array, sizeof(void*) * new_capacity); + void **newarr = realloc(ls->array, sizeof(void*) * new_capacity); - if (ls->array == NULL) { + if (newarr == NULL) { + fleprintf0("Fail to realloc\n"); ls->len -= 1; - free(array); return 1; } - for (NUM i = 0; i < ls->capacity; i++) - *(ls->array+i) = *(array+i); - - free(array); + ls->array = newarr; for (NUM i = ls->capacity; i < new_capacity; i++) *(ls->array+i) = NULL; @@ -179,21 +165,14 @@ list_assure_size(List *ls, NUM size) { if (ls->capacity >= size) return 0; /* we are good */ - void **array = MYALLOC(void *, ls->capacity); + void **newarray = realloc(ls->array, sizeof(void*) * size); - if (array == NULL) return 1; - - for (NUM i = 0; i < ls->len; i++) - *(array+i) = *(ls->array+i); - - ls->array = realloc(ls->array, sizeof(void*) * size); - - if (ls->array == NULL) return 1; - - for (NUM i = 0; i < ls->len; i++) - *(ls->array+i) = *(array+i); + if (newarray == NULL) { + fleprintf0("Fail to realloc\n"); + return 1; + } - free(array); + ls->array = newarray; for (NUM i = ls->capacity; i < size; i++) *(ls->array+i) = NULL; diff --git a/src/test/check_ht.c b/src/test/check_ht.c new file mode 100644 index 0000000..2419ce5 --- /dev/null +++ b/src/test/check_ht.c @@ -0,0 +1,43 @@ +#include +#include "../ht.h" + +int main(int UNUSED argc, char ** UNUSED argv) +{ + ht *htp = new_ht(HT_INIT_CAP); + + NUM *temp = MYALLOC(NUM, 1), key = 1023; + *temp = 12345; + + if (ht_insert(htp, key, temp)) { + fleprintf0("Fail to insert\n"); + free(temp); + destroy_ht(htp, 1); + return 1; + } + + if ((temp = ht_find(htp, key))) { + fleprintf("We found value %ld for key %ld\n", + *temp, key); + } else + fleprintf("We found no value for key %ld\n", key); + + if (ht_delete(htp, key, 1)) { + fleprintf("Fail to delete key %ld\n", key); + destroy_ht(htp, 1); + return 1; + } + + fleprintf0("After the deletion, "); + + if ((temp = ht_find(htp, key))) { + eprintf("We found value %ld for key %ld\n", + *temp, key); + destroy_ht(htp, 1); + return 1; + } else { + eprintf("We found no value for key %ld\n", key); + } + + destroy_ht(htp, 1); + return 0; +} diff --git a/src/util.c b/src/util.c index 1af8e5e..fb18e23 100644 --- a/src/util.c +++ b/src/util.c @@ -7,7 +7,7 @@ If errors occur, return non-zero; otherwise the return value is zero. - + Note that *str should be already allocated. Note that the length of the string str is actually 1+length, and @@ -29,16 +29,18 @@ read_entire_file(const char *file_name, char **str, NUM *len) fseek(file, 0, SEEK_END); file_size = ftell(file); fseek(file, 0, SEEK_SET); - + *len = 1+file_size; - *str = realloc(*str, 1+file_size); - - if (*str == NULL) { + char *newstr = realloc(*str, 1+file_size); + + if (newstr == NULL) { fleprintf0("Cannot realloc\n"); return 1; } + *str = newstr; + fread(*str, sizeof(char), file_size, file); fclose(file); -- cgit v1.2.3-18-g5258