#include #include #include #include "ht.h" #define HT_REHASH_THRESHOLD 0.5 P_ATTR static NUM num_hft(CCR_MOD(void *) key) { if (key == NULL) { fleprintf0("Got NULL key!\n"); return 0; } return *((NUM *) key); } P_ATTR static BOOL num_cft(CCR_MOD(void *) keya, CCR_MOD(void *) keyb) { if ((keya == NULL && keyb != NULL) || (keyb == NULL && keya != NULL)) return 0; if (keya == NULL && keyb == NULL) return 1; return *((NUM *) keya) == *((NUM *) keyb); } /* SIZE is a hint to the number of elements that might be put in the table. In fact we will allocate more memory than this number, as a hash table works better if it is not fully used. NARGS is the number of arguments to follow. It should be between 0 and 2 (inclusive). If there are more than one argument, the first argument is used as the hashing function; if there are two arguments, the second argument is used as the comparison function. */ ht * new_ht(UNUM size, int nargs, ...) { if (nargs < 0 || nargs > 2) { fleprintf("NARGS should be between zero and two, but got %d\n", nargs); return NULL; } hash_func_t hft = num_hft; compare_func_t cft = num_cft; va_list args; if (nargs) { va_start(args, nargs); if (nargs >= 1) hft = va_arg(args, hash_func_t); if (nargs == 2) cft = va_arg(args, compare_func_t); va_end(args); } ht *htp = NULL; SAFE_MALLOC(ht, htp, 1, return NULL;); htp->capability = (size<<1)+(size>>1); htp->size = 0; htp->hf = hft; htp->cf = cft; /* For safer clean up */ htp->keys = NULL; htp->values = NULL; htp->indices = NULL; SAFE_MALLOC(NUM, htp->keys, htp->capability, goto cleanup;); SAFE_MALLOC(void*, htp->values, htp->capability, goto cleanup;); SAFE_MALLOC(NUM, htp->indices, htp->capability, goto cleanup;); /* Initialize the keys to be -1 and the values to be NULL */ memset(htp->keys, 0xff, sizeof (NUM) * htp->capability); memset(htp->values, 0, sizeof (void*) * htp->capability); /* Initialize indices to be -1 */ memset(htp->indices, 0xff, sizeof (NUM) * htp->capability); return htp; cleanup: if (htp->keys) free(htp->keys); if (htp->values) free(htp->values); if (htp->indices) free(htp->indices); free(htp); return NULL; } void destroy_ht(ht * restrict htp, HT_DESTROY_FLAG flag) { if (htp == NULL) return; switch (flag) { case DESTROY_KEY_SELF: case DESTROY_KEY_NO_SELF: case DESTROY_EVERY_SELF: case DESTROY_EVERY_NO_SELF: case DESTROY_KEY_VALUE_FIRST_SELF: case DESTROY_KEY_VALUE_FIRST_NO_SELF: for (NUM i = 0; i < htp->size;) free(*(htp->keys+i++)); break; case DESTROY_KEY_FIRST_SELF: case DESTROY_KEY_FIRST_NO_SELF: case DESTROY_EVERY_FIRST_SELF: case DESTROY_EVERY_FIRST_NO_SELF: case DESTROY_KEY_FIRST_VALUE_SELF: case DESTROY_KEY_FIRST_VALUE_NO_SELF: free(*(htp->keys)); break; default: break; } switch (flag) { case DESTROY_VALUE_SELF: case DESTROY_VALUE_NO_SELF: case DESTROY_EVERY_SELF: case DESTROY_EVERY_NO_SELF: case DESTROY_KEY_FIRST_VALUE_SELF: case DESTROY_KEY_FIRST_VALUE_NO_SELF: for (NUM i = 0; i < htp->size;) free(*(htp->values+i++)); break; case DESTROY_VALUE_FIRST_SELF: case DESTROY_VALUE_FIRST_NO_SELF: case DESTROY_EVERY_FIRST_SELF: case DESTROY_EVERY_FIRST_NO_SELF: case DESTROY_KEY_VALUE_FIRST_SELF: case DESTROY_KEY_VALUE_FIRST_NO_SELF: free(*(htp->values)); break; default: break; } free(htp->values); free(htp->keys); free(htp->indices); switch (flag) { case DESTROY_NONE_SELF: case DESTROY_KEY_SELF: case DESTROY_KEY_FIRST_SELF: case DESTROY_VALUE_SELF: case DESTROY_VALUE_FIRST_SELF: case DESTROY_EVERY_SELF: case DESTROY_EVERY_FIRST_SELF: case DESTROY_KEY_FIRST_VALUE_SELF: case DESTROY_KEY_VALUE_FIRST_SELF: free(htp); break; default: break; } } static BOOL ht_expand(ht *htp) { htp->capability <<= 1; void **keys = NULL; NUM size = htp->size; void **values = NULL; if (size) { SAFE_MALLOC(void*, keys, size, goto cleanup;); SAFE_MALLOC(void*, values, size, goto cleanup;); memcpy(keys, htp->keys, sizeof(void*) * size); memcpy(values, htp->values, sizeof(void*) * size); } SAFE_REALLOC(void*, htp->keys, htp->capability, goto cleanup;); memset(htp->keys, 0, sizeof (void*) * htp->capability); SAFE_REALLOC(void*, htp->values, htp->capability, goto cleanup;); memset(htp->values, 0, sizeof(void*) * htp->capability); SAFE_REALLOC(NUM, htp->indices, htp->capability, goto cleanup;); memset(htp->indices, 0xff, sizeof (NUM) * htp->capability); htp->size = 0; for (NUM i = 0; i < size; i++) ht_insert(htp, *(keys+i), *(values+i)); if (keys) free(keys); if (values) free(values); return 0; cleanup: if (keys) free(keys); if (values) free(values); return 1; } #define PERTURBATION_SHIFT 5 /* On error return non-zero. */ __attribute__((__hot__)) static BOOL ht_probe(CC_MOD(ht *) htp, CC_MOD(void *) key, NUM *result) { NUM hashed = htp->hf(key); NUM ikey = hashed % htp->capability; unsigned long perturbation = hashed - ikey; for (NUM count = 0; count < htp->capability * HT_REHASH_THRESHOLD && *(htp->indices+ikey) >= 0 && /* continue if keys don't match */ !(htp->cf (*(htp->keys+*(htp->indices+ikey)), key)); count++) { ikey = 5 * ikey + 1 + perturbation; perturbation >>= PERTURBATION_SHIFT; ikey %= htp->capability; } if (*(htp->indices+ikey) >= 0 && !(htp->cf(*(htp->keys+*(htp->indices+ikey)), key))) { /* failed for some reason */ fleprintf("Fail to probe a location for the key, ikey = %ld, " "last index = %ld\n", ikey, *(htp->indices+ikey)); return 1; } *result = ikey; return 0; } BOOL ht_insert(ht * const restrict htp, void *key, void *value) { if (htp->size+1 >= HT_REHASH_THRESHOLD * htp->capability) { if (ht_expand(htp)) { fleprintf0("Fail to expand\n"); return 1; } } NUM ikey = 0; /* check errors */ if (ht_probe(htp, key, &ikey)) return 1; if (*(htp->indices+ikey) < 0) { /* not inserted before */ *(htp->indices+ikey) = htp->size; /* Store the key and the value in the array. */ *(htp->keys+htp->size) = key; *(htp->values+htp->size) = value; (htp->size)++; } else { /* error check is performed in ht_probe */ *(htp->values+*(htp->indices+ikey)) = value; } /* fleprintf("ikey = %ld, size = %d, capability = %llu\n", * ikey, htp->size, htp->capability); */ return 0; } BOOL ht_delete(ht * const restrict htp, void *key, HT_DELETE_FLAG flag) { NUM ikey = 0; /* check errors */ if (ht_probe(htp, key, &ikey)) return 1; if (*(htp->indices+ikey) < 0) { /* not inserted before */ } else { (htp->size)--; switch (flag) { case DELETE_KEY: case DELETE_EVERY: free(*(htp->keys+*(htp->indices+ikey))); break; default: break; } switch (flag) { case DELETE_VALUE: case DELETE_EVERY: free(*(htp->values+*(htp->indices+ikey))); break; default: break; } *(htp->values+*(htp->indices+ikey)) = NULL; *(htp->keys+*(htp->indices+ikey)) = NULL; *(htp->indices+ikey) = -1; } return 0; } void ht_reset(ht * const restrict htp, HT_DELETE_FLAG flag) { for (int i = 0; i < htp->size; i++) { switch (flag) { case DELETE_KEY: case DELETE_EVERY: free(*(htp->keys+i)); break; default: break; } switch (flag) { case DELETE_VALUE: case DELETE_EVERY: free(*(htp->values+i)); break; default: break; } } htp->size = 0; memset(htp->values, 0, sizeof(void*) * htp->capability); memset(htp->keys, 0, sizeof(void*) * htp->capability); memset(htp->indices, 0xff, sizeof (NUM) * htp->capability); } P_ATTR void * ht_find(CCR_MOD(ht *) htp, void *key) { NUM ikey = 0; /* check errors */ if (ht_probe(htp, key, &ikey)) return NULL; if (*(htp->indices+ikey) < 0) return NULL; return *(htp->values+*(htp->indices+ikey)); } P_ATTR NUM ht_size(CCR_MOD(ht *) htp) { return (NUM) htp->size; } P_ATTR void ** ht_values(CCR_MOD(ht *) htp) { return htp->values; } P_ATTR void ** ht_keys(CCR_MOD(ht *) htp) { return htp->keys; } /* Calculate binomial coefficients efficiently. The algorithm is adapted from the following website. */ UHC_ATTR static NUM binom(NUM n, int k) { if (k < 1) { fleprintf("Invalid k: %d\n", k); return 0; } NUM result = 1; for (int d = 1; d <= k;) { result *= n--; result /= d++; } return result; } /* REVIEW: I might want a faster hashing function in the future. Consider using bit manipulations as shown in the following file. */ /* A generalization of Cantor's pairing functions. See Wikipedia for more information. The integer n determins the number of arguments, which are all of type NUM. */ /* I later changed my mind and decided to use another pairing function, the elegant pairing function of Szudzik. See the following page for details. */ UHC_ATTR static NUM pair(int n, ...) { if (n < 1) { fleprintf("Invalid n: %d\n", n); return 0; } va_list args; va_start(args, n); NUM result = va_arg(args, NUM), temp = 0; /* a simple fold */ for (int i = 1; i < n; i++) { temp = va_arg(args, NUM); result = (result >= temp) ? result * result + result + temp : result + temp * temp; } va_end(args); return result; } HP_ATTR static NUM pair2_hf(CCR_MOD(void *) key) { if (key == NULL) { fleprintf0("Got NULL key!\n"); return 0; } NUM result = pair(2, ((pair2 *) key)->x, ((pair2 *) key)->y); /* fleprintf("Hash of (%ld, %ld) is %ld\n", * ((pair2 *) key)->x, * ((pair2 *) key)->y, * result); */ return result; } HP_ATTR static BOOL pair2_cf(CCR_MOD(void *) keya, CCR_MOD(void *) keyb) { if ((keya == NULL && keyb != NULL) || (keyb == NULL && keya != NULL)) return 0; if (keya == NULL && keyb == NULL) return 1; return ((pair2 *) keya)->x == ((pair2 *) keyb)->x && ((pair2 *) keya)->y == ((pair2 *) keyb)->y; } HP_ATTR static NUM pair3_hf(CCR_MOD(void *) key) { if (key == NULL) { fleprintf0("Got NULL key!\n"); return 0; } NUM result = pair(3, ((pair3 *) key)->x, ((pair3 *) key)->y, ((pair3 *) key)->z); /* fleprintf("Hash of (%ld, %ld, %ld) is %ld\n", * ((pair3 *) key)->x, * ((pair3 *) key)->y, * ((pair3 *) key)->z, * result); */ return result; } HP_ATTR static BOOL pair3_cf(CCR_MOD(void *) keya, CCR_MOD(void *) keyb) { if ((keya == NULL && keyb != NULL) || (keyb == NULL && keya != NULL)) return 0; if (keya == NULL && keyb == NULL) return 1; return ((pair3 *) keya)->x == ((pair3 *) keyb)->x && ((pair3 *) keya)->y == ((pair3 *) keyb)->y && ((pair3 *) keya)->z == ((pair3 *) keyb)->z; } HP_ATTR static NUM pair4_hf(CCR_MOD(void *) key) { if (key == NULL) { fleprintf0("Got NULL key!\n"); return 0; } NUM result = pair(4, ((pair4 *) key)->x, ((pair4 *) key)->y, ((pair4 *) key)->z, ((pair4 *) key)->w); /* fleprintf("Hash of (%ld, %ld, %ld) is %ld\n", * ((pair3 *) key)->x, * ((pair3 *) key)->y, * ((pair3 *) key)->z, * result); */ return result; } HP_ATTR static BOOL pair4_cf(CCR_MOD(void *) keya, CCR_MOD(void *) keyb) { if ((keya == NULL && keyb != NULL) || (keyb == NULL && keya != NULL)) return 0; if (keya == NULL && keyb == NULL) return 1; return ((pair4 *) keya)->x == ((pair4 *) keyb)->x && ((pair4 *) keya)->y == ((pair4 *) keyb)->y && ((pair4 *) keya)->z == ((pair4 *) keyb)->z && ((pair4 *) keya)->w == ((pair4 *) keyb)->w; } ht * new_ht2(UNUM size) { return new_ht(size, 2, pair2_hf, pair2_cf); } ht * new_ht3(UNUM size) { return new_ht(size, 2, pair3_hf, pair3_cf); } ht * new_ht4(UNUM size) { return new_ht(size, 2, pair4_hf, pair4_cf); }