hash.c
上传用户:liugui
上传日期:2007-01-04
资源大小:822k
文件大小:10k
- /*
- * $Id: hash.c,v 1.5 1998/07/22 20:38:08 wessels Exp $
- *
- * DEBUG: section 0 Hash Tables
- * AUTHOR: Harvest Derived
- *
- * SQUID Internet Object Cache http://squid.nlanr.net/Squid/
- * --------------------------------------------------------
- *
- * Squid is the result of efforts by numerous individuals from the
- * Internet community. Development is led by Duane Wessels of the
- * National Laboratory for Applied Network Research and funded by
- * the National Science Foundation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
- *
- */
- #include <unistd.h>
- #include <stdlib.h>
- #include <stdio.h>
- #include <sys/types.h>
- #include <ctype.h>
- #include <sys/time.h>
- #include <strings.h>
- #include <assert.h>
- #include "hash.h"
- #undef free
- extern void my_free(char *, int , void *);
- #define free(a) my_free(__FILE__, __LINE__, a)
- extern void print_stats();
- /*
- * hash_url() - Returns a well-distributed hash function for URLs.
- * The best way is to sum up the last half of the string.
- * Adapted from code written by Mic Bowman. -Darren
- * Generates a standard deviation = 15.73
- */
- unsigned int
- hash_url(const void *data, unsigned int size)
- {
- const char *s = data;
- unsigned int i, j, n;
- j = strlen(s);
- for (i = j / 2, n = 0; i < j; i++)
- n ^= 271 * (unsigned) s[i];
- i = n ^ (j * 271);
- return i % size;
- }
- unsigned int
- hash_string(const void *data, unsigned int size)
- {
- const char *s = data;
- unsigned int n = 0;
- unsigned int j = 0;
- unsigned int i = 0;
- while (*s) {
- j++;
- n ^= 271 * (unsigned) *s++;
- }
- i = n ^ (j * 271);
- return i % size;
- }
- /* the following 4 functions were adapted from
- * usr/src/lib/libc/db/hash_func.c, 4.4 BSD lite */
- /*
- * HASH FUNCTIONS
- *
- * Assume that we've already split the bucket to which this key hashes,
- * calculate that bucket, and check that in fact we did already split it.
- *
- * This came from ejb's hsearch.
- */
- #define PRIME1 37
- #define PRIME2 1048583
- /* Hash function from Chris Torek. */
- unsigned int
- hash4(const void *data, unsigned int size)
- {
- const char *key = data;
- size_t loop;
- unsigned int h;
- size_t len;
- #define HASH4a h = (h << 5) - h + *key++;
- #define HASH4b h = (h << 5) + h + *key++;
- #define HASH4 HASH4b
- h = 0;
- len = strlen(key);
- loop = (len + 8 - 1) >> 3;
- switch (len & (8 - 1)) {
- case 0:
- do {
- HASH4;
- /* FALLTHROUGH */
- case 7:
- HASH4;
- /* FALLTHROUGH */
- case 6:
- HASH4;
- /* FALLTHROUGH */
- case 5:
- HASH4;
- /* FALLTHROUGH */
- case 4:
- HASH4;
- /* FALLTHROUGH */
- case 3:
- HASH4;
- /* FALLTHROUGH */
- case 2:
- HASH4;
- /* FALLTHROUGH */
- case 1:
- HASH4;
- } while (--loop);
- }
- return h % size;
- }
- /*
- * hash_create - creates a new hash table, uses the cmp_func
- * to compare keys. Returns the identification for the hash table;
- * otherwise returns a negative number on error.
- */
- hash_table *
- hash_create(HASHCMP * cmp_func, int hash_sz, HASHHASH * hash_func)
- {
- hash_table *hid = calloc(1, sizeof(hash_table));
- if (!hash_sz)
- hid->size = (unsigned int) DEFAULT_HASH_SIZE;
- else
- hid->size = (unsigned int) hash_sz;
- /* allocate and null the buckets */
- hid->buckets = calloc(hid->size, sizeof(hash_link *));
- hid->cmp = cmp_func;
- hid->hash = hash_func;
- hid->current_ptr = NULL;
- hid->current_slot = 0;
- return hid;
- }
- /*
- * hash_insert - inserts the given item 'item' under the given key 'k'
- * into the hash table 'hid'. Returns non-zero on error; otherwise,
- * returns 0 and inserts the item.
- *
- * It does not copy any data into the hash table, only pointers.
- */
- void
- hash_insert(hash_table * hid, const char *k, void *item)
- {
- int i;
- hash_link *new;
- assert(k != NULL);
- /* Add to the given hash table 'hid' */
- new = calloc(1, sizeof(hash_link));
- if (!new) {
- fprintf(stderr,"calloc failed!n");
- print_stats();
- exit(1);
- }
- new->item = item;
- new->key = (char *) k;
- i = hid->hash(k, hid->size);
- new->next = hid->buckets[i];
- hid->buckets[i] = new;
- }
- /*
- * hash_join - joins a hash_link under its key lnk->key
- * into the hash table 'hid'.
- *
- * It does not copy any data into the hash table, only links pointers.
- */
- void
- hash_join(hash_table * hid, hash_link * lnk)
- {
- int i;
- i = hid->hash(lnk->key, hid->size);
- lnk->next = hid->buckets[i];
- hid->buckets[i] = lnk;
- }
- /*
- * hash_lookup - locates the item under the key 'k' in the hash table
- * 'hid'. Returns a pointer to the hash bucket on success; otherwise
- * returns NULL.
- */
- hash_link *
- hash_lookup(hash_table * hid, const void *k)
- {
- hash_link *walker;
- int b;
- assert(k != NULL);
- b = hid->hash(k, hid->size);
- for (walker = hid->buckets[b]; walker != NULL; walker = walker->next) {
- if ((hid->cmp) (k, walker->key) == 0)
- return (walker);
- assert(walker != walker->next);
- }
- return NULL;
- }
- /*
- * hash_first - returns the first item in the hash table 'hid'.
- * Otherwise, returns NULL on error.
- */
- hash_link *
- hash_first(hash_table * hid)
- {
- int i;
- for (i = 0; i < hid->size; i++) {
- hid->current_slot = i;
- if (hid->buckets[i] != NULL)
- return (hid->current_ptr = hid->buckets[i]);
- }
- return NULL;
- }
- /*
- * hash_next - returns the next item in the hash table 'hid'.
- * Otherwise, returns NULL on error or end of list.
- *
- * MUST call hash_first() before hash_next().
- */
- hash_link *
- hash_next(hash_table * hid)
- {
- int i;
- if (hid->current_ptr != NULL) {
- hid->current_ptr = hid->current_ptr->next;
- if (hid->current_ptr != NULL)
- return (hid->current_ptr); /* next item */
- }
- /* find next bucket */
- for (i = hid->current_slot + 1; i < hid->size; i++) {
- hid->current_slot = i;
- if (hid->buckets[i] != NULL)
- return (hid->current_ptr = hid->buckets[i]);
- }
- return NULL; /* end of list */
- }
- int
- hash_delete(hash_table * hid, const char *key)
- {
- return hash_delete_link(hid, hash_lookup(hid, key));
- }
- /*
- * hash_delete_link - deletes the given hash_link node from the
- * hash table 'hid'. If FreeLink then free the given hash_link.
- *
- * On success, it returns 0 and deletes the link; otherwise,
- * returns non-zero on error.
- */
- int
- hash_unlink(hash_table * hid, hash_link * hl, int FreeLink)
- {
- hash_link *walker, *prev;
- int i;
- if (hl == NULL)
- return -1;
- i = hid->hash(hl->key, hid->size);
- for (prev = NULL, walker = hid->buckets[i];
- walker != NULL; prev = walker, walker = walker->next) {
- if (walker == hl) {
- if (prev == NULL) { /* it's the head */
- hid->buckets[i] = walker->next;
- } else {
- prev->next = walker->next; /* skip it */
- }
- /* fix walker state if needed */
- if (walker == hid->current_ptr)
- hid->current_ptr = walker->next;
- if (FreeLink) {
- if (walker) {
- free(walker);
- }
- }
- return 0;
- }
- }
- return 1;
- }
- /* take link off and free link node */
- int
- hash_delete_link(hash_table * hid, hash_link * hl)
- {
- return (hash_unlink(hid, hl, 1));
- }
- /* take link off only */
- int
- hash_remove_link(hash_table * hid, hash_link * hl)
- {
- return (hash_unlink(hid, hl, 0));
- }
- /*
- * hash_get_bucket - returns the head item of the bucket
- * in the hash table 'hid'. Otherwise, returns NULL on error.
- */
- hash_link *
- hash_get_bucket(hash_table * hid, unsigned int bucket)
- {
- if (bucket >= hid->size)
- return NULL;
- return (hid->buckets[bucket]);
- }
- void
- hashFreeMemory(hash_table * hid)
- {
- if (hid->buckets);
- free(hid->buckets);
- if (hid)
- free(hid);
- }
- #ifdef USE_HASH_DRIVER
- /*
- * hash-driver - Run with a big file as stdin to insert each line into the
- * hash table, then prints the whole hash table, then deletes a random item,
- * and prints the table again...
- */
- int
- main(void)
- {
- hash_table *hid;
- int i;
- LOCAL_ARRAY(char, buf, BUFSIZ);
- LOCAL_ARRAY(char, todelete, BUFSIZ);
- hash_link *walker = NULL;
- todelete[0] = ' ';
- printf("initn");
- printf("creating hash tablen");
- if ((hid = hash_create(strcmp, 229, hash_string)) < 0) {
- printf("hash_create error.n");
- exit(1);
- }
- printf("done creating hash table: %dn", hid);
- while (fgets(buf, BUFSIZ, stdin)) {
- buf[strlen(buf) - 1] = ' ';
- printf("Inserting '%s' for item %p to hash table: %dn",
- buf, buf, hid);
- hash_insert(hid, xstrdup(buf), (void *) 0x12345678);
- if (random() % 17 == 0)
- strcpy(todelete, buf);
- }
- printf("walking hash table...n");
- for (i = 0, walker = hash_first(hid); walker; walker = hash_next(hid)) {
- printf("item %5d: key: '%s' item: %pn", i++, walker->key,
- walker->item);
- }
- printf("done walking hash table...n");
- if (todelete[0]) {
- printf("deleting %s from %dn", todelete, hid);
- if (hash_delete(hid, todelete))
- printf("hash_delete errorn");
- }
- printf("walking hash table...n");
- for (i = 0, walker = hash_first(hid); walker; walker = hash_next(hid)) {
- printf("item %5d: key: '%s' item: %pn", i++, walker->key,
- walker->item);
- }
- printf("done walking hash table...n");
- printf("driver finished.n");
- exit(0);
- }
- #endif