链式hashmap实现笔记

bachmozart

浏览: 110012 次
性别:
来自: 北京

最近访客更多访客>>

nat2010

airs

daleion

bneliao

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

Linux C

C C++C#算法 memcached

Memcached里的Hashmap实现是一个简单的链式存储方式，简单记录一下
链式hashmap的结构比较简单，基本上可以理解为一个一维线性空间，每个空间称作一个bucket,每个bucket又是一个链表结构，这个链表结构上的所有元素的key的hash值是相同的，所以被分配在一个bucket里。

hashmap存储时首先根据key计算hash得到对应bucket，然后加入该bucket中的链表首部
hashmap查询时也是通过key计算hash得到对应bucket，然后遍历链表查找对应元素

所以hashmap的关键是hash的算法是否可以尽可能的减小不同key值计算出相同hash的概率（减小冲突），如果所有的key的hash均相同，那么hashmap也就变成了List结构了

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef  unsigned long  int  ub4;   /* unsigned 4-byte quantities */
typedef  unsigned       char ub1;   /* unsigned 1-byte quantities */

/* hard-code one million buckets, for now (2**20 == 4MB hash) */
#define HASHPOWER  20 // 1<<20 = buckets 的总数

#define hashsize(n) ((ub4)1<<(n))   
#define hashmask(n) (hashsize(n)-1) // 防止hash值超过buckets总长度

#define mix(a,b,c) \
{ \
  a -= b; a -= c; a ^= (c>>13); \
  b -= c; b -= a; b ^= (a<<8); \
  c -= a; c -= b; c ^= (b>>13); \
  a -= b; a -= c; a ^= (c>>12);  \
  b -= c; b -= a; b ^= (a<<16); \
  c -= a; c -= b; c ^= (b>>5); \
  a -= b; a -= c; a ^= (c>>3);  \
  b -= c; b -= a; b ^= (a<<10); \
  c -= a; c -= b; c ^= (b>>15); \
}

//测试用的一个item
typedef struct _item{
	struct _item *next;
	char *key;
	char *value;
}item;

//最重要的算法部分
ub4 hash( k, length, initval)
     register ub1 *k;        /* the key */
     register ub4  length;   /* the length of the key */
     register ub4  initval;  /* the previous hash, or an arbitrary value */
{
    register ub4 a,b,c,len;

    /* Set up the internal state */
    len = length;
    a = b = 0x9e3779b9;  /* the golden ratio; an arbitrary value */
    c = initval;         /* the previous hash value */ 

    /*------------------------------------ handle most of the key*/
    while (len >= 12)
        {
            a += (k[0] +((ub4)k[1]<<8) +((ub4)k[2]<<16) +((ub4)k[3]<<24));
            b += (k[4] +((ub4)k[5]<<8) +((ub4)k[6]<<16) +((ub4)k[7]<<24));
            c += (k[8] +((ub4)k[9]<<8) +((ub4)k[10]<<16)+((ub4)k[11]<<24));
            mix(a,b,c);
            k += 12; len -= 12;
        }

    /*------------------------------------- handle the last 11 bytes*/
    c += length;
    switch(len)              /* all the case statements fall through*/
        {
        case 11: c+=((ub4)k[10]<<24);
        case 10: c+=((ub4)k[9]<<16);
        case 9 : c+=((ub4)k[8]<<8);
            /* the first byte of c is reserved for the length */
        case 8 : b+=((ub4)k[7]<<24);
        case 7 : b+=((ub4)k[6]<<16);
        case 6 : b+=((ub4)k[5]<<8);
        case 5 : b+=k[4];
        case 4 : a+=((ub4)k[3]<<24);
        case 3 : a+=((ub4)k[2]<<16);
        case 2 : a+=((ub4)k[1]<<8);
        case 1 : a+=k[0];
            /* case 0: nothing left to add */
        }
    mix(a,b,c);
    /*-------------------------------------------- report the result*/
    return c;
}

static item **hashtable=0;

//初始化hash buckets空间
void assoc_init(void) {
    unsigned int hash_size = hashsize(HASHPOWER) * sizeof(void*);
    hashtable = malloc(hash_size);
    if (! hashtable) {
        fprintf(stderr, "Failed to init hashtable.\n");
        exit(1);
    }
    memset(hashtable, 0, hash_size);
}

item *assoc_find(char *key){
    ub4 hv = hash(key, strlen(key), 0) & hashmask(HASHPOWER);
    item *it = hashtable[hv];
    while(it){
    	if(strcmp(it->key,key)==0)
		return it;
	it=it->next;
    }
    return 0;
}
int assoc_insert(char *key,item *it){
    ub4 hv = hash(key, strlen(key), 0) & hashmask(HASHPOWER);
    it->next = hashtable[hv];
    hashtable[hv] = it;
    return 1;
}
int main(int argc,char **argv){
  assoc_init();
  
  item *it=malloc(sizeof(item));
  it->key=malloc(200);
  it->value=malloc(200);
  strcpy(it->key,"k1");
  strcpy(it->value,"value1");
  assoc_insert("k1",it);

  item *head=assoc_find("k1");

  printf("%s->%s",head->key,head->value);


}

分享到：

Linux 时间函数使用备忘 | .....

2009-02-23 22:10
浏览 2207
评论(2)
论坛回复 / 浏览 (2 / 2982)
分类:编程语言
查看更多

2 楼 mathgl 2009-02-25

bucket那种形式貌似好实现一些

1 楼 kimmking 2009-02-24

java里的hashmap是用的链式
.net里的hashtable(等价于java的hashmap)用的是二次hash

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论