PostGIS 3.7.0dev-r@@SVN_REVISION@@
Loading...
Searching...
No Matches
lookup3.c
Go to the documentation of this file.
1/*
2-------------------------------------------------------------------------------
3lookup3.c, by Bob Jenkins, May 2006, Public Domain.
4
5These are functions for producing 32-bit hashes for hash table lookup.
6hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
7are externally useful functions. Routines to test the hash are included
8if SELF_TEST is defined. You can use this free for any purpose. It's in
9the public domain. It has no warranty.
10
11You probably want to use hashlittle(). hashlittle() and hashbig()
12hash byte arrays. hashlittle() is is faster than hashbig() on
13little-endian machines. Intel and AMD are little-endian machines.
14On second thought, you probably want hashlittle2(), which is identical to
15hashlittle() except it returns two 32-bit hashes for the price of one.
16You could implement hashbig2() if you wanted but I haven't bothered here.
17
18If you want to find a hash of, say, exactly 7 integers, do
19 a = i1; b = i2; c = i3;
20 mix(a,b,c);
21 a += i4; b += i5; c += i6;
22 mix(a,b,c);
23 a += i7;
24 final(a,b,c);
25then use c as the hash value. If you have a variable length array of
264-byte integers to hash, use hashword(). If you have a byte array (like
27a character string), use hashlittle(). If you have several byte arrays, or
28a mix of things, see the comments above hashlittle().
29
30Why is this so big? I read 12 bytes at a time into 3 4-byte integers,
31then mix those integers. This is fast (you can do a lot more thorough
32mixing with 12*3 instructions on 3 integers than you can with 3 instructions
33on 1 byte), but shoehorning those bytes into integers efficiently is messy.
34-------------------------------------------------------------------------------
35*/
36#define SELF_TEST 1
37
38#include <stdio.h> /* defines printf for tests */
39#include <time.h> /* defines time_t for timings in the test */
40#include <stdint.h> /* defines uint32_t etc */
41#ifdef linux
42#include <sys/param.h> /* attempt to define endianness */
43# include <endian.h> /* attempt to define endianness */
44#endif
45
46/*
47 * My best guess at if you are big-endian or little-endian. This may
48 * need adjustment.
49 */
50#if defined (__WIN32__) // windows is always little-endian except for NT
51# define HASH_LITTLE_ENDIAN 1
52# define HASH_BIG_ENDIAN 0
53#elif (defined(WORDS_BIGENDIAN))
54# define HASH_LITTLE_ENDIAN 0
55# define HASH_BIG_ENDIAN 1
56#elif (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
57 __BYTE_ORDER == __LITTLE_ENDIAN) || \
58 (defined(i386) || defined(__i386__) || defined(__i486__) || \
59 defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL))
60# define HASH_LITTLE_ENDIAN 1
61# define HASH_BIG_ENDIAN 0
62#elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
63 __BYTE_ORDER == __BIG_ENDIAN) || \
64 (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel))
65# define HASH_LITTLE_ENDIAN 0
66# define HASH_BIG_ENDIAN 1
67#else
68# define HASH_LITTLE_ENDIAN 0
69# define HASH_BIG_ENDIAN 0
70#endif
71
72#define hashsize(n) ((uint32_t)1<<(n))
73#define hashmask(n) (hashsize(n)-1)
74#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
75
76#if 0
77static uint32_t hashword(const uint32_t *k, size_t length, uint32_t initval);
78static void hashword2 (const uint32_t *k, size_t length, uint32_t *pc, uint32_t *pb);
79static uint32_t hashlittle( const void *key, size_t length, uint32_t initval);
80static uint32_t hashbig( const void *key, size_t length, uint32_t initval);
81#endif
82
83void hashlittle2(const void *key, size_t length, uint32_t *pc, uint32_t *pb);
84
85/*
86-------------------------------------------------------------------------------
87mix -- mix 3 32-bit values reversibly.
88
89This is reversible, so any information in (a,b,c) before mix() is
90still in (a,b,c) after mix().
91
92If four pairs of (a,b,c) inputs are run through mix(), or through
93mix() in reverse, there are at least 32 bits of the output that
94are sometimes the same for one pair and different for another pair.
95This was tested for:
96* pairs that differed by one bit, by two bits, in any combination
97 of top bits of (a,b,c), or in any combination of bottom bits of
98 (a,b,c).
99* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
100 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
101 is commonly produced by subtraction) look like a single 1-bit
102 difference.
103* the base values were pseudorandom, all zero but one bit set, or
104 all zero plus a counter that starts at zero.
105
106Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
107satisfy this are
108 4 6 8 16 19 4
109 9 15 3 18 27 15
110 14 9 3 7 17 3
111Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
112for "differ" defined as + with a one-bit base and a two-bit delta. I
113used http://burtleburtle.net/bob/hash/avalanche.html to choose
114the operations, constants, and arrangements of the variables.
115
116This does not achieve avalanche. There are input bits of (a,b,c)
117that fail to affect some output bits of (a,b,c), especially of a. The
118most thoroughly mixed value is c, but it doesn't really even achieve
119avalanche in c.
120
121This allows some parallelism. Read-after-writes are good at doubling
122the number of bits affected, so the goal of mixing pulls in the opposite
123direction as the goal of parallelism. I did what I could. Rotates
124seem to cost as much as shifts on every machine I could lay my hands
125on, and rotates are much kinder to the top and bottom bits, so I used
126rotates.
127-------------------------------------------------------------------------------
128*/
129#define mix(a,b,c) \
130{ \
131 a -= c; a ^= rot(c, 4); c += b; \
132 b -= a; b ^= rot(a, 6); a += c; \
133 c -= b; c ^= rot(b, 8); b += a; \
134 a -= c; a ^= rot(c,16); c += b; \
135 b -= a; b ^= rot(a,19); a += c; \
136 c -= b; c ^= rot(b, 4); b += a; \
137}
138
139/*
140-------------------------------------------------------------------------------
141final -- final mixing of 3 32-bit values (a,b,c) into c
142
143Pairs of (a,b,c) values differing in only a few bits will usually
144produce values of c that look totally different. This was tested for
145* pairs that differed by one bit, by two bits, in any combination
146 of top bits of (a,b,c), or in any combination of bottom bits of
147 (a,b,c).
148* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
149 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
150 is commonly produced by subtraction) look like a single 1-bit
151 difference.
152* the base values were pseudorandom, all zero but one bit set, or
153 all zero plus a counter that starts at zero.
154
155These constants passed:
156 14 11 25 16 4 14 24
157 12 14 25 16 4 14 24
158and these came close:
159 4 8 15 26 3 22 24
160 10 8 15 26 3 22 24
161 11 8 15 26 3 22 24
162-------------------------------------------------------------------------------
163*/
164#define final(a,b,c) \
165{ \
166 c ^= b; c -= rot(b,14); \
167 a ^= c; a -= rot(c,11); \
168 b ^= a; b -= rot(a,25); \
169 c ^= b; c -= rot(b,16); \
170 a ^= c; a -= rot(c,4); \
171 b ^= a; b -= rot(a,14); \
172 c ^= b; c -= rot(b,24); \
173}
174
175#if 0
176/*
177--------------------------------------------------------------------
178 This works on all machines. To be useful, it requires
179 -- that the key be an array of uint32_t's, and
180 -- that the length be the number of uint32_t's in the key
181
182 The function hashword() is identical to hashlittle() on little-endian
183 machines, and identical to hashbig() on big-endian machines,
184 except that the length has to be measured in uint32_ts rather than in
185 bytes. hashlittle() is more complicated than hashword() only because
186 hashlittle() has to dance around fitting the key bytes into registers.
187--------------------------------------------------------------------
188*/
189static uint32_t hashword(
190const uint32_t *k, /* the key, an array of uint32_t values */
191size_t length, /* the length of the key, in uint32_ts */
192uint32_t initval) /* the previous hash, or an arbitrary value */
193{
194 uint32_t a,b,c;
195
196 /* Set up the internal state */
197 a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval;
198
199 /*------------------------------------------------- handle most of the key */
200 while (length > 3)
201 {
202 a += k[0];
203 b += k[1];
204 c += k[2];
205 mix(a,b,c);
206 length -= 3;
207 k += 3;
208 }
209
210 /*------------------------------------------- handle the last 3 uint32_t's */
211 switch(length) /* all the case statements fall through */
212 {
213 case 3 : c+=k[2];
214 case 2 : b+=k[1];
215 case 1 : a+=k[0];
216 final(a,b,c);
217 case 0: /* case 0: nothing left to add */
218 break;
219 }
220 /*------------------------------------------------------ report the result */
221 return c;
222}
223#endif
224
225#if 0
226/*
227--------------------------------------------------------------------
228hashword2() -- same as hashword(), but take two seeds and return two
22932-bit values. pc and pb must both be nonnull, and *pc and *pb must
230both be initialized with seeds. If you pass in (*pb)==0, the output
231(*pc) will be the same as the return value from hashword().
232--------------------------------------------------------------------
233*/
234static void hashword2 (
235const uint32_t *k, /* the key, an array of uint32_t values */
236size_t length, /* the length of the key, in uint32_ts */
237uint32_t *pc, /* IN: seed OUT: primary hash value */
238uint32_t *pb) /* IN: more seed OUT: secondary hash value */
239{
240 uint32_t a,b,c;
241
242 /* Set up the internal state */
243 a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + *pc;
244 c += *pb;
245
246 /*------------------------------------------------- handle most of the key */
247 while (length > 3)
248 {
249 a += k[0];
250 b += k[1];
251 c += k[2];
252 mix(a,b,c);
253 length -= 3;
254 k += 3;
255 }
256
257 /*------------------------------------------- handle the last 3 uint32_t's */
258 switch(length) /* all the case statements fall through */
259 {
260 case 3 : c+=k[2];
261 case 2 : b+=k[1];
262 case 1 : a+=k[0];
263 final(a,b,c);
264 case 0: /* case 0: nothing left to add */
265 break;
266 }
267 /*------------------------------------------------------ report the result */
268 *pc=c; *pb=b;
269}
270#endif
271
272/*
273-------------------------------------------------------------------------------
274hashlittle() -- hash a variable-length key into a 32-bit value
275 k : the key (the unaligned variable-length array of bytes)
276 length : the length of the key, counting by bytes
277 initval : can be any 4-byte value
278Returns a 32-bit value. Every bit of the key affects every bit of
279the return value. Two keys differing by one or two bits will have
280totally different hash values.
281
282The best hash table sizes are powers of 2. There is no need to do
283mod a prime (mod is sooo slow!). If you need less than 32 bits,
284use a bitmask. For example, if you need only 10 bits, do
285 h = (h & hashmask(10));
286In which case, the hash table should have hashsize(10) elements.
287
288If you are hashing n strings (uint8_t **)k, do it like this:
289 for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
290
291By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
292code any way you wish, private, educational, or commercial. It's free.
293
294Use for hash table lookup, or anything where one collision in 2^^32 is
295acceptable. Do NOT use for cryptographic purposes.
296-------------------------------------------------------------------------------
297*/
298#if 0
299static uint32_t hashlittle( const void *key, size_t length, uint32_t initval)
300{
301 uint32_t a,b,c; /* internal state */
302 union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
303
304 /* Set up the internal state */
305 a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
306
307 u.ptr = key;
308 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
309 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
310 // const uint8_t *k8;
311
312 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
313 while (length > 12)
314 {
315 a += k[0];
316 b += k[1];
317 c += k[2];
318 mix(a,b,c);
319 length -= 12;
320 k += 3;
321 }
322
323 /*----------------------------- handle the last (probably partial) block */
324 /*
325 * "k[2]&0xffffff" actually reads beyond the end of the string, but
326 * then masks off the part it's not allowed to read. Because the
327 * string is aligned, the masked-off tail is in the same word as the
328 * rest of the string. Every machine with memory protection I've seen
329 * does it on word boundaries, so is OK with this. But VALGRIND will
330 * still catch it and complain. The masking trick does make the hash
331 * noticeably faster for short strings (like English words).
332 */
333#ifndef VALGRIND
334
335 switch(length)
336 {
337 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
338 case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
339 case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
340 case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
341 case 8 : b+=k[1]; a+=k[0]; break;
342 case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
343 case 6 : b+=k[1]&0xffff; a+=k[0]; break;
344 case 5 : b+=k[1]&0xff; a+=k[0]; break;
345 case 4 : a+=k[0]; break;
346 case 3 : a+=k[0]&0xffffff; break;
347 case 2 : a+=k[0]&0xffff; break;
348 case 1 : a+=k[0]&0xff; break;
349 case 0 : return c; /* zero length strings require no mixing */
350 }
351
352#else /* make valgrind happy */
353
354 k8 = (const uint8_t *)k;
355 switch(length)
356 {
357 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
358 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
359 case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
360 case 9 : c+=k8[8]; /* fall through */
361 case 8 : b+=k[1]; a+=k[0]; break;
362 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
363 case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
364 case 5 : b+=k8[4]; /* fall through */
365 case 4 : a+=k[0]; break;
366 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
367 case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
368 case 1 : a+=k8[0]; break;
369 case 0 : return c;
370 }
371
372#endif /* !valgrind */
373
374 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
375 const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
376 const uint8_t *k8;
377
378 /*--------------- all but last block: aligned reads and different mixing */
379 while (length > 12)
380 {
381 a += k[0] + (((uint32_t)k[1])<<16);
382 b += k[2] + (((uint32_t)k[3])<<16);
383 c += k[4] + (((uint32_t)k[5])<<16);
384 mix(a,b,c);
385 length -= 12;
386 k += 6;
387 }
388
389 /*----------------------------- handle the last (probably partial) block */
390 k8 = (const uint8_t *)k;
391 switch(length)
392 {
393 case 12: c+=k[4]+(((uint32_t)k[5])<<16);
394 b+=k[2]+(((uint32_t)k[3])<<16);
395 a+=k[0]+(((uint32_t)k[1])<<16);
396 break;
397 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
398 case 10: c+=k[4];
399 b+=k[2]+(((uint32_t)k[3])<<16);
400 a+=k[0]+(((uint32_t)k[1])<<16);
401 break;
402 case 9 : c+=k8[8]; /* fall through */
403 case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
404 a+=k[0]+(((uint32_t)k[1])<<16);
405 break;
406 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
407 case 6 : b+=k[2];
408 a+=k[0]+(((uint32_t)k[1])<<16);
409 break;
410 case 5 : b+=k8[4]; /* fall through */
411 case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
412 break;
413 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
414 case 2 : a+=k[0];
415 break;
416 case 1 : a+=k8[0];
417 break;
418 case 0 : return c; /* zero length requires no mixing */
419 }
420
421 } else { /* need to read the key one byte at a time */
422 const uint8_t *k = (const uint8_t *)key;
423
424 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
425 while (length > 12)
426 {
427 a += k[0];
428 a += ((uint32_t)k[1])<<8;
429 a += ((uint32_t)k[2])<<16;
430 a += ((uint32_t)k[3])<<24;
431 b += k[4];
432 b += ((uint32_t)k[5])<<8;
433 b += ((uint32_t)k[6])<<16;
434 b += ((uint32_t)k[7])<<24;
435 c += k[8];
436 c += ((uint32_t)k[9])<<8;
437 c += ((uint32_t)k[10])<<16;
438 c += ((uint32_t)k[11])<<24;
439 mix(a,b,c);
440 length -= 12;
441 k += 12;
442 }
443
444 /*-------------------------------- last block: affect all 32 bits of (c) */
445 switch(length) /* all the case statements fall through */
446 {
447 case 12: c+=((uint32_t)k[11])<<24;
448 case 11: c+=((uint32_t)k[10])<<16;
449 case 10: c+=((uint32_t)k[9])<<8;
450 case 9 : c+=k[8];
451 case 8 : b+=((uint32_t)k[7])<<24;
452 case 7 : b+=((uint32_t)k[6])<<16;
453 case 6 : b+=((uint32_t)k[5])<<8;
454 case 5 : b+=k[4];
455 case 4 : a+=((uint32_t)k[3])<<24;
456 case 3 : a+=((uint32_t)k[2])<<16;
457 case 2 : a+=((uint32_t)k[1])<<8;
458 case 1 : a+=k[0];
459 break;
460 case 0 : return c;
461 }
462 }
463
464 final(a,b,c);
465 return c;
466}
467#endif
468
469/*
470 * hashlittle2: return 2 32-bit hash values
471 *
472 * This is identical to hashlittle(), except it returns two 32-bit hash
473 * values instead of just one. This is good enough for hash table
474 * lookup with 2^^64 buckets, or if you want a second hash if you're not
475 * happy with the first, or if you want a probably-unique 64-bit ID for
476 * the key. *pc is better mixed than *pb, so use *pc first. If you want
477 * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)".
478 */
480 const void *key, /* the key to hash */
481 size_t length, /* length of the key */
482 uint32_t *pc, /* IN: primary initval, OUT: primary hash */
483 uint32_t *pb) /* IN: secondary initval, OUT: secondary hash */
484{
485 uint32_t a,b,c; /* internal state */
486 union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
487
488 /* Set up the internal state */
489 a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc;
490 c += *pb;
491
492 u.ptr = key;
493 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
494 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
495 // const uint8_t *k8;
496
497 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
498 while (length > 12)
499 {
500 a += k[0];
501 b += k[1];
502 c += k[2];
503 mix(a,b,c);
504 length -= 12;
505 k += 3;
506 }
507
508 /*----------------------------- handle the last (probably partial) block */
509 /*
510 * "k[2]&0xffffff" actually reads beyond the end of the string, but
511 * then masks off the part it's not allowed to read. Because the
512 * string is aligned, the masked-off tail is in the same word as the
513 * rest of the string. Every machine with memory protection I've seen
514 * does it on word boundaries, so is OK with this. But VALGRIND will
515 * still catch it and complain. The masking trick does make the hash
516 * noticeably faster for short strings (like English words).
517 */
518#ifndef VALGRIND
519
520 switch(length)
521 {
522 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
523 case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
524 case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
525 case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
526 case 8 : b+=k[1]; a+=k[0]; break;
527 case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
528 case 6 : b+=k[1]&0xffff; a+=k[0]; break;
529 case 5 : b+=k[1]&0xff; a+=k[0]; break;
530 case 4 : a+=k[0]; break;
531 case 3 : a+=k[0]&0xffffff; break;
532 case 2 : a+=k[0]&0xffff; break;
533 case 1 : a+=k[0]&0xff; break;
534 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
535 }
536
537#else /* make valgrind happy */
538
539 k8 = (const uint8_t *)k;
540 switch(length)
541 {
542 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
543 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
544 case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
545 case 9 : c+=k8[8]; /* fall through */
546 case 8 : b+=k[1]; a+=k[0]; break;
547 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
548 case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
549 case 5 : b+=k8[4]; /* fall through */
550 case 4 : a+=k[0]; break;
551 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
552 case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
553 case 1 : a+=k8[0]; break;
554 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
555 }
556
557#endif /* !valgrind */
558
559 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
560 const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
561 const uint8_t *k8;
562
563 /*--------------- all but last block: aligned reads and different mixing */
564 while (length > 12)
565 {
566 a += k[0] + (((uint32_t)k[1])<<16);
567 b += k[2] + (((uint32_t)k[3])<<16);
568 c += k[4] + (((uint32_t)k[5])<<16);
569 mix(a,b,c);
570 length -= 12;
571 k += 6;
572 }
573
574 /*----------------------------- handle the last (probably partial) block */
575 k8 = (const uint8_t *)k;
576 switch(length)
577 {
578 case 12: c+=k[4]+(((uint32_t)k[5])<<16);
579 b+=k[2]+(((uint32_t)k[3])<<16);
580 a+=k[0]+(((uint32_t)k[1])<<16);
581 break;
582 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
583 case 10: c+=k[4];
584 b+=k[2]+(((uint32_t)k[3])<<16);
585 a+=k[0]+(((uint32_t)k[1])<<16);
586 break;
587 case 9 : c+=k8[8]; /* fall through */
588 case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
589 a+=k[0]+(((uint32_t)k[1])<<16);
590 break;
591 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
592 case 6 : b+=k[2];
593 a+=k[0]+(((uint32_t)k[1])<<16);
594 break;
595 case 5 : b+=k8[4]; /* fall through */
596 case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
597 break;
598 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
599 case 2 : a+=k[0];
600 break;
601 case 1 : a+=k8[0];
602 break;
603 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
604 }
605
606 } else { /* need to read the key one byte at a time */
607 const uint8_t *k = (const uint8_t *)key;
608
609 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
610 while (length > 12)
611 {
612 a += k[0];
613 a += ((uint32_t)k[1])<<8;
614 a += ((uint32_t)k[2])<<16;
615 a += ((uint32_t)k[3])<<24;
616 b += k[4];
617 b += ((uint32_t)k[5])<<8;
618 b += ((uint32_t)k[6])<<16;
619 b += ((uint32_t)k[7])<<24;
620 c += k[8];
621 c += ((uint32_t)k[9])<<8;
622 c += ((uint32_t)k[10])<<16;
623 c += ((uint32_t)k[11])<<24;
624 mix(a,b,c);
625 length -= 12;
626 k += 12;
627 }
628
629 /*-------------------------------- last block: affect all 32 bits of (c) */
630 switch(length) /* all the case statements fall through */
631 {
632 case 12: c+=((uint32_t)k[11])<<24; /* fall through */
633 case 11: c+=((uint32_t)k[10])<<16; /* fall through */
634 case 10: c+=((uint32_t)k[9])<<8; /* fall through */
635 case 9 : c+=k[8]; /* fall through */
636 case 8 : b+=((uint32_t)k[7])<<24; /* fall through */
637 case 7 : b+=((uint32_t)k[6])<<16; /* fall through */
638 case 6 : b+=((uint32_t)k[5])<<8; /* fall through */
639 case 5 : b+=k[4]; /* fall through */
640 case 4 : a+=((uint32_t)k[3])<<24; /* fall through */
641 case 3 : a+=((uint32_t)k[2])<<16; /* fall through */
642 case 2 : a+=((uint32_t)k[1])<<8; /* fall through */
643 case 1 : a+=k[0];
644 break;
645 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
646 }
647 }
648
649 final(a,b,c);
650 *pc=c; *pb=b;
651}
652
653
654#if 0
655/*
656 * hashbig():
657 * This is the same as hashword() on big-endian machines. It is different
658 * from hashlittle() on all machines. hashbig() takes advantage of
659 * big-endian byte ordering.
660 */
661static uint32_t hashbig( const void *key, size_t length, uint32_t initval)
662{
663 uint32_t a,b,c;
664 union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
665
666 /* Set up the internal state */
667 a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
668
669 u.ptr = key;
670 if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
671 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
672 // const uint8_t *k8;
673
674 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
675 while (length > 12)
676 {
677 a += k[0];
678 b += k[1];
679 c += k[2];
680 mix(a,b,c);
681 length -= 12;
682 k += 3;
683 }
684
685 /*----------------------------- handle the last (probably partial) block */
686 /*
687 * "k[2]<<8" actually reads beyond the end of the string, but
688 * then shifts out the part it's not allowed to read. Because the
689 * string is aligned, the illegal read is in the same word as the
690 * rest of the string. Every machine with memory protection I've seen
691 * does it on word boundaries, so is OK with this. But VALGRIND will
692 * still catch it and complain. The masking trick does make the hash
693 * noticeably faster for short strings (like English words).
694 */
695#ifndef VALGRIND
696
697 switch(length)
698 {
699 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
700 case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
701 case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
702 case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
703 case 8 : b+=k[1]; a+=k[0]; break;
704 case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
705 case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
706 case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
707 case 4 : a+=k[0]; break;
708 case 3 : a+=k[0]&0xffffff00; break;
709 case 2 : a+=k[0]&0xffff0000; break;
710 case 1 : a+=k[0]&0xff000000; break;
711 case 0 : return c; /* zero length strings require no mixing */
712 }
713
714#else /* make valgrind happy */
715
716 k8 = (const uint8_t *)k;
717 switch(length) /* all the case statements fall through */
718 {
719 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
720 case 11: c+=((uint32_t)k8[10])<<8; /* fall through */
721 case 10: c+=((uint32_t)k8[9])<<16; /* fall through */
722 case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */
723 case 8 : b+=k[1]; a+=k[0]; break;
724 case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */
725 case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */
726 case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */
727 case 4 : a+=k[0]; break;
728 case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */
729 case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */
730 case 1 : a+=((uint32_t)k8[0])<<24; break;
731 case 0 : return c;
732 }
733
734#endif /* !VALGRIND */
735
736 } else { /* need to read the key one byte at a time */
737 const uint8_t *k = (const uint8_t *)key;
738
739 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
740 while (length > 12)
741 {
742 a += ((uint32_t)k[0])<<24;
743 a += ((uint32_t)k[1])<<16;
744 a += ((uint32_t)k[2])<<8;
745 a += ((uint32_t)k[3]);
746 b += ((uint32_t)k[4])<<24;
747 b += ((uint32_t)k[5])<<16;
748 b += ((uint32_t)k[6])<<8;
749 b += ((uint32_t)k[7]);
750 c += ((uint32_t)k[8])<<24;
751 c += ((uint32_t)k[9])<<16;
752 c += ((uint32_t)k[10])<<8;
753 c += ((uint32_t)k[11]);
754 mix(a,b,c);
755 length -= 12;
756 k += 12;
757 }
758
759 /*-------------------------------- last block: affect all 32 bits of (c) */
760 switch(length) /* all the case statements fall through */
761 {
762 case 12: c+=k[11];
763 case 11: c+=((uint32_t)k[10])<<8;
764 case 10: c+=((uint32_t)k[9])<<16;
765 case 9 : c+=((uint32_t)k[8])<<24;
766 case 8 : b+=k[7];
767 case 7 : b+=((uint32_t)k[6])<<8;
768 case 6 : b+=((uint32_t)k[5])<<16;
769 case 5 : b+=((uint32_t)k[4])<<24;
770 case 4 : a+=k[3];
771 case 3 : a+=((uint32_t)k[2])<<8;
772 case 2 : a+=((uint32_t)k[1])<<16;
773 case 1 : a+=((uint32_t)k[0])<<24;
774 break;
775 case 0 : return c;
776 }
777 }
778
779 final(a,b,c);
780 return c;
781}
782#endif
783
#define HASH_BIG_ENDIAN
Definition lookup3.c:69
void hashlittle2(const void *key, size_t length, uint32_t *pc, uint32_t *pb)
Definition lookup3.c:479
#define HASH_LITTLE_ENDIAN
Definition lookup3.c:68
#define mix(a, b, c)
Definition lookup3.c:129