PostGIS  3.0.6dev-r@@SVN_REVISION@@
lookup3.c
Go to the documentation of this file.
1 /*
2 -------------------------------------------------------------------------------
3 lookup3.c, by Bob Jenkins, May 2006, Public Domain.
4 
5 These are functions for producing 32-bit hashes for hash table lookup.
6 hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
7 are externally useful functions. Routines to test the hash are included
8 if SELF_TEST is defined. You can use this free for any purpose. It's in
9 the public domain. It has no warranty.
10 
11 You probably want to use hashlittle(). hashlittle() and hashbig()
12 hash byte arrays. hashlittle() is is faster than hashbig() on
13 little-endian machines. Intel and AMD are little-endian machines.
14 On second thought, you probably want hashlittle2(), which is identical to
15 hashlittle() except it returns two 32-bit hashes for the price of one.
16 You could implement hashbig2() if you wanted but I haven't bothered here.
17 
18 If you want to find a hash of, say, exactly 7 integers, do
19  a = i1; b = i2; c = i3;
20  mix(a,b,c);
21  a += i4; b += i5; c += i6;
22  mix(a,b,c);
23  a += i7;
24  final(a,b,c);
25 then use c as the hash value. If you have a variable length array of
26 4-byte integers to hash, use hashword(). If you have a byte array (like
27 a character string), use hashlittle(). If you have several byte arrays, or
28 a mix of things, see the comments above hashlittle().
29 
30 Why is this so big? I read 12 bytes at a time into 3 4-byte integers,
31 then mix those integers. This is fast (you can do a lot more thorough
32 mixing with 12*3 instructions on 3 integers than you can with 3 instructions
33 on 1 byte), but shoehorning those bytes into integers efficiently is messy.
34 -------------------------------------------------------------------------------
35 */
36 #define SELF_TEST 1
37 
38 #include <stdio.h> /* defines printf for tests */
39 #include <time.h> /* defines time_t for timings in the test */
40 #include <stdint.h> /* defines uint32_t etc */
41 #include <sys/param.h> /* attempt to define endianness */
42 #ifdef linux
43 # include <endian.h> /* attempt to define endianness */
44 #endif
45 
46 /*
47  * My best guess at if you are big-endian or little-endian. This may
48  * need adjustment.
49  */
50 #if (defined(WORDS_BIGENDIAN))
51 # define HASH_LITTLE_ENDIAN 0
52 # define HASH_BIG_ENDIAN 1
53 #elif (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
54  __BYTE_ORDER == __LITTLE_ENDIAN) || \
55  (defined(i386) || defined(__i386__) || defined(__i486__) || \
56  defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL))
57 # define HASH_LITTLE_ENDIAN 1
58 # define HASH_BIG_ENDIAN 0
59 #elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
60  __BYTE_ORDER == __BIG_ENDIAN) || \
61  (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel))
62 # define HASH_LITTLE_ENDIAN 0
63 # define HASH_BIG_ENDIAN 1
64 #else
65 # define HASH_LITTLE_ENDIAN 0
66 # define HASH_BIG_ENDIAN 0
67 #endif
68 
69 #define hashsize(n) ((uint32_t)1<<(n))
70 #define hashmask(n) (hashsize(n)-1)
71 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
72 
73 #if 0
74 static uint32_t hashword(const uint32_t *k, size_t length, uint32_t initval);
75 static void hashword2 (const uint32_t *k, size_t length, uint32_t *pc, uint32_t *pb);
76 static uint32_t hashlittle( const void *key, size_t length, uint32_t initval);
77 static uint32_t hashbig( const void *key, size_t length, uint32_t initval);
78 #endif
79 
80 void hashlittle2(const void *key, size_t length, uint32_t *pc, uint32_t *pb);
81 
82 /*
83 -------------------------------------------------------------------------------
84 mix -- mix 3 32-bit values reversibly.
85 
86 This is reversible, so any information in (a,b,c) before mix() is
87 still in (a,b,c) after mix().
88 
89 If four pairs of (a,b,c) inputs are run through mix(), or through
90 mix() in reverse, there are at least 32 bits of the output that
91 are sometimes the same for one pair and different for another pair.
92 This was tested for:
93 * pairs that differed by one bit, by two bits, in any combination
94  of top bits of (a,b,c), or in any combination of bottom bits of
95  (a,b,c).
96 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
97  the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
98  is commonly produced by subtraction) look like a single 1-bit
99  difference.
100 * the base values were pseudorandom, all zero but one bit set, or
101  all zero plus a counter that starts at zero.
102 
103 Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
104 satisfy this are
105  4 6 8 16 19 4
106  9 15 3 18 27 15
107  14 9 3 7 17 3
108 Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
109 for "differ" defined as + with a one-bit base and a two-bit delta. I
110 used http://burtleburtle.net/bob/hash/avalanche.html to choose
111 the operations, constants, and arrangements of the variables.
112 
113 This does not achieve avalanche. There are input bits of (a,b,c)
114 that fail to affect some output bits of (a,b,c), especially of a. The
115 most thoroughly mixed value is c, but it doesn't really even achieve
116 avalanche in c.
117 
118 This allows some parallelism. Read-after-writes are good at doubling
119 the number of bits affected, so the goal of mixing pulls in the opposite
120 direction as the goal of parallelism. I did what I could. Rotates
121 seem to cost as much as shifts on every machine I could lay my hands
122 on, and rotates are much kinder to the top and bottom bits, so I used
123 rotates.
124 -------------------------------------------------------------------------------
125 */
126 #define mix(a,b,c) \
127 { \
128  a -= c; a ^= rot(c, 4); c += b; \
129  b -= a; b ^= rot(a, 6); a += c; \
130  c -= b; c ^= rot(b, 8); b += a; \
131  a -= c; a ^= rot(c,16); c += b; \
132  b -= a; b ^= rot(a,19); a += c; \
133  c -= b; c ^= rot(b, 4); b += a; \
134 }
135 
136 /*
137 -------------------------------------------------------------------------------
138 final -- final mixing of 3 32-bit values (a,b,c) into c
139 
140 Pairs of (a,b,c) values differing in only a few bits will usually
141 produce values of c that look totally different. This was tested for
142 * pairs that differed by one bit, by two bits, in any combination
143  of top bits of (a,b,c), or in any combination of bottom bits of
144  (a,b,c).
145 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
146  the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
147  is commonly produced by subtraction) look like a single 1-bit
148  difference.
149 * the base values were pseudorandom, all zero but one bit set, or
150  all zero plus a counter that starts at zero.
151 
152 These constants passed:
153  14 11 25 16 4 14 24
154  12 14 25 16 4 14 24
155 and these came close:
156  4 8 15 26 3 22 24
157  10 8 15 26 3 22 24
158  11 8 15 26 3 22 24
159 -------------------------------------------------------------------------------
160 */
161 #define final(a,b,c) \
162 { \
163  c ^= b; c -= rot(b,14); \
164  a ^= c; a -= rot(c,11); \
165  b ^= a; b -= rot(a,25); \
166  c ^= b; c -= rot(b,16); \
167  a ^= c; a -= rot(c,4); \
168  b ^= a; b -= rot(a,14); \
169  c ^= b; c -= rot(b,24); \
170 }
171 
172 #if 0
173 /*
174 --------------------------------------------------------------------
175  This works on all machines. To be useful, it requires
176  -- that the key be an array of uint32_t's, and
177  -- that the length be the number of uint32_t's in the key
178 
179  The function hashword() is identical to hashlittle() on little-endian
180  machines, and identical to hashbig() on big-endian machines,
181  except that the length has to be measured in uint32_ts rather than in
182  bytes. hashlittle() is more complicated than hashword() only because
183  hashlittle() has to dance around fitting the key bytes into registers.
184 --------------------------------------------------------------------
185 */
186 static uint32_t hashword(
187 const uint32_t *k, /* the key, an array of uint32_t values */
188 size_t length, /* the length of the key, in uint32_ts */
189 uint32_t initval) /* the previous hash, or an arbitrary value */
190 {
191  uint32_t a,b,c;
192 
193  /* Set up the internal state */
194  a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval;
195 
196  /*------------------------------------------------- handle most of the key */
197  while (length > 3)
198  {
199  a += k[0];
200  b += k[1];
201  c += k[2];
202  mix(a,b,c);
203  length -= 3;
204  k += 3;
205  }
206 
207  /*------------------------------------------- handle the last 3 uint32_t's */
208  switch(length) /* all the case statements fall through */
209  {
210  case 3 : c+=k[2];
211  case 2 : b+=k[1];
212  case 1 : a+=k[0];
213  final(a,b,c);
214  case 0: /* case 0: nothing left to add */
215  break;
216  }
217  /*------------------------------------------------------ report the result */
218  return c;
219 }
220 #endif
221 
222 #if 0
223 /*
224 --------------------------------------------------------------------
225 hashword2() -- same as hashword(), but take two seeds and return two
226 32-bit values. pc and pb must both be nonnull, and *pc and *pb must
227 both be initialized with seeds. If you pass in (*pb)==0, the output
228 (*pc) will be the same as the return value from hashword().
229 --------------------------------------------------------------------
230 */
231 static void hashword2 (
232 const uint32_t *k, /* the key, an array of uint32_t values */
233 size_t length, /* the length of the key, in uint32_ts */
234 uint32_t *pc, /* IN: seed OUT: primary hash value */
235 uint32_t *pb) /* IN: more seed OUT: secondary hash value */
236 {
237  uint32_t a,b,c;
238 
239  /* Set up the internal state */
240  a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + *pc;
241  c += *pb;
242 
243  /*------------------------------------------------- handle most of the key */
244  while (length > 3)
245  {
246  a += k[0];
247  b += k[1];
248  c += k[2];
249  mix(a,b,c);
250  length -= 3;
251  k += 3;
252  }
253 
254  /*------------------------------------------- handle the last 3 uint32_t's */
255  switch(length) /* all the case statements fall through */
256  {
257  case 3 : c+=k[2];
258  case 2 : b+=k[1];
259  case 1 : a+=k[0];
260  final(a,b,c);
261  case 0: /* case 0: nothing left to add */
262  break;
263  }
264  /*------------------------------------------------------ report the result */
265  *pc=c; *pb=b;
266 }
267 #endif
268 
269 /*
270 -------------------------------------------------------------------------------
271 hashlittle() -- hash a variable-length key into a 32-bit value
272  k : the key (the unaligned variable-length array of bytes)
273  length : the length of the key, counting by bytes
274  initval : can be any 4-byte value
275 Returns a 32-bit value. Every bit of the key affects every bit of
276 the return value. Two keys differing by one or two bits will have
277 totally different hash values.
278 
279 The best hash table sizes are powers of 2. There is no need to do
280 mod a prime (mod is sooo slow!). If you need less than 32 bits,
281 use a bitmask. For example, if you need only 10 bits, do
282  h = (h & hashmask(10));
283 In which case, the hash table should have hashsize(10) elements.
284 
285 If you are hashing n strings (uint8_t **)k, do it like this:
286  for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
287 
288 By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
289 code any way you wish, private, educational, or commercial. It's free.
290 
291 Use for hash table lookup, or anything where one collision in 2^^32 is
292 acceptable. Do NOT use for cryptographic purposes.
293 -------------------------------------------------------------------------------
294 */
295 #if 0
296 static uint32_t hashlittle( const void *key, size_t length, uint32_t initval)
297 {
298  uint32_t a,b,c; /* internal state */
299  union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
300 
301  /* Set up the internal state */
302  a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
303 
304  u.ptr = key;
305  if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
306  const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
307  // const uint8_t *k8;
308 
309  /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
310  while (length > 12)
311  {
312  a += k[0];
313  b += k[1];
314  c += k[2];
315  mix(a,b,c);
316  length -= 12;
317  k += 3;
318  }
319 
320  /*----------------------------- handle the last (probably partial) block */
321  /*
322  * "k[2]&0xffffff" actually reads beyond the end of the string, but
323  * then masks off the part it's not allowed to read. Because the
324  * string is aligned, the masked-off tail is in the same word as the
325  * rest of the string. Every machine with memory protection I've seen
326  * does it on word boundaries, so is OK with this. But VALGRIND will
327  * still catch it and complain. The masking trick does make the hash
328  * noticably faster for short strings (like English words).
329  */
330 #ifndef VALGRIND
331 
332  switch(length)
333  {
334  case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
335  case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
336  case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
337  case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
338  case 8 : b+=k[1]; a+=k[0]; break;
339  case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
340  case 6 : b+=k[1]&0xffff; a+=k[0]; break;
341  case 5 : b+=k[1]&0xff; a+=k[0]; break;
342  case 4 : a+=k[0]; break;
343  case 3 : a+=k[0]&0xffffff; break;
344  case 2 : a+=k[0]&0xffff; break;
345  case 1 : a+=k[0]&0xff; break;
346  case 0 : return c; /* zero length strings require no mixing */
347  }
348 
349 #else /* make valgrind happy */
350 
351  k8 = (const uint8_t *)k;
352  switch(length)
353  {
354  case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
355  case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
356  case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
357  case 9 : c+=k8[8]; /* fall through */
358  case 8 : b+=k[1]; a+=k[0]; break;
359  case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
360  case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
361  case 5 : b+=k8[4]; /* fall through */
362  case 4 : a+=k[0]; break;
363  case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
364  case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
365  case 1 : a+=k8[0]; break;
366  case 0 : return c;
367  }
368 
369 #endif /* !valgrind */
370 
371  } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
372  const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
373  const uint8_t *k8;
374 
375  /*--------------- all but last block: aligned reads and different mixing */
376  while (length > 12)
377  {
378  a += k[0] + (((uint32_t)k[1])<<16);
379  b += k[2] + (((uint32_t)k[3])<<16);
380  c += k[4] + (((uint32_t)k[5])<<16);
381  mix(a,b,c);
382  length -= 12;
383  k += 6;
384  }
385 
386  /*----------------------------- handle the last (probably partial) block */
387  k8 = (const uint8_t *)k;
388  switch(length)
389  {
390  case 12: c+=k[4]+(((uint32_t)k[5])<<16);
391  b+=k[2]+(((uint32_t)k[3])<<16);
392  a+=k[0]+(((uint32_t)k[1])<<16);
393  break;
394  case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
395  case 10: c+=k[4];
396  b+=k[2]+(((uint32_t)k[3])<<16);
397  a+=k[0]+(((uint32_t)k[1])<<16);
398  break;
399  case 9 : c+=k8[8]; /* fall through */
400  case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
401  a+=k[0]+(((uint32_t)k[1])<<16);
402  break;
403  case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
404  case 6 : b+=k[2];
405  a+=k[0]+(((uint32_t)k[1])<<16);
406  break;
407  case 5 : b+=k8[4]; /* fall through */
408  case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
409  break;
410  case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
411  case 2 : a+=k[0];
412  break;
413  case 1 : a+=k8[0];
414  break;
415  case 0 : return c; /* zero length requires no mixing */
416  }
417 
418  } else { /* need to read the key one byte at a time */
419  const uint8_t *k = (const uint8_t *)key;
420 
421  /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
422  while (length > 12)
423  {
424  a += k[0];
425  a += ((uint32_t)k[1])<<8;
426  a += ((uint32_t)k[2])<<16;
427  a += ((uint32_t)k[3])<<24;
428  b += k[4];
429  b += ((uint32_t)k[5])<<8;
430  b += ((uint32_t)k[6])<<16;
431  b += ((uint32_t)k[7])<<24;
432  c += k[8];
433  c += ((uint32_t)k[9])<<8;
434  c += ((uint32_t)k[10])<<16;
435  c += ((uint32_t)k[11])<<24;
436  mix(a,b,c);
437  length -= 12;
438  k += 12;
439  }
440 
441  /*-------------------------------- last block: affect all 32 bits of (c) */
442  switch(length) /* all the case statements fall through */
443  {
444  case 12: c+=((uint32_t)k[11])<<24;
445  case 11: c+=((uint32_t)k[10])<<16;
446  case 10: c+=((uint32_t)k[9])<<8;
447  case 9 : c+=k[8];
448  case 8 : b+=((uint32_t)k[7])<<24;
449  case 7 : b+=((uint32_t)k[6])<<16;
450  case 6 : b+=((uint32_t)k[5])<<8;
451  case 5 : b+=k[4];
452  case 4 : a+=((uint32_t)k[3])<<24;
453  case 3 : a+=((uint32_t)k[2])<<16;
454  case 2 : a+=((uint32_t)k[1])<<8;
455  case 1 : a+=k[0];
456  break;
457  case 0 : return c;
458  }
459  }
460 
461  final(a,b,c);
462  return c;
463 }
464 #endif
465 
466 /*
467  * hashlittle2: return 2 32-bit hash values
468  *
469  * This is identical to hashlittle(), except it returns two 32-bit hash
470  * values instead of just one. This is good enough for hash table
471  * lookup with 2^^64 buckets, or if you want a second hash if you're not
472  * happy with the first, or if you want a probably-unique 64-bit ID for
473  * the key. *pc is better mixed than *pb, so use *pc first. If you want
474  * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)".
475  */
477  const void *key, /* the key to hash */
478  size_t length, /* length of the key */
479  uint32_t *pc, /* IN: primary initval, OUT: primary hash */
480  uint32_t *pb) /* IN: secondary initval, OUT: secondary hash */
481 {
482  uint32_t a,b,c; /* internal state */
483  union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
484 
485  /* Set up the internal state */
486  a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc;
487  c += *pb;
488 
489  u.ptr = key;
490  if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
491  const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
492  // const uint8_t *k8;
493 
494  /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
495  while (length > 12)
496  {
497  a += k[0];
498  b += k[1];
499  c += k[2];
500  mix(a,b,c);
501  length -= 12;
502  k += 3;
503  }
504 
505  /*----------------------------- handle the last (probably partial) block */
506  /*
507  * "k[2]&0xffffff" actually reads beyond the end of the string, but
508  * then masks off the part it's not allowed to read. Because the
509  * string is aligned, the masked-off tail is in the same word as the
510  * rest of the string. Every machine with memory protection I've seen
511  * does it on word boundaries, so is OK with this. But VALGRIND will
512  * still catch it and complain. The masking trick does make the hash
513  * noticably faster for short strings (like English words).
514  */
515 #ifndef VALGRIND
516 
517  switch(length)
518  {
519  case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
520  case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
521  case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
522  case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
523  case 8 : b+=k[1]; a+=k[0]; break;
524  case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
525  case 6 : b+=k[1]&0xffff; a+=k[0]; break;
526  case 5 : b+=k[1]&0xff; a+=k[0]; break;
527  case 4 : a+=k[0]; break;
528  case 3 : a+=k[0]&0xffffff; break;
529  case 2 : a+=k[0]&0xffff; break;
530  case 1 : a+=k[0]&0xff; break;
531  case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
532  }
533 
534 #else /* make valgrind happy */
535 
536  k8 = (const uint8_t *)k;
537  switch(length)
538  {
539  case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
540  case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
541  case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
542  case 9 : c+=k8[8]; /* fall through */
543  case 8 : b+=k[1]; a+=k[0]; break;
544  case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
545  case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
546  case 5 : b+=k8[4]; /* fall through */
547  case 4 : a+=k[0]; break;
548  case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
549  case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
550  case 1 : a+=k8[0]; break;
551  case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
552  }
553 
554 #endif /* !valgrind */
555 
556  } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
557  const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
558  const uint8_t *k8;
559 
560  /*--------------- all but last block: aligned reads and different mixing */
561  while (length > 12)
562  {
563  a += k[0] + (((uint32_t)k[1])<<16);
564  b += k[2] + (((uint32_t)k[3])<<16);
565  c += k[4] + (((uint32_t)k[5])<<16);
566  mix(a,b,c);
567  length -= 12;
568  k += 6;
569  }
570 
571  /*----------------------------- handle the last (probably partial) block */
572  k8 = (const uint8_t *)k;
573  switch(length)
574  {
575  case 12: c+=k[4]+(((uint32_t)k[5])<<16);
576  b+=k[2]+(((uint32_t)k[3])<<16);
577  a+=k[0]+(((uint32_t)k[1])<<16);
578  break;
579  case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
580  case 10: c+=k[4];
581  b+=k[2]+(((uint32_t)k[3])<<16);
582  a+=k[0]+(((uint32_t)k[1])<<16);
583  break;
584  case 9 : c+=k8[8]; /* fall through */
585  case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
586  a+=k[0]+(((uint32_t)k[1])<<16);
587  break;
588  case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
589  case 6 : b+=k[2];
590  a+=k[0]+(((uint32_t)k[1])<<16);
591  break;
592  case 5 : b+=k8[4]; /* fall through */
593  case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
594  break;
595  case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
596  case 2 : a+=k[0];
597  break;
598  case 1 : a+=k8[0];
599  break;
600  case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
601  }
602 
603  } else { /* need to read the key one byte at a time */
604  const uint8_t *k = (const uint8_t *)key;
605 
606  /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
607  while (length > 12)
608  {
609  a += k[0];
610  a += ((uint32_t)k[1])<<8;
611  a += ((uint32_t)k[2])<<16;
612  a += ((uint32_t)k[3])<<24;
613  b += k[4];
614  b += ((uint32_t)k[5])<<8;
615  b += ((uint32_t)k[6])<<16;
616  b += ((uint32_t)k[7])<<24;
617  c += k[8];
618  c += ((uint32_t)k[9])<<8;
619  c += ((uint32_t)k[10])<<16;
620  c += ((uint32_t)k[11])<<24;
621  mix(a,b,c);
622  length -= 12;
623  k += 12;
624  }
625 
626  /*-------------------------------- last block: affect all 32 bits of (c) */
627  switch(length) /* all the case statements fall through */
628  {
629  case 12: c+=((uint32_t)k[11])<<24; /* fall through */
630  case 11: c+=((uint32_t)k[10])<<16; /* fall through */
631  case 10: c+=((uint32_t)k[9])<<8; /* fall through */
632  case 9 : c+=k[8]; /* fall through */
633  case 8 : b+=((uint32_t)k[7])<<24; /* fall through */
634  case 7 : b+=((uint32_t)k[6])<<16; /* fall through */
635  case 6 : b+=((uint32_t)k[5])<<8; /* fall through */
636  case 5 : b+=k[4]; /* fall through */
637  case 4 : a+=((uint32_t)k[3])<<24; /* fall through */
638  case 3 : a+=((uint32_t)k[2])<<16; /* fall through */
639  case 2 : a+=((uint32_t)k[1])<<8; /* fall through */
640  case 1 : a+=k[0];
641  break;
642  case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
643  }
644  }
645 
646  final(a,b,c);
647  *pc=c; *pb=b;
648 }
649 
650 
651 #if 0
652 /*
653  * hashbig():
654  * This is the same as hashword() on big-endian machines. It is different
655  * from hashlittle() on all machines. hashbig() takes advantage of
656  * big-endian byte ordering.
657  */
658 static uint32_t hashbig( const void *key, size_t length, uint32_t initval)
659 {
660  uint32_t a,b,c;
661  union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
662 
663  /* Set up the internal state */
664  a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
665 
666  u.ptr = key;
667  if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
668  const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
669  // const uint8_t *k8;
670 
671  /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
672  while (length > 12)
673  {
674  a += k[0];
675  b += k[1];
676  c += k[2];
677  mix(a,b,c);
678  length -= 12;
679  k += 3;
680  }
681 
682  /*----------------------------- handle the last (probably partial) block */
683  /*
684  * "k[2]<<8" actually reads beyond the end of the string, but
685  * then shifts out the part it's not allowed to read. Because the
686  * string is aligned, the illegal read is in the same word as the
687  * rest of the string. Every machine with memory protection I've seen
688  * does it on word boundaries, so is OK with this. But VALGRIND will
689  * still catch it and complain. The masking trick does make the hash
690  * noticably faster for short strings (like English words).
691  */
692 #ifndef VALGRIND
693 
694  switch(length)
695  {
696  case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
697  case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
698  case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
699  case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
700  case 8 : b+=k[1]; a+=k[0]; break;
701  case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
702  case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
703  case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
704  case 4 : a+=k[0]; break;
705  case 3 : a+=k[0]&0xffffff00; break;
706  case 2 : a+=k[0]&0xffff0000; break;
707  case 1 : a+=k[0]&0xff000000; break;
708  case 0 : return c; /* zero length strings require no mixing */
709  }
710 
711 #else /* make valgrind happy */
712 
713  k8 = (const uint8_t *)k;
714  switch(length) /* all the case statements fall through */
715  {
716  case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
717  case 11: c+=((uint32_t)k8[10])<<8; /* fall through */
718  case 10: c+=((uint32_t)k8[9])<<16; /* fall through */
719  case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */
720  case 8 : b+=k[1]; a+=k[0]; break;
721  case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */
722  case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */
723  case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */
724  case 4 : a+=k[0]; break;
725  case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */
726  case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */
727  case 1 : a+=((uint32_t)k8[0])<<24; break;
728  case 0 : return c;
729  }
730 
731 #endif /* !VALGRIND */
732 
733  } else { /* need to read the key one byte at a time */
734  const uint8_t *k = (const uint8_t *)key;
735 
736  /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
737  while (length > 12)
738  {
739  a += ((uint32_t)k[0])<<24;
740  a += ((uint32_t)k[1])<<16;
741  a += ((uint32_t)k[2])<<8;
742  a += ((uint32_t)k[3]);
743  b += ((uint32_t)k[4])<<24;
744  b += ((uint32_t)k[5])<<16;
745  b += ((uint32_t)k[6])<<8;
746  b += ((uint32_t)k[7]);
747  c += ((uint32_t)k[8])<<24;
748  c += ((uint32_t)k[9])<<16;
749  c += ((uint32_t)k[10])<<8;
750  c += ((uint32_t)k[11]);
751  mix(a,b,c);
752  length -= 12;
753  k += 12;
754  }
755 
756  /*-------------------------------- last block: affect all 32 bits of (c) */
757  switch(length) /* all the case statements fall through */
758  {
759  case 12: c+=k[11];
760  case 11: c+=((uint32_t)k[10])<<8;
761  case 10: c+=((uint32_t)k[9])<<16;
762  case 9 : c+=((uint32_t)k[8])<<24;
763  case 8 : b+=k[7];
764  case 7 : b+=((uint32_t)k[6])<<8;
765  case 6 : b+=((uint32_t)k[5])<<16;
766  case 5 : b+=((uint32_t)k[4])<<24;
767  case 4 : a+=k[3];
768  case 3 : a+=((uint32_t)k[2])<<8;
769  case 2 : a+=((uint32_t)k[1])<<16;
770  case 1 : a+=((uint32_t)k[0])<<24;
771  break;
772  case 0 : return c;
773  }
774  }
775 
776  final(a,b,c);
777  return c;
778 }
779 #endif
780 
#define HASH_BIG_ENDIAN
Definition: lookup3.c:66
void hashlittle2(const void *key, size_t length, uint32_t *pc, uint32_t *pb)
Definition: lookup3.c:476
#define HASH_LITTLE_ENDIAN
Definition: lookup3.c:65
#define mix(a, b, c)
Definition: lookup3.c:126