PostGIS  3.4.0dev-r@@SVN_REVISION@@
lookup3.c
Go to the documentation of this file.
1 /*
2 -------------------------------------------------------------------------------
3 lookup3.c, by Bob Jenkins, May 2006, Public Domain.
4 
5 These are functions for producing 32-bit hashes for hash table lookup.
6 hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
7 are externally useful functions. Routines to test the hash are included
8 if SELF_TEST is defined. You can use this free for any purpose. It's in
9 the public domain. It has no warranty.
10 
11 You probably want to use hashlittle(). hashlittle() and hashbig()
12 hash byte arrays. hashlittle() is is faster than hashbig() on
13 little-endian machines. Intel and AMD are little-endian machines.
14 On second thought, you probably want hashlittle2(), which is identical to
15 hashlittle() except it returns two 32-bit hashes for the price of one.
16 You could implement hashbig2() if you wanted but I haven't bothered here.
17 
18 If you want to find a hash of, say, exactly 7 integers, do
19  a = i1; b = i2; c = i3;
20  mix(a,b,c);
21  a += i4; b += i5; c += i6;
22  mix(a,b,c);
23  a += i7;
24  final(a,b,c);
25 then use c as the hash value. If you have a variable length array of
26 4-byte integers to hash, use hashword(). If you have a byte array (like
27 a character string), use hashlittle(). If you have several byte arrays, or
28 a mix of things, see the comments above hashlittle().
29 
30 Why is this so big? I read 12 bytes at a time into 3 4-byte integers,
31 then mix those integers. This is fast (you can do a lot more thorough
32 mixing with 12*3 instructions on 3 integers than you can with 3 instructions
33 on 1 byte), but shoehorning those bytes into integers efficiently is messy.
34 -------------------------------------------------------------------------------
35 */
36 #define SELF_TEST 1
37 
38 #include <stdio.h> /* defines printf for tests */
39 #include <time.h> /* defines time_t for timings in the test */
40 #include <stdint.h> /* defines uint32_t etc */
41 #ifdef linux
42 #include <sys/param.h> /* attempt to define endianness */
43 # include <endian.h> /* attempt to define endianness */
44 #endif
45 
46 /*
47  * My best guess at if you are big-endian or little-endian. This may
48  * need adjustment.
49  */
50 #if defined (__WIN32__) // windows is always little-endian except for NT
51 # define HASH_LITTLE_ENDIAN 1
52 # define HASH_BIG_ENDIAN 0
53 #elif (defined(WORDS_BIGENDIAN))
54 # define HASH_LITTLE_ENDIAN 0
55 # define HASH_BIG_ENDIAN 1
56 #elif (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
57  __BYTE_ORDER == __LITTLE_ENDIAN) || \
58  (defined(i386) || defined(__i386__) || defined(__i486__) || \
59  defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL))
60 # define HASH_LITTLE_ENDIAN 1
61 # define HASH_BIG_ENDIAN 0
62 #elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
63  __BYTE_ORDER == __BIG_ENDIAN) || \
64  (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel))
65 # define HASH_LITTLE_ENDIAN 0
66 # define HASH_BIG_ENDIAN 1
67 #else
68 # define HASH_LITTLE_ENDIAN 0
69 # define HASH_BIG_ENDIAN 0
70 #endif
71 
72 #define hashsize(n) ((uint32_t)1<<(n))
73 #define hashmask(n) (hashsize(n)-1)
74 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
75 
76 #if 0
77 static uint32_t hashword(const uint32_t *k, size_t length, uint32_t initval);
78 static void hashword2 (const uint32_t *k, size_t length, uint32_t *pc, uint32_t *pb);
79 static uint32_t hashlittle( const void *key, size_t length, uint32_t initval);
80 static uint32_t hashbig( const void *key, size_t length, uint32_t initval);
81 #endif
82 
83 void hashlittle2(const void *key, size_t length, uint32_t *pc, uint32_t *pb);
84 
85 /*
86 -------------------------------------------------------------------------------
87 mix -- mix 3 32-bit values reversibly.
88 
89 This is reversible, so any information in (a,b,c) before mix() is
90 still in (a,b,c) after mix().
91 
92 If four pairs of (a,b,c) inputs are run through mix(), or through
93 mix() in reverse, there are at least 32 bits of the output that
94 are sometimes the same for one pair and different for another pair.
95 This was tested for:
96 * pairs that differed by one bit, by two bits, in any combination
97  of top bits of (a,b,c), or in any combination of bottom bits of
98  (a,b,c).
99 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
100  the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
101  is commonly produced by subtraction) look like a single 1-bit
102  difference.
103 * the base values were pseudorandom, all zero but one bit set, or
104  all zero plus a counter that starts at zero.
105 
106 Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
107 satisfy this are
108  4 6 8 16 19 4
109  9 15 3 18 27 15
110  14 9 3 7 17 3
111 Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
112 for "differ" defined as + with a one-bit base and a two-bit delta. I
113 used http://burtleburtle.net/bob/hash/avalanche.html to choose
114 the operations, constants, and arrangements of the variables.
115 
116 This does not achieve avalanche. There are input bits of (a,b,c)
117 that fail to affect some output bits of (a,b,c), especially of a. The
118 most thoroughly mixed value is c, but it doesn't really even achieve
119 avalanche in c.
120 
121 This allows some parallelism. Read-after-writes are good at doubling
122 the number of bits affected, so the goal of mixing pulls in the opposite
123 direction as the goal of parallelism. I did what I could. Rotates
124 seem to cost as much as shifts on every machine I could lay my hands
125 on, and rotates are much kinder to the top and bottom bits, so I used
126 rotates.
127 -------------------------------------------------------------------------------
128 */
129 #define mix(a,b,c) \
130 { \
131  a -= c; a ^= rot(c, 4); c += b; \
132  b -= a; b ^= rot(a, 6); a += c; \
133  c -= b; c ^= rot(b, 8); b += a; \
134  a -= c; a ^= rot(c,16); c += b; \
135  b -= a; b ^= rot(a,19); a += c; \
136  c -= b; c ^= rot(b, 4); b += a; \
137 }
138 
139 /*
140 -------------------------------------------------------------------------------
141 final -- final mixing of 3 32-bit values (a,b,c) into c
142 
143 Pairs of (a,b,c) values differing in only a few bits will usually
144 produce values of c that look totally different. This was tested for
145 * pairs that differed by one bit, by two bits, in any combination
146  of top bits of (a,b,c), or in any combination of bottom bits of
147  (a,b,c).
148 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
149  the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
150  is commonly produced by subtraction) look like a single 1-bit
151  difference.
152 * the base values were pseudorandom, all zero but one bit set, or
153  all zero plus a counter that starts at zero.
154 
155 These constants passed:
156  14 11 25 16 4 14 24
157  12 14 25 16 4 14 24
158 and these came close:
159  4 8 15 26 3 22 24
160  10 8 15 26 3 22 24
161  11 8 15 26 3 22 24
162 -------------------------------------------------------------------------------
163 */
164 #define final(a,b,c) \
165 { \
166  c ^= b; c -= rot(b,14); \
167  a ^= c; a -= rot(c,11); \
168  b ^= a; b -= rot(a,25); \
169  c ^= b; c -= rot(b,16); \
170  a ^= c; a -= rot(c,4); \
171  b ^= a; b -= rot(a,14); \
172  c ^= b; c -= rot(b,24); \
173 }
174 
175 #if 0
176 /*
177 --------------------------------------------------------------------
178  This works on all machines. To be useful, it requires
179  -- that the key be an array of uint32_t's, and
180  -- that the length be the number of uint32_t's in the key
181 
182  The function hashword() is identical to hashlittle() on little-endian
183  machines, and identical to hashbig() on big-endian machines,
184  except that the length has to be measured in uint32_ts rather than in
185  bytes. hashlittle() is more complicated than hashword() only because
186  hashlittle() has to dance around fitting the key bytes into registers.
187 --------------------------------------------------------------------
188 */
189 static uint32_t hashword(
190 const uint32_t *k, /* the key, an array of uint32_t values */
191 size_t length, /* the length of the key, in uint32_ts */
192 uint32_t initval) /* the previous hash, or an arbitrary value */
193 {
194  uint32_t a,b,c;
195 
196  /* Set up the internal state */
197  a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval;
198 
199  /*------------------------------------------------- handle most of the key */
200  while (length > 3)
201  {
202  a += k[0];
203  b += k[1];
204  c += k[2];
205  mix(a,b,c);
206  length -= 3;
207  k += 3;
208  }
209 
210  /*------------------------------------------- handle the last 3 uint32_t's */
211  switch(length) /* all the case statements fall through */
212  {
213  case 3 : c+=k[2];
214  case 2 : b+=k[1];
215  case 1 : a+=k[0];
216  final(a,b,c);
217  case 0: /* case 0: nothing left to add */
218  break;
219  }
220  /*------------------------------------------------------ report the result */
221  return c;
222 }
223 #endif
224 
225 #if 0
226 /*
227 --------------------------------------------------------------------
228 hashword2() -- same as hashword(), but take two seeds and return two
229 32-bit values. pc and pb must both be nonnull, and *pc and *pb must
230 both be initialized with seeds. If you pass in (*pb)==0, the output
231 (*pc) will be the same as the return value from hashword().
232 --------------------------------------------------------------------
233 */
234 static void hashword2 (
235 const uint32_t *k, /* the key, an array of uint32_t values */
236 size_t length, /* the length of the key, in uint32_ts */
237 uint32_t *pc, /* IN: seed OUT: primary hash value */
238 uint32_t *pb) /* IN: more seed OUT: secondary hash value */
239 {
240  uint32_t a,b,c;
241 
242  /* Set up the internal state */
243  a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + *pc;
244  c += *pb;
245 
246  /*------------------------------------------------- handle most of the key */
247  while (length > 3)
248  {
249  a += k[0];
250  b += k[1];
251  c += k[2];
252  mix(a,b,c);
253  length -= 3;
254  k += 3;
255  }
256 
257  /*------------------------------------------- handle the last 3 uint32_t's */
258  switch(length) /* all the case statements fall through */
259  {
260  case 3 : c+=k[2];
261  case 2 : b+=k[1];
262  case 1 : a+=k[0];
263  final(a,b,c);
264  case 0: /* case 0: nothing left to add */
265  break;
266  }
267  /*------------------------------------------------------ report the result */
268  *pc=c; *pb=b;
269 }
270 #endif
271 
272 /*
273 -------------------------------------------------------------------------------
274 hashlittle() -- hash a variable-length key into a 32-bit value
275  k : the key (the unaligned variable-length array of bytes)
276  length : the length of the key, counting by bytes
277  initval : can be any 4-byte value
278 Returns a 32-bit value. Every bit of the key affects every bit of
279 the return value. Two keys differing by one or two bits will have
280 totally different hash values.
281 
282 The best hash table sizes are powers of 2. There is no need to do
283 mod a prime (mod is sooo slow!). If you need less than 32 bits,
284 use a bitmask. For example, if you need only 10 bits, do
285  h = (h & hashmask(10));
286 In which case, the hash table should have hashsize(10) elements.
287 
288 If you are hashing n strings (uint8_t **)k, do it like this:
289  for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
290 
291 By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
292 code any way you wish, private, educational, or commercial. It's free.
293 
294 Use for hash table lookup, or anything where one collision in 2^^32 is
295 acceptable. Do NOT use for cryptographic purposes.
296 -------------------------------------------------------------------------------
297 */
298 #if 0
299 static uint32_t hashlittle( const void *key, size_t length, uint32_t initval)
300 {
301  uint32_t a,b,c; /* internal state */
302  union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
303 
304  /* Set up the internal state */
305  a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
306 
307  u.ptr = key;
308  if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
309  const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
310  // const uint8_t *k8;
311 
312  /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
313  while (length > 12)
314  {
315  a += k[0];
316  b += k[1];
317  c += k[2];
318  mix(a,b,c);
319  length -= 12;
320  k += 3;
321  }
322 
323  /*----------------------------- handle the last (probably partial) block */
324  /*
325  * "k[2]&0xffffff" actually reads beyond the end of the string, but
326  * then masks off the part it's not allowed to read. Because the
327  * string is aligned, the masked-off tail is in the same word as the
328  * rest of the string. Every machine with memory protection I've seen
329  * does it on word boundaries, so is OK with this. But VALGRIND will
330  * still catch it and complain. The masking trick does make the hash
331  * noticably faster for short strings (like English words).
332  */
333 #ifndef VALGRIND
334 
335  switch(length)
336  {
337  case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
338  case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
339  case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
340  case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
341  case 8 : b+=k[1]; a+=k[0]; break;
342  case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
343  case 6 : b+=k[1]&0xffff; a+=k[0]; break;
344  case 5 : b+=k[1]&0xff; a+=k[0]; break;
345  case 4 : a+=k[0]; break;
346  case 3 : a+=k[0]&0xffffff; break;
347  case 2 : a+=k[0]&0xffff; break;
348  case 1 : a+=k[0]&0xff; break;
349  case 0 : return c; /* zero length strings require no mixing */
350  }
351 
352 #else /* make valgrind happy */
353 
354  k8 = (const uint8_t *)k;
355  switch(length)
356  {
357  case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
358  case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
359  case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
360  case 9 : c+=k8[8]; /* fall through */
361  case 8 : b+=k[1]; a+=k[0]; break;
362  case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
363  case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
364  case 5 : b+=k8[4]; /* fall through */
365  case 4 : a+=k[0]; break;
366  case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
367  case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
368  case 1 : a+=k8[0]; break;
369  case 0 : return c;
370  }
371 
372 #endif /* !valgrind */
373 
374  } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
375  const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
376  const uint8_t *k8;
377 
378  /*--------------- all but last block: aligned reads and different mixing */
379  while (length > 12)
380  {
381  a += k[0] + (((uint32_t)k[1])<<16);
382  b += k[2] + (((uint32_t)k[3])<<16);
383  c += k[4] + (((uint32_t)k[5])<<16);
384  mix(a,b,c);
385  length -= 12;
386  k += 6;
387  }
388 
389  /*----------------------------- handle the last (probably partial) block */
390  k8 = (const uint8_t *)k;
391  switch(length)
392  {
393  case 12: c+=k[4]+(((uint32_t)k[5])<<16);
394  b+=k[2]+(((uint32_t)k[3])<<16);
395  a+=k[0]+(((uint32_t)k[1])<<16);
396  break;
397  case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
398  case 10: c+=k[4];
399  b+=k[2]+(((uint32_t)k[3])<<16);
400  a+=k[0]+(((uint32_t)k[1])<<16);
401  break;
402  case 9 : c+=k8[8]; /* fall through */
403  case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
404  a+=k[0]+(((uint32_t)k[1])<<16);
405  break;
406  case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
407  case 6 : b+=k[2];
408  a+=k[0]+(((uint32_t)k[1])<<16);
409  break;
410  case 5 : b+=k8[4]; /* fall through */
411  case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
412  break;
413  case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
414  case 2 : a+=k[0];
415  break;
416  case 1 : a+=k8[0];
417  break;
418  case 0 : return c; /* zero length requires no mixing */
419  }
420 
421  } else { /* need to read the key one byte at a time */
422  const uint8_t *k = (const uint8_t *)key;
423 
424  /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
425  while (length > 12)
426  {
427  a += k[0];
428  a += ((uint32_t)k[1])<<8;
429  a += ((uint32_t)k[2])<<16;
430  a += ((uint32_t)k[3])<<24;
431  b += k[4];
432  b += ((uint32_t)k[5])<<8;
433  b += ((uint32_t)k[6])<<16;
434  b += ((uint32_t)k[7])<<24;
435  c += k[8];
436  c += ((uint32_t)k[9])<<8;
437  c += ((uint32_t)k[10])<<16;
438  c += ((uint32_t)k[11])<<24;
439  mix(a,b,c);
440  length -= 12;
441  k += 12;
442  }
443 
444  /*-------------------------------- last block: affect all 32 bits of (c) */
445  switch(length) /* all the case statements fall through */
446  {
447  case 12: c+=((uint32_t)k[11])<<24;
448  case 11: c+=((uint32_t)k[10])<<16;
449  case 10: c+=((uint32_t)k[9])<<8;
450  case 9 : c+=k[8];
451  case 8 : b+=((uint32_t)k[7])<<24;
452  case 7 : b+=((uint32_t)k[6])<<16;
453  case 6 : b+=((uint32_t)k[5])<<8;
454  case 5 : b+=k[4];
455  case 4 : a+=((uint32_t)k[3])<<24;
456  case 3 : a+=((uint32_t)k[2])<<16;
457  case 2 : a+=((uint32_t)k[1])<<8;
458  case 1 : a+=k[0];
459  break;
460  case 0 : return c;
461  }
462  }
463 
464  final(a,b,c);
465  return c;
466 }
467 #endif
468 
469 /*
470  * hashlittle2: return 2 32-bit hash values
471  *
472  * This is identical to hashlittle(), except it returns two 32-bit hash
473  * values instead of just one. This is good enough for hash table
474  * lookup with 2^^64 buckets, or if you want a second hash if you're not
475  * happy with the first, or if you want a probably-unique 64-bit ID for
476  * the key. *pc is better mixed than *pb, so use *pc first. If you want
477  * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)".
478  */
480  const void *key, /* the key to hash */
481  size_t length, /* length of the key */
482  uint32_t *pc, /* IN: primary initval, OUT: primary hash */
483  uint32_t *pb) /* IN: secondary initval, OUT: secondary hash */
484 {
485  uint32_t a,b,c; /* internal state */
486  union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
487 
488  /* Set up the internal state */
489  a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc;
490  c += *pb;
491 
492  u.ptr = key;
493  if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
494  const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
495  // const uint8_t *k8;
496 
497  /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
498  while (length > 12)
499  {
500  a += k[0];
501  b += k[1];
502  c += k[2];
503  mix(a,b,c);
504  length -= 12;
505  k += 3;
506  }
507 
508  /*----------------------------- handle the last (probably partial) block */
509  /*
510  * "k[2]&0xffffff" actually reads beyond the end of the string, but
511  * then masks off the part it's not allowed to read. Because the
512  * string is aligned, the masked-off tail is in the same word as the
513  * rest of the string. Every machine with memory protection I've seen
514  * does it on word boundaries, so is OK with this. But VALGRIND will
515  * still catch it and complain. The masking trick does make the hash
516  * noticably faster for short strings (like English words).
517  */
518 #ifndef VALGRIND
519 
520  switch(length)
521  {
522  case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
523  case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
524  case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
525  case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
526  case 8 : b+=k[1]; a+=k[0]; break;
527  case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
528  case 6 : b+=k[1]&0xffff; a+=k[0]; break;
529  case 5 : b+=k[1]&0xff; a+=k[0]; break;
530  case 4 : a+=k[0]; break;
531  case 3 : a+=k[0]&0xffffff; break;
532  case 2 : a+=k[0]&0xffff; break;
533  case 1 : a+=k[0]&0xff; break;
534  case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
535  }
536 
537 #else /* make valgrind happy */
538 
539  k8 = (const uint8_t *)k;
540  switch(length)
541  {
542  case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
543  case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
544  case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
545  case 9 : c+=k8[8]; /* fall through */
546  case 8 : b+=k[1]; a+=k[0]; break;
547  case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
548  case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
549  case 5 : b+=k8[4]; /* fall through */
550  case 4 : a+=k[0]; break;
551  case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
552  case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
553  case 1 : a+=k8[0]; break;
554  case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
555  }
556 
557 #endif /* !valgrind */
558 
559  } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
560  const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
561  const uint8_t *k8;
562 
563  /*--------------- all but last block: aligned reads and different mixing */
564  while (length > 12)
565  {
566  a += k[0] + (((uint32_t)k[1])<<16);
567  b += k[2] + (((uint32_t)k[3])<<16);
568  c += k[4] + (((uint32_t)k[5])<<16);
569  mix(a,b,c);
570  length -= 12;
571  k += 6;
572  }
573 
574  /*----------------------------- handle the last (probably partial) block */
575  k8 = (const uint8_t *)k;
576  switch(length)
577  {
578  case 12: c+=k[4]+(((uint32_t)k[5])<<16);
579  b+=k[2]+(((uint32_t)k[3])<<16);
580  a+=k[0]+(((uint32_t)k[1])<<16);
581  break;
582  case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
583  case 10: c+=k[4];
584  b+=k[2]+(((uint32_t)k[3])<<16);
585  a+=k[0]+(((uint32_t)k[1])<<16);
586  break;
587  case 9 : c+=k8[8]; /* fall through */
588  case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
589  a+=k[0]+(((uint32_t)k[1])<<16);
590  break;
591  case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
592  case 6 : b+=k[2];
593  a+=k[0]+(((uint32_t)k[1])<<16);
594  break;
595  case 5 : b+=k8[4]; /* fall through */
596  case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
597  break;
598  case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
599  case 2 : a+=k[0];
600  break;
601  case 1 : a+=k8[0];
602  break;
603  case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
604  }
605 
606  } else { /* need to read the key one byte at a time */
607  const uint8_t *k = (const uint8_t *)key;
608 
609  /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
610  while (length > 12)
611  {
612  a += k[0];
613  a += ((uint32_t)k[1])<<8;
614  a += ((uint32_t)k[2])<<16;
615  a += ((uint32_t)k[3])<<24;
616  b += k[4];
617  b += ((uint32_t)k[5])<<8;
618  b += ((uint32_t)k[6])<<16;
619  b += ((uint32_t)k[7])<<24;
620  c += k[8];
621  c += ((uint32_t)k[9])<<8;
622  c += ((uint32_t)k[10])<<16;
623  c += ((uint32_t)k[11])<<24;
624  mix(a,b,c);
625  length -= 12;
626  k += 12;
627  }
628 
629  /*-------------------------------- last block: affect all 32 bits of (c) */
630  switch(length) /* all the case statements fall through */
631  {
632  case 12: c+=((uint32_t)k[11])<<24; /* fall through */
633  case 11: c+=((uint32_t)k[10])<<16; /* fall through */
634  case 10: c+=((uint32_t)k[9])<<8; /* fall through */
635  case 9 : c+=k[8]; /* fall through */
636  case 8 : b+=((uint32_t)k[7])<<24; /* fall through */
637  case 7 : b+=((uint32_t)k[6])<<16; /* fall through */
638  case 6 : b+=((uint32_t)k[5])<<8; /* fall through */
639  case 5 : b+=k[4]; /* fall through */
640  case 4 : a+=((uint32_t)k[3])<<24; /* fall through */
641  case 3 : a+=((uint32_t)k[2])<<16; /* fall through */
642  case 2 : a+=((uint32_t)k[1])<<8; /* fall through */
643  case 1 : a+=k[0];
644  break;
645  case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
646  }
647  }
648 
649  final(a,b,c);
650  *pc=c; *pb=b;
651 }
652 
653 
654 #if 0
655 /*
656  * hashbig():
657  * This is the same as hashword() on big-endian machines. It is different
658  * from hashlittle() on all machines. hashbig() takes advantage of
659  * big-endian byte ordering.
660  */
661 static uint32_t hashbig( const void *key, size_t length, uint32_t initval)
662 {
663  uint32_t a,b,c;
664  union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
665 
666  /* Set up the internal state */
667  a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
668 
669  u.ptr = key;
670  if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
671  const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
672  // const uint8_t *k8;
673 
674  /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
675  while (length > 12)
676  {
677  a += k[0];
678  b += k[1];
679  c += k[2];
680  mix(a,b,c);
681  length -= 12;
682  k += 3;
683  }
684 
685  /*----------------------------- handle the last (probably partial) block */
686  /*
687  * "k[2]<<8" actually reads beyond the end of the string, but
688  * then shifts out the part it's not allowed to read. Because the
689  * string is aligned, the illegal read is in the same word as the
690  * rest of the string. Every machine with memory protection I've seen
691  * does it on word boundaries, so is OK with this. But VALGRIND will
692  * still catch it and complain. The masking trick does make the hash
693  * noticably faster for short strings (like English words).
694  */
695 #ifndef VALGRIND
696 
697  switch(length)
698  {
699  case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
700  case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
701  case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
702  case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
703  case 8 : b+=k[1]; a+=k[0]; break;
704  case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
705  case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
706  case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
707  case 4 : a+=k[0]; break;
708  case 3 : a+=k[0]&0xffffff00; break;
709  case 2 : a+=k[0]&0xffff0000; break;
710  case 1 : a+=k[0]&0xff000000; break;
711  case 0 : return c; /* zero length strings require no mixing */
712  }
713 
714 #else /* make valgrind happy */
715 
716  k8 = (const uint8_t *)k;
717  switch(length) /* all the case statements fall through */
718  {
719  case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
720  case 11: c+=((uint32_t)k8[10])<<8; /* fall through */
721  case 10: c+=((uint32_t)k8[9])<<16; /* fall through */
722  case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */
723  case 8 : b+=k[1]; a+=k[0]; break;
724  case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */
725  case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */
726  case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */
727  case 4 : a+=k[0]; break;
728  case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */
729  case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */
730  case 1 : a+=((uint32_t)k8[0])<<24; break;
731  case 0 : return c;
732  }
733 
734 #endif /* !VALGRIND */
735 
736  } else { /* need to read the key one byte at a time */
737  const uint8_t *k = (const uint8_t *)key;
738 
739  /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
740  while (length > 12)
741  {
742  a += ((uint32_t)k[0])<<24;
743  a += ((uint32_t)k[1])<<16;
744  a += ((uint32_t)k[2])<<8;
745  a += ((uint32_t)k[3]);
746  b += ((uint32_t)k[4])<<24;
747  b += ((uint32_t)k[5])<<16;
748  b += ((uint32_t)k[6])<<8;
749  b += ((uint32_t)k[7]);
750  c += ((uint32_t)k[8])<<24;
751  c += ((uint32_t)k[9])<<16;
752  c += ((uint32_t)k[10])<<8;
753  c += ((uint32_t)k[11]);
754  mix(a,b,c);
755  length -= 12;
756  k += 12;
757  }
758 
759  /*-------------------------------- last block: affect all 32 bits of (c) */
760  switch(length) /* all the case statements fall through */
761  {
762  case 12: c+=k[11];
763  case 11: c+=((uint32_t)k[10])<<8;
764  case 10: c+=((uint32_t)k[9])<<16;
765  case 9 : c+=((uint32_t)k[8])<<24;
766  case 8 : b+=k[7];
767  case 7 : b+=((uint32_t)k[6])<<8;
768  case 6 : b+=((uint32_t)k[5])<<16;
769  case 5 : b+=((uint32_t)k[4])<<24;
770  case 4 : a+=k[3];
771  case 3 : a+=((uint32_t)k[2])<<8;
772  case 2 : a+=((uint32_t)k[1])<<16;
773  case 1 : a+=((uint32_t)k[0])<<24;
774  break;
775  case 0 : return c;
776  }
777  }
778 
779  final(a,b,c);
780  return c;
781 }
782 #endif
783 
#define HASH_BIG_ENDIAN
Definition: lookup3.c:69
void hashlittle2(const void *key, size_t length, uint32_t *pc, uint32_t *pb)
Definition: lookup3.c:479
#define HASH_LITTLE_ENDIAN
Definition: lookup3.c:68
#define mix(a, b, c)
Definition: lookup3.c:129