Line data Source code
1 : /* hash - hashing table processing.
2 :
3 : Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2006, 2007 Free
4 : Software Foundation, Inc.
5 :
6 : Written by Jim Meyering, 1992.
7 :
8 : This program is free software: you can redistribute it and/or modify
9 : it under the terms of the GNU General Public License as published by
10 : the Free Software Foundation; either version 3 of the License, or
11 : (at your option) any later version.
12 :
13 : This program is distributed in the hope that it will be useful,
14 : but WITHOUT ANY WARRANTY; without even the implied warranty of
15 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 : GNU General Public License for more details.
17 :
18 : You should have received a copy of the GNU General Public License
19 : along with this program. If not, see <http://www.gnu.org/licenses/>. */
20 :
21 : /* A generic hash table package. */
22 :
23 : /* Define USE_OBSTACK to 1 if you want the allocator to use obstacks instead
24 : of malloc. If you change USE_OBSTACK, you have to recompile! */
25 :
26 : #include <config.h>
27 :
28 : #include "hash.h"
29 : #include "xalloc.h"
30 :
31 : #include <limits.h>
32 : #include <stdio.h>
33 : #include <stdlib.h>
34 :
35 : #if USE_OBSTACK
36 : # include "obstack.h"
37 : # ifndef obstack_chunk_alloc
38 : # define obstack_chunk_alloc malloc
39 : # endif
40 : # ifndef obstack_chunk_free
41 : # define obstack_chunk_free free
42 : # endif
43 : #endif
44 :
45 : #ifndef SIZE_MAX
46 : # define SIZE_MAX ((size_t) -1)
47 : #endif
48 :
49 : struct hash_table
50 : {
51 : /* The array of buckets starts at BUCKET and extends to BUCKET_LIMIT-1,
52 : for a possibility of N_BUCKETS. Among those, N_BUCKETS_USED buckets
53 : are not empty, there are N_ENTRIES active entries in the table. */
54 : struct hash_entry *bucket;
55 : struct hash_entry const *bucket_limit;
56 : size_t n_buckets;
57 : size_t n_buckets_used;
58 : size_t n_entries;
59 :
60 : /* Tuning arguments, kept in a physicaly separate structure. */
61 : const Hash_tuning *tuning;
62 :
63 : /* Three functions are given to `hash_initialize', see the documentation
64 : block for this function. In a word, HASHER randomizes a user entry
65 : into a number up from 0 up to some maximum minus 1; COMPARATOR returns
66 : true if two user entries compare equally; and DATA_FREER is the cleanup
67 : function for a user entry. */
68 : Hash_hasher hasher;
69 : Hash_comparator comparator;
70 : Hash_data_freer data_freer;
71 :
72 : /* A linked list of freed struct hash_entry structs. */
73 : struct hash_entry *free_entry_list;
74 :
75 : #if USE_OBSTACK
76 : /* Whenever obstacks are used, it is possible to allocate all overflowed
77 : entries into a single stack, so they all can be freed in a single
78 : operation. It is not clear if the speedup is worth the trouble. */
79 : struct obstack entry_stack;
80 : #endif
81 : };
82 :
83 : /* A hash table contains many internal entries, each holding a pointer to
84 : some user provided data (also called a user entry). An entry indistinctly
85 : refers to both the internal entry and its associated user entry. A user
86 : entry contents may be hashed by a randomization function (the hashing
87 : function, or just `hasher' for short) into a number (or `slot') between 0
88 : and the current table size. At each slot position in the hash table,
89 : starts a linked chain of entries for which the user data all hash to this
90 : slot. A bucket is the collection of all entries hashing to the same slot.
91 :
92 : A good `hasher' function will distribute entries rather evenly in buckets.
93 : In the ideal case, the length of each bucket is roughly the number of
94 : entries divided by the table size. Finding the slot for a data is usually
95 : done in constant time by the `hasher', and the later finding of a precise
96 : entry is linear in time with the size of the bucket. Consequently, a
97 : larger hash table size (that is, a larger number of buckets) is prone to
98 : yielding shorter chains, *given* the `hasher' function behaves properly.
99 :
100 : Long buckets slow down the lookup algorithm. One might use big hash table
101 : sizes in hope to reduce the average length of buckets, but this might
102 : become inordinate, as unused slots in the hash table take some space. The
103 : best bet is to make sure you are using a good `hasher' function (beware
104 : that those are not that easy to write! :-), and to use a table size
105 : larger than the actual number of entries. */
106 :
107 : /* If an insertion makes the ratio of nonempty buckets to table size larger
108 : than the growth threshold (a number between 0.0 and 1.0), then increase
109 : the table size by multiplying by the growth factor (a number greater than
110 : 1.0). The growth threshold defaults to 0.8, and the growth factor
111 : defaults to 1.414, meaning that the table will have doubled its size
112 : every second time 80% of the buckets get used. */
113 : #define DEFAULT_GROWTH_THRESHOLD 0.8
114 : #define DEFAULT_GROWTH_FACTOR 1.414
115 :
116 : /* If a deletion empties a bucket and causes the ratio of used buckets to
117 : table size to become smaller than the shrink threshold (a number between
118 : 0.0 and 1.0), then shrink the table by multiplying by the shrink factor (a
119 : number greater than the shrink threshold but smaller than 1.0). The shrink
120 : threshold and factor default to 0.0 and 1.0, meaning that the table never
121 : shrinks. */
122 : #define DEFAULT_SHRINK_THRESHOLD 0.0
123 : #define DEFAULT_SHRINK_FACTOR 1.0
124 :
125 : /* Use this to initialize or reset a TUNING structure to
126 : some sensible values. */
127 : static const Hash_tuning default_tuning =
128 : {
129 : DEFAULT_SHRINK_THRESHOLD,
130 : DEFAULT_SHRINK_FACTOR,
131 : DEFAULT_GROWTH_THRESHOLD,
132 : DEFAULT_GROWTH_FACTOR,
133 : false
134 : };
135 :
136 : /* Information and lookup. */
137 :
138 : /* The following few functions provide information about the overall hash
139 : table organization: the number of entries, number of buckets and maximum
140 : length of buckets. */
141 :
142 : /* Return the number of buckets in the hash table. The table size, the total
143 : number of buckets (used plus unused), or the maximum number of slots, are
144 : the same quantity. */
145 :
146 : size_t
147 0 : hash_get_n_buckets (const Hash_table *table)
148 : {
149 0 : return table->n_buckets;
150 : }
151 :
152 : /* Return the number of slots in use (non-empty buckets). */
153 :
154 : size_t
155 0 : hash_get_n_buckets_used (const Hash_table *table)
156 : {
157 0 : return table->n_buckets_used;
158 : }
159 :
160 : /* Return the number of active entries. */
161 :
162 : size_t
163 0 : hash_get_n_entries (const Hash_table *table)
164 : {
165 0 : return table->n_entries;
166 : }
167 :
168 : /* Return the length of the longest chain (bucket). */
169 :
170 : size_t
171 0 : hash_get_max_bucket_length (const Hash_table *table)
172 : {
173 : struct hash_entry const *bucket;
174 0 : size_t max_bucket_length = 0;
175 :
176 0 : for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
177 : {
178 0 : if (bucket->data)
179 : {
180 0 : struct hash_entry const *cursor = bucket;
181 0 : size_t bucket_length = 1;
182 :
183 0 : while (cursor = cursor->next, cursor)
184 0 : bucket_length++;
185 :
186 0 : if (bucket_length > max_bucket_length)
187 0 : max_bucket_length = bucket_length;
188 : }
189 : }
190 :
191 0 : return max_bucket_length;
192 : }
193 :
194 : /* Do a mild validation of a hash table, by traversing it and checking two
195 : statistics. */
196 :
197 : bool
198 0 : hash_table_ok (const Hash_table *table)
199 : {
200 : struct hash_entry const *bucket;
201 0 : size_t n_buckets_used = 0;
202 0 : size_t n_entries = 0;
203 :
204 0 : for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
205 : {
206 0 : if (bucket->data)
207 : {
208 0 : struct hash_entry const *cursor = bucket;
209 :
210 : /* Count bucket head. */
211 0 : n_buckets_used++;
212 0 : n_entries++;
213 :
214 : /* Count bucket overflow. */
215 0 : while (cursor = cursor->next, cursor)
216 0 : n_entries++;
217 : }
218 : }
219 :
220 0 : if (n_buckets_used == table->n_buckets_used && n_entries == table->n_entries)
221 0 : return true;
222 :
223 0 : return false;
224 : }
225 :
226 : void
227 0 : hash_print_statistics (const Hash_table *table, FILE *stream)
228 : {
229 0 : size_t n_entries = hash_get_n_entries (table);
230 0 : size_t n_buckets = hash_get_n_buckets (table);
231 0 : size_t n_buckets_used = hash_get_n_buckets_used (table);
232 0 : size_t max_bucket_length = hash_get_max_bucket_length (table);
233 :
234 0 : fprintf (stream, "# entries: %lu\n", (unsigned long int) n_entries);
235 0 : fprintf (stream, "# buckets: %lu\n", (unsigned long int) n_buckets);
236 0 : fprintf (stream, "# buckets used: %lu (%.2f%%)\n",
237 : (unsigned long int) n_buckets_used,
238 0 : (100.0 * n_buckets_used) / n_buckets);
239 0 : fprintf (stream, "max bucket length: %lu\n",
240 : (unsigned long int) max_bucket_length);
241 0 : }
242 :
243 : /* If ENTRY matches an entry already in the hash table, return the
244 : entry from the table. Otherwise, return NULL. */
245 :
246 : void *
247 22 : hash_lookup (const Hash_table *table, const void *entry)
248 : {
249 22 : struct hash_entry const *bucket
250 22 : = table->bucket + table->hasher (entry, table->n_buckets);
251 : struct hash_entry const *cursor;
252 :
253 22 : if (! (bucket < table->bucket_limit))
254 0 : abort ();
255 :
256 22 : if (bucket->data == NULL)
257 21 : return NULL;
258 :
259 1 : for (cursor = bucket; cursor; cursor = cursor->next)
260 1 : if (table->comparator (entry, cursor->data))
261 1 : return cursor->data;
262 :
263 0 : return NULL;
264 : }
265 :
266 : /* Walking. */
267 :
268 : /* The functions in this page traverse the hash table and process the
269 : contained entries. For the traversal to work properly, the hash table
270 : should not be resized nor modified while any particular entry is being
271 : processed. In particular, entries should not be added or removed. */
272 :
273 : /* Return the first data in the table, or NULL if the table is empty. */
274 :
275 : void *
276 0 : hash_get_first (const Hash_table *table)
277 : {
278 : struct hash_entry const *bucket;
279 :
280 0 : if (table->n_entries == 0)
281 0 : return NULL;
282 :
283 0 : for (bucket = table->bucket; ; bucket++)
284 0 : if (! (bucket < table->bucket_limit))
285 0 : abort ();
286 0 : else if (bucket->data)
287 0 : return bucket->data;
288 : }
289 :
290 : /* Return the user data for the entry following ENTRY, where ENTRY has been
291 : returned by a previous call to either `hash_get_first' or `hash_get_next'.
292 : Return NULL if there are no more entries. */
293 :
294 : void *
295 0 : hash_get_next (const Hash_table *table, const void *entry)
296 : {
297 0 : struct hash_entry const *bucket
298 0 : = table->bucket + table->hasher (entry, table->n_buckets);
299 : struct hash_entry const *cursor;
300 :
301 0 : if (! (bucket < table->bucket_limit))
302 0 : abort ();
303 :
304 : /* Find next entry in the same bucket. */
305 0 : for (cursor = bucket; cursor; cursor = cursor->next)
306 0 : if (cursor->data == entry && cursor->next)
307 0 : return cursor->next->data;
308 :
309 : /* Find first entry in any subsequent bucket. */
310 0 : while (++bucket < table->bucket_limit)
311 0 : if (bucket->data)
312 0 : return bucket->data;
313 :
314 : /* None found. */
315 0 : return NULL;
316 : }
317 :
318 : /* Fill BUFFER with pointers to active user entries in the hash table, then
319 : return the number of pointers copied. Do not copy more than BUFFER_SIZE
320 : pointers. */
321 :
322 : size_t
323 0 : hash_get_entries (const Hash_table *table, void **buffer,
324 : size_t buffer_size)
325 : {
326 0 : size_t counter = 0;
327 : struct hash_entry const *bucket;
328 : struct hash_entry const *cursor;
329 :
330 0 : for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
331 : {
332 0 : if (bucket->data)
333 : {
334 0 : for (cursor = bucket; cursor; cursor = cursor->next)
335 : {
336 0 : if (counter >= buffer_size)
337 0 : return counter;
338 0 : buffer[counter++] = cursor->data;
339 : }
340 : }
341 : }
342 :
343 0 : return counter;
344 : }
345 :
346 : /* Call a PROCESSOR function for each entry of a hash table, and return the
347 : number of entries for which the processor function returned success. A
348 : pointer to some PROCESSOR_DATA which will be made available to each call to
349 : the processor function. The PROCESSOR accepts two arguments: the first is
350 : the user entry being walked into, the second is the value of PROCESSOR_DATA
351 : as received. The walking continue for as long as the PROCESSOR function
352 : returns nonzero. When it returns zero, the walking is interrupted. */
353 :
354 : size_t
355 0 : hash_do_for_each (const Hash_table *table, Hash_processor processor,
356 : void *processor_data)
357 : {
358 0 : size_t counter = 0;
359 : struct hash_entry const *bucket;
360 : struct hash_entry const *cursor;
361 :
362 0 : for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
363 : {
364 0 : if (bucket->data)
365 : {
366 0 : for (cursor = bucket; cursor; cursor = cursor->next)
367 : {
368 0 : if (!(*processor) (cursor->data, processor_data))
369 0 : return counter;
370 0 : counter++;
371 : }
372 : }
373 : }
374 :
375 0 : return counter;
376 : }
377 :
378 : /* Allocation and clean-up. */
379 :
380 : /* Return a hash index for a NUL-terminated STRING between 0 and N_BUCKETS-1.
381 : This is a convenience routine for constructing other hashing functions. */
382 :
383 : #if USE_DIFF_HASH
384 :
385 : /* About hashings, Paul Eggert writes to me (FP), on 1994-01-01: "Please see
386 : B. J. McKenzie, R. Harries & T. Bell, Selecting a hashing algorithm,
387 : Software--practice & experience 20, 2 (Feb 1990), 209-224. Good hash
388 : algorithms tend to be domain-specific, so what's good for [diffutils'] io.c
389 : may not be good for your application." */
390 :
391 : size_t
392 : hash_string (const char *string, size_t n_buckets)
393 : {
394 : # define ROTATE_LEFT(Value, Shift) \
395 : ((Value) << (Shift) | (Value) >> ((sizeof (size_t) * CHAR_BIT) - (Shift)))
396 : # define HASH_ONE_CHAR(Value, Byte) \
397 : ((Byte) + ROTATE_LEFT (Value, 7))
398 :
399 : size_t value = 0;
400 : unsigned char ch;
401 :
402 : for (; (ch = *string); string++)
403 : value = HASH_ONE_CHAR (value, ch);
404 : return value % n_buckets;
405 :
406 : # undef ROTATE_LEFT
407 : # undef HASH_ONE_CHAR
408 : }
409 :
410 : #else /* not USE_DIFF_HASH */
411 :
412 : /* This one comes from `recode', and performs a bit better than the above as
413 : per a few experiments. It is inspired from a hashing routine found in the
414 : very old Cyber `snoop', itself written in typical Greg Mansfield style.
415 : (By the way, what happened to this excellent man? Is he still alive?) */
416 :
417 : size_t
418 0 : hash_string (const char *string, size_t n_buckets)
419 : {
420 0 : size_t value = 0;
421 : unsigned char ch;
422 :
423 0 : for (; (ch = *string); string++)
424 0 : value = (value * 31 + ch) % n_buckets;
425 0 : return value;
426 : }
427 :
428 : #endif /* not USE_DIFF_HASH */
429 :
430 : /* Return true if CANDIDATE is a prime number. CANDIDATE should be an odd
431 : number at least equal to 11. */
432 :
433 : static bool
434 981 : is_prime (size_t candidate)
435 : {
436 981 : size_t divisor = 3;
437 981 : size_t square = divisor * divisor;
438 :
439 4389 : while (square < candidate && (candidate % divisor))
440 : {
441 2427 : divisor++;
442 2427 : square += 4 * divisor;
443 2427 : divisor++;
444 : }
445 :
446 981 : return (candidate % divisor ? true : false);
447 : }
448 :
449 : /* Round a given CANDIDATE number up to the nearest prime, and return that
450 : prime. Primes lower than 10 are merely skipped. */
451 :
452 : static size_t
453 484 : next_prime (size_t candidate)
454 : {
455 : /* Skip small primes. */
456 484 : if (candidate < 10)
457 2 : candidate = 10;
458 :
459 : /* Make it definitely odd. */
460 484 : candidate |= 1;
461 :
462 1465 : while (!is_prime (candidate))
463 497 : candidate += 2;
464 :
465 484 : return candidate;
466 : }
467 :
468 : void
469 0 : hash_reset_tuning (Hash_tuning *tuning)
470 : {
471 0 : *tuning = default_tuning;
472 0 : }
473 :
474 : /* For the given hash TABLE, check the user supplied tuning structure for
475 : reasonable values, and return true if there is no gross error with it.
476 : Otherwise, definitively reset the TUNING field to some acceptable default
477 : in the hash table (that is, the user loses the right of further modifying
478 : tuning arguments), and return false. */
479 :
480 : static bool
481 514 : check_tuning (Hash_table *table)
482 : {
483 514 : const Hash_tuning *tuning = table->tuning;
484 :
485 : /* Be a bit stricter than mathematics would require, so that
486 : rounding errors in size calculations do not cause allocations to
487 : fail to grow or shrink as they should. The smallest allocation
488 : is 11 (due to next_prime's algorithm), so an epsilon of 0.1
489 : should be good enough. */
490 514 : float epsilon = 0.1f;
491 :
492 514 : if (epsilon < tuning->growth_threshold
493 514 : && tuning->growth_threshold < 1 - epsilon
494 514 : && 1 + epsilon < tuning->growth_factor
495 514 : && 0 <= tuning->shrink_threshold
496 514 : && tuning->shrink_threshold + epsilon < tuning->shrink_factor
497 514 : && tuning->shrink_factor <= 1
498 514 : && tuning->shrink_threshold + epsilon < tuning->growth_threshold)
499 514 : return true;
500 :
501 0 : table->tuning = &default_tuning;
502 0 : return false;
503 : }
504 :
505 : /* Allocate and return a new hash table, or NULL upon failure. The initial
506 : number of buckets is automatically selected so as to _guarantee_ that you
507 : may insert at least CANDIDATE different user entries before any growth of
508 : the hash table size occurs. So, if have a reasonably tight a-priori upper
509 : bound on the number of entries you intend to insert in the hash table, you
510 : may save some table memory and insertion time, by specifying it here. If
511 : the IS_N_BUCKETS field of the TUNING structure is true, the CANDIDATE
512 : argument has its meaning changed to the wanted number of buckets.
513 :
514 : TUNING points to a structure of user-supplied values, in case some fine
515 : tuning is wanted over the default behavior of the hasher. If TUNING is
516 : NULL, the default tuning parameters are used instead.
517 :
518 : The user-supplied HASHER function should be provided. It accepts two
519 : arguments ENTRY and TABLE_SIZE. It computes, by hashing ENTRY contents, a
520 : slot number for that entry which should be in the range 0..TABLE_SIZE-1.
521 : This slot number is then returned.
522 :
523 : The user-supplied COMPARATOR function should be provided. It accepts two
524 : arguments pointing to user data, it then returns true for a pair of entries
525 : that compare equal, or false otherwise. This function is internally called
526 : on entries which are already known to hash to the same bucket index.
527 :
528 : The user-supplied DATA_FREER function, when not NULL, may be later called
529 : with the user data as an argument, just before the entry containing the
530 : data gets freed. This happens from within `hash_free' or `hash_clear'.
531 : You should specify this function only if you want these functions to free
532 : all of your `data' data. This is typically the case when your data is
533 : simply an auxiliary struct that you have malloc'd to aggregate several
534 : values. */
535 :
536 : Hash_table *
537 484 : hash_initialize (size_t candidate, const Hash_tuning *tuning,
538 : Hash_hasher hasher, Hash_comparator comparator,
539 : Hash_data_freer data_freer)
540 : {
541 : Hash_table *table;
542 :
543 484 : if (hasher == NULL || comparator == NULL)
544 0 : return NULL;
545 :
546 484 : table = malloc (sizeof *table);
547 484 : if (table == NULL)
548 0 : return NULL;
549 :
550 484 : if (!tuning)
551 454 : tuning = &default_tuning;
552 484 : table->tuning = tuning;
553 484 : if (!check_tuning (table))
554 : {
555 : /* Fail if the tuning options are invalid. This is the only occasion
556 : when the user gets some feedback about it. Once the table is created,
557 : if the user provides invalid tuning options, we silently revert to
558 : using the defaults, and ignore further request to change the tuning
559 : options. */
560 0 : goto fail;
561 : }
562 :
563 484 : if (!tuning->is_n_buckets)
564 : {
565 484 : float new_candidate = candidate / tuning->growth_threshold;
566 484 : if (SIZE_MAX <= new_candidate)
567 0 : goto fail;
568 484 : candidate = new_candidate;
569 : }
570 :
571 484 : if (xalloc_oversized (candidate, sizeof *table->bucket))
572 0 : goto fail;
573 484 : table->n_buckets = next_prime (candidate);
574 484 : if (xalloc_oversized (table->n_buckets, sizeof *table->bucket))
575 0 : goto fail;
576 :
577 484 : table->bucket = calloc (table->n_buckets, sizeof *table->bucket);
578 484 : if (table->bucket == NULL)
579 0 : goto fail;
580 484 : table->bucket_limit = table->bucket + table->n_buckets;
581 484 : table->n_buckets_used = 0;
582 484 : table->n_entries = 0;
583 :
584 484 : table->hasher = hasher;
585 484 : table->comparator = comparator;
586 484 : table->data_freer = data_freer;
587 :
588 484 : table->free_entry_list = NULL;
589 : #if USE_OBSTACK
590 : obstack_init (&table->entry_stack);
591 : #endif
592 484 : return table;
593 :
594 0 : fail:
595 0 : free (table);
596 0 : return NULL;
597 : }
598 :
599 : /* Make all buckets empty, placing any chained entries on the free list.
600 : Apply the user-specified function data_freer (if any) to the datas of any
601 : affected entries. */
602 :
603 : void
604 0 : hash_clear (Hash_table *table)
605 : {
606 : struct hash_entry *bucket;
607 :
608 0 : for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
609 : {
610 0 : if (bucket->data)
611 : {
612 : struct hash_entry *cursor;
613 : struct hash_entry *next;
614 :
615 : /* Free the bucket overflow. */
616 0 : for (cursor = bucket->next; cursor; cursor = next)
617 : {
618 0 : if (table->data_freer)
619 0 : (*table->data_freer) (cursor->data);
620 0 : cursor->data = NULL;
621 :
622 0 : next = cursor->next;
623 : /* Relinking is done one entry at a time, as it is to be expected
624 : that overflows are either rare or short. */
625 0 : cursor->next = table->free_entry_list;
626 0 : table->free_entry_list = cursor;
627 : }
628 :
629 : /* Free the bucket head. */
630 0 : if (table->data_freer)
631 0 : (*table->data_freer) (bucket->data);
632 0 : bucket->data = NULL;
633 0 : bucket->next = NULL;
634 : }
635 : }
636 :
637 0 : table->n_buckets_used = 0;
638 0 : table->n_entries = 0;
639 0 : }
640 :
641 : /* Reclaim all storage associated with a hash table. If a data_freer
642 : function has been supplied by the user when the hash table was created,
643 : this function applies it to the data of each entry before freeing that
644 : entry. */
645 :
646 : void
647 209 : hash_free (Hash_table *table)
648 : {
649 : struct hash_entry *bucket;
650 : struct hash_entry *cursor;
651 : struct hash_entry *next;
652 :
653 : /* Call the user data_freer function. */
654 209 : if (table->data_freer && table->n_entries)
655 : {
656 1476 : for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
657 : {
658 1463 : if (bucket->data)
659 : {
660 92 : for (cursor = bucket; cursor; cursor = cursor->next)
661 : {
662 46 : (*table->data_freer) (cursor->data);
663 : }
664 : }
665 : }
666 : }
667 :
668 : #if USE_OBSTACK
669 :
670 : obstack_free (&table->entry_stack, NULL);
671 :
672 : #else
673 :
674 : /* Free all bucket overflowed entries. */
675 26814 : for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
676 : {
677 26605 : for (cursor = bucket->next; cursor; cursor = next)
678 : {
679 0 : next = cursor->next;
680 0 : free (cursor);
681 : }
682 : }
683 :
684 : /* Also reclaim the internal list of previously freed entries. */
685 221 : for (cursor = table->free_entry_list; cursor; cursor = next)
686 : {
687 12 : next = cursor->next;
688 12 : free (cursor);
689 : }
690 :
691 : #endif
692 :
693 : /* Free the remainder of the hash table structure. */
694 209 : free (table->bucket);
695 209 : free (table);
696 209 : }
697 :
698 : /* Insertion and deletion. */
699 :
700 : /* Get a new hash entry for a bucket overflow, possibly by reclying a
701 : previously freed one. If this is not possible, allocate a new one. */
702 :
703 : static struct hash_entry *
704 1275 : allocate_entry (Hash_table *table)
705 : {
706 : struct hash_entry *new;
707 :
708 1275 : if (table->free_entry_list)
709 : {
710 1263 : new = table->free_entry_list;
711 1263 : table->free_entry_list = new->next;
712 : }
713 : else
714 : {
715 : #if USE_OBSTACK
716 : new = obstack_alloc (&table->entry_stack, sizeof *new);
717 : #else
718 12 : new = malloc (sizeof *new);
719 : #endif
720 : }
721 :
722 1275 : return new;
723 : }
724 :
725 : /* Free a hash entry which was part of some bucket overflow,
726 : saving it for later recycling. */
727 :
728 : static void
729 1275 : free_entry (Hash_table *table, struct hash_entry *entry)
730 : {
731 1275 : entry->data = NULL;
732 1275 : entry->next = table->free_entry_list;
733 1275 : table->free_entry_list = entry;
734 1275 : }
735 :
736 : /* This private function is used to help with insertion and deletion. When
737 : ENTRY matches an entry in the table, return a pointer to the corresponding
738 : user data and set *BUCKET_HEAD to the head of the selected bucket.
739 : Otherwise, return NULL. When DELETE is true and ENTRY matches an entry in
740 : the table, unlink the matching entry. */
741 :
742 : static void *
743 80808 : hash_find_entry (Hash_table *table, const void *entry,
744 : struct hash_entry **bucket_head, bool delete)
745 : {
746 80808 : struct hash_entry *bucket
747 80808 : = table->bucket + table->hasher (entry, table->n_buckets);
748 : struct hash_entry *cursor;
749 :
750 80808 : if (! (bucket < table->bucket_limit))
751 0 : abort ();
752 :
753 80808 : *bucket_head = bucket;
754 :
755 : /* Test for empty bucket. */
756 80808 : if (bucket->data == NULL)
757 39723 : return NULL;
758 :
759 : /* See if the entry is the first in the bucket. */
760 41085 : if ((*table->comparator) (entry, bucket->data))
761 : {
762 38685 : void *data = bucket->data;
763 :
764 38685 : if (delete)
765 : {
766 38636 : if (bucket->next)
767 : {
768 0 : struct hash_entry *next = bucket->next;
769 :
770 : /* Bump the first overflow entry into the bucket head, then save
771 : the previous first overflow entry for later recycling. */
772 0 : *bucket = *next;
773 0 : free_entry (table, next);
774 : }
775 : else
776 : {
777 38636 : bucket->data = NULL;
778 : }
779 : }
780 :
781 38685 : return data;
782 : }
783 :
784 : /* Scan the bucket overflow. */
785 2526 : for (cursor = bucket; cursor->next; cursor = cursor->next)
786 : {
787 1317 : if ((*table->comparator) (entry, cursor->next->data))
788 : {
789 1191 : void *data = cursor->next->data;
790 :
791 1191 : if (delete)
792 : {
793 1191 : struct hash_entry *next = cursor->next;
794 :
795 : /* Unlink the entry to delete, then save the freed entry for later
796 : recycling. */
797 1191 : cursor->next = next->next;
798 1191 : free_entry (table, next);
799 : }
800 :
801 1191 : return data;
802 : }
803 : }
804 :
805 : /* No entry found. */
806 1209 : return NULL;
807 : }
808 :
809 : /* For an already existing hash table, change the number of buckets through
810 : specifying CANDIDATE. The contents of the hash table are preserved. The
811 : new number of buckets is automatically selected so as to _guarantee_ that
812 : the table may receive at least CANDIDATE different user entries, including
813 : those already in the table, before any other growth of the hash table size
814 : occurs. If TUNING->IS_N_BUCKETS is true, then CANDIDATE specifies the
815 : exact number of buckets desired. */
816 :
817 : bool
818 30 : hash_rehash (Hash_table *table, size_t candidate)
819 : {
820 : Hash_table *new_table;
821 : struct hash_entry *bucket;
822 : struct hash_entry *cursor;
823 : struct hash_entry *next;
824 :
825 30 : new_table = hash_initialize (candidate, table->tuning, table->hasher,
826 : table->comparator, table->data_freer);
827 30 : if (new_table == NULL)
828 0 : return false;
829 :
830 : /* Merely reuse the extra old space into the new table. */
831 : #if USE_OBSTACK
832 : obstack_free (&new_table->entry_stack, NULL);
833 : new_table->entry_stack = table->entry_stack;
834 : #endif
835 30 : new_table->free_entry_list = table->free_entry_list;
836 :
837 10296 : for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
838 10266 : if (bucket->data)
839 16542 : for (cursor = bucket; cursor; cursor = next)
840 : {
841 8313 : void *data = cursor->data;
842 8313 : struct hash_entry *new_bucket
843 8313 : = (new_table->bucket
844 8313 : + new_table->hasher (data, new_table->n_buckets));
845 :
846 8313 : if (! (new_bucket < new_table->bucket_limit))
847 0 : abort ();
848 :
849 8313 : next = cursor->next;
850 :
851 8313 : if (new_bucket->data)
852 : {
853 75 : if (cursor == bucket)
854 : {
855 : /* Allocate or recycle an entry, when moving from a bucket
856 : header into a bucket overflow. */
857 75 : struct hash_entry *new_entry = allocate_entry (new_table);
858 :
859 75 : if (new_entry == NULL)
860 0 : return false;
861 :
862 75 : new_entry->data = data;
863 75 : new_entry->next = new_bucket->next;
864 75 : new_bucket->next = new_entry;
865 : }
866 : else
867 : {
868 : /* Merely relink an existing entry, when moving from a
869 : bucket overflow into a bucket overflow. */
870 0 : cursor->next = new_bucket->next;
871 0 : new_bucket->next = cursor;
872 : }
873 : }
874 : else
875 : {
876 : /* Free an existing entry, when moving from a bucket
877 : overflow into a bucket header. Also take care of the
878 : simple case of moving from a bucket header into a bucket
879 : header. */
880 8238 : new_bucket->data = data;
881 8238 : new_table->n_buckets_used++;
882 8238 : if (cursor != bucket)
883 84 : free_entry (new_table, cursor);
884 : }
885 : }
886 :
887 30 : free (table->bucket);
888 30 : table->bucket = new_table->bucket;
889 30 : table->bucket_limit = new_table->bucket_limit;
890 30 : table->n_buckets = new_table->n_buckets;
891 30 : table->n_buckets_used = new_table->n_buckets_used;
892 30 : table->free_entry_list = new_table->free_entry_list;
893 : /* table->n_entries already holds its value. */
894 : #if USE_OBSTACK
895 : table->entry_stack = new_table->entry_stack;
896 : #endif
897 30 : free (new_table);
898 :
899 30 : return true;
900 : }
901 :
902 : /* If ENTRY matches an entry already in the hash table, return the pointer
903 : to the entry from the table. Otherwise, insert ENTRY and return ENTRY.
904 : Return NULL if the storage required for insertion cannot be allocated. */
905 :
906 : void *
907 39933 : hash_insert (Hash_table *table, const void *entry)
908 : {
909 : void *data;
910 : struct hash_entry *bucket;
911 :
912 : /* The caller cannot insert a NULL entry. */
913 39933 : if (! entry)
914 0 : abort ();
915 :
916 : /* If there's a matching entry already in the table, return that. */
917 39933 : if ((data = hash_find_entry (table, entry, &bucket, false)) != NULL)
918 49 : return data;
919 :
920 : /* ENTRY is not matched, it should be inserted. */
921 :
922 39884 : if (bucket->data)
923 : {
924 1200 : struct hash_entry *new_entry = allocate_entry (table);
925 :
926 1200 : if (new_entry == NULL)
927 0 : return NULL;
928 :
929 : /* Add ENTRY in the overflow of the bucket. */
930 :
931 1200 : new_entry->data = (void *) entry;
932 1200 : new_entry->next = bucket->next;
933 1200 : bucket->next = new_entry;
934 1200 : table->n_entries++;
935 1200 : return (void *) entry;
936 : }
937 :
938 : /* Add ENTRY right in the bucket head. */
939 :
940 38684 : bucket->data = (void *) entry;
941 38684 : table->n_entries++;
942 38684 : table->n_buckets_used++;
943 :
944 : /* If the growth threshold of the buckets in use has been reached, increase
945 : the table size and rehash. There's no point in checking the number of
946 : entries: if the hashing function is ill-conditioned, rehashing is not
947 : likely to improve it. */
948 :
949 77368 : if (table->n_buckets_used
950 38684 : > table->tuning->growth_threshold * table->n_buckets)
951 : {
952 : /* Check more fully, before starting real work. If tuning arguments
953 : became invalid, the second check will rely on proper defaults. */
954 30 : check_tuning (table);
955 60 : if (table->n_buckets_used
956 30 : > table->tuning->growth_threshold * table->n_buckets)
957 : {
958 30 : const Hash_tuning *tuning = table->tuning;
959 30 : float candidate =
960 30 : (tuning->is_n_buckets
961 0 : ? (table->n_buckets * tuning->growth_factor)
962 60 : : (table->n_buckets * tuning->growth_factor
963 30 : * tuning->growth_threshold));
964 :
965 30 : if (SIZE_MAX <= candidate)
966 0 : return NULL;
967 :
968 : /* If the rehash fails, arrange to return NULL. */
969 30 : if (!hash_rehash (table, candidate))
970 0 : entry = NULL;
971 : }
972 : }
973 :
974 38684 : return (void *) entry;
975 : }
976 :
977 : /* If ENTRY is already in the table, remove it and return the just-deleted
978 : data (the user may want to deallocate its storage). If ENTRY is not in the
979 : table, don't modify the table and return NULL. */
980 :
981 : void *
982 40875 : hash_delete (Hash_table *table, const void *entry)
983 : {
984 : void *data;
985 : struct hash_entry *bucket;
986 :
987 40875 : data = hash_find_entry (table, entry, &bucket, true);
988 40875 : if (!data)
989 1048 : return NULL;
990 :
991 39827 : table->n_entries--;
992 39827 : if (!bucket->data)
993 : {
994 38636 : table->n_buckets_used--;
995 :
996 : /* If the shrink threshold of the buckets in use has been reached,
997 : rehash into a smaller table. */
998 :
999 77272 : if (table->n_buckets_used
1000 38636 : < table->tuning->shrink_threshold * table->n_buckets)
1001 : {
1002 : /* Check more fully, before starting real work. If tuning arguments
1003 : became invalid, the second check will rely on proper defaults. */
1004 0 : check_tuning (table);
1005 0 : if (table->n_buckets_used
1006 0 : < table->tuning->shrink_threshold * table->n_buckets)
1007 : {
1008 0 : const Hash_tuning *tuning = table->tuning;
1009 0 : size_t candidate =
1010 0 : (tuning->is_n_buckets
1011 0 : ? table->n_buckets * tuning->shrink_factor
1012 0 : : (table->n_buckets * tuning->shrink_factor
1013 0 : * tuning->growth_threshold));
1014 :
1015 0 : hash_rehash (table, candidate);
1016 : }
1017 : }
1018 : }
1019 :
1020 39827 : return data;
1021 : }
1022 :
1023 : /* Testing. */
1024 :
1025 : #if TESTING
1026 :
1027 : void
1028 : hash_print (const Hash_table *table)
1029 : {
1030 : struct hash_entry const *bucket;
1031 :
1032 : for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
1033 : {
1034 : struct hash_entry *cursor;
1035 :
1036 : if (bucket)
1037 : printf ("%lu:\n", (unsigned long int) (bucket - table->bucket));
1038 :
1039 : for (cursor = bucket; cursor; cursor = cursor->next)
1040 : {
1041 : char const *s = cursor->data;
1042 : /* FIXME */
1043 : if (s)
1044 : printf (" %s\n", s);
1045 : }
1046 : }
1047 : }
1048 :
1049 : #endif /* TESTING */
|