LCOV - code coverage report
Current view: top level - lib - hash.c (source / functions) Hit Total Coverage
Test: coreutils.info Lines: 177 321 55.1 %
Date: 2018-01-30 Functions: 12 25 48.0 %

          Line data    Source code
       1             : /* hash - hashing table processing.
       2             : 
       3             :    Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2006, 2007 Free
       4             :    Software Foundation, Inc.
       5             : 
       6             :    Written by Jim Meyering, 1992.
       7             : 
       8             :    This program is free software: you can redistribute it and/or modify
       9             :    it under the terms of the GNU General Public License as published by
      10             :    the Free Software Foundation; either version 3 of the License, or
      11             :    (at your option) any later version.
      12             : 
      13             :    This program is distributed in the hope that it will be useful,
      14             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      15             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      16             :    GNU General Public License for more details.
      17             : 
      18             :    You should have received a copy of the GNU General Public License
      19             :    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
      20             : 
      21             : /* A generic hash table package.  */
      22             : 
      23             : /* Define USE_OBSTACK to 1 if you want the allocator to use obstacks instead
      24             :    of malloc.  If you change USE_OBSTACK, you have to recompile!  */
      25             : 
      26             : #include <config.h>
      27             : 
      28             : #include "hash.h"
      29             : #include "xalloc.h"
      30             : 
      31             : #include <limits.h>
      32             : #include <stdio.h>
      33             : #include <stdlib.h>
      34             : 
      35             : #if USE_OBSTACK
      36             : # include "obstack.h"
      37             : # ifndef obstack_chunk_alloc
      38             : #  define obstack_chunk_alloc malloc
      39             : # endif
      40             : # ifndef obstack_chunk_free
      41             : #  define obstack_chunk_free free
      42             : # endif
      43             : #endif
      44             : 
      45             : #ifndef SIZE_MAX
      46             : # define SIZE_MAX ((size_t) -1)
      47             : #endif
      48             : 
      49             : struct hash_table
      50             :   {
      51             :     /* The array of buckets starts at BUCKET and extends to BUCKET_LIMIT-1,
      52             :        for a possibility of N_BUCKETS.  Among those, N_BUCKETS_USED buckets
      53             :        are not empty, there are N_ENTRIES active entries in the table.  */
      54             :     struct hash_entry *bucket;
      55             :     struct hash_entry const *bucket_limit;
      56             :     size_t n_buckets;
      57             :     size_t n_buckets_used;
      58             :     size_t n_entries;
      59             : 
      60             :     /* Tuning arguments, kept in a physicaly separate structure.  */
      61             :     const Hash_tuning *tuning;
      62             : 
      63             :     /* Three functions are given to `hash_initialize', see the documentation
      64             :        block for this function.  In a word, HASHER randomizes a user entry
      65             :        into a number up from 0 up to some maximum minus 1; COMPARATOR returns
      66             :        true if two user entries compare equally; and DATA_FREER is the cleanup
      67             :        function for a user entry.  */
      68             :     Hash_hasher hasher;
      69             :     Hash_comparator comparator;
      70             :     Hash_data_freer data_freer;
      71             : 
      72             :     /* A linked list of freed struct hash_entry structs.  */
      73             :     struct hash_entry *free_entry_list;
      74             : 
      75             : #if USE_OBSTACK
      76             :     /* Whenever obstacks are used, it is possible to allocate all overflowed
      77             :        entries into a single stack, so they all can be freed in a single
      78             :        operation.  It is not clear if the speedup is worth the trouble.  */
      79             :     struct obstack entry_stack;
      80             : #endif
      81             :   };
      82             : 
      83             : /* A hash table contains many internal entries, each holding a pointer to
      84             :    some user provided data (also called a user entry).  An entry indistinctly
      85             :    refers to both the internal entry and its associated user entry.  A user
      86             :    entry contents may be hashed by a randomization function (the hashing
      87             :    function, or just `hasher' for short) into a number (or `slot') between 0
      88             :    and the current table size.  At each slot position in the hash table,
      89             :    starts a linked chain of entries for which the user data all hash to this
      90             :    slot.  A bucket is the collection of all entries hashing to the same slot.
      91             : 
      92             :    A good `hasher' function will distribute entries rather evenly in buckets.
      93             :    In the ideal case, the length of each bucket is roughly the number of
      94             :    entries divided by the table size.  Finding the slot for a data is usually
      95             :    done in constant time by the `hasher', and the later finding of a precise
      96             :    entry is linear in time with the size of the bucket.  Consequently, a
      97             :    larger hash table size (that is, a larger number of buckets) is prone to
      98             :    yielding shorter chains, *given* the `hasher' function behaves properly.
      99             : 
     100             :    Long buckets slow down the lookup algorithm.  One might use big hash table
     101             :    sizes in hope to reduce the average length of buckets, but this might
     102             :    become inordinate, as unused slots in the hash table take some space.  The
     103             :    best bet is to make sure you are using a good `hasher' function (beware
     104             :    that those are not that easy to write! :-), and to use a table size
     105             :    larger than the actual number of entries.  */
     106             : 
     107             : /* If an insertion makes the ratio of nonempty buckets to table size larger
     108             :    than the growth threshold (a number between 0.0 and 1.0), then increase
     109             :    the table size by multiplying by the growth factor (a number greater than
     110             :    1.0).  The growth threshold defaults to 0.8, and the growth factor
     111             :    defaults to 1.414, meaning that the table will have doubled its size
     112             :    every second time 80% of the buckets get used.  */
     113             : #define DEFAULT_GROWTH_THRESHOLD 0.8
     114             : #define DEFAULT_GROWTH_FACTOR 1.414
     115             : 
     116             : /* If a deletion empties a bucket and causes the ratio of used buckets to
     117             :    table size to become smaller than the shrink threshold (a number between
     118             :    0.0 and 1.0), then shrink the table by multiplying by the shrink factor (a
     119             :    number greater than the shrink threshold but smaller than 1.0).  The shrink
     120             :    threshold and factor default to 0.0 and 1.0, meaning that the table never
     121             :    shrinks.  */
     122             : #define DEFAULT_SHRINK_THRESHOLD 0.0
     123             : #define DEFAULT_SHRINK_FACTOR 1.0
     124             : 
     125             : /* Use this to initialize or reset a TUNING structure to
     126             :    some sensible values. */
     127             : static const Hash_tuning default_tuning =
     128             :   {
     129             :     DEFAULT_SHRINK_THRESHOLD,
     130             :     DEFAULT_SHRINK_FACTOR,
     131             :     DEFAULT_GROWTH_THRESHOLD,
     132             :     DEFAULT_GROWTH_FACTOR,
     133             :     false
     134             :   };
     135             : 
     136             : /* Information and lookup.  */
     137             : 
     138             : /* The following few functions provide information about the overall hash
     139             :    table organization: the number of entries, number of buckets and maximum
     140             :    length of buckets.  */
     141             : 
     142             : /* Return the number of buckets in the hash table.  The table size, the total
     143             :    number of buckets (used plus unused), or the maximum number of slots, are
     144             :    the same quantity.  */
     145             : 
     146             : size_t
     147           0 : hash_get_n_buckets (const Hash_table *table)
     148             : {
     149           0 :   return table->n_buckets;
     150             : }
     151             : 
     152             : /* Return the number of slots in use (non-empty buckets).  */
     153             : 
     154             : size_t
     155           0 : hash_get_n_buckets_used (const Hash_table *table)
     156             : {
     157           0 :   return table->n_buckets_used;
     158             : }
     159             : 
     160             : /* Return the number of active entries.  */
     161             : 
     162             : size_t
     163           0 : hash_get_n_entries (const Hash_table *table)
     164             : {
     165           0 :   return table->n_entries;
     166             : }
     167             : 
     168             : /* Return the length of the longest chain (bucket).  */
     169             : 
     170             : size_t
     171           0 : hash_get_max_bucket_length (const Hash_table *table)
     172             : {
     173             :   struct hash_entry const *bucket;
     174           0 :   size_t max_bucket_length = 0;
     175             : 
     176           0 :   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
     177             :     {
     178           0 :       if (bucket->data)
     179             :         {
     180           0 :           struct hash_entry const *cursor = bucket;
     181           0 :           size_t bucket_length = 1;
     182             : 
     183           0 :           while (cursor = cursor->next, cursor)
     184           0 :             bucket_length++;
     185             : 
     186           0 :           if (bucket_length > max_bucket_length)
     187           0 :             max_bucket_length = bucket_length;
     188             :         }
     189             :     }
     190             : 
     191           0 :   return max_bucket_length;
     192             : }
     193             : 
     194             : /* Do a mild validation of a hash table, by traversing it and checking two
     195             :    statistics.  */
     196             : 
     197             : bool
     198           0 : hash_table_ok (const Hash_table *table)
     199             : {
     200             :   struct hash_entry const *bucket;
     201           0 :   size_t n_buckets_used = 0;
     202           0 :   size_t n_entries = 0;
     203             : 
     204           0 :   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
     205             :     {
     206           0 :       if (bucket->data)
     207             :         {
     208           0 :           struct hash_entry const *cursor = bucket;
     209             : 
     210             :           /* Count bucket head.  */
     211           0 :           n_buckets_used++;
     212           0 :           n_entries++;
     213             : 
     214             :           /* Count bucket overflow.  */
     215           0 :           while (cursor = cursor->next, cursor)
     216           0 :             n_entries++;
     217             :         }
     218             :     }
     219             : 
     220           0 :   if (n_buckets_used == table->n_buckets_used && n_entries == table->n_entries)
     221           0 :     return true;
     222             : 
     223           0 :   return false;
     224             : }
     225             : 
     226             : void
     227           0 : hash_print_statistics (const Hash_table *table, FILE *stream)
     228             : {
     229           0 :   size_t n_entries = hash_get_n_entries (table);
     230           0 :   size_t n_buckets = hash_get_n_buckets (table);
     231           0 :   size_t n_buckets_used = hash_get_n_buckets_used (table);
     232           0 :   size_t max_bucket_length = hash_get_max_bucket_length (table);
     233             : 
     234           0 :   fprintf (stream, "# entries:         %lu\n", (unsigned long int) n_entries);
     235           0 :   fprintf (stream, "# buckets:         %lu\n", (unsigned long int) n_buckets);
     236           0 :   fprintf (stream, "# buckets used:    %lu (%.2f%%)\n",
     237             :            (unsigned long int) n_buckets_used,
     238           0 :            (100.0 * n_buckets_used) / n_buckets);
     239           0 :   fprintf (stream, "max bucket length: %lu\n",
     240             :            (unsigned long int) max_bucket_length);
     241           0 : }
     242             : 
     243             : /* If ENTRY matches an entry already in the hash table, return the
     244             :    entry from the table.  Otherwise, return NULL.  */
     245             : 
     246             : void *
     247          22 : hash_lookup (const Hash_table *table, const void *entry)
     248             : {
     249          22 :   struct hash_entry const *bucket
     250          22 :     = table->bucket + table->hasher (entry, table->n_buckets);
     251             :   struct hash_entry const *cursor;
     252             : 
     253          22 :   if (! (bucket < table->bucket_limit))
     254           0 :     abort ();
     255             : 
     256          22 :   if (bucket->data == NULL)
     257          21 :     return NULL;
     258             : 
     259           1 :   for (cursor = bucket; cursor; cursor = cursor->next)
     260           1 :     if (table->comparator (entry, cursor->data))
     261           1 :       return cursor->data;
     262             : 
     263           0 :   return NULL;
     264             : }
     265             : 
     266             : /* Walking.  */
     267             : 
     268             : /* The functions in this page traverse the hash table and process the
     269             :    contained entries.  For the traversal to work properly, the hash table
     270             :    should not be resized nor modified while any particular entry is being
     271             :    processed.  In particular, entries should not be added or removed.  */
     272             : 
     273             : /* Return the first data in the table, or NULL if the table is empty.  */
     274             : 
     275             : void *
     276           0 : hash_get_first (const Hash_table *table)
     277             : {
     278             :   struct hash_entry const *bucket;
     279             : 
     280           0 :   if (table->n_entries == 0)
     281           0 :     return NULL;
     282             : 
     283           0 :   for (bucket = table->bucket; ; bucket++)
     284           0 :     if (! (bucket < table->bucket_limit))
     285           0 :       abort ();
     286           0 :     else if (bucket->data)
     287           0 :       return bucket->data;
     288             : }
     289             : 
     290             : /* Return the user data for the entry following ENTRY, where ENTRY has been
     291             :    returned by a previous call to either `hash_get_first' or `hash_get_next'.
     292             :    Return NULL if there are no more entries.  */
     293             : 
     294             : void *
     295           0 : hash_get_next (const Hash_table *table, const void *entry)
     296             : {
     297           0 :   struct hash_entry const *bucket
     298           0 :     = table->bucket + table->hasher (entry, table->n_buckets);
     299             :   struct hash_entry const *cursor;
     300             : 
     301           0 :   if (! (bucket < table->bucket_limit))
     302           0 :     abort ();
     303             : 
     304             :   /* Find next entry in the same bucket.  */
     305           0 :   for (cursor = bucket; cursor; cursor = cursor->next)
     306           0 :     if (cursor->data == entry && cursor->next)
     307           0 :       return cursor->next->data;
     308             : 
     309             :   /* Find first entry in any subsequent bucket.  */
     310           0 :   while (++bucket < table->bucket_limit)
     311           0 :     if (bucket->data)
     312           0 :       return bucket->data;
     313             : 
     314             :   /* None found.  */
     315           0 :   return NULL;
     316             : }
     317             : 
     318             : /* Fill BUFFER with pointers to active user entries in the hash table, then
     319             :    return the number of pointers copied.  Do not copy more than BUFFER_SIZE
     320             :    pointers.  */
     321             : 
     322             : size_t
     323           0 : hash_get_entries (const Hash_table *table, void **buffer,
     324             :                   size_t buffer_size)
     325             : {
     326           0 :   size_t counter = 0;
     327             :   struct hash_entry const *bucket;
     328             :   struct hash_entry const *cursor;
     329             : 
     330           0 :   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
     331             :     {
     332           0 :       if (bucket->data)
     333             :         {
     334           0 :           for (cursor = bucket; cursor; cursor = cursor->next)
     335             :             {
     336           0 :               if (counter >= buffer_size)
     337           0 :                 return counter;
     338           0 :               buffer[counter++] = cursor->data;
     339             :             }
     340             :         }
     341             :     }
     342             : 
     343           0 :   return counter;
     344             : }
     345             : 
     346             : /* Call a PROCESSOR function for each entry of a hash table, and return the
     347             :    number of entries for which the processor function returned success.  A
     348             :    pointer to some PROCESSOR_DATA which will be made available to each call to
     349             :    the processor function.  The PROCESSOR accepts two arguments: the first is
     350             :    the user entry being walked into, the second is the value of PROCESSOR_DATA
     351             :    as received.  The walking continue for as long as the PROCESSOR function
     352             :    returns nonzero.  When it returns zero, the walking is interrupted.  */
     353             : 
     354             : size_t
     355           0 : hash_do_for_each (const Hash_table *table, Hash_processor processor,
     356             :                   void *processor_data)
     357             : {
     358           0 :   size_t counter = 0;
     359             :   struct hash_entry const *bucket;
     360             :   struct hash_entry const *cursor;
     361             : 
     362           0 :   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
     363             :     {
     364           0 :       if (bucket->data)
     365             :         {
     366           0 :           for (cursor = bucket; cursor; cursor = cursor->next)
     367             :             {
     368           0 :               if (!(*processor) (cursor->data, processor_data))
     369           0 :                 return counter;
     370           0 :               counter++;
     371             :             }
     372             :         }
     373             :     }
     374             : 
     375           0 :   return counter;
     376             : }
     377             : 
     378             : /* Allocation and clean-up.  */
     379             : 
     380             : /* Return a hash index for a NUL-terminated STRING between 0 and N_BUCKETS-1.
     381             :    This is a convenience routine for constructing other hashing functions.  */
     382             : 
     383             : #if USE_DIFF_HASH
     384             : 
     385             : /* About hashings, Paul Eggert writes to me (FP), on 1994-01-01: "Please see
     386             :    B. J. McKenzie, R. Harries & T. Bell, Selecting a hashing algorithm,
     387             :    Software--practice & experience 20, 2 (Feb 1990), 209-224.  Good hash
     388             :    algorithms tend to be domain-specific, so what's good for [diffutils'] io.c
     389             :    may not be good for your application."  */
     390             : 
     391             : size_t
     392             : hash_string (const char *string, size_t n_buckets)
     393             : {
     394             : # define ROTATE_LEFT(Value, Shift) \
     395             :   ((Value) << (Shift) | (Value) >> ((sizeof (size_t) * CHAR_BIT) - (Shift)))
     396             : # define HASH_ONE_CHAR(Value, Byte) \
     397             :   ((Byte) + ROTATE_LEFT (Value, 7))
     398             : 
     399             :   size_t value = 0;
     400             :   unsigned char ch;
     401             : 
     402             :   for (; (ch = *string); string++)
     403             :     value = HASH_ONE_CHAR (value, ch);
     404             :   return value % n_buckets;
     405             : 
     406             : # undef ROTATE_LEFT
     407             : # undef HASH_ONE_CHAR
     408             : }
     409             : 
     410             : #else /* not USE_DIFF_HASH */
     411             : 
     412             : /* This one comes from `recode', and performs a bit better than the above as
     413             :    per a few experiments.  It is inspired from a hashing routine found in the
     414             :    very old Cyber `snoop', itself written in typical Greg Mansfield style.
     415             :    (By the way, what happened to this excellent man?  Is he still alive?)  */
     416             : 
     417             : size_t
     418           0 : hash_string (const char *string, size_t n_buckets)
     419             : {
     420           0 :   size_t value = 0;
     421             :   unsigned char ch;
     422             : 
     423           0 :   for (; (ch = *string); string++)
     424           0 :     value = (value * 31 + ch) % n_buckets;
     425           0 :   return value;
     426             : }
     427             : 
     428             : #endif /* not USE_DIFF_HASH */
     429             : 
     430             : /* Return true if CANDIDATE is a prime number.  CANDIDATE should be an odd
     431             :    number at least equal to 11.  */
     432             : 
     433             : static bool
     434         981 : is_prime (size_t candidate)
     435             : {
     436         981 :   size_t divisor = 3;
     437         981 :   size_t square = divisor * divisor;
     438             : 
     439        4389 :   while (square < candidate && (candidate % divisor))
     440             :     {
     441        2427 :       divisor++;
     442        2427 :       square += 4 * divisor;
     443        2427 :       divisor++;
     444             :     }
     445             : 
     446         981 :   return (candidate % divisor ? true : false);
     447             : }
     448             : 
     449             : /* Round a given CANDIDATE number up to the nearest prime, and return that
     450             :    prime.  Primes lower than 10 are merely skipped.  */
     451             : 
     452             : static size_t
     453         484 : next_prime (size_t candidate)
     454             : {
     455             :   /* Skip small primes.  */
     456         484 :   if (candidate < 10)
     457           2 :     candidate = 10;
     458             : 
     459             :   /* Make it definitely odd.  */
     460         484 :   candidate |= 1;
     461             : 
     462        1465 :   while (!is_prime (candidate))
     463         497 :     candidate += 2;
     464             : 
     465         484 :   return candidate;
     466             : }
     467             : 
     468             : void
     469           0 : hash_reset_tuning (Hash_tuning *tuning)
     470             : {
     471           0 :   *tuning = default_tuning;
     472           0 : }
     473             : 
     474             : /* For the given hash TABLE, check the user supplied tuning structure for
     475             :    reasonable values, and return true if there is no gross error with it.
     476             :    Otherwise, definitively reset the TUNING field to some acceptable default
     477             :    in the hash table (that is, the user loses the right of further modifying
     478             :    tuning arguments), and return false.  */
     479             : 
     480             : static bool
     481         514 : check_tuning (Hash_table *table)
     482             : {
     483         514 :   const Hash_tuning *tuning = table->tuning;
     484             : 
     485             :   /* Be a bit stricter than mathematics would require, so that
     486             :      rounding errors in size calculations do not cause allocations to
     487             :      fail to grow or shrink as they should.  The smallest allocation
     488             :      is 11 (due to next_prime's algorithm), so an epsilon of 0.1
     489             :      should be good enough.  */
     490         514 :   float epsilon = 0.1f;
     491             : 
     492         514 :   if (epsilon < tuning->growth_threshold
     493         514 :       && tuning->growth_threshold < 1 - epsilon
     494         514 :       && 1 + epsilon < tuning->growth_factor
     495         514 :       && 0 <= tuning->shrink_threshold
     496         514 :       && tuning->shrink_threshold + epsilon < tuning->shrink_factor
     497         514 :       && tuning->shrink_factor <= 1
     498         514 :       && tuning->shrink_threshold + epsilon < tuning->growth_threshold)
     499         514 :     return true;
     500             : 
     501           0 :   table->tuning = &default_tuning;
     502           0 :   return false;
     503             : }
     504             : 
     505             : /* Allocate and return a new hash table, or NULL upon failure.  The initial
     506             :    number of buckets is automatically selected so as to _guarantee_ that you
     507             :    may insert at least CANDIDATE different user entries before any growth of
     508             :    the hash table size occurs.  So, if have a reasonably tight a-priori upper
     509             :    bound on the number of entries you intend to insert in the hash table, you
     510             :    may save some table memory and insertion time, by specifying it here.  If
     511             :    the IS_N_BUCKETS field of the TUNING structure is true, the CANDIDATE
     512             :    argument has its meaning changed to the wanted number of buckets.
     513             : 
     514             :    TUNING points to a structure of user-supplied values, in case some fine
     515             :    tuning is wanted over the default behavior of the hasher.  If TUNING is
     516             :    NULL, the default tuning parameters are used instead.
     517             : 
     518             :    The user-supplied HASHER function should be provided.  It accepts two
     519             :    arguments ENTRY and TABLE_SIZE.  It computes, by hashing ENTRY contents, a
     520             :    slot number for that entry which should be in the range 0..TABLE_SIZE-1.
     521             :    This slot number is then returned.
     522             : 
     523             :    The user-supplied COMPARATOR function should be provided.  It accepts two
     524             :    arguments pointing to user data, it then returns true for a pair of entries
     525             :    that compare equal, or false otherwise.  This function is internally called
     526             :    on entries which are already known to hash to the same bucket index.
     527             : 
     528             :    The user-supplied DATA_FREER function, when not NULL, may be later called
     529             :    with the user data as an argument, just before the entry containing the
     530             :    data gets freed.  This happens from within `hash_free' or `hash_clear'.
     531             :    You should specify this function only if you want these functions to free
     532             :    all of your `data' data.  This is typically the case when your data is
     533             :    simply an auxiliary struct that you have malloc'd to aggregate several
     534             :    values.  */
     535             : 
     536             : Hash_table *
     537         484 : hash_initialize (size_t candidate, const Hash_tuning *tuning,
     538             :                  Hash_hasher hasher, Hash_comparator comparator,
     539             :                  Hash_data_freer data_freer)
     540             : {
     541             :   Hash_table *table;
     542             : 
     543         484 :   if (hasher == NULL || comparator == NULL)
     544           0 :     return NULL;
     545             : 
     546         484 :   table = malloc (sizeof *table);
     547         484 :   if (table == NULL)
     548           0 :     return NULL;
     549             : 
     550         484 :   if (!tuning)
     551         454 :     tuning = &default_tuning;
     552         484 :   table->tuning = tuning;
     553         484 :   if (!check_tuning (table))
     554             :     {
     555             :       /* Fail if the tuning options are invalid.  This is the only occasion
     556             :          when the user gets some feedback about it.  Once the table is created,
     557             :          if the user provides invalid tuning options, we silently revert to
     558             :          using the defaults, and ignore further request to change the tuning
     559             :          options.  */
     560           0 :       goto fail;
     561             :     }
     562             : 
     563         484 :   if (!tuning->is_n_buckets)
     564             :     {
     565         484 :       float new_candidate = candidate / tuning->growth_threshold;
     566         484 :       if (SIZE_MAX <= new_candidate)
     567           0 :         goto fail;
     568         484 :       candidate = new_candidate;
     569             :     }
     570             : 
     571         484 :   if (xalloc_oversized (candidate, sizeof *table->bucket))
     572           0 :     goto fail;
     573         484 :   table->n_buckets = next_prime (candidate);
     574         484 :   if (xalloc_oversized (table->n_buckets, sizeof *table->bucket))
     575           0 :     goto fail;
     576             : 
     577         484 :   table->bucket = calloc (table->n_buckets, sizeof *table->bucket);
     578         484 :   if (table->bucket == NULL)
     579           0 :     goto fail;
     580         484 :   table->bucket_limit = table->bucket + table->n_buckets;
     581         484 :   table->n_buckets_used = 0;
     582         484 :   table->n_entries = 0;
     583             : 
     584         484 :   table->hasher = hasher;
     585         484 :   table->comparator = comparator;
     586         484 :   table->data_freer = data_freer;
     587             : 
     588         484 :   table->free_entry_list = NULL;
     589             : #if USE_OBSTACK
     590             :   obstack_init (&table->entry_stack);
     591             : #endif
     592         484 :   return table;
     593             : 
     594           0 :  fail:
     595           0 :   free (table);
     596           0 :   return NULL;
     597             : }
     598             : 
     599             : /* Make all buckets empty, placing any chained entries on the free list.
     600             :    Apply the user-specified function data_freer (if any) to the datas of any
     601             :    affected entries.  */
     602             : 
     603             : void
     604           0 : hash_clear (Hash_table *table)
     605             : {
     606             :   struct hash_entry *bucket;
     607             : 
     608           0 :   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
     609             :     {
     610           0 :       if (bucket->data)
     611             :         {
     612             :           struct hash_entry *cursor;
     613             :           struct hash_entry *next;
     614             : 
     615             :           /* Free the bucket overflow.  */
     616           0 :           for (cursor = bucket->next; cursor; cursor = next)
     617             :             {
     618           0 :               if (table->data_freer)
     619           0 :                 (*table->data_freer) (cursor->data);
     620           0 :               cursor->data = NULL;
     621             : 
     622           0 :               next = cursor->next;
     623             :               /* Relinking is done one entry at a time, as it is to be expected
     624             :                  that overflows are either rare or short.  */
     625           0 :               cursor->next = table->free_entry_list;
     626           0 :               table->free_entry_list = cursor;
     627             :             }
     628             : 
     629             :           /* Free the bucket head.  */
     630           0 :           if (table->data_freer)
     631           0 :             (*table->data_freer) (bucket->data);
     632           0 :           bucket->data = NULL;
     633           0 :           bucket->next = NULL;
     634             :         }
     635             :     }
     636             : 
     637           0 :   table->n_buckets_used = 0;
     638           0 :   table->n_entries = 0;
     639           0 : }
     640             : 
     641             : /* Reclaim all storage associated with a hash table.  If a data_freer
     642             :    function has been supplied by the user when the hash table was created,
     643             :    this function applies it to the data of each entry before freeing that
     644             :    entry.  */
     645             : 
     646             : void
     647         209 : hash_free (Hash_table *table)
     648             : {
     649             :   struct hash_entry *bucket;
     650             :   struct hash_entry *cursor;
     651             :   struct hash_entry *next;
     652             : 
     653             :   /* Call the user data_freer function.  */
     654         209 :   if (table->data_freer && table->n_entries)
     655             :     {
     656        1476 :       for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
     657             :         {
     658        1463 :           if (bucket->data)
     659             :             {
     660          92 :               for (cursor = bucket; cursor; cursor = cursor->next)
     661             :                 {
     662          46 :                   (*table->data_freer) (cursor->data);
     663             :                 }
     664             :             }
     665             :         }
     666             :     }
     667             : 
     668             : #if USE_OBSTACK
     669             : 
     670             :   obstack_free (&table->entry_stack, NULL);
     671             : 
     672             : #else
     673             : 
     674             :   /* Free all bucket overflowed entries.  */
     675       26814 :   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
     676             :     {
     677       26605 :       for (cursor = bucket->next; cursor; cursor = next)
     678             :         {
     679           0 :           next = cursor->next;
     680           0 :           free (cursor);
     681             :         }
     682             :     }
     683             : 
     684             :   /* Also reclaim the internal list of previously freed entries.  */
     685         221 :   for (cursor = table->free_entry_list; cursor; cursor = next)
     686             :     {
     687          12 :       next = cursor->next;
     688          12 :       free (cursor);
     689             :     }
     690             : 
     691             : #endif
     692             : 
     693             :   /* Free the remainder of the hash table structure.  */
     694         209 :   free (table->bucket);
     695         209 :   free (table);
     696         209 : }
     697             : 
     698             : /* Insertion and deletion.  */
     699             : 
     700             : /* Get a new hash entry for a bucket overflow, possibly by reclying a
     701             :    previously freed one.  If this is not possible, allocate a new one.  */
     702             : 
     703             : static struct hash_entry *
     704        1275 : allocate_entry (Hash_table *table)
     705             : {
     706             :   struct hash_entry *new;
     707             : 
     708        1275 :   if (table->free_entry_list)
     709             :     {
     710        1263 :       new = table->free_entry_list;
     711        1263 :       table->free_entry_list = new->next;
     712             :     }
     713             :   else
     714             :     {
     715             : #if USE_OBSTACK
     716             :       new = obstack_alloc (&table->entry_stack, sizeof *new);
     717             : #else
     718          12 :       new = malloc (sizeof *new);
     719             : #endif
     720             :     }
     721             : 
     722        1275 :   return new;
     723             : }
     724             : 
     725             : /* Free a hash entry which was part of some bucket overflow,
     726             :    saving it for later recycling.  */
     727             : 
     728             : static void
     729        1275 : free_entry (Hash_table *table, struct hash_entry *entry)
     730             : {
     731        1275 :   entry->data = NULL;
     732        1275 :   entry->next = table->free_entry_list;
     733        1275 :   table->free_entry_list = entry;
     734        1275 : }
     735             : 
     736             : /* This private function is used to help with insertion and deletion.  When
     737             :    ENTRY matches an entry in the table, return a pointer to the corresponding
     738             :    user data and set *BUCKET_HEAD to the head of the selected bucket.
     739             :    Otherwise, return NULL.  When DELETE is true and ENTRY matches an entry in
     740             :    the table, unlink the matching entry.  */
     741             : 
     742             : static void *
     743       80808 : hash_find_entry (Hash_table *table, const void *entry,
     744             :                  struct hash_entry **bucket_head, bool delete)
     745             : {
     746       80808 :   struct hash_entry *bucket
     747       80808 :     = table->bucket + table->hasher (entry, table->n_buckets);
     748             :   struct hash_entry *cursor;
     749             : 
     750       80808 :   if (! (bucket < table->bucket_limit))
     751           0 :     abort ();
     752             : 
     753       80808 :   *bucket_head = bucket;
     754             : 
     755             :   /* Test for empty bucket.  */
     756       80808 :   if (bucket->data == NULL)
     757       39723 :     return NULL;
     758             : 
     759             :   /* See if the entry is the first in the bucket.  */
     760       41085 :   if ((*table->comparator) (entry, bucket->data))
     761             :     {
     762       38685 :       void *data = bucket->data;
     763             : 
     764       38685 :       if (delete)
     765             :         {
     766       38636 :           if (bucket->next)
     767             :             {
     768           0 :               struct hash_entry *next = bucket->next;
     769             : 
     770             :               /* Bump the first overflow entry into the bucket head, then save
     771             :                  the previous first overflow entry for later recycling.  */
     772           0 :               *bucket = *next;
     773           0 :               free_entry (table, next);
     774             :             }
     775             :           else
     776             :             {
     777       38636 :               bucket->data = NULL;
     778             :             }
     779             :         }
     780             : 
     781       38685 :       return data;
     782             :     }
     783             : 
     784             :   /* Scan the bucket overflow.  */
     785        2526 :   for (cursor = bucket; cursor->next; cursor = cursor->next)
     786             :     {
     787        1317 :       if ((*table->comparator) (entry, cursor->next->data))
     788             :         {
     789        1191 :           void *data = cursor->next->data;
     790             : 
     791        1191 :           if (delete)
     792             :             {
     793        1191 :               struct hash_entry *next = cursor->next;
     794             : 
     795             :               /* Unlink the entry to delete, then save the freed entry for later
     796             :                  recycling.  */
     797        1191 :               cursor->next = next->next;
     798        1191 :               free_entry (table, next);
     799             :             }
     800             : 
     801        1191 :           return data;
     802             :         }
     803             :     }
     804             : 
     805             :   /* No entry found.  */
     806        1209 :   return NULL;
     807             : }
     808             : 
     809             : /* For an already existing hash table, change the number of buckets through
     810             :    specifying CANDIDATE.  The contents of the hash table are preserved.  The
     811             :    new number of buckets is automatically selected so as to _guarantee_ that
     812             :    the table may receive at least CANDIDATE different user entries, including
     813             :    those already in the table, before any other growth of the hash table size
     814             :    occurs.  If TUNING->IS_N_BUCKETS is true, then CANDIDATE specifies the
     815             :    exact number of buckets desired.  */
     816             : 
     817             : bool
     818          30 : hash_rehash (Hash_table *table, size_t candidate)
     819             : {
     820             :   Hash_table *new_table;
     821             :   struct hash_entry *bucket;
     822             :   struct hash_entry *cursor;
     823             :   struct hash_entry *next;
     824             : 
     825          30 :   new_table = hash_initialize (candidate, table->tuning, table->hasher,
     826             :                                table->comparator, table->data_freer);
     827          30 :   if (new_table == NULL)
     828           0 :     return false;
     829             : 
     830             :   /* Merely reuse the extra old space into the new table.  */
     831             : #if USE_OBSTACK
     832             :   obstack_free (&new_table->entry_stack, NULL);
     833             :   new_table->entry_stack = table->entry_stack;
     834             : #endif
     835          30 :   new_table->free_entry_list = table->free_entry_list;
     836             : 
     837       10296 :   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
     838       10266 :     if (bucket->data)
     839       16542 :       for (cursor = bucket; cursor; cursor = next)
     840             :         {
     841        8313 :           void *data = cursor->data;
     842        8313 :           struct hash_entry *new_bucket
     843        8313 :             = (new_table->bucket
     844        8313 :                + new_table->hasher (data, new_table->n_buckets));
     845             : 
     846        8313 :           if (! (new_bucket < new_table->bucket_limit))
     847           0 :             abort ();
     848             : 
     849        8313 :           next = cursor->next;
     850             : 
     851        8313 :           if (new_bucket->data)
     852             :             {
     853          75 :               if (cursor == bucket)
     854             :                 {
     855             :                   /* Allocate or recycle an entry, when moving from a bucket
     856             :                      header into a bucket overflow.  */
     857          75 :                   struct hash_entry *new_entry = allocate_entry (new_table);
     858             : 
     859          75 :                   if (new_entry == NULL)
     860           0 :                     return false;
     861             : 
     862          75 :                   new_entry->data = data;
     863          75 :                   new_entry->next = new_bucket->next;
     864          75 :                   new_bucket->next = new_entry;
     865             :                 }
     866             :               else
     867             :                 {
     868             :                   /* Merely relink an existing entry, when moving from a
     869             :                      bucket overflow into a bucket overflow.  */
     870           0 :                   cursor->next = new_bucket->next;
     871           0 :                   new_bucket->next = cursor;
     872             :                 }
     873             :             }
     874             :           else
     875             :             {
     876             :               /* Free an existing entry, when moving from a bucket
     877             :                  overflow into a bucket header.  Also take care of the
     878             :                  simple case of moving from a bucket header into a bucket
     879             :                  header.  */
     880        8238 :               new_bucket->data = data;
     881        8238 :               new_table->n_buckets_used++;
     882        8238 :               if (cursor != bucket)
     883          84 :                 free_entry (new_table, cursor);
     884             :             }
     885             :         }
     886             : 
     887          30 :   free (table->bucket);
     888          30 :   table->bucket = new_table->bucket;
     889          30 :   table->bucket_limit = new_table->bucket_limit;
     890          30 :   table->n_buckets = new_table->n_buckets;
     891          30 :   table->n_buckets_used = new_table->n_buckets_used;
     892          30 :   table->free_entry_list = new_table->free_entry_list;
     893             :   /* table->n_entries already holds its value.  */
     894             : #if USE_OBSTACK
     895             :   table->entry_stack = new_table->entry_stack;
     896             : #endif
     897          30 :   free (new_table);
     898             : 
     899          30 :   return true;
     900             : }
     901             : 
     902             : /* If ENTRY matches an entry already in the hash table, return the pointer
     903             :    to the entry from the table.  Otherwise, insert ENTRY and return ENTRY.
     904             :    Return NULL if the storage required for insertion cannot be allocated.  */
     905             : 
     906             : void *
     907       39933 : hash_insert (Hash_table *table, const void *entry)
     908             : {
     909             :   void *data;
     910             :   struct hash_entry *bucket;
     911             : 
     912             :   /* The caller cannot insert a NULL entry.  */
     913       39933 :   if (! entry)
     914           0 :     abort ();
     915             : 
     916             :   /* If there's a matching entry already in the table, return that.  */
     917       39933 :   if ((data = hash_find_entry (table, entry, &bucket, false)) != NULL)
     918          49 :     return data;
     919             : 
     920             :   /* ENTRY is not matched, it should be inserted.  */
     921             : 
     922       39884 :   if (bucket->data)
     923             :     {
     924        1200 :       struct hash_entry *new_entry = allocate_entry (table);
     925             : 
     926        1200 :       if (new_entry == NULL)
     927           0 :         return NULL;
     928             : 
     929             :       /* Add ENTRY in the overflow of the bucket.  */
     930             : 
     931        1200 :       new_entry->data = (void *) entry;
     932        1200 :       new_entry->next = bucket->next;
     933        1200 :       bucket->next = new_entry;
     934        1200 :       table->n_entries++;
     935        1200 :       return (void *) entry;
     936             :     }
     937             : 
     938             :   /* Add ENTRY right in the bucket head.  */
     939             : 
     940       38684 :   bucket->data = (void *) entry;
     941       38684 :   table->n_entries++;
     942       38684 :   table->n_buckets_used++;
     943             : 
     944             :   /* If the growth threshold of the buckets in use has been reached, increase
     945             :      the table size and rehash.  There's no point in checking the number of
     946             :      entries:  if the hashing function is ill-conditioned, rehashing is not
     947             :      likely to improve it.  */
     948             : 
     949       77368 :   if (table->n_buckets_used
     950       38684 :       > table->tuning->growth_threshold * table->n_buckets)
     951             :     {
     952             :       /* Check more fully, before starting real work.  If tuning arguments
     953             :          became invalid, the second check will rely on proper defaults.  */
     954          30 :       check_tuning (table);
     955          60 :       if (table->n_buckets_used
     956          30 :           > table->tuning->growth_threshold * table->n_buckets)
     957             :         {
     958          30 :           const Hash_tuning *tuning = table->tuning;
     959          30 :           float candidate =
     960          30 :             (tuning->is_n_buckets
     961           0 :              ? (table->n_buckets * tuning->growth_factor)
     962          60 :              : (table->n_buckets * tuning->growth_factor
     963          30 :                 * tuning->growth_threshold));
     964             : 
     965          30 :           if (SIZE_MAX <= candidate)
     966           0 :             return NULL;
     967             : 
     968             :           /* If the rehash fails, arrange to return NULL.  */
     969          30 :           if (!hash_rehash (table, candidate))
     970           0 :             entry = NULL;
     971             :         }
     972             :     }
     973             : 
     974       38684 :   return (void *) entry;
     975             : }
     976             : 
     977             : /* If ENTRY is already in the table, remove it and return the just-deleted
     978             :    data (the user may want to deallocate its storage).  If ENTRY is not in the
     979             :    table, don't modify the table and return NULL.  */
     980             : 
     981             : void *
     982       40875 : hash_delete (Hash_table *table, const void *entry)
     983             : {
     984             :   void *data;
     985             :   struct hash_entry *bucket;
     986             : 
     987       40875 :   data = hash_find_entry (table, entry, &bucket, true);
     988       40875 :   if (!data)
     989        1048 :     return NULL;
     990             : 
     991       39827 :   table->n_entries--;
     992       39827 :   if (!bucket->data)
     993             :     {
     994       38636 :       table->n_buckets_used--;
     995             : 
     996             :       /* If the shrink threshold of the buckets in use has been reached,
     997             :          rehash into a smaller table.  */
     998             : 
     999       77272 :       if (table->n_buckets_used
    1000       38636 :           < table->tuning->shrink_threshold * table->n_buckets)
    1001             :         {
    1002             :           /* Check more fully, before starting real work.  If tuning arguments
    1003             :              became invalid, the second check will rely on proper defaults.  */
    1004           0 :           check_tuning (table);
    1005           0 :           if (table->n_buckets_used
    1006           0 :               < table->tuning->shrink_threshold * table->n_buckets)
    1007             :             {
    1008           0 :               const Hash_tuning *tuning = table->tuning;
    1009           0 :               size_t candidate =
    1010           0 :                 (tuning->is_n_buckets
    1011           0 :                  ? table->n_buckets * tuning->shrink_factor
    1012           0 :                  : (table->n_buckets * tuning->shrink_factor
    1013           0 :                     * tuning->growth_threshold));
    1014             : 
    1015           0 :               hash_rehash (table, candidate);
    1016             :             }
    1017             :         }
    1018             :     }
    1019             : 
    1020       39827 :   return data;
    1021             : }
    1022             : 
    1023             : /* Testing.  */
    1024             : 
    1025             : #if TESTING
    1026             : 
    1027             : void
    1028             : hash_print (const Hash_table *table)
    1029             : {
    1030             :   struct hash_entry const *bucket;
    1031             : 
    1032             :   for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
    1033             :     {
    1034             :       struct hash_entry *cursor;
    1035             : 
    1036             :       if (bucket)
    1037             :         printf ("%lu:\n", (unsigned long int) (bucket - table->bucket));
    1038             : 
    1039             :       for (cursor = bucket; cursor; cursor = cursor->next)
    1040             :         {
    1041             :           char const *s = cursor->data;
    1042             :           /* FIXME */
    1043             :           if (s)
    1044             :             printf ("  %s\n", s);
    1045             :         }
    1046             :     }
    1047             : }
    1048             : 
    1049             : #endif /* TESTING */

Generated by: LCOV version 1.10