hash_table.h

Go to the documentation of this file.
00001 /*****************************************************************************/
00002 /*!
00003  *\file hash_table.h
00004  *\brief hash table implementation
00005  *
00006  * Author: Alexander Fuchs
00007  *
00008  * Created: Thu Oct 19 11:04:00 2006
00009  *
00010  * <hr>
00011  *
00012  * License to use, copy, modify, sell and/or distribute this software
00013  * and its documentation for any purpose is hereby granted without
00014  * royalty, subject to the terms and conditions defined in the \ref
00015  * LICENSE file provided with this distribution.
00016  * 
00017  * <hr>
00018  */
00019 /*****************************************************************************/
00020 
00021 /*
00022  * Copyright (c) 1996,1997
00023  * Silicon Graphics Computer Systems, Inc.
00024  *
00025  * Permission to use, copy, modify, distribute and sell this software
00026  * and its documentation for any purpose is hereby granted without fee,
00027  * provided that the above copyright notice appear in all copies and
00028  * that both that copyright notice and this permission notice appear
00029  * in supporting documentation.  Silicon Graphics makes no
00030  * representations about the suitability of this software for any
00031  * purpose.  It is provided "as is" without express or implied warranty.
00032  *
00033  *
00034  * Copyright (c) 1994
00035  * Hewlett-Packard Company
00036  *
00037  * Permission to use, copy, modify, distribute and sell this software
00038  * and its documentation for any purpose is hereby granted without fee,
00039  * provided that the above copyright notice appear in all copies and
00040  * that both that copyright notice and this permission notice appear
00041  * in supporting documentation.  Hewlett-Packard Company makes no
00042  * representations about the suitability of this software for any
00043  * purpose.  It is provided "as is" without express or implied warranty.
00044  *
00045  */
00046 
00047 // this implementation is in essence a subset of the SGI implementation:
00048 // http://www.sgi.com/tech/stl/stl_hashtable.h
00049 
00050 #ifndef _cvc3__hash__hash_table_h_
00051 #define _cvc3__hash__hash_table_h_
00052 
00053 #include <vector>
00054 #include <string>
00055 #include <functional>
00056 #include <algorithm>
00057 #include "hash_fun.h"
00058 
00059 // For some reason, including debug.h doesn't work--so redeclare macros here
00060 
00061 #ifdef DEBUG
00062 #define DebugAssert(cond, str) if(!(cond)) \
00063  CVC3::debugError(__FILE__, __LINE__, #cond, str)
00064 namespace CVC3 {
00065 extern void debugError(const std::string& file, int line,
00066                        const std::string& cond, const std::string& msg);
00067 }
00068 #else
00069 #define DebugAssert(cond, str)
00070 #endif
00071 
00072 namespace Hash {
00073   // get size_t from hash_fun, which gets it from cstddef
00074   typedef size_t size_type;
00075 
00076     /// primes for increasing the hash table size
00077 
00078   // Note: assumes size_type is unsigned and at least 32 bits.
00079   const size_type num_primes = 28;
00080   
00081   static const size_type prime_list[num_primes] = {
00082     53ul,         97ul,         193ul,       389ul,       769ul,
00083     1543ul,       3079ul,       6151ul,      12289ul,     24593ul,
00084     49157ul,      98317ul,      196613ul,    393241ul,    786433ul,
00085     1572869ul,    3145739ul,    6291469ul,   12582917ul,  25165843ul,
00086     50331653ul,   100663319ul,  201326611ul, 402653189ul, 805306457ul, 
00087     1610612741ul, 3221225473ul, 4294967291ul
00088   };
00089   
00090   inline size_type next_prime(size_type n)
00091   {
00092     const size_type* first = prime_list;
00093     const size_type* last = prime_list + (size_type)num_primes;
00094     const size_type* pos = std::lower_bound(first, last, n);
00095     return pos == last ? *(last - 1) : *pos;
00096   }
00097 
00098 
00099   /*! template to instante to hash map and hash set
00100 
00101   based on the sgi implementation:
00102   http://www.sgi.com/tech/stl/HashedAssociativeContainer.html
00103 
00104     _Key: hash key type
00105     _Data: key + value data to store
00106     _HashFcn: functional class providing a hash function: int(_Key)
00107       Note: in some STL implementations hash is already part of
00108       some extension an in namespace std or stdext, in some it is not.
00109       So we assume that it is not available. :TODO:
00110     _EqualKey: functional class providing a comparison function: bool(_Key, _Key)
00111       returns true iff two keys are considered to be equal
00112     _ExtractKey: extracts key from _Data: _Key(_Data)
00113   */
00114   //
00115   // can't use debug.h as debug.h uses hash maps...
00116   //
00117   // implemented as an array of lists (buckets)
00118   template <class _Key, class _Value,
00119       class _HashFcn, class _EqualKey, class _ExtractKey>
00120   class hash_table {
00121 
00122   /// types
00123 
00124   public:
00125     // interface typedefs
00126     typedef Hash::size_type size_type;
00127     typedef _Key key_type;
00128     typedef _Value value_type;
00129     typedef _HashFcn hasher;
00130     typedef _EqualKey key_equal;
00131 
00132   protected:
00133     // a bucket is a list of values
00134     // using an STL list makes it more complicated to have a nice end iterator,
00135     // as iterators of different lists can not be compared
00136     // (at least in the MS STL).
00137     // so instead we implement our own single-linked list here,
00138     // where NULL is the end iterator for all lists.
00139     struct BucketNode {
00140       BucketNode(BucketNode* next, const value_type& value)
00141   : d_next(next), d_value(value)
00142       { };
00143       BucketNode* d_next;
00144       value_type d_value;
00145     };
00146     typedef BucketNode Bucket;
00147     
00148     // the buckets are kept in an array
00149     typedef std::vector<Bucket*> Data;
00150     typedef typename Data::iterator data_iter;
00151     typedef typename Data::const_iterator data_const_iter;
00152 
00153 
00154   public:
00155     // iterators
00156     class iterator;
00157     friend class iterator;
00158     class const_iterator;
00159     friend class const_iterator;
00160 
00161 
00162 
00163   /// variables
00164 
00165   protected:
00166     /// template parameters
00167 
00168     // the hash function for a key
00169     hasher d_hash;
00170 
00171     // the equality function between keys
00172     key_equal d_equal;
00173 
00174     // extraction of key from data
00175     _ExtractKey d_extractKey;
00176         
00177 
00178     // the current number of elements - stored for efficiency
00179     size_type d_size;
00180 
00181     // the hash table - an array of buckets
00182     Data d_data;
00183 
00184 
00185   /// methods
00186 
00187   protected:
00188 
00189     /// template parameters
00190     
00191 
00192     // the hash function for a key
00193     size_type hash(const key_type& key) const {
00194       return d_hash(key);
00195     }
00196 
00197     // equality between keys
00198     bool equal(const key_type& key1, const key_type& key2) const {
00199       return d_equal(key1, key2);
00200     }
00201 
00202     // extraction of a key
00203     const key_type& extractKey(const value_type& value) const {
00204       return d_extractKey(value);
00205     }
00206 
00207 
00208     /// bucket retrieval
00209 
00210     // returns the index in the array which contains the bucket
00211     // with the keys mapping to the same hash value as key
00212     size_type getBucketIndex(const key_type& key) const {
00213       return (hash(key) % d_data.size());
00214     }
00215 
00216     Bucket* getBucketByKey(const key_type& key) {
00217       return getBucketByIndex(getBucketIndex(key));
00218     }
00219     
00220     const Bucket* getBucketByKey(const key_type& key) const {
00221       return getBucketByIndex(getBucketIndex(key));
00222     }
00223 
00224     Bucket* getBucketByIndex(const size_type index) {
00225       DebugAssert(index < d_data.size(),"hash_table::getBucketByIndex");
00226       return d_data.at(index);
00227     }
00228 
00229     const Bucket* getBucketByIndex(const size_type index) const {
00230       DebugAssert(index < d_data.size(),"hash_table::getBucketByIndex (const)");
00231       return d_data.at(index);
00232     }
00233 
00234 
00235 
00236     /// resize
00237 
00238     // increase the size of the table, typically if the load factor is too high
00239     void resize() {
00240       if (load_factor() > 1) {
00241   // create new table with doubled size size
00242   //size_type size = 2 * d_data.size();
00243   // this is simple, but might not be efficient for bad hash functions,
00244   // which for example mainly hash to values which contain 2 as a factor.
00245   // thus, instead we go from a prime to a prime of more or less double size.
00246   size_type size = next_prime(d_data.size() + 1);
00247   Data copy(size, NULL);
00248   
00249   // move entries to new table
00250   for (size_type i = 0; i < d_data.size(); ++i) {
00251     // head of current bucket to move
00252     BucketNode* bucket = d_data[i];
00253     while (bucket != NULL) {
00254       // move head of old bucket
00255       BucketNode* current = bucket;
00256       bucket = bucket->d_next;
00257 
00258       // move old head to new bucket
00259       size_type new_index = hash(extractKey(current->d_value)) % size;
00260       BucketNode* new_bucket = copy[new_index];
00261       current->d_next = new_bucket;
00262       copy[new_index] = current;
00263     }
00264     d_data[i] = NULL;
00265   }
00266 
00267   d_data.swap(copy);
00268       }
00269     }
00270 
00271 
00272   public:
00273     /// constructors
00274 
00275     // default size is 16 buckets
00276     hash_table() :
00277       d_hash(_HashFcn()), d_equal(_EqualKey()), d_extractKey(_ExtractKey()),
00278       d_size(0), d_data(16)
00279     {
00280       init();
00281     };
00282     
00283     // specifiy initial number of buckets - must be positive
00284     hash_table(size_type initial_capacity) : 
00285       d_hash(_HashFcn()), d_equal(_EqualKey()), d_extractKey(_ExtractKey()),
00286       d_size(0), d_data(initial_capacity)
00287     {
00288       init();
00289     };
00290 
00291     // specifiy initial number of buckets and hash function
00292     hash_table(size_type initial_capacity, const _HashFcn& hash) : 
00293       d_hash(hash), d_equal(_EqualKey()), d_extractKey(_ExtractKey()),
00294       d_size(0), d_data(initial_capacity)
00295     {
00296       init();
00297     };
00298 
00299     // specifiy initial number of buckets, hash and equal function
00300     hash_table(size_type initial_capacity,
00301        const _HashFcn& hash, const _EqualKey& equal) : 
00302       d_hash(hash), d_equal(equal), d_extractKey(_ExtractKey()),
00303       d_size(0), d_data(initial_capacity)
00304     {
00305       init();
00306     };
00307 
00308     // copy hash table
00309     hash_table(const hash_table& other) :
00310       d_hash(other.d_hash), d_equal(other.d_equal), d_extractKey(other.d_extractKey),
00311       d_size(other.d_size), d_data(0)
00312     {
00313       assignTable(other.d_data);
00314     };
00315 
00316     ~hash_table() {
00317       clear();
00318     }
00319 
00320     // assign hash table
00321     hash_table& operator=(const hash_table& other) {
00322       if (this != &other) {
00323   clear();
00324 
00325   d_hash = other.d_hash;
00326   d_equal = other.d_equal;
00327   d_extractKey = other.d_extractKey;
00328   d_size = other.d_size;
00329   assignTable(other.d_data);
00330       }
00331 
00332       return *this;
00333     }
00334     
00335     
00336     // replaces the current hash table with the given one
00337     void assignTable(const Data& data) {
00338       // copy elements:
00339       // default assignment operator does not work if value_type contains
00340       // constants, which should be the case as the key should be constant.
00341       // so not even shrinking a vector is possible,
00342       // so create a new table instead and swap with the current one.
00343       Data tmp(data.size());
00344       d_data.swap(tmp);
00345       
00346       // for each bucket ...
00347       for (size_type i = 0; i < data.size(); ++i) {
00348   // .. copy each element
00349   DebugAssert(i < d_data.size(),"hash_table::operator=");
00350   
00351     // copy bucket if not empty
00352   Bucket* source = data[i];
00353   if (source != NULL) {
00354     // set first element
00355     Bucket* target = new BucketNode(NULL, source->d_value);
00356     d_data[i] = target;
00357     source = source->d_next;  
00358     
00359     // copy remaining nodes
00360     while (source != NULL) {
00361       target->d_next = new BucketNode(NULL, source->d_value);
00362       target = target->d_next;
00363       source = source->d_next;  
00364     }
00365   }
00366       }
00367     }
00368 
00369     void swap(hash_table& other) {
00370       std::swap(d_hash, other.d_hash);
00371       std::swap(d_equal, other.d_equal);
00372       std::swap(d_extractKey, other.d_extractKey);
00373       std::swap(d_size, other.d_size);
00374       d_data.swap(other.d_data);
00375     }
00376 
00377     // sets all buckets to NULL
00378     void init() {
00379       for (size_type i = 0; i < d_data.size(); ++i) {
00380   d_data[i] = NULL;
00381       }
00382     }
00383 
00384     // empty all buckets
00385     void clear() {
00386       d_size = 0;
00387     
00388       for (size_type i = 0; i < d_data.size(); ++i) {
00389   BucketNode* head = d_data[i];
00390   while (head != NULL) {
00391     BucketNode* next = head->d_next;
00392     delete head;
00393     head = next;
00394   }
00395   d_data[i] = NULL;
00396       }
00397     }
00398 
00399 
00400 
00401     /// operations
00402 
00403     
00404     // returns end iterator if key was not bound
00405     iterator find(const key_type& key) {
00406       for (BucketNode* node = getBucketByKey(key); node != NULL; node = node->d_next) {
00407   if (equal(extractKey(node->d_value), key)) {
00408     return iterator(this, node);
00409   }
00410       }
00411       return end();      
00412     }
00413 
00414     // const version of find
00415     const_iterator find(const key_type& key) const {
00416       for (const BucketNode* node = getBucketByKey(key); node != NULL; node = node->d_next) {
00417   if (equal(extractKey(node->d_value), key)) {
00418     return const_iterator(this, node);
00419   }
00420       }
00421       return end();      
00422     }
00423 
00424 
00425     // adds the mapping from key to data, if key is still unbound
00426     // otherwise returns false
00427     std::pair<iterator, bool> insert(const value_type& value) {
00428       // resize in case we insert
00429       resize();
00430 
00431       const key_type& key = extractKey(value);
00432       size_type index = getBucketIndex(key);
00433 
00434       // check if key is already bound
00435       for (BucketNode* node = d_data[index]; node != NULL; node = node->d_next) {
00436   if (equal(extractKey(node->d_value), key)) {
00437     return std::make_pair(end(), false);
00438   }
00439       }
00440 
00441       // insert new value
00442       ++d_size;
00443       d_data[index] = new BucketNode(d_data[index], value);
00444       return std::make_pair(iterator(this, d_data[index]), true);
00445     }
00446 
00447     // if key in value is already bound,
00448     // returns that bindings,
00449     // otherwise inserts value and returns it.
00450     value_type& find_or_insert(const value_type& value) {
00451       // resize in case we insert
00452       resize();
00453 
00454       const key_type& key = extractKey(value);
00455       size_type index = getBucketIndex(key);
00456 
00457       // check if key is already bound
00458       for (BucketNode* node = d_data[index]; node != NULL; node = node->d_next) {
00459   if (equal(extractKey(node->d_value), key)) {
00460     return node->d_value;
00461   }
00462       }
00463 
00464       // insert new value
00465       ++d_size;
00466       d_data[index] = new BucketNode(d_data[index], value);
00467       return d_data[index]->d_value;
00468     }
00469 
00470 
00471     // removes binding of key
00472     // returns number of keys removed,
00473     // i.e. 1 if key was bound, 0 if key was not bound.
00474     size_type erase(const key_type& key) {
00475       size_type index = getBucketIndex(key);
00476 
00477       // keep track of the node previous to the current one
00478       BucketNode* prev = NULL;
00479       for (BucketNode* node = d_data[index]; node != NULL; node = node->d_next) {
00480   if (equal(extractKey(node->d_value), key)) {
00481     --d_size;
00482     
00483     // remove the bucket's head
00484     if (prev == NULL) {
00485       d_data[index] = node->d_next;
00486     }
00487     // remove within the list;
00488     else {
00489       prev->d_next = node->d_next;
00490     }
00491     delete node;
00492     return 1;
00493   }
00494 
00495   prev = node;
00496       }
00497       
00498       return 0;
00499     }
00500 
00501 
00502 
00503     /// status
00504 
00505     // is the key bound?
00506     bool contains(const key_type& key) const {
00507       return (find(key) != end());
00508     }
00509 
00510     // returns the number of times a key is bound,
00511     // i.e. 0 or 1
00512     size_type count(const _Key& key) const {
00513       if (contains(key)) {
00514   return 1;
00515       }
00516       else {
00517   return 0;
00518       }
00519     }
00520   
00521     // is the hash table empty?
00522     bool empty() const {
00523       return (d_size == 0);
00524     }
00525 
00526     // the number of elements in the hash table
00527     size_type size() const {
00528       return d_size;
00529     }
00530 
00531     // the number of buckets in the hash table
00532     size_type bucket_count() const {
00533       return d_data.size();
00534     }
00535 
00536     // returns the average number of elements per bucket
00537     float load_factor() const {
00538       return (float(d_size) / float(d_data.size()));
00539     }
00540 
00541 
00542 
00543     /// iterators
00544 
00545     // returns forward iterator to iterate over all key/data pairs
00546     iterator begin() {
00547       if (d_size > 0) {
00548   // find first non-empty bucket
00549   size_type index = 0;
00550   while (d_data[index] == NULL) {
00551     ++index;
00552   }
00553   
00554   return iterator(this, d_data[index]);
00555       }
00556       else {
00557   return end();
00558       }
00559     }
00560 
00561     // const version of begin
00562     const_iterator begin() const {
00563       if (d_size > 0) {
00564   // find first non-empty bucket
00565   size_type index = 0;
00566   while (d_data[index] == NULL) {
00567     ++index;
00568   }
00569   
00570   return const_iterator(this, d_data[index]);
00571       }
00572       else {
00573   return end();
00574       }
00575     }
00576 
00577 
00578     // returns end iterator
00579     iterator end() {
00580       return iterator(this, NULL);
00581     }
00582     
00583     // const version of end
00584     const_iterator end() const {
00585       return const_iterator(this, NULL);
00586     }
00587 
00588 
00589 
00590 
00591     /// inner classes
00592 
00593 
00594 
00595 
00596     // iterator over hashtable elements
00597 
00598     // modifying the hash table leaves iterator intact,
00599     // unless the key in the value it points to is modified/deleted
00600     class iterator {
00601       friend class hash_table;
00602       friend class const_iterator;
00603 
00604       /// variables
00605       
00606     protected:
00607       // the hash table of this iterator
00608       hash_table* d_hash_table;
00609       // iterator points to current element in some bucket
00610       BucketNode* d_node;
00611 
00612 
00613       /// methods
00614     protected:
00615       // used by hash_table to create an iterator
00616       iterator(hash_table* hash_table, BucketNode* node)
00617   : d_hash_table(hash_table), d_node(node)
00618       { }
00619 
00620     public:
00621       // public default constructor,
00622       // leaves the iterator in undefined state.
00623       iterator()
00624   : d_hash_table(NULL), d_node(NULL)
00625       { }
00626 
00627       // copy constructor
00628       iterator(const iterator& other)
00629   : d_hash_table(other.d_hash_table), d_node(other.d_node)
00630       { }
00631 
00632       // assignment
00633       iterator& operator=(const iterator& other) {
00634   if (this != &other) {
00635     d_hash_table = other.d_hash_table;
00636     d_node = other.d_node;
00637   }
00638 
00639   return *this;
00640       }
00641 
00642       // go to next data - pre-increment
00643       iterator& operator++() {
00644   // must not be the end iterator
00645   DebugAssert(d_node != NULL, "hash operator++");
00646 
00647   // get current bucket index
00648   size_type index = d_hash_table->getBucketIndex(d_hash_table->extractKey(d_node->d_value));
00649   
00650   // go to next entry in bucket
00651   d_node = d_node->d_next;
00652 
00653   // while current bucket empty
00654   while (d_node == NULL) {
00655     // go to next bucket
00656     ++index;
00657 
00658     // all buckets exhausted
00659     if (index == d_hash_table->d_data.size()) {
00660       // unfortunately this does not work, as end() returns a tmp value
00661       // return d_hash_table->end();
00662       *this = d_hash_table->end();
00663       return *this;
00664     }
00665     DebugAssert(index < d_hash_table->d_data.size(),
00666                       "hash operator++ 2");
00667 
00668     d_node = d_hash_table->getBucketByIndex(index);
00669   }
00670   
00671   return *this;
00672       };
00673 
00674       // go to next data - post-increment
00675       iterator operator++(int) {
00676   iterator tmp = *this;
00677   ++(*this);
00678   return tmp;
00679       }
00680 
00681       value_type& operator*() const {
00682   return d_node->d_value;
00683       }
00684 
00685       value_type* operator->() const {
00686   return &(operator*());
00687       }
00688 
00689       // are two iterator identical?
00690       bool operator==(const iterator& other) const {
00691   // if the same bucket iterator, then it must be the same hash table
00692   DebugAssert(d_node == NULL || d_node != other.d_node || d_hash_table == other.d_hash_table, "hash operator==");
00693   return (d_node == other.d_node);
00694       }
00695 
00696       // negation of ==
00697       bool operator!=(const iterator& other) const {
00698   return !(*this == other);
00699       }
00700     };
00701 
00702 
00703 
00704 
00705     // const iterator over hashtable elements
00706 
00707     // modifying the hash table leaves iterator intact,
00708     // unless the key in the value it points to is modified/deleted
00709     class const_iterator {
00710       friend class hash_table;
00711 
00712       /// variables
00713       
00714     protected:
00715       // the hash table of this iterator
00716       const hash_table* d_hash_table;
00717       // iterator points to current element in some bucket
00718       const BucketNode* d_node;
00719 
00720 
00721       /// methods
00722     protected:
00723       // used by hash_table to create an iterator
00724       const_iterator(hash_table const* hash_table, const BucketNode* node)
00725   : d_hash_table(hash_table), d_node(node)
00726       { }
00727 
00728     public:
00729       // public default constructor,
00730       // leaves the iterator in undefined state.
00731       const_iterator()
00732   : d_hash_table(NULL), d_node(NULL)
00733       { }
00734 
00735       // copy constructor
00736       const_iterator(const const_iterator& other)
00737   : d_hash_table(other.d_hash_table), d_node(other.d_node)
00738       { }
00739 
00740       // conversion constructor from non-const iterator
00741       const_iterator(const iterator& other)
00742   : d_hash_table(other.d_hash_table), d_node(other.d_node)
00743       { }
00744 
00745       // assignment
00746       const_iterator& operator=(const const_iterator& other) {
00747   if (this != &other) {
00748     d_hash_table = other.d_hash_table;
00749     d_node = other.d_node;
00750   }
00751 
00752   return *this;
00753       }
00754 
00755       // go to next data - pre-increment
00756       const_iterator& operator++() {
00757   // must not be the end iterator
00758   DebugAssert(d_node != NULL, "");
00759 
00760   // get current bucket index
00761   size_type index = d_hash_table->getBucketIndex(d_hash_table->extractKey(d_node->d_value));
00762   
00763   // go to next entry in bucket
00764   d_node = d_node->d_next;
00765 
00766   // while current bucket empty
00767   while (d_node == NULL) {
00768     // go to next bucket
00769     ++index;
00770 
00771     // all buckets exhausted
00772     if (index == d_hash_table->d_data.size()) {
00773       // unfortunately this does not work, as end() returns a tmp value
00774       // return d_hash_table->end();
00775       *this = d_hash_table->end();
00776       return *this;
00777     }
00778     DebugAssert(index < d_hash_table->d_data.size(),"");
00779 
00780     d_node = d_hash_table->getBucketByIndex(index);
00781   }
00782   
00783   return *this;
00784       };
00785 
00786       // go to next data - post-increment
00787       const_iterator operator++(int) {
00788   const_iterator tmp = *this;
00789   ++(*this);
00790   return tmp;
00791       }
00792 
00793       const value_type& operator*() const {
00794   return d_node->d_value;
00795       }
00796 
00797       const value_type* operator->() const {
00798   return &(operator*());
00799       }
00800 
00801       // are two iterator identical?
00802       bool operator==(const const_iterator& other) const {
00803   // if the same bucket iterator, then it must be the same hash table
00804   DebugAssert(d_node == NULL || d_node != other.d_node || d_hash_table == other.d_hash_table,"");
00805   return (d_node == other.d_node);
00806       }
00807 
00808       // negation of ==
00809       bool operator!=(const const_iterator& other) const {
00810   return !(*this == other);
00811       }
00812     };
00813   };
00814 
00815 }
00816 
00817 #endif

Generated on Tue Jul 3 14:33:37 2007 for CVC3 by  doxygen 1.5.1