Save This Page
Home » mahout-collections-1.0-src » org.apache.mahout.math.map » [javadoc | source]
    1   /*
    2   Copyright ? 1999 CERN - European Organization for Nuclear Research.
    3   Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
    4   is hereby granted without fee, provided that the above copyright notice appear in all copies and 
    5   that both that copyright notice and this permission notice appear in supporting documentation. 
    6   CERN makes no representations about the suitability of this software for any purpose. 
    7   It is provided "as is" without expressed or implied warranty.
    8   */
    9   package org.apache.mahout.math.map;
   10   
   11   /**
   12    * Status: Experimental; Do not use for production yet. Hash map holding (key,value) associations of type
   13    * <tt>(int-->int)</tt>; Automatically grows and shrinks as needed; Implemented using open addressing with double
   14    * hashing. First see the <a href="package-summary.html">package summary</a> and javadoc <a
   15    * href="package-tree.html">tree view</a> to get the broad picture.
   16    *
   17    * Implements open addressing with double hashing, using "Brent's variation". Brent's variation slows insertions a bit
   18    * down (not much) but reduces probes (collisions) for successful searches, in particular for large load factors. (It
   19    * does not improve unsuccessful searches.) See D. Knuth, Searching and Sorting, 3rd ed., p.533-545
   20    *
   21    * @author wolfgang.hoschek@cern.ch
   22    * @version 1.0, 09/24/99
   23    * @see java.util.HashMap
   24    */
   25   class QuickOpenIntIntHashMap extends OpenIntIntHashMap {
   26     //public int totalProbesSaved = 0; // benchmark only
   27   
   28     /** Constructs an empty map with default capacity and default load factors. */
   29     QuickOpenIntIntHashMap() {
   30       this(defaultCapacity);
   31     }
   32   
   33     /**
   34      * Constructs an empty map with the specified initial capacity and default load factors.
   35      *
   36      * @param initialCapacity the initial capacity of the map.
   37      * @throws IllegalArgumentException if the initial capacity is less than zero.
   38      */
   39     QuickOpenIntIntHashMap(int initialCapacity) {
   40       this(initialCapacity, defaultMinLoadFactor, defaultMaxLoadFactor);
   41     }
   42   
   43     /**
   44      * Constructs an empty map with the specified initial capacity and the specified minimum and maximum load factor.
   45      *
   46      * @param initialCapacity the initial capacity.
   47      * @param minLoadFactor   the minimum load factor.
   48      * @param maxLoadFactor   the maximum load factor.
   49      * @throws IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) ||
   50      *                                  (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >=
   51      *                                  maxLoadFactor)</tt>.
   52      */
   53     QuickOpenIntIntHashMap(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
   54       setUp(initialCapacity, minLoadFactor, maxLoadFactor);
   55     }
   56   
   57     /**
   58      * Associates the given key with the given value. Replaces any old <tt>(key,someOtherValue)</tt> association, if
   59      * existing.
   60      *
   61      * @param key   the key the value shall be associated with.
   62      * @param value the value to be associated.
   63      * @return <tt>true</tt> if the receiver did not already contain such a key; <tt>false</tt> if the receiver did
   64      *         already contain such a key - the new value has now replaced the formerly associated value.
   65      */
   66     @Override
   67     public boolean put(int key, int value) {
   68       /*
   69          This is open addressing with double hashing, using "Brent's variation".
   70          Brent's variation slows insertions a bit down (not much) but reduces probes (collisions) for successful searches, in particular for large load factors.
   71          (It does not improve unsuccessful searches.)
   72          See D. Knuth, Searching and Sorting, 3rd ed., p.533-545
   73   
   74          h1(key) = hash % M
   75          h2(key) = decrement = Max(1, hash/M % M)
   76          M is prime = capacity = table.length
   77          probing positions are table[(h1-j*h2) % M] for j=0,1,...
   78          (M and h2 could also be chosen differently, but h2 is required to be relative prime to M.)
   79       */
   80   
   81       int[] tab = table;
   82       byte[] stat = state;
   83       int length = tab.length;
   84   
   85       int hash = HashFunctions.hash(key) & 0x7FFFFFFF;
   86       int i = hash % length;
   87       int decrement = (hash / length) % length;
   88       if (decrement == 0) {
   89         decrement = 1;
   90       }
   91   
   92       // stop if we find a removed or free slot, or if we find the key itself
   93       // do NOT skip over removed slots (yes, open addressing is like that...)
   94       //int comp = comparisons;
   95       int t = 0;  // the number of probes
   96       int p0 = i; // the first position to probe
   97       while (stat[i] == FULL && tab[i] != key) {
   98         t++;
   99         i -= decrement;
  100         //hashCollisions++;
  101         if (i < 0) {
  102           i += length;
  103         }
  104       }
  105       if (stat[i] == FULL) {
  106         // key already contained at slot i.
  107         this.values[i] = value;
  108         return false;
  109       }
  110       // not already contained, should be inserted at slot i.
  111   
  112       if (this.distinct > this.highWaterMark) {
  113         int newCapacity = chooseGrowCapacity(this.distinct + 1, this.minLoadFactor, this.maxLoadFactor);
  114         rehash(newCapacity);
  115         return put(key, value);
  116       }
  117   
  118       /*
  119       Brent's variation does a local reorganization to reduce probes. It essentially means:
  120       We test whether it is possible to move the association we probed first (table[p0]) out of the way.
  121       If this is possible, it will reduce probes for the key to be inserted, since it takes its place; it gets hit earlier.
  122       However, future probes for the key that we move out of the way will increase.
  123       Thus we only move it out of the way, if we have a net gain, that is, if we save more probes than we loose.
  124       For the first probe we safe more than we loose if the number of probes we needed was >=2 (t>=2).
  125       If the first probe cannot be moved out of the way, we try the next probe (p1).
  126       Now we safe more than we loose if t>=3.
  127       We repeat this until we find that we cannot gain or that we can indeed move p(x) out of the way.
  128   
  129       Note: Under the great majority of insertions t<=1, so the loop is entered very infrequently.
  130       */
  131       while (t > 1) {
  132         int key0 = tab[p0];
  133         hash = HashFunctions.hash(key0) & 0x7FFFFFFF;
  134         decrement = (hash / length) % length;
  135         if (decrement == 0) {
  136           decrement = 1;
  137         }
  138         int pc = p0 - decrement; // pc = (p0-j*decrement) % M, j=1,2,..
  139         if (pc < 0) {
  140           pc += length;
  141         }
  142   
  143         if (stat[pc] != FREE) { // not a free slot, continue searching for free slot to move to, or break.
  144           p0 = pc;
  145           t--;
  146         } else { // free or removed slot found, now move...
  147           tab[pc] = key0;
  148           stat[pc] = FULL;
  149           values[pc] = values[p0];
  150           i = p0; // prepare to insert: table[p0]=key
  151           t = 0; // break loop
  152         }
  153       }
  154   
  155       this.table[i] = key;
  156       this.values[i] = value;
  157       if (this.state[i] == FREE) {
  158         this.freeEntries--;
  159       }
  160       this.state[i] = FULL;
  161       this.distinct++;
  162   
  163       if (this.freeEntries < 1) { //delta
  164         int newCapacity = chooseGrowCapacity(this.distinct + 1, this.minLoadFactor, this.maxLoadFactor);
  165         rehash(newCapacity);
  166       }
  167   
  168       return true;
  169     }
  170   
  171     /**
  172      * Rehashes the contents of the receiver into a new table with a smaller or larger capacity. This method is called
  173      * automatically when the number of keys in the receiver exceeds the high water mark or falls below the low water
  174      * mark.
  175      */
  176     @Override
  177     protected void rehash(int newCapacity) {
  178       int oldCapacity = table.length;
  179       //if (oldCapacity == newCapacity) return;
  180   
  181       int[] oldTable = table;
  182       int[] oldValues = values;
  183       byte[] oldState = state;
  184   
  185       int[] newTable = new int[newCapacity];
  186       int[] newValues = new int[newCapacity];
  187       byte[] newState = new byte[newCapacity];
  188   
  189       this.lowWaterMark = chooseLowWaterMark(newCapacity, this.minLoadFactor);
  190       this.highWaterMark = chooseHighWaterMark(newCapacity, this.maxLoadFactor);
  191   
  192       this.table = newTable;
  193       this.values = newValues;
  194       this.state = newState;
  195       this.freeEntries = newCapacity - this.distinct; // delta
  196   
  197       int tmp = this.distinct;
  198       this.distinct = Integer.MIN_VALUE; // switch of watermarks
  199       for (int i = oldCapacity; i-- > 0;) {
  200         if (oldState[i] == FULL) {
  201           put(oldTable[i], oldValues[i]);
  202           /*
  203           int element = oldTable[i];
  204           int index = indexOfInsertion(element);
  205           newTable[index]=element;
  206           newValues[index]=oldValues[i];
  207           newState[index]=FULL;
  208           */
  209         }
  210       }
  211       this.distinct = tmp;
  212     }
  213   }

Save This Page
Home » mahout-collections-1.0-src » org.apache.mahout.math.map » [javadoc | source]