Save This Page
Home » mahout-collections-1.0-src » org.apache.mahout.math.map » [javadoc | source]
    1   /**
    2    * Licensed to the Apache Software Foundation (ASF) under one
    3    * or more contributor license agreements. See the NOTICE file
    4    * distributed with this work for additional information
    5    * regarding copyright ownership. The ASF licenses this file
    6    * to you under the Apache License, Version 2.0 (the
    7    * "License"); you may not use this file except in compliance
    8    * with the License. You may obtain a copy of the License at
    9    *
   10    * http://www.apache.org/licenses/LICENSE-2.0
   11    *
   12    * Unless required by applicable law or agreed to in writing,
   13    * software distributed under the License is distributed on an
   14    * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
   15    * KIND, either express or implied. See the License for the
   16    * specific language governing permissions and limitations
   17    * under the License.
   18    */
   19   
   20   /*
   21   Copyright � 1999 CERN - European Organization for Nuclear Research.
   22   Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
   23   is hereby granted without fee, provided that the above copyright notice appear in all copies and 
   24   that both that copyright notice and this permission notice appear in supporting documentation. 
   25   CERN makes no representations about the suitability of this software for any purpose. 
   26   It is provided "as is" without expressed or implied warranty.
   27   */
   28   package org.apache.mahout.math.map;
   29   
   30   import java.util.ArrayList;
   31   import java.util.Arrays;
   32   import java.util.Collection;
   33   import java.util.List;
   34   import java.util.Map;
   35   import java.util.Set;
   36   
   37   import org.apache.mahout.math.function.ObjectObjectProcedure;
   38   import org.apache.mahout.math.function.ObjectProcedure;
   39   import org.apache.mahout.math.set.AbstractSet;
   40   import org.apache.mahout.math.set.OpenHashSet;
   41   
   42   /**
   43     * Open hash map. This implements Map, but it does not respect several aspects of the Map contract
   44     * that impose the very sorts of performance penalities that this class exists to avoid.
   45     * {@link #entrySet}, {@link #values}, and {@link #keySet()} do <strong>not</strong> return
   46     * collections that share storage with the main map, and changes to those returned objects
   47     * are <strong>not</strong> reflected in the container.
   48    **/
   49   public class OpenHashMap<K,V> extends AbstractSet implements Map<K,V> {
   50     protected static final byte FREE = 0;
   51     protected static final byte FULL = 1;
   52     protected static final byte REMOVED = 2;
   53     protected static final Object NO_KEY_VALUE = null;
   54   
   55     /** The hash table keys. */
   56     protected Object[] table;
   57   
   58     /** The hash table values. */
   59     protected Object[] values;
   60   
   61     /** The state of each hash table entry (FREE, FULL, REMOVED). */
   62     protected byte[] state;
   63   
   64     /** The number of table entries in state==FREE. */
   65     protected int freeEntries;
   66   
   67   
   68     /** Constructs an empty map with default capacity and default load factors. */
   69     public OpenHashMap() {
   70       this(defaultCapacity);
   71     }
   72   
   73     /**
   74      * Constructs an empty map with the specified initial capacity and default load factors.
   75      *
   76      * @param initialCapacity the initial capacity of the map.
   77      * @throws IllegalArgumentException if the initial capacity is less than zero.
   78      */
   79     public OpenHashMap(int initialCapacity) {
   80       this(initialCapacity, defaultMinLoadFactor, defaultMaxLoadFactor);
   81     }
   82   
   83     /**
   84      * Constructs an empty map with the specified initial capacity and the specified minimum and maximum load factor.
   85      *
   86      * @param initialCapacity the initial capacity.
   87      * @param minLoadFactor   the minimum load factor.
   88      * @param maxLoadFactor   the maximum load factor.
   89      * @throws IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) ||
   90      *                                  (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >=
   91      *                                  maxLoadFactor)</tt>.
   92      */
   93     public OpenHashMap(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
   94       setUp(initialCapacity, minLoadFactor, maxLoadFactor);
   95     }
   96   
   97     /** Removes all (key,value) associations from the receiver. Implicitly calls <tt>trimToSize()</tt>. */
   98     @Override
   99     public void clear() {
  100       Arrays.fill(this.state, FREE);
  101       distinct = 0;
  102       freeEntries = table.length; // delta
  103       trimToSize();
  104     }
  105   
  106     /**
  107      * Returns a deep copy of the receiver.
  108      *
  109      * @return a deep copy of the receiver.
  110      */
  111     @Override
  112     @SuppressWarnings("unchecked")
  113     public Object clone() {
  114       OpenHashMap<K,V> copy = (OpenHashMap<K,V>) super.clone();
  115       copy.table = copy.table.clone();
  116       copy.values = copy.values.clone();
  117       copy.state = copy.state.clone();
  118       return copy;
  119     }
  120   
  121     /**
  122      * Returns <tt>true</tt> if the receiver contains the specified key.
  123      *
  124      * @return <tt>true</tt> if the receiver contains the specified key.
  125      */
  126     @SuppressWarnings("unchecked")
  127     @Override
  128     public boolean containsKey(Object key) {
  129       return indexOfKey((K)key) >= 0;
  130     }
  131   
  132     /**
  133      * Returns <tt>true</tt> if the receiver contains the specified value.
  134      *
  135      * @return <tt>true</tt> if the receiver contains the specified value.
  136      */
  137     @SuppressWarnings("unchecked")
  138     @Override
  139     public boolean containsValue(Object value) {
  140       return indexOfValue((V)value) >= 0;
  141     }
  142   
  143     /**
  144      * Ensures that the receiver can hold at least the specified number of associations without needing to allocate new
  145      * internal memory. If necessary, allocates new internal memory and increases the capacity of the receiver. <p> This
  146      * method never need be called; it is for performance tuning only. Calling this method before <tt>put()</tt>ing a
  147      * large number of associations boosts performance, because the receiver will grow only once instead of potentially
  148      * many times and hash collisions get less probable.
  149      *
  150      * @param minCapacity the desired minimum capacity.
  151      */
  152     @Override
  153     public void ensureCapacity(int minCapacity) {
  154       if (table.length < minCapacity) {
  155         int newCapacity = nextPrime(minCapacity);
  156         rehash(newCapacity);
  157       }
  158     }
  159   
  160     /**
  161      * Applies a procedure to each key of the receiver, if any. Note: Iterates over the keys in no particular order.
  162      * Subclasses can define a particular order, for example, "sorted by key". All methods which <i>can</i> be expressed
  163      * in terms of this method (most methods can) <i>must guarantee</i> to use the <i>same</i> order defined by this
  164      * method, even if it is no particular order. This is necessary so that, for example, methods <tt>keys</tt> and
  165      * <tt>values</tt> will yield association pairs, not two uncorrelated lists.
  166      *
  167      * @param procedure the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise
  168      *                  continues.
  169      * @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise.
  170      */
  171     @SuppressWarnings("unchecked")
  172     public boolean forEachKey(ObjectProcedure<K> procedure) {
  173       for (int i = table.length; i-- > 0;) {
  174         if (state[i] == FULL) {
  175           if (!procedure.apply((K)table[i])) {
  176             return false;
  177           }
  178         }
  179       }
  180       return true;
  181     }
  182   
  183     /**
  184      * Applies a procedure to each (key,value) pair of the receiver, if any. Iteration order is guaranteed to be
  185      * <i>identical</i> to the order used by method {@link #forEachKey(ObjectProcedure)}.
  186      *
  187      * @param procedure the procedure to be applied. Stops iteration if the procedure returns <tt>false</tt>, otherwise
  188      *                  continues.
  189      * @return <tt>false</tt> if the procedure stopped before all keys where iterated over, <tt>true</tt> otherwise.
  190      */
  191       @SuppressWarnings("unchecked")
  192     public boolean forEachPair(ObjectObjectProcedure<K,V> procedure) {
  193       for (int i = table.length; i-- > 0;) {
  194         if (state[i] == FULL) {
  195           if (!procedure.apply((K)table[i], (V)values[i])) {
  196             return false;
  197           }
  198         }
  199       }
  200       return true;
  201     }
  202   
  203     /**
  204      * Returns the value associated with the specified key. It is often a good idea to first check with {@link
  205      * #containsKey(Object)} whether the given key has a value associated or not, i.e. whether there exists an association
  206      * for the given key or not.
  207      *
  208      * @param key the key to be searched for.
  209      * @return the value associated with the specified key; <tt>0</tt> if no such key is present.
  210      */
  211     @SuppressWarnings("unchecked")
  212     @Override
  213     public V get(Object key) {
  214       int i = indexOfKey((K)key);
  215       if (i < 0) {
  216         return null;
  217       } //not contained
  218       return (V)values[i];
  219     }
  220   
  221     /**
  222      * @param key the key to be added to the receiver.
  223      * @return the index where the key would need to be inserted, if it is not already contained. Returns -index-1 if the
  224      *         key is already contained at slot index. Therefore, if the returned index < 0, then it is already contained
  225      *         at slot -index-1. If the returned index >= 0, then it is NOT already contained and should be inserted at
  226      *         slot index.
  227      */
  228     protected int indexOfInsertion(K key) {
  229       Object[] tab = table;
  230       byte[] stat = state;
  231       int length = tab.length;
  232   
  233       int hash = key.hashCode() & 0x7FFFFFFF;
  234       int i = hash % length;
  235       int decrement = hash % (length - 2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
  236       //int decrement = (hash / length) % length;
  237       if (decrement == 0) {
  238         decrement = 1;
  239       }
  240   
  241       // stop if we find a removed or free slot, or if we find the key itself
  242       // do NOT skip over removed slots (yes, open addressing is like that...)
  243       while (stat[i] == FULL && !equalsMindTheNull(key, tab[i])) {
  244         i -= decrement;
  245         //hashCollisions++;
  246         if (i < 0) {
  247           i += length;
  248         }
  249       }
  250   
  251       if (stat[i] == REMOVED) {
  252         // stop if we find a free slot, or if we find the key itself.
  253         // do skip over removed slots (yes, open addressing is like that...)
  254         // assertion: there is at least one FREE slot.
  255         int j = i;
  256         while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
  257           i -= decrement;
  258           //hashCollisions++;
  259           if (i < 0) {
  260             i += length;
  261           }
  262         }
  263         if (stat[i] == FREE) {
  264           i = j;
  265         }
  266       }
  267   
  268   
  269       if (stat[i] == FULL) {
  270         // key already contained at slot i.
  271         // return a negative number identifying the slot.
  272         return -i - 1;
  273       }
  274       // not already contained, should be inserted at slot i.
  275       // return a number >= 0 identifying the slot.
  276       return i;
  277     }
  278   
  279     /**
  280      * @param key the key to be searched in the receiver.
  281      * @return the index where the key is contained in the receiver, returns -1 if the key was not found.
  282      */
  283     protected int indexOfKey(K key) {
  284       Object[] tab = table;
  285       byte[] stat = state;
  286       int length = tab.length;
  287   
  288       int hash = key.hashCode() & 0x7FFFFFFF;
  289       int i = hash % length;
  290       int decrement = hash % (length - 2); // double hashing, see http://www.eece.unm.edu/faculty/heileman/hash/node4.html
  291       //int decrement = (hash / length) % length;
  292       if (decrement == 0) {
  293         decrement = 1;
  294       }
  295   
  296       // stop if we find a free slot, or if we find the key itself.
  297       // do skip over removed slots (yes, open addressing is like that...)
  298       while (stat[i] != FREE && (stat[i] == REMOVED || !equalsMindTheNull(key, tab[i]))) {
  299         i -= decrement;
  300         //hashCollisions++;
  301         if (i < 0) {
  302           i += length;
  303         }
  304       }
  305   
  306       if (stat[i] == FREE) {
  307         return -1;
  308       } // not found
  309       return i; //found, return index where key is contained
  310     }
  311   
  312     /**
  313      * @param value the value to be searched in the receiver.
  314      * @return the index where the value is contained in the receiver, returns -1 if the value was not found.
  315      */
  316     protected int indexOfValue(V value) {
  317       Object[] val = values;
  318       byte[] stat = state;
  319   
  320       for (int i = stat.length; --i >= 0;) {
  321         if (stat[i] == FULL && equalsMindTheNull(val[i], value)) {
  322           return i;
  323         }
  324       }
  325   
  326       return -1; // not found
  327     }
  328   
  329     /**
  330      * Fills all keys contained in the receiver into the specified list. Fills the list, starting at index 0. After this
  331      * call returns the specified list has a new size that equals <tt>this.size()</tt>. 
  332      * This method can be used
  333      * to iterate over the keys of the receiver.
  334      *
  335      * @param list the list to be filled, can have any size.
  336      */
  337     @SuppressWarnings("unchecked")
  338     public void keys(List<K> list) {
  339       list.clear();
  340     
  341   
  342       Object [] tab = table;
  343       byte[] stat = state;
  344   
  345       for (int i = tab.length; i-- > 0;) {
  346         if (stat[i] == FULL) {
  347           list.add((K)tab[i]);
  348         }
  349       }
  350     }
  351   
  352     /**
  353      * Associates the given key with the given value. Replaces any old <tt>(key,someOtherValue)</tt> association, if
  354      * existing.
  355      *
  356      * @param key   the key the value shall be associated with.
  357      * @param value the value to be associated.
  358      * @return <tt>true</tt> if the receiver did not already contain such a key; <tt>false</tt> if the receiver did
  359      *         already contain such a key - the new value has now replaced the formerly associated value.
  360      */
  361     @SuppressWarnings("unchecked")
  362     @Override
  363     public V put(K key, V value) {
  364       int i = indexOfInsertion(key);
  365       if (i < 0) { //already contained
  366         i = -i - 1;
  367         V previous = (V) this.values[i];
  368         this.values[i] = value;
  369         return previous;
  370       }
  371   
  372       if (this.distinct > this.highWaterMark) {
  373         int newCapacity = chooseGrowCapacity(this.distinct + 1, this.minLoadFactor, this.maxLoadFactor);
  374         rehash(newCapacity);
  375         return put(key, value);
  376       }
  377   
  378       this.table[i] = key;
  379       this.values[i] = value;
  380       if (this.state[i] == FREE) {
  381         this.freeEntries--;
  382       }
  383       this.state[i] = FULL;
  384       this.distinct++;
  385   
  386       if (this.freeEntries < 1) { //delta
  387         int newCapacity = chooseGrowCapacity(this.distinct + 1, this.minLoadFactor, this.maxLoadFactor);
  388         rehash(newCapacity);
  389       }
  390   
  391       return null;
  392     }
  393   
  394     /**
  395      * Rehashes the contents of the receiver into a new table with a smaller or larger capacity. This method is called
  396      * automatically when the number of keys in the receiver exceeds the high water mark or falls below the low water
  397      * mark.
  398      */
  399     @SuppressWarnings("unchecked")
  400     protected void rehash(int newCapacity) {
  401       int oldCapacity = table.length;
  402       //if (oldCapacity == newCapacity) return;
  403   
  404       Object[] oldTable = table;
  405       Object[] oldValues = values;
  406       byte[] oldState = state;
  407   
  408       Object[] newTable = new Object[newCapacity];
  409       Object[] newValues = new Object[newCapacity];
  410       byte[] newState = new byte[newCapacity];
  411   
  412       this.lowWaterMark = chooseLowWaterMark(newCapacity, this.minLoadFactor);
  413       this.highWaterMark = chooseHighWaterMark(newCapacity, this.maxLoadFactor);
  414   
  415       this.table = newTable;
  416       this.values = newValues;
  417       this.state = newState;
  418       this.freeEntries = newCapacity - this.distinct; // delta
  419   
  420       for (int i = oldCapacity; i-- > 0;) {
  421         if (oldState[i] == FULL) {
  422           Object element = oldTable[i];
  423           int index = indexOfInsertion((K)element);
  424           newTable[index] = element;
  425           newValues[index] = oldValues[i];
  426           newState[index] = FULL;
  427         }
  428       }
  429     }
  430   
  431     /**
  432      * Removes the given key with its associated element from the receiver, if present.
  433      *
  434      * @param key the key to be removed from the receiver.
  435      * @return <tt>true</tt> if the receiver contained the specified key, <tt>false</tt> otherwise.
  436      */
  437     @SuppressWarnings("unchecked")
  438     @Override
  439     public V remove(Object key) {
  440       int i = indexOfKey((K)key);
  441       if (i < 0) {
  442         return null;
  443       }
  444       // key not contained
  445       V removed = (V) values[i];
  446   
  447       this.state[i] = REMOVED;
  448       //this.values[i]=0; // delta
  449       this.distinct--;
  450   
  451       if (this.distinct < this.lowWaterMark) {
  452         int newCapacity = chooseShrinkCapacity(this.distinct, this.minLoadFactor, this.maxLoadFactor);
  453         rehash(newCapacity);
  454       }
  455   
  456       return removed;
  457     }
  458   
  459     /**
  460      * Initializes the receiver.
  461      *
  462      * @param initialCapacity the initial capacity of the receiver.
  463      * @param minLoadFactor   the minLoadFactor of the receiver.
  464      * @param maxLoadFactor   the maxLoadFactor of the receiver.
  465      * @throws IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) ||
  466      *                                  (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >=
  467      *                                  maxLoadFactor)</tt>.
  468      */
  469     @Override
  470     protected void setUp(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
  471       int capacity = initialCapacity;
  472       super.setUp(capacity, minLoadFactor, maxLoadFactor);
  473       capacity = nextPrime(capacity);
  474       if (capacity == 0) {
  475         capacity = 1;
  476       } // open addressing needs at least one FREE slot at any time.
  477   
  478       this.table = new Object[capacity];
  479       this.values = new Object[capacity];
  480       this.state = new byte[capacity];
  481   
  482       // memory will be exhausted long before this pathological case happens, anyway.
  483       this.minLoadFactor = minLoadFactor;
  484       if (capacity == PrimeFinder.largestPrime) {
  485         this.maxLoadFactor = 1.0;
  486       } else {
  487         this.maxLoadFactor = maxLoadFactor;
  488       }
  489   
  490       this.distinct = 0;
  491       this.freeEntries = capacity; // delta
  492   
  493       // lowWaterMark will be established upon first expansion.
  494       // establishing it now (upon instance construction) would immediately make the table shrink upon first put(...).
  495       // After all the idea of an "initialCapacity" implies violating lowWaterMarks when an object is young.
  496       // See ensureCapacity(...)
  497       this.lowWaterMark = 0;
  498       this.highWaterMark = chooseHighWaterMark(capacity, this.maxLoadFactor);
  499     }
  500   
  501     /**
  502      * Trims the capacity of the receiver to be the receiver's current size. Releases any superfluous internal memory. An
  503      * application can use this operation to minimize the storage of the receiver.
  504      */
  505     @Override
  506     public void trimToSize() {
  507       // * 1.2 because open addressing's performance exponentially degrades beyond that point
  508       // so that even rehashing the table can take very long
  509       int newCapacity = nextPrime((int) (1 + 1.2 * size()));
  510       if (table.length > newCapacity) {
  511         rehash(newCapacity);
  512       }
  513     }
  514   
  515     /**
  516      * Access for unit tests.
  517      * @param capacity
  518      * @param minLoadFactor
  519      * @param maxLoadFactor
  520      */
  521     void getInternalFactors(int[] capacity, 
  522         double[] minLoadFactor, 
  523         double[] maxLoadFactor) {
  524       capacity[0] = table.length;
  525       minLoadFactor[0] = this.minLoadFactor;
  526       maxLoadFactor[0] = this.maxLoadFactor;
  527     }
  528   
  529     private class MapEntry implements Map.Entry<K,V> {
  530       private final K key;
  531       private final V value;
  532       
  533       MapEntry(K key, V value) {
  534         this.key = key;
  535         this.value = value;
  536       }
  537   
  538       @Override
  539       public K getKey() {
  540         return key;
  541       }
  542   
  543       @Override
  544       public V getValue() {
  545         return value;
  546       }
  547   
  548       @Override
  549       public V setValue(V value) {
  550         throw new UnsupportedOperationException("Map.Entry.setValue not supported for OpenHashMap");
  551       }
  552       
  553     }
  554   
  555     /**
  556      * Allocate a set to contain Map.Entry objects for the pairs and return it.
  557      */
  558     @Override
  559     public Set<java.util.Map.Entry<K,V>> entrySet() {
  560       final Set<Entry<K, V>> entries = new OpenHashSet<Map.Entry<K,V>>();
  561       forEachPair(new ObjectObjectProcedure<K,V>() {
  562   
  563         @Override
  564         public boolean apply(K key, V value) {
  565           entries.add(new MapEntry(key, value));
  566           return true;
  567         }});
  568       return entries;
  569     }
  570   
  571     /**
  572      * Allocate a set to contain keys and return it.
  573      * This violates the 'backing' provisions of the map interface.
  574      */
  575     @Override
  576     public Set<K> keySet() {
  577       final Set<K> keys = new OpenHashSet<K>();
  578       forEachKey(new ObjectProcedure<K>() {
  579   
  580         @Override
  581         public boolean apply(K element) {
  582           keys.add(element);
  583           return true;
  584         }});
  585       return keys;
  586     }
  587   
  588     @Override
  589     public void putAll(Map<? extends K,? extends V> m) {
  590       for (Map.Entry<? extends K, ? extends V> e : m.entrySet()) {
  591         put(e.getKey(), e.getValue());
  592       }
  593     }
  594   
  595     /**
  596      * Allocate a list to contain the values and return it.
  597      * This violates the 'backing' provision of the Map interface.
  598      */
  599     @Override
  600     public Collection<V> values() {
  601       final List<V> valueList = new ArrayList<V>();
  602       forEachPair(new ObjectObjectProcedure<K,V>() {
  603   
  604         @Override
  605         public boolean apply(K key, V value) {
  606           valueList.add(value);
  607           return true;
  608         }});
  609       return valueList;
  610     }
  611   
  612     @SuppressWarnings("unchecked")
  613     @Override
  614     public boolean equals(Object obj) {
  615       if (! (obj instanceof OpenHashMap)) {
  616         return false;
  617       }
  618       final OpenHashMap<K,V> o = (OpenHashMap<K,V>) obj;
  619       if (o.size() != size()) {
  620         return false;
  621       }
  622       final boolean[] equal = new boolean[1];
  623       equal[0] = true;
  624       forEachPair(new ObjectObjectProcedure<K,V>() {
  625   
  626         @Override
  627         public boolean apply(K key, V value) {
  628           Object ov = o.get(key);
  629           if (!value.equals(ov)) {
  630             equal[0] = false;
  631             return false;
  632           }
  633           return true;
  634         }});
  635       return equal[0];
  636     }
  637   
  638     @Override
  639     public String toString() {
  640       final StringBuilder sb = new StringBuilder();
  641       sb.append('{');
  642       forEachPair(new ObjectObjectProcedure<K,V>() {
  643   
  644         @Override
  645         public boolean apply(K key, V value) {
  646           sb.append('[');
  647           sb.append(key);
  648           sb.append(" -> ");
  649           sb.append(value);
  650           sb.append("] ");
  651           return true;
  652         }});
  653       sb.append('}');
  654       return sb.toString();
  655     }
  656   }

Save This Page
Home » mahout-collections-1.0-src » org.apache.mahout.math.map » [javadoc | source]