Save This Page
Home » mahout-collections-1.0-src » org.apache.mahout.math.set » [javadoc | source]
    1   /**
    2    * Licensed to the Apache Software Foundation (ASF) under one or more
    3    * contributor license agreements.  See the NOTICE file distributed with
    4    * this work for additional information regarding copyright ownership.
    5    * The ASF licenses this file to You under the Apache License, Version 2.0
    6    * (the "License"); you may not use this file except in compliance with
    7    * the License.  You may obtain a copy of the License at
    8    *
    9    *     http://www.apache.org/licenses/LICENSE-2.0
   10    *
   11    * Unless required by applicable law or agreed to in writing, software
   12    * distributed under the License is distributed on an "AS IS" BASIS,
   13    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   14    * See the License for the specific language governing permissions and
   15    * limitations under the License.
   16    */
   17   /*
   18   Copyright 1999 CERN - European Organization for Nuclear Research.
   19   Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
   20   is hereby granted without fee, provided that the above copyright notice appear in all copies and 
   21   that both that copyright notice and this permission notice appear in supporting documentation. 
   22   CERN makes no representations about the suitability of this software for any purpose. 
   23   It is provided "as is" without expressed or implied warranty.
   24   */
   25   package org.apache.mahout.math.set;
   26   
   27   import org.apache.mahout.math.PersistentObject;
   28   import org.apache.mahout.math.map.PrimeFinder;
   29   
   30   public abstract class AbstractSet extends PersistentObject {
   31     //public static boolean debug = false; // debug only
   32   
   33     /** The number of distinct associations in the map; its "size()". */
   34     protected int distinct;
   35   
   36     /**
   37      * The table capacity c=table.length always satisfies the invariant <tt>c * minLoadFactor <= s <= c *
   38      * maxLoadFactor</tt>, where s=size() is the number of associations currently contained. The term "c * minLoadFactor"
   39      * is called the "lowWaterMark", "c * maxLoadFactor" is called the "highWaterMark". In other words, the table capacity
   40      * (and proportionally the memory used by this class) oscillates within these constraints. The terms are precomputed
   41      * and cached to avoid recalculating them each time put(..) or removeKey(...) is called.
   42      */
   43     protected int lowWaterMark;
   44     protected int highWaterMark;
   45   
   46     /** The minimum load factor for the hashtable. */
   47     protected double minLoadFactor;
   48   
   49     /** The maximum load factor for the hashtable. */
   50     protected double maxLoadFactor;
   51   
   52     // these are public access for unit tests.
   53     public static final int defaultCapacity = 277;
   54     public static final double defaultMinLoadFactor = 0.2;
   55     public static final double defaultMaxLoadFactor = 0.5;
   56   
   57     /**
   58      * Chooses a new prime table capacity optimized for growing that (approximately) satisfies the invariant <tt>c *
   59      * minLoadFactor <= size <= c * maxLoadFactor</tt> and has at least one FREE slot for the given size.
   60      */
   61     protected int chooseGrowCapacity(int size, double minLoad, double maxLoad) {
   62       return nextPrime(Math.max(size + 1, (int) ((4 * size / (3 * minLoad + maxLoad)))));
   63     }
   64   
   65     /**
   66      * Returns new high water mark threshold based on current capacity and maxLoadFactor.
   67      *
   68      * @return int the new threshold.
   69      */
   70     protected int chooseHighWaterMark(int capacity, double maxLoad) {
   71       return Math.min(capacity - 2, (int) (capacity * maxLoad)); //makes sure there is always at least one FREE slot
   72     }
   73   
   74     /**
   75      * Returns new low water mark threshold based on current capacity and minLoadFactor.
   76      *
   77      * @return int the new threshold.
   78      */
   79     protected int chooseLowWaterMark(int capacity, double minLoad) {
   80       return (int) (capacity * minLoad);
   81     }
   82   
   83     /**
   84      * Chooses a new prime table capacity neither favoring shrinking nor growing, that (approximately) satisfies the
   85      * invariant <tt>c * minLoadFactor <= size <= c * maxLoadFactor</tt> and has at least one FREE slot for the given
   86      * size.
   87      */
   88     protected int chooseMeanCapacity(int size, double minLoad, double maxLoad) {
   89       return nextPrime(Math.max(size + 1, (int) ((2 * size / (minLoad + maxLoad)))));
   90     }
   91   
   92     /**
   93      * Chooses a new prime table capacity optimized for shrinking that (approximately) satisfies the invariant <tt>c *
   94      * minLoadFactor <= size <= c * maxLoadFactor</tt> and has at least one FREE slot for the given size.
   95      */
   96     protected int chooseShrinkCapacity(int size, double minLoad, double maxLoad) {
   97       return nextPrime(Math.max(size + 1, (int) ((4 * size / (minLoad + 3 * maxLoad)))));
   98     }
   99   
  100     /** Removes all (key,value) associations from the receiver. */
  101     public abstract void clear();
  102   
  103     /**
  104      * Ensures that the receiver can hold at least the specified number of elements without needing to allocate new
  105      * internal memory. If necessary, allocates new internal memory and increases the capacity of the receiver. <p> This
  106      * method never need be called; it is for performance tuning only. Calling this method before <tt>put()</tt>ing a
  107      * large number of associations boosts performance, because the receiver will grow only once instead of potentially
  108      * many times. <p> <b>This default implementation does nothing.</b> Override this method if necessary.
  109      *
  110      * @param minCapacity the desired minimum capacity.
  111      */
  112     public void ensureCapacity(int minCapacity) {
  113     }
  114   
  115     /**
  116      * Returns <tt>true</tt> if the receiver contains no (key,value) associations.
  117      *
  118      * @return <tt>true</tt> if the receiver contains no (key,value) associations.
  119      */
  120     public boolean isEmpty() {
  121       return distinct == 0;
  122     }
  123   
  124     /**
  125      * Returns a prime number which is <code>&gt;= desiredCapacity</code> and very close to <code>desiredCapacity</code>
  126      * (within 11% if <code>desiredCapacity &gt;= 1000</code>).
  127      *
  128      * @param desiredCapacity the capacity desired by the user.
  129      * @return the capacity which should be used for a hashtable.
  130      */
  131     protected int nextPrime(int desiredCapacity) {
  132       return PrimeFinder.nextPrime(desiredCapacity);
  133     }
  134   
  135     /**
  136      * Initializes the receiver. You will almost certainly need to override this method in subclasses to initialize the
  137      * hash table.
  138      *
  139      * @param initialCapacity the initial capacity of the receiver.
  140      * @param minLoadFactor   the minLoadFactor of the receiver.
  141      * @param maxLoadFactor   the maxLoadFactor of the receiver.
  142      * @throws IllegalArgumentException if <tt>initialCapacity < 0 || (minLoadFactor < 0.0 || minLoadFactor >= 1.0) ||
  143      *                                  (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) || (minLoadFactor >=
  144      *                                  maxLoadFactor)</tt>.
  145      */
  146     protected void setUp(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
  147       if (initialCapacity < 0) {
  148         throw new IllegalArgumentException("Initial Capacity must not be less than zero: " + initialCapacity);
  149       }
  150       if (minLoadFactor < 0.0 || minLoadFactor >= 1.0) {
  151         throw new IllegalArgumentException("Illegal minLoadFactor: " + minLoadFactor);
  152       }
  153       if (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) {
  154         throw new IllegalArgumentException("Illegal maxLoadFactor: " + maxLoadFactor);
  155       }
  156       if (minLoadFactor >= maxLoadFactor) {
  157         throw new IllegalArgumentException(
  158             "Illegal minLoadFactor: " + minLoadFactor + " and maxLoadFactor: " + maxLoadFactor);
  159       }
  160     }
  161   
  162     /**
  163      * Returns the number of (key,value) associations currently contained.
  164      *
  165      * @return the number of (key,value) associations currently contained.
  166      */
  167     public int size() {
  168       return distinct;
  169     }
  170   
  171     /**
  172      * Trims the capacity of the receiver to be the receiver's current size. Releases any superfluous internal memory. An
  173      * application can use this operation to minimize the storage of the receiver. <p> This default implementation does
  174      * nothing. Override this method if necessary.
  175      */
  176     public void trimToSize() {
  177     }
  178     
  179     protected static boolean equalsMindTheNull(Object a, Object b) {
  180       if (a == null && b == null) {
  181         return true;
  182       }
  183       if (a == null || b == null) {
  184         return false;
  185       }
  186       return a.equals(b);
  187     }
  188   }

Save This Page
Home » mahout-collections-1.0-src » org.apache.mahout.math.set » [javadoc | source]