Save This Page
Home » mahout-collections-1.0-src » org.apache.mahout.math.map » [javadoc | source]
    1   /**
    2    * Licensed to the Apache Software Foundation (ASF) under one
    3    * or more contributor license agreements. See the NOTICE file
    4    * distributed with this work for additional information
    5    * regarding copyright ownership. The ASF licenses this file
    6    * to you under the Apache License, Version 2.0 (the
    7    * "License"); you may not use this file except in compliance
    8    * with the License. You may obtain a copy of the License at
    9    *
   10    * http://www.apache.org/licenses/LICENSE-2.0
   11    *
   12    * Unless required by applicable law or agreed to in writing,
   13    * software distributed under the License is distributed on an
   14    * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
   15    * KIND, either express or implied. See the License for the
   16    * specific language governing permissions and limitations
   17    * under the License.
   18    */
   19   
   20   package org.apache.mahout.math.map;
   21   
   22   import java.util.Arrays;
   23   
   24   /**
   25    * Not of interest for users; only for implementors of hashtables.
   26    * Used to keep hash table capacities prime numbers.
   27    *
   28    * <p>Choosing prime numbers as hash table capacities is a good idea to keep them working fast,
   29    * particularly under hash table expansions.
   30    *
   31    * <p>However, JDK 1.2, JGL 3.1 and many other toolkits do nothing to keep capacities prime.
   32    * This class provides efficient means to choose prime capacities.
   33    *
   34    * <p>Choosing a prime is <tt>O(log 300)</tt> (binary search in a list of 300 int's).
   35    * Memory requirements: 1 KB static memory.
   36    *
   37    */
   38   public class PrimeFinder {
   39   
   40     /** The largest prime this class can generate; currently equal to <tt>Integer.MAX_VALUE</tt>. */
   41     public static final int largestPrime = Integer.MAX_VALUE; //yes, it is prime.
   42   
   43     /**
   44      * The prime number list consists of 11 chunks. Each chunk contains prime numbers. A chunk starts with a prime P1. The
   45      * next element is a prime P2. P2 is the smallest prime for which holds: P2 >= 2*P1. The next element is P3, for which
   46      * the same holds with respect to P2, and so on.
   47      *
   48      * Chunks are chosen such that for any desired capacity >= 1000 the list includes a prime number <= desired capacity *
   49      * 1.11 (11%). For any desired capacity >= 200 the list includes a prime number <= desired capacity * 1.16 (16%). For
   50      * any desired capacity >= 16 the list includes a prime number <= desired capacity * 1.21 (21%).
   51      *
   52      * Therefore, primes can be retrieved which are quite close to any desired capacity, which in turn avoids wasting
   53      * memory. For example, the list includes 1039,1117,1201,1277,1361,1439,1523,1597,1759,1907,2081. So if you need a
   54      * prime >= 1040, you will find a prime <= 1040*1.11=1154.
   55      *
   56      * Chunks are chosen such that they are optimized for a hashtable growthfactor of 2.0; If your hashtable has such a
   57      * growthfactor then, after initially "rounding to a prime" upon hashtable construction, it will later expand to prime
   58      * capacities such that there exist no better primes.
   59      *
   60      * In total these are about 32*10=320 numbers -> 1 KB of static memory needed. If you are stingy, then delete every
   61      * second or fourth chunk.
   62      */
   63   
   64     private static final int[] primeCapacities = {
   65         //chunk #0
   66         largestPrime,
   67   
   68         //chunk #1
   69         5, 11, 23, 47, 97, 197, 397, 797, 1597, 3203, 6421, 12853, 25717, 51437, 102877, 205759,
   70         411527, 823117, 1646237, 3292489, 6584983, 13169977, 26339969, 52679969, 105359939,
   71         210719881, 421439783, 842879579, 1685759167,
   72   
   73         //chunk #2
   74         433, 877, 1759, 3527, 7057, 14143, 28289, 56591, 113189, 226379, 452759, 905551, 1811107,
   75         3622219, 7244441, 14488931, 28977863, 57955739, 115911563, 231823147, 463646329, 927292699,
   76         1854585413,
   77   
   78         //chunk #3
   79         953, 1907, 3821, 7643, 15287, 30577, 61169, 122347, 244703, 489407, 978821, 1957651, 3915341,
   80         7830701, 15661423, 31322867, 62645741, 125291483, 250582987, 501165979, 1002331963,
   81         2004663929,
   82   
   83         //chunk #4
   84         1039, 2081, 4177, 8363, 16729, 33461, 66923, 133853, 267713, 535481, 1070981, 2141977, 4283963,
   85         8567929, 17135863, 34271747, 68543509, 137087021, 274174111, 548348231, 1096696463,
   86   
   87         //chunk #5
   88         31, 67, 137, 277, 557, 1117, 2237, 4481, 8963, 17929, 35863, 71741, 143483, 286973, 573953,
   89         1147921, 2295859, 4591721, 9183457, 18366923, 36733847, 73467739, 146935499, 293871013,
   90         587742049, 1175484103,
   91   
   92         //chunk #6
   93         599, 1201, 2411, 4831, 9677, 19373, 38747, 77509, 155027, 310081, 620171, 1240361, 2480729,
   94         4961459, 9922933, 19845871, 39691759, 79383533, 158767069, 317534141, 635068283, 1270136683,
   95   
   96         //chunk #7
   97         311, 631, 1277, 2557, 5119, 10243, 20507, 41017, 82037, 164089, 328213, 656429, 1312867,
   98         2625761, 5251529, 10503061, 21006137, 42012281, 84024581, 168049163, 336098327, 672196673,
   99         1344393353,
  100   
  101         //chunk #8
  102         3, 7, 17, 37, 79, 163, 331, 673, 1361, 2729, 5471, 10949, 21911, 43853, 87719, 175447, 350899,
  103         701819, 1403641, 2807303, 5614657, 11229331, 22458671, 44917381, 89834777, 179669557,
  104         359339171, 718678369, 1437356741,
  105   
  106         //chunk #9
  107         43, 89, 179, 359, 719, 1439, 2879, 5779, 11579, 23159, 46327, 92657, 185323, 370661, 741337,
  108         1482707, 2965421, 5930887, 11861791, 23723597, 47447201, 94894427, 189788857, 379577741,
  109         759155483, 1518310967,
  110   
  111         //chunk #10
  112         379, 761, 1523, 3049, 6101, 12203, 24407, 48817, 97649, 195311, 390647, 781301, 1562611,
  113         3125257, 6250537, 12501169, 25002389, 50004791, 100009607, 200019221, 400038451, 800076929,
  114         1600153859
  115     };
  116   
  117   
  118     static { //initializer
  119       // The above prime numbers are formatted for human readability.
  120       // To find numbers fast, we sort them once and for all.
  121   
  122       Arrays.sort(primeCapacities);
  123     }
  124   
  125     /** Makes this class non instantiable, but still let's others inherit from it. */
  126     private PrimeFinder() {
  127     }
  128   
  129     /**
  130      * Returns a prime number which is <code>&gt;= desiredCapacity</code> and very close to <code>desiredCapacity</code>
  131      * (within 11% if <code>desiredCapacity &gt;= 1000</code>).
  132      *
  133      * @param desiredCapacity the capacity desired by the user.
  134      * @return the capacity which should be used for a hashtable.
  135      */
  136     public static int nextPrime(int desiredCapacity) {
  137       int i = java.util.Arrays.binarySearch(primeCapacities, desiredCapacity);
  138       if (i < 0) {
  139         // desired capacity not found, choose next prime greater than desired capacity
  140         i = -i - 1; // remember the semantics of binarySearch...
  141       }
  142       return primeCapacities[i];
  143     }
  144   
  145     /** Tests correctness. */
  146     private static void statistics(int from, int to) {
  147       // check that primes contain no accidental errors
  148       for (int i = 0; i < primeCapacities.length - 1; i++) {
  149         if (primeCapacities[i] >= primeCapacities[i + 1]) {
  150           throw new IllegalStateException(
  151               "primes are unsorted or contain duplicates; detected at " + i + '@' + primeCapacities[i]);
  152         }
  153       }
  154   
  155       double accDeviation = 0.0;
  156       double maxDeviation = -1.0;
  157   
  158       for (int i = from; i <= to; i++) {
  159         int primeCapacity = nextPrime(i);
  160         //log.info(primeCapacity);
  161         double deviation = (primeCapacity - i) / (double) i;
  162   
  163         if (deviation > maxDeviation) {
  164           maxDeviation = deviation;
  165         }
  166   
  167         accDeviation += deviation;
  168       }
  169       long width = 1 + (long) to - (long) from;
  170   
  171       double meanDeviation = accDeviation / width;
  172     }
  173   }

Save This Page
Home » mahout-collections-1.0-src » org.apache.mahout.math.map » [javadoc | source]