Save This Page
Home » mahout-collections-1.0-src » org.apache.mahout.math.bitvector » [javadoc | source]
    1   /*
    2   Copyright 1999 CERN - European Organization for Nuclear Research.
    3   Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
    4   is hereby granted without fee, provided that the above copyright notice appear in all copies and 
    5   that both that copyright notice and this permission notice appear in supporting documentation. 
    6   CERN makes no representations about the suitability of this software for any purpose. 
    7   It is provided "as is" without expressed or implied warranty.
    8   */
    9   package org.apache.mahout.math.bitvector;
   10   
   11   /**
   12    * Implements quick non polymorphic non bounds checking low level bitvector operations.
   13    * Includes some operations that interpret sub-bitstrings as long integers.
   14    * <p>
   15    * <b>WARNING: Methods of this class do not check preconditions.</b>
   16    * Provided with invalid parameters these method may return (or set) invalid values without throwing any exception.
   17    * <b>You should only use this class when performance is critical and you are absolutely sure that indexes are within bounds.</b>
   18    * <p>   
   19    * A bitvector is modelled as a long array, i.e. <tt>long[] bits</tt> holds bits of a bitvector.
   20    * Each long value holds 64 bits.
   21    * The i-th bit is stored in bits[i/64] at
   22    * bit position i % 64 (where bit position 0 refers to the least
   23    * significant bit and 63 refers to the most significant bit).
   24    *
   25    * @see     BitVector
   26    * @see     BitMatrix
   27    * @see     java.util.BitSet
   28    */
   29   
   30   /** @deprecated until unit tests are in place.  Until this time, this class/interface is unsupported. */
   31   @Deprecated
   32   public class QuickBitVector {
   33   
   34     private static final int ADDRESS_BITS_PER_UNIT = 6; // 64=2^6
   35     protected static final int BITS_PER_UNIT = 64; // = 1 << ADDRESS_BITS_PER_UNIT
   36     private static final int BIT_INDEX_MASK = 63; // = BITS_PER_UNIT - 1;
   37   
   38     private static final long[] pows = precomputePows(); //precompute bitmasks for speed
   39   
   40     /** Makes this class non instantiable, but still inheritable. */
   41     private QuickBitVector() {
   42     }
   43   
   44     /**
   45      * Returns a bit mask with bits in the specified range set to 1, all the rest set to 0. In other words, returns a bit
   46      * mask having 0,1,2,3,...,64 bits set. If <tt>to-from+1==0</tt> then returns zero (<tt>0L</tt>). Precondition (not
   47      * checked): <tt>to-from+1 &gt;= 0 && to-from+1 &lt;= 64</tt>.
   48      *
   49      * @param from index of start bit (inclusive)
   50      * @param to   index of end bit (inclusive).
   51      * @return the bit mask having all bits between <tt>from</tt> and <tt>to</tt> set to 1.
   52      */
   53     private static long bitMaskWithBitsSetFromTo(int from, int to) {
   54       return pows[to - from + 1] << from;
   55   
   56       // This turned out to be slower:
   57       // 0xffffffffffffffffL == ~0L == -1L == all 64 bits set.
   58       // int width;
   59       // return (width=to-from+1) == 0 ? 0L : (0xffffffffffffffffL >>> (BITS_PER_UNIT-width)) << from;
   60     }
   61   
   62     /**
   63      * Changes the bit with index <tt>bitIndex</tt> in the bitvector <tt>bits</tt> to the "clear" (<tt>false</tt>) state.
   64      *
   65      * @param bits     the bitvector.
   66      * @param bitIndex the index of the bit to be cleared.
   67      */
   68     public static void clear(long[] bits, int bitIndex) {
   69       bits[bitIndex >> ADDRESS_BITS_PER_UNIT] &= ~(1L << (bitIndex & BIT_INDEX_MASK));
   70     }
   71   
   72     /**
   73      * Returns from the bitvector the value of the bit with the specified index. The value is <tt>true</tt> if the bit
   74      * with the index <tt>bitIndex</tt> is currently set; otherwise, returns <tt>false</tt>.
   75      *
   76      * @param bits     the bitvector.
   77      * @param bitIndex the bit index.
   78      * @return the value of the bit with the specified index.
   79      */
   80     public static boolean get(long[] bits, int bitIndex) {
   81       return ((bits[bitIndex >> ADDRESS_BITS_PER_UNIT] & (1L << (bitIndex & BIT_INDEX_MASK))) != 0);
   82     }
   83   
   84     /**
   85      * Returns a long value representing bits of a bitvector from index <tt>from</tt> to index <tt>to</tt>. Bits are
   86      * returned as a long value with the return value having bit 0 set to bit <code>from</code>, ..., bit
   87      * <code>to-from</code> set to bit <code>to</code>. All other bits of return value are set to 0. If <tt>from &gt;
   88      * to</tt> then returns zero (<tt>0L</tt>). Precondition (not checked): <tt>to-from+1 &lt;= 64</tt>.
   89      *
   90      * @param bits the bitvector.
   91      * @param from index of start bit (inclusive).
   92      * @param to   index of end bit (inclusive).
   93      * @return the specified bits as long value.
   94      */
   95     public static long getLongFromTo(long[] bits, int from, int to) {
   96       if (from > to) {
   97         return 0L;
   98       }
   99   
  100       int fromIndex = from >> ADDRESS_BITS_PER_UNIT; //equivalent to from/64
  101       int toIndex = to >> ADDRESS_BITS_PER_UNIT;
  102       int fromOffset = from & BIT_INDEX_MASK; //equivalent to from%64
  103       int toOffset = to & BIT_INDEX_MASK;
  104       //this is equivalent to the above, but slower:
  105       //final int fromIndex=from/BITS_PER_UNIT;
  106       //final int toIndex=to/BITS_PER_UNIT;
  107       //final int fromOffset=from%BITS_PER_UNIT;
  108       //final int toOffset=to%BITS_PER_UNIT;
  109   
  110   
  111       long mask;
  112       if (fromIndex ==
  113           toIndex) { //range does not cross unit boundaries; value to retrieve is contained in one single long value.
  114         mask = bitMaskWithBitsSetFromTo(fromOffset, toOffset);
  115         return (bits[fromIndex] & mask) >>> fromOffset;
  116   
  117       }
  118   
  119       //range crosses unit boundaries; value to retrieve is spread over two long values.
  120       //get part from first long value
  121       mask = bitMaskWithBitsSetFromTo(fromOffset, BIT_INDEX_MASK);
  122       long x1 = (bits[fromIndex] & mask) >>> fromOffset;
  123   
  124       //get part from second long value
  125       mask = bitMaskWithBitsSetFromTo(0, toOffset);
  126       long x2 = (bits[toIndex] & mask) << (BITS_PER_UNIT - fromOffset);
  127   
  128       //combine
  129       return x1 | x2;
  130     }
  131   
  132     /**
  133      * Returns the index of the least significant bit in state "true". Returns 32 if no bit is in state "true". Examples:
  134      * <pre>
  135      * 0x80000000 --> 31
  136      * 0x7fffffff --> 0
  137      * 0x00000001 --> 0
  138      * 0x00000000 --> 32
  139      * </pre>
  140      */
  141     public static int leastSignificantBit(int value) {
  142       int i = -1;
  143       while (++i < 32 && (((1 << i) & value)) == 0) {
  144       }
  145       return i;
  146     }
  147   
  148     /**
  149      * Constructs a low level bitvector that holds <tt>size</tt> elements, with each element taking
  150      * <tt>bitsPerElement</tt> bits.
  151      *
  152      * @param size           the number of elements to be stored in the bitvector (must be &gt;= 0).
  153      * @param bitsPerElement the number of bits one single element takes.
  154      * @return a low level bitvector.
  155      */
  156     public static long[] makeBitVector(int size, int bitsPerElement) {
  157       int nBits = size * bitsPerElement;
  158       int unitIndex = (nBits - 1) >> ADDRESS_BITS_PER_UNIT;
  159       return new long[unitIndex + 1];
  160     }
  161   
  162     /**
  163      * Returns the index of the most significant bit in state "true". Returns -1 if no bit is in state "true". Examples:
  164      * <pre>
  165      * 0x80000000 --> 31
  166      * 0x7fffffff --> 30
  167      * 0x00000001 --> 0
  168      * 0x00000000 --> -1
  169      * </pre>
  170      */
  171     public static int mostSignificantBit(int value) {
  172       int i = 32;
  173       while (--i >= 0 && (((1 << i) & value)) == 0) {
  174       }
  175       return i;
  176     }
  177   
  178     /** Returns the index within the unit that contains the given bitIndex. */
  179     protected static int offset(int bitIndex) {
  180       return bitIndex & BIT_INDEX_MASK;
  181       //equivalent to bitIndex%64
  182     }
  183   
  184     /**
  185      * Initializes a table with numbers having 1,2,3,...,64 bits set. pows[i] has bits [0..i-1] set. pows[64] == -1L ==
  186      * ~0L == has all 64 bits set --> correct. to speedup calculations in subsequent methods.
  187      */
  188     private static long[] precomputePows() {
  189       long[] pows = new long[BITS_PER_UNIT + 1];
  190       long value = ~0L;
  191       for (int i = BITS_PER_UNIT + 1; --i >= 1;) {
  192         pows[i] = value >>> (BITS_PER_UNIT - i);
  193       }
  194       pows[0] = 0L;
  195       return pows;
  196     }
  197   
  198     /**
  199      * Sets the bit with index <tt>bitIndex</tt> in the bitvector <tt>bits</tt> to the state specified by <tt>value</tt>.
  200      *
  201      * @param bits     the bitvector.
  202      * @param bitIndex the index of the bit to be changed.
  203      * @param value    the value to be stored in the bit.
  204      */
  205     public static void put(long[] bits, int bitIndex, boolean value) {
  206       if (value) {
  207         set(bits, bitIndex);
  208       } else {
  209         clear(bits, bitIndex);
  210       }
  211     }
  212   
  213     /**
  214      * Sets bits of a bitvector from index <code>from</code> to index <code>to</code> to the bits of <code>value</code>.
  215      * Bit <code>from</code> is set to bit 0 of <code>value</code>, ..., bit <code>to</code> is set to bit
  216      * <code>to-from</code> of <code>value</code>. All other bits stay unaffected. If <tt>from &gt; to</tt> then does
  217      * nothing. Precondition (not checked): <tt>to-from+1 &lt;= 64</tt>.
  218      *
  219      * @param bits  the bitvector.
  220      * @param value the value to be copied into the bitvector.
  221      * @param from  index of start bit (inclusive).
  222      * @param to    index of end bit (inclusive).
  223      */
  224     public static void putLongFromTo(long[] bits, long value, int from, int to) {
  225       if (from > to) {
  226         return;
  227       }
  228   
  229       int fromIndex = from >> ADDRESS_BITS_PER_UNIT; //equivalent to from/64
  230       int toIndex = to >> ADDRESS_BITS_PER_UNIT;
  231       int fromOffset = from & BIT_INDEX_MASK; //equivalent to from%64
  232       int toOffset = to & BIT_INDEX_MASK;
  233       /*
  234       this is equivalent to the above, but slower:
  235       int fromIndex=from/BITS_PER_UNIT;
  236       int toIndex=to/BITS_PER_UNIT;
  237       int fromOffset=from%BITS_PER_UNIT;
  238       int toOffset=to%BITS_PER_UNIT;
  239       */
  240   
  241       //make sure all unused bits to the left are cleared.
  242       long mask = bitMaskWithBitsSetFromTo(to - from + 1, BIT_INDEX_MASK);
  243       long cleanValue = value & (~mask);
  244   
  245       long shiftedValue;
  246   
  247       if (fromIndex == toIndex) { //range does not cross unit boundaries; should go into one single long value.
  248         shiftedValue = cleanValue << fromOffset;
  249         mask = bitMaskWithBitsSetFromTo(fromOffset, toOffset);
  250         bits[fromIndex] = (bits[fromIndex] & (~mask)) | shiftedValue;
  251         return;
  252   
  253       }
  254   
  255       //range crosses unit boundaries; value should go into two long values.
  256       //copy into first long value.
  257       shiftedValue = cleanValue << fromOffset;
  258       mask = bitMaskWithBitsSetFromTo(fromOffset, BIT_INDEX_MASK);
  259       bits[fromIndex] = (bits[fromIndex] & (~mask)) | shiftedValue;
  260   
  261       //copy into second long value.
  262       shiftedValue = cleanValue >>> (BITS_PER_UNIT - fromOffset);
  263       mask = bitMaskWithBitsSetFromTo(0, toOffset);
  264       bits[toIndex] = (bits[toIndex] & (~mask)) | shiftedValue;
  265     }
  266   
  267     /**
  268      * Changes the bit with index <tt>bitIndex</tt> in the bitvector <tt>bits</tt> to the "set" (<tt>true</tt>) state.
  269      *
  270      * @param bits     the bitvector.
  271      * @param bitIndex the index of the bit to be set.
  272      */
  273     public static void set(long[] bits, int bitIndex) {
  274       bits[bitIndex >> ADDRESS_BITS_PER_UNIT] |= 1L << (bitIndex & BIT_INDEX_MASK);
  275     }
  276   
  277     /** Returns the index of the unit that contains the given bitIndex. */
  278     protected static int unit(int bitIndex) {
  279       return bitIndex >> ADDRESS_BITS_PER_UNIT;
  280       //equivalent to bitIndex/64
  281     }
  282   }

Save This Page
Home » mahout-collections-1.0-src » org.apache.mahout.math.bitvector » [javadoc | source]