Home » lucene-3.0.1-src » org.apache » lucene » search » [javadoc | source]

    1   package org.apache.lucene.search;
    2   
    3   /**
    4    * Licensed to the Apache Software Foundation (ASF) under one or more
    5    * contributor license agreements.  See the NOTICE file distributed with
    6    * this work for additional information regarding copyright ownership.
    7    * The ASF licenses this file to You under the Apache License, Version 2.0
    8    * (the "License"); you may not use this file except in compliance with
    9    * the License.  You may obtain a copy of the License at
   10    *
   11    *     http://www.apache.org/licenses/LICENSE-2.0
   12    *
   13    * Unless required by applicable law or agreed to in writing, software
   14    * distributed under the License is distributed on an "AS IS" BASIS,
   15    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   16    * See the License for the specific language governing permissions and
   17    * limitations under the License.
   18    */
   19   
   20   import org.apache.lucene.index.IndexReader;
   21   import org.apache.lucene.util.NumericUtils;
   22   import org.apache.lucene.util.RamUsageEstimator;
   23   import org.apache.lucene.document.NumericField; // for javadocs
   24   import org.apache.lucene.analysis.NumericTokenStream; // for javadocs
   25   
   26   import java.io.IOException;
   27   import java.io.Serializable;
   28   import java.io.PrintStream;
   29   
   30   import java.text.DecimalFormat;
   31   
   32   /**
   33    * Expert: Maintains caches of term values.
   34    *
   35    * <p>Created: May 19, 2004 11:13:14 AM
   36    *
   37    * @since   lucene 1.4
   38    * @see org.apache.lucene.util.FieldCacheSanityChecker
   39    */
   40   public interface FieldCache {
   41   
   42     public static final class CreationPlaceholder {
   43       Object value;
   44     }
   45   
   46     /** Indicator for StringIndex values in the cache. */
   47     // NOTE: the value assigned to this constant must not be
   48     // the same as any of those in SortField!!
   49     public static final int STRING_INDEX = -1;
   50   
   51   
   52     /** Expert: Stores term text values and document ordering data. */
   53     public static class StringIndex {
   54   	  
   55       public int binarySearchLookup(String key) {
   56         // this special case is the reason that Arrays.binarySearch() isn't useful.
   57         if (key == null)
   58           return 0;
   59   	  
   60         int low = 1;
   61         int high = lookup.length-1;
   62   
   63         while (low <= high) {
   64           int mid = (low + high) >>> 1;
   65           int cmp = lookup[mid].compareTo(key);
   66   
   67           if (cmp < 0)
   68             low = mid + 1;
   69           else if (cmp > 0)
   70             high = mid - 1;
   71           else
   72             return mid; // key found
   73         }
   74         return -(low + 1);  // key not found.
   75       }
   76   	
   77       /** All the term values, in natural order. */
   78       public final String[] lookup;
   79   
   80       /** For each document, an index into the lookup array. */
   81       public final int[] order;
   82   
   83       /** Creates one of these objects */
   84       public StringIndex (int[] values, String[] lookup) {
   85         this.order = values;
   86         this.lookup = lookup;
   87       }
   88     }
   89   
   90     /**
   91      * Marker interface as super-interface to all parsers. It
   92      * is used to specify a custom parser to {@link
   93      * SortField#SortField(String, FieldCache.Parser)}.
   94      */
   95     public interface Parser extends Serializable {
   96     }
   97   
   98     /** Interface to parse bytes from document fields.
   99      * @see FieldCache#getBytes(IndexReader, String, FieldCache.ByteParser)
  100      */
  101     public interface ByteParser extends Parser {
  102       /** Return a single Byte representation of this field's value. */
  103       public byte parseByte(String string);
  104     }
  105   
  106     /** Interface to parse shorts from document fields.
  107      * @see FieldCache#getShorts(IndexReader, String, FieldCache.ShortParser)
  108      */
  109     public interface ShortParser extends Parser {
  110       /** Return a short representation of this field's value. */
  111       public short parseShort(String string);
  112     }
  113   
  114     /** Interface to parse ints from document fields.
  115      * @see FieldCache#getInts(IndexReader, String, FieldCache.IntParser)
  116      */
  117     public interface IntParser extends Parser {
  118       /** Return an integer representation of this field's value. */
  119       public int parseInt(String string);
  120     }
  121   
  122     /** Interface to parse floats from document fields.
  123      * @see FieldCache#getFloats(IndexReader, String, FieldCache.FloatParser)
  124      */
  125     public interface FloatParser extends Parser {
  126       /** Return an float representation of this field's value. */
  127       public float parseFloat(String string);
  128     }
  129   
  130     /** Interface to parse long from document fields.
  131      * @see FieldCache#getLongs(IndexReader, String, FieldCache.LongParser)
  132      */
  133     public interface LongParser extends Parser {
  134       /** Return an long representation of this field's value. */
  135       public long parseLong(String string);
  136     }
  137   
  138     /** Interface to parse doubles from document fields.
  139      * @see FieldCache#getDoubles(IndexReader, String, FieldCache.DoubleParser)
  140      */
  141     public interface DoubleParser extends Parser {
  142       /** Return an long representation of this field's value. */
  143       public double parseDouble(String string);
  144     }
  145   
  146     /** Expert: The cache used internally by sorting and range query classes. */
  147     public static FieldCache DEFAULT = new FieldCacheImpl();
  148     
  149     /** The default parser for byte values, which are encoded by {@link Byte#toString(byte)} */
  150     public static final ByteParser DEFAULT_BYTE_PARSER = new ByteParser() {
  151       public byte parseByte(String value) {
  152         return Byte.parseByte(value);
  153       }
  154       protected Object readResolve() {
  155         return DEFAULT_BYTE_PARSER;
  156       }
  157       @Override
  158       public String toString() { 
  159         return FieldCache.class.getName()+".DEFAULT_BYTE_PARSER"; 
  160       }
  161     };
  162   
  163     /** The default parser for short values, which are encoded by {@link Short#toString(short)} */
  164     public static final ShortParser DEFAULT_SHORT_PARSER = new ShortParser() {
  165       public short parseShort(String value) {
  166         return Short.parseShort(value);
  167       }
  168       protected Object readResolve() {
  169         return DEFAULT_SHORT_PARSER;
  170       }
  171       @Override
  172       public String toString() { 
  173         return FieldCache.class.getName()+".DEFAULT_SHORT_PARSER"; 
  174       }
  175     };
  176   
  177     /** The default parser for int values, which are encoded by {@link Integer#toString(int)} */
  178     public static final IntParser DEFAULT_INT_PARSER = new IntParser() {
  179       public int parseInt(String value) {
  180         return Integer.parseInt(value);
  181       }
  182       protected Object readResolve() {
  183         return DEFAULT_INT_PARSER;
  184       }
  185       @Override
  186       public String toString() { 
  187         return FieldCache.class.getName()+".DEFAULT_INT_PARSER"; 
  188       }
  189     };
  190   
  191     /** The default parser for float values, which are encoded by {@link Float#toString(float)} */
  192     public static final FloatParser DEFAULT_FLOAT_PARSER = new FloatParser() {
  193       public float parseFloat(String value) {
  194         return Float.parseFloat(value);
  195       }
  196       protected Object readResolve() {
  197         return DEFAULT_FLOAT_PARSER;
  198       }
  199       @Override
  200       public String toString() { 
  201         return FieldCache.class.getName()+".DEFAULT_FLOAT_PARSER"; 
  202       }
  203     };
  204   
  205     /** The default parser for long values, which are encoded by {@link Long#toString(long)} */
  206     public static final LongParser DEFAULT_LONG_PARSER = new LongParser() {
  207       public long parseLong(String value) {
  208         return Long.parseLong(value);
  209       }
  210       protected Object readResolve() {
  211         return DEFAULT_LONG_PARSER;
  212       }
  213       @Override
  214       public String toString() { 
  215         return FieldCache.class.getName()+".DEFAULT_LONG_PARSER"; 
  216       }
  217     };
  218   
  219     /** The default parser for double values, which are encoded by {@link Double#toString(double)} */
  220     public static final DoubleParser DEFAULT_DOUBLE_PARSER = new DoubleParser() {
  221       public double parseDouble(String value) {
  222         return Double.parseDouble(value);
  223       }
  224       protected Object readResolve() {
  225         return DEFAULT_DOUBLE_PARSER;
  226       }
  227       @Override
  228       public String toString() { 
  229         return FieldCache.class.getName()+".DEFAULT_DOUBLE_PARSER"; 
  230       }
  231     };
  232   
  233     /**
  234      * A parser instance for int values encoded by {@link NumericUtils#intToPrefixCoded(int)}, e.g. when indexed
  235      * via {@link NumericField}/{@link NumericTokenStream}.
  236      */
  237     public static final IntParser NUMERIC_UTILS_INT_PARSER=new IntParser(){
  238       public int parseInt(String val) {
  239         final int shift = val.charAt(0)-NumericUtils.SHIFT_START_INT;
  240         if (shift>0 && shift<=31)
  241           throw new FieldCacheImpl.StopFillCacheException();
  242         return NumericUtils.prefixCodedToInt(val);
  243       }
  244       protected Object readResolve() {
  245         return NUMERIC_UTILS_INT_PARSER;
  246       }
  247       @Override
  248       public String toString() { 
  249         return FieldCache.class.getName()+".NUMERIC_UTILS_INT_PARSER"; 
  250       }
  251     };
  252   
  253     /**
  254      * A parser instance for float values encoded with {@link NumericUtils}, e.g. when indexed
  255      * via {@link NumericField}/{@link NumericTokenStream}.
  256      */
  257     public static final FloatParser NUMERIC_UTILS_FLOAT_PARSER=new FloatParser(){
  258       public float parseFloat(String val) {
  259         final int shift = val.charAt(0)-NumericUtils.SHIFT_START_INT;
  260         if (shift>0 && shift<=31)
  261           throw new FieldCacheImpl.StopFillCacheException();
  262         return NumericUtils.sortableIntToFloat(NumericUtils.prefixCodedToInt(val));
  263       }
  264       protected Object readResolve() {
  265         return NUMERIC_UTILS_FLOAT_PARSER;
  266       }
  267       @Override
  268       public String toString() { 
  269         return FieldCache.class.getName()+".NUMERIC_UTILS_FLOAT_PARSER"; 
  270       }
  271     };
  272   
  273     /**
  274      * A parser instance for long values encoded by {@link NumericUtils#longToPrefixCoded(long)}, e.g. when indexed
  275      * via {@link NumericField}/{@link NumericTokenStream}.
  276      */
  277     public static final LongParser NUMERIC_UTILS_LONG_PARSER = new LongParser(){
  278       public long parseLong(String val) {
  279         final int shift = val.charAt(0)-NumericUtils.SHIFT_START_LONG;
  280         if (shift>0 && shift<=63)
  281           throw new FieldCacheImpl.StopFillCacheException();
  282         return NumericUtils.prefixCodedToLong(val);
  283       }
  284       protected Object readResolve() {
  285         return NUMERIC_UTILS_LONG_PARSER;
  286       }
  287       @Override
  288       public String toString() { 
  289         return FieldCache.class.getName()+".NUMERIC_UTILS_LONG_PARSER"; 
  290       }
  291     };
  292   
  293     /**
  294      * A parser instance for double values encoded with {@link NumericUtils}, e.g. when indexed
  295      * via {@link NumericField}/{@link NumericTokenStream}.
  296      */
  297     public static final DoubleParser NUMERIC_UTILS_DOUBLE_PARSER = new DoubleParser(){
  298       public double parseDouble(String val) {
  299         final int shift = val.charAt(0)-NumericUtils.SHIFT_START_LONG;
  300         if (shift>0 && shift<=63)
  301           throw new FieldCacheImpl.StopFillCacheException();
  302         return NumericUtils.sortableLongToDouble(NumericUtils.prefixCodedToLong(val));
  303       }
  304       protected Object readResolve() {
  305         return NUMERIC_UTILS_DOUBLE_PARSER;
  306       }
  307       @Override
  308       public String toString() { 
  309         return FieldCache.class.getName()+".NUMERIC_UTILS_DOUBLE_PARSER"; 
  310       }
  311     };
  312     
  313     /** Checks the internal cache for an appropriate entry, and if none is
  314      * found, reads the terms in <code>field</code> as a single byte and returns an array
  315      * of size <code>reader.maxDoc()</code> of the value each document
  316      * has in the given field.
  317      * @param reader  Used to get field values.
  318      * @param field   Which field contains the single byte values.
  319      * @return The values in the given field for each document.
  320      * @throws IOException  If any error occurs.
  321      */
  322     public byte[] getBytes (IndexReader reader, String field)
  323     throws IOException;
  324   
  325     /** Checks the internal cache for an appropriate entry, and if none is found,
  326      * reads the terms in <code>field</code> as bytes and returns an array of
  327      * size <code>reader.maxDoc()</code> of the value each document has in the
  328      * given field.
  329      * @param reader  Used to get field values.
  330      * @param field   Which field contains the bytes.
  331      * @param parser  Computes byte for string values.
  332      * @return The values in the given field for each document.
  333      * @throws IOException  If any error occurs.
  334      */
  335     public byte[] getBytes (IndexReader reader, String field, ByteParser parser)
  336     throws IOException;
  337   
  338     /** Checks the internal cache for an appropriate entry, and if none is
  339      * found, reads the terms in <code>field</code> as shorts and returns an array
  340      * of size <code>reader.maxDoc()</code> of the value each document
  341      * has in the given field.
  342      * @param reader  Used to get field values.
  343      * @param field   Which field contains the shorts.
  344      * @return The values in the given field for each document.
  345      * @throws IOException  If any error occurs.
  346      */
  347     public short[] getShorts (IndexReader reader, String field)
  348     throws IOException;
  349   
  350     /** Checks the internal cache for an appropriate entry, and if none is found,
  351      * reads the terms in <code>field</code> as shorts and returns an array of
  352      * size <code>reader.maxDoc()</code> of the value each document has in the
  353      * given field.
  354      * @param reader  Used to get field values.
  355      * @param field   Which field contains the shorts.
  356      * @param parser  Computes short for string values.
  357      * @return The values in the given field for each document.
  358      * @throws IOException  If any error occurs.
  359      */
  360     public short[] getShorts (IndexReader reader, String field, ShortParser parser)
  361     throws IOException;
  362   
  363     /** Checks the internal cache for an appropriate entry, and if none is
  364      * found, reads the terms in <code>field</code> as integers and returns an array
  365      * of size <code>reader.maxDoc()</code> of the value each document
  366      * has in the given field.
  367      * @param reader  Used to get field values.
  368      * @param field   Which field contains the integers.
  369      * @return The values in the given field for each document.
  370      * @throws IOException  If any error occurs.
  371      */
  372     public int[] getInts (IndexReader reader, String field)
  373     throws IOException;
  374   
  375     /** Checks the internal cache for an appropriate entry, and if none is found,
  376      * reads the terms in <code>field</code> as integers and returns an array of
  377      * size <code>reader.maxDoc()</code> of the value each document has in the
  378      * given field.
  379      * @param reader  Used to get field values.
  380      * @param field   Which field contains the integers.
  381      * @param parser  Computes integer for string values.
  382      * @return The values in the given field for each document.
  383      * @throws IOException  If any error occurs.
  384      */
  385     public int[] getInts (IndexReader reader, String field, IntParser parser)
  386     throws IOException;
  387   
  388     /** Checks the internal cache for an appropriate entry, and if
  389      * none is found, reads the terms in <code>field</code> as floats and returns an array
  390      * of size <code>reader.maxDoc()</code> of the value each document
  391      * has in the given field.
  392      * @param reader  Used to get field values.
  393      * @param field   Which field contains the floats.
  394      * @return The values in the given field for each document.
  395      * @throws IOException  If any error occurs.
  396      */
  397     public float[] getFloats (IndexReader reader, String field)
  398     throws IOException;
  399   
  400     /** Checks the internal cache for an appropriate entry, and if
  401      * none is found, reads the terms in <code>field</code> as floats and returns an array
  402      * of size <code>reader.maxDoc()</code> of the value each document
  403      * has in the given field.
  404      * @param reader  Used to get field values.
  405      * @param field   Which field contains the floats.
  406      * @param parser  Computes float for string values.
  407      * @return The values in the given field for each document.
  408      * @throws IOException  If any error occurs.
  409      */
  410     public float[] getFloats (IndexReader reader, String field,
  411                               FloatParser parser) throws IOException;
  412     
  413     /**
  414      * Checks the internal cache for an appropriate entry, and if none is
  415      * found, reads the terms in <code>field</code> as longs and returns an array
  416      * of size <code>reader.maxDoc()</code> of the value each document
  417      * has in the given field.
  418      *
  419      * @param reader Used to get field values.
  420      * @param field  Which field contains the longs.
  421      * @return The values in the given field for each document.
  422      * @throws java.io.IOException If any error occurs.
  423      */
  424     public long[] getLongs(IndexReader reader, String field)
  425             throws IOException;
  426   
  427     /**
  428      * Checks the internal cache for an appropriate entry, and if none is found,
  429      * reads the terms in <code>field</code> as longs and returns an array of
  430      * size <code>reader.maxDoc()</code> of the value each document has in the
  431      * given field.
  432      *
  433      * @param reader Used to get field values.
  434      * @param field  Which field contains the longs.
  435      * @param parser Computes integer for string values.
  436      * @return The values in the given field for each document.
  437      * @throws IOException If any error occurs.
  438      */
  439     public long[] getLongs(IndexReader reader, String field, LongParser parser)
  440             throws IOException;
  441   
  442   
  443     /**
  444      * Checks the internal cache for an appropriate entry, and if none is
  445      * found, reads the terms in <code>field</code> as integers and returns an array
  446      * of size <code>reader.maxDoc()</code> of the value each document
  447      * has in the given field.
  448      *
  449      * @param reader Used to get field values.
  450      * @param field  Which field contains the doubles.
  451      * @return The values in the given field for each document.
  452      * @throws IOException If any error occurs.
  453      */
  454     public double[] getDoubles(IndexReader reader, String field)
  455             throws IOException;
  456   
  457     /**
  458      * Checks the internal cache for an appropriate entry, and if none is found,
  459      * reads the terms in <code>field</code> as doubles and returns an array of
  460      * size <code>reader.maxDoc()</code> of the value each document has in the
  461      * given field.
  462      *
  463      * @param reader Used to get field values.
  464      * @param field  Which field contains the doubles.
  465      * @param parser Computes integer for string values.
  466      * @return The values in the given field for each document.
  467      * @throws IOException If any error occurs.
  468      */
  469     public double[] getDoubles(IndexReader reader, String field, DoubleParser parser)
  470             throws IOException;
  471   
  472     /** Checks the internal cache for an appropriate entry, and if none
  473      * is found, reads the term values in <code>field</code> and returns an array
  474      * of size <code>reader.maxDoc()</code> containing the value each document
  475      * has in the given field.
  476      * @param reader  Used to get field values.
  477      * @param field   Which field contains the strings.
  478      * @return The values in the given field for each document.
  479      * @throws IOException  If any error occurs.
  480      */
  481     public String[] getStrings (IndexReader reader, String field)
  482     throws IOException;
  483   
  484     /** Checks the internal cache for an appropriate entry, and if none
  485      * is found reads the term values in <code>field</code> and returns
  486      * an array of them in natural order, along with an array telling
  487      * which element in the term array each document uses.
  488      * @param reader  Used to get field values.
  489      * @param field   Which field contains the strings.
  490      * @return Array of terms and index into the array for each document.
  491      * @throws IOException  If any error occurs.
  492      */
  493     public StringIndex getStringIndex (IndexReader reader, String field)
  494     throws IOException;
  495   
  496     /**
  497      * EXPERT: A unique Identifier/Description for each item in the FieldCache. 
  498      * Can be useful for logging/debugging.
  499      * <p>
  500      * <b>EXPERIMENTAL API:</b> This API is considered extremely advanced 
  501      * and experimental.  It may be removed or altered w/o warning in future 
  502      * releases 
  503      * of Lucene.
  504      * </p>
  505      */
  506     public static abstract class CacheEntry {
  507       public abstract Object getReaderKey();
  508       public abstract String getFieldName();
  509       public abstract Class getCacheType();
  510       public abstract Object getCustom();
  511       public abstract Object getValue();
  512       private String size = null;
  513       protected final void setEstimatedSize(String size) {
  514         this.size = size;
  515       }
  516       /** 
  517        * @see #estimateSize(RamUsageEstimator)
  518        */
  519       public void estimateSize() {
  520         estimateSize(new RamUsageEstimator(false)); // doesn't check for interned
  521       }
  522       /** 
  523        * Computes (and stores) the estimated size of the cache Value 
  524        * @see #getEstimatedSize
  525        */
  526       public void estimateSize(RamUsageEstimator ramCalc) {
  527         long size = ramCalc.estimateRamUsage(getValue());
  528         setEstimatedSize(RamUsageEstimator.humanReadableUnits
  529                          (size, new DecimalFormat("0.#")));
  530                           
  531       }
  532       /**
  533        * The most recently estimated size of the value, null unless 
  534        * estimateSize has been called.
  535        */
  536       public final String getEstimatedSize() {
  537         return size;
  538       }
  539       
  540       
  541       @Override
  542       public String toString() {
  543         StringBuilder b = new StringBuilder();
  544         b.append("'").append(getReaderKey()).append("'=>");
  545         b.append("'").append(getFieldName()).append("',");
  546         b.append(getCacheType()).append(",").append(getCustom());
  547         b.append("=>").append(getValue().getClass().getName()).append("#");
  548         b.append(System.identityHashCode(getValue()));
  549         
  550         String s = getEstimatedSize();
  551         if(null != s) {
  552           b.append(" (size =~ ").append(s).append(')');
  553         }
  554   
  555         return b.toString();
  556       }
  557     
  558     }
  559   
  560     /**
  561      * EXPERT: Generates an array of CacheEntry objects representing all items 
  562      * currently in the FieldCache.
  563      * <p>
  564      * NOTE: These CacheEntry objects maintain a strong reference to the 
  565      * Cached Values.  Maintaining references to a CacheEntry the IndexReader 
  566      * associated with it has garbage collected will prevent the Value itself
  567      * from being garbage collected when the Cache drops the WeakRefrence.
  568      * </p>
  569      * <p>
  570      * <b>EXPERIMENTAL API:</b> This API is considered extremely advanced 
  571      * and experimental.  It may be removed or altered w/o warning in future 
  572      * releases 
  573      * of Lucene.
  574      * </p>
  575      */
  576     public abstract CacheEntry[] getCacheEntries();
  577   
  578     /**
  579      * <p>
  580      * EXPERT: Instructs the FieldCache to forcibly expunge all entries 
  581      * from the underlying caches.  This is intended only to be used for 
  582      * test methods as a way to ensure a known base state of the Cache 
  583      * (with out needing to rely on GC to free WeakReferences).  
  584      * It should not be relied on for "Cache maintenance" in general 
  585      * application code.
  586      * </p>
  587      * <p>
  588      * <b>EXPERIMENTAL API:</b> This API is considered extremely advanced 
  589      * and experimental.  It may be removed or altered w/o warning in future 
  590      * releases 
  591      * of Lucene.
  592      * </p>
  593      */
  594     public abstract void purgeAllCaches();
  595   
  596     /**
  597      * If non-null, FieldCacheImpl will warn whenever
  598      * entries are created that are not sane according to
  599      * {@link org.apache.lucene.util.FieldCacheSanityChecker}.
  600      */
  601     public void setInfoStream(PrintStream stream);
  602   
  603     /** counterpart of {@link #setInfoStream(PrintStream)} */
  604     public PrintStream getInfoStream();
  605   }

Home » lucene-3.0.1-src » org.apache » lucene » search » [javadoc | source]