Save This Page
Home » lucene-3.0.1-src » org.apache » lucene » analysis » [javadoc | source]
    1   package org.apache.lucene.analysis;
    2   
    3   /**
    4    * Licensed to the Apache Software Foundation (ASF) under one or more
    5    * contributor license agreements.  See the NOTICE file distributed with
    6    * this work for additional information regarding copyright ownership.
    7    * The ASF licenses this file to You under the Apache License, Version 2.0
    8    * (the "License"); you may not use this file except in compliance with
    9    * the License.  You may obtain a copy of the License at
   10    *
   11    *     http://www.apache.org/licenses/LICENSE-2.0
   12    *
   13    * Unless required by applicable law or agreed to in writing, software
   14    * distributed under the License is distributed on an "AS IS" BASIS,
   15    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   16    * See the License for the specific language governing permissions and
   17    * limitations under the License.
   18    */
   19   
   20   import java.io.Reader;
   21   import java.io.IOException;
   22   import java.io.Closeable;
   23   import java.lang.reflect.Method;
   24   
   25   import org.apache.lucene.util.CloseableThreadLocal;
   26   import org.apache.lucene.store.AlreadyClosedException;
   27   
   28   import org.apache.lucene.document.Fieldable;
   29   
   30   /** An Analyzer builds TokenStreams, which analyze text.  It thus represents a
   31    *  policy for extracting index terms from text.
   32    *  <p>
   33    *  Typical implementations first build a Tokenizer, which breaks the stream of
   34    *  characters from the Reader into raw Tokens.  One or more TokenFilters may
   35    *  then be applied to the output of the Tokenizer.
   36    */
   37   public abstract class Analyzer implements Closeable {
   38     /** Creates a TokenStream which tokenizes all the text in the provided
   39      * Reader.  Must be able to handle null field name for
   40      * backward compatibility.
   41      */
   42     public abstract TokenStream tokenStream(String fieldName, Reader reader);
   43   
   44     /** Creates a TokenStream that is allowed to be re-used
   45      *  from the previous time that the same thread called
   46      *  this method.  Callers that do not need to use more
   47      *  than one TokenStream at the same time from this
   48      *  analyzer should use this method for better
   49      *  performance.
   50      */
   51     public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
   52       return tokenStream(fieldName, reader);
   53     }
   54   
   55     private CloseableThreadLocal<Object> tokenStreams = new CloseableThreadLocal<Object>();
   56   
   57     /** Used by Analyzers that implement reusableTokenStream
   58      *  to retrieve previously saved TokenStreams for re-use
   59      *  by the same thread. */
   60     protected Object getPreviousTokenStream() {
   61       try {
   62         return tokenStreams.get();
   63       } catch (NullPointerException npe) {
   64         if (tokenStreams == null) {
   65           throw new AlreadyClosedException("this Analyzer is closed");
   66         } else {
   67           throw npe;
   68         }
   69       }
   70     }
   71   
   72     /** Used by Analyzers that implement reusableTokenStream
   73      *  to save a TokenStream for later re-use by the same
   74      *  thread. */
   75     protected void setPreviousTokenStream(Object obj) {
   76       try {
   77         tokenStreams.set(obj);
   78       } catch (NullPointerException npe) {
   79         if (tokenStreams == null) {
   80           throw new AlreadyClosedException("this Analyzer is closed");
   81         } else {
   82           throw npe;
   83         }
   84       }
   85     }
   86   
   87     /** @deprecated */
   88     protected boolean overridesTokenStreamMethod = false;
   89   
   90     /** @deprecated This is only present to preserve
   91      *  back-compat of classes that subclass a core analyzer
   92      *  and override tokenStream but not reusableTokenStream */
   93     protected void setOverridesTokenStreamMethod(Class<? extends Analyzer> baseClass) {
   94       try {
   95         Method m = this.getClass().getMethod("tokenStream", String.class, Reader.class);
   96         overridesTokenStreamMethod = m.getDeclaringClass() != baseClass;
   97       } catch (NoSuchMethodException nsme) {
   98         // cannot happen, as baseClass is subclass of Analyzer through generics
   99         overridesTokenStreamMethod = false;
  100       }
  101     }
  102   
  103   
  104     /**
  105      * Invoked before indexing a Fieldable instance if
  106      * terms have already been added to that field.  This allows custom
  107      * analyzers to place an automatic position increment gap between
  108      * Fieldable instances using the same field name.  The default value
  109      * position increment gap is 0.  With a 0 position increment gap and
  110      * the typical default token position increment of 1, all terms in a field,
  111      * including across Fieldable instances, are in successive positions, allowing
  112      * exact PhraseQuery matches, for instance, across Fieldable instance boundaries.
  113      *
  114      * @param fieldName Fieldable name being indexed.
  115      * @return position increment gap, added to the next token emitted from {@link #tokenStream(String,Reader)}
  116      */
  117     public int getPositionIncrementGap(String fieldName) {
  118       return 0;
  119     }
  120   
  121     /**
  122      * Just like {@link #getPositionIncrementGap}, except for
  123      * Token offsets instead.  By default this returns 1 for
  124      * tokenized fields and, as if the fields were joined
  125      * with an extra space character, and 0 for un-tokenized
  126      * fields.  This method is only called if the field
  127      * produced at least one token for indexing.
  128      *
  129      * @param field the field just indexed
  130      * @return offset gap, added to the next token emitted from {@link #tokenStream(String,Reader)}
  131      */
  132     public int getOffsetGap(Fieldable field) {
  133       if (field.isTokenized())
  134         return 1;
  135       else
  136         return 0;
  137     }
  138   
  139     /** Frees persistent resources used by this Analyzer */
  140     public void close() {
  141       tokenStreams.close();
  142       tokenStreams = null;
  143     }
  144   }

Save This Page
Home » lucene-3.0.1-src » org.apache » lucene » analysis » [javadoc | source]