Save This Page
Home » lucene-3.0.1-src » org.apache » lucene » search » highlight » [javadoc | source]
    1   package org.apache.lucene.search.highlight;
    2   
    3   
    4   /**
    5    * Licensed to the Apache Software Foundation (ASF) under one or more
    6    * contributor license agreements.  See the NOTICE file distributed with
    7    * this work for additional information regarding copyright ownership.
    8    * The ASF licenses this file to You under the Apache License, Version 2.0
    9    * (the "License"); you may not use this file except in compliance with
   10    * the License.  You may obtain a copy of the License at
   11    *
   12    *     http://www.apache.org/licenses/LICENSE-2.0
   13    *
   14    * Unless required by applicable law or agreed to in writing, software
   15    * distributed under the License is distributed on an "AS IS" BASIS,
   16    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   17    * See the License for the specific language governing permissions and
   18    * limitations under the License.
   19    */
   20   import java.util.List;
   21   
   22   import org.apache.lucene.analysis.TokenStream;
   23   import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
   24   import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
   25   import org.apache.lucene.analysis.tokenattributes.TermAttribute;
   26   import org.apache.lucene.search.spans.Spans;
   27   
   28   
   29   /**
   30    * {@link Fragmenter} implementation which breaks text up into same-size
   31    * fragments but does not split up {@link Spans}. This is a simple sample class.
   32    */
   33   public class SimpleSpanFragmenter implements Fragmenter {
   34     private static final int DEFAULT_FRAGMENT_SIZE = 100;
   35     private int fragmentSize;
   36     private int currentNumFrags;
   37     private int position = -1;
   38     private QueryScorer queryScorer;
   39     private int waitForPos = -1;
   40     private int textSize;
   41     private TermAttribute termAtt;
   42     private PositionIncrementAttribute posIncAtt;
   43     private OffsetAttribute offsetAtt;
   44   
   45     /**
   46      * @param queryScorer QueryScorer that was used to score hits
   47      */
   48     public SimpleSpanFragmenter(QueryScorer queryScorer) {
   49       this(queryScorer, DEFAULT_FRAGMENT_SIZE);
   50     }
   51   
   52     /**
   53      * @param queryScorer QueryScorer that was used to score hits
   54      * @param fragmentSize size in bytes of each fragment
   55      */
   56     public SimpleSpanFragmenter(QueryScorer queryScorer, int fragmentSize) {
   57       this.fragmentSize = fragmentSize;
   58       this.queryScorer = queryScorer;
   59     }
   60     
   61     /* (non-Javadoc)
   62      * @see org.apache.lucene.search.highlight.Fragmenter#isNewFragment()
   63      */
   64     public boolean isNewFragment() {
   65       position += posIncAtt.getPositionIncrement();
   66   
   67       if (waitForPos == position) {
   68         waitForPos = -1;
   69       } else if (waitForPos != -1) {
   70         return false;
   71       }
   72   
   73       WeightedSpanTerm wSpanTerm = queryScorer.getWeightedSpanTerm(termAtt.term());
   74   
   75       if (wSpanTerm != null) {
   76         List<PositionSpan> positionSpans = wSpanTerm.getPositionSpans();
   77   
   78         for (int i = 0; i < positionSpans.size(); i++) {
   79           if (positionSpans.get(i).start == position) {
   80             waitForPos = positionSpans.get(i).end + 1;
   81             break;
   82           }
   83         }
   84       }
   85   
   86       boolean isNewFrag = offsetAtt.endOffset() >= (fragmentSize * currentNumFrags)
   87           && (textSize - offsetAtt.endOffset()) >= (fragmentSize >>> 1);
   88       
   89       if (isNewFrag) {
   90         currentNumFrags++;
   91       }
   92   
   93       return isNewFrag;
   94     }
   95   
   96   
   97     /* (non-Javadoc)
   98      * @see org.apache.lucene.search.highlight.Fragmenter#start(java.lang.String, org.apache.lucene.analysis.TokenStream)
   99      */
  100     public void start(String originalText, TokenStream tokenStream) {
  101       position = -1;
  102       currentNumFrags = 1;
  103       textSize = originalText.length();
  104       termAtt = tokenStream.addAttribute(TermAttribute.class);
  105       posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class);
  106       offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);
  107     }
  108   }

Save This Page
Home » lucene-3.0.1-src » org.apache » lucene » search » highlight » [javadoc | source]