Save This Page
Home » lucene-3.0.1-src » org.apache » lucene » search » highlight » [javadoc | source]
    1   package org.apache.lucene.search.highlight;
    2   
    3   /**
    4    * Licensed to the Apache Software Foundation (ASF) under one or more
    5    * contributor license agreements.  See the NOTICE file distributed with
    6    * this work for additional information regarding copyright ownership.
    7    * The ASF licenses this file to You under the Apache License, Version 2.0
    8    * (the "License"); you may not use this file except in compliance with
    9    * the License.  You may obtain a copy of the License at
   10    *
   11    *     http://www.apache.org/licenses/LICENSE-2.0
   12    *
   13    * Unless required by applicable law or agreed to in writing, software
   14    * distributed under the License is distributed on an "AS IS" BASIS,
   15    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   16    * See the License for the specific language governing permissions and
   17    * limitations under the License.
   18    */
   19   
   20   import java.util.HashMap;
   21   import java.util.HashSet;
   22   
   23   import org.apache.lucene.analysis.TokenStream;
   24   import org.apache.lucene.analysis.tokenattributes.TermAttribute;
   25   import org.apache.lucene.index.IndexReader;
   26   import org.apache.lucene.search.Query;
   27   
   28   /**
   29    * {@link Scorer} implementation which scores text fragments by the number of
   30    * unique query terms found. This class uses the {@link QueryTermExtractor}
   31    * class to process determine the query terms and their boosts to be used.
   32    */
   33   // TODO: provide option to boost score of fragments near beginning of document
   34   // based on fragment.getFragNum()
   35   public class QueryTermScorer implements Scorer {
   36     
   37     TextFragment currentTextFragment = null;
   38     HashSet<String> uniqueTermsInFragment;
   39   
   40     float totalScore = 0;
   41     float maxTermWeight = 0;
   42     private HashMap<String,WeightedTerm> termsToFind;
   43   
   44     private TermAttribute termAtt;
   45   
   46     /**
   47      * 
   48      * @param query a Lucene query (ideally rewritten using query.rewrite before
   49      *        being passed to this class and the searcher)
   50      */
   51     public QueryTermScorer(Query query) {
   52       this(QueryTermExtractor.getTerms(query));
   53     }
   54   
   55     /**
   56      * 
   57      * @param query a Lucene query (ideally rewritten using query.rewrite before
   58      *        being passed to this class and the searcher)
   59      * @param fieldName the Field name which is used to match Query terms
   60      */
   61     public QueryTermScorer(Query query, String fieldName) {
   62       this(QueryTermExtractor.getTerms(query, false, fieldName));
   63     }
   64   
   65     /**
   66      * 
   67      * @param query a Lucene query (ideally rewritten using query.rewrite before
   68      *        being passed to this class and the searcher)
   69      * @param reader used to compute IDF which can be used to a) score selected
   70      *        fragments better b) use graded highlights eg set font color
   71      *        intensity
   72      * @param fieldName the field on which Inverse Document Frequency (IDF)
   73      *        calculations are based
   74      */
   75     public QueryTermScorer(Query query, IndexReader reader, String fieldName) {
   76       this(QueryTermExtractor.getIdfWeightedTerms(query, reader, fieldName));
   77     }
   78   
   79     public QueryTermScorer(WeightedTerm[] weightedTerms) {
   80       termsToFind = new HashMap<String,WeightedTerm>();
   81       for (int i = 0; i < weightedTerms.length; i++) {
   82         WeightedTerm existingTerm = termsToFind
   83             .get(weightedTerms[i].term);
   84         if ((existingTerm == null)
   85             || (existingTerm.weight < weightedTerms[i].weight)) {
   86           // if a term is defined more than once, always use the highest scoring
   87           // weight
   88           termsToFind.put(weightedTerms[i].term, weightedTerms[i]);
   89           maxTermWeight = Math.max(maxTermWeight, weightedTerms[i].getWeight());
   90         }
   91       }
   92     }
   93   
   94     /* (non-Javadoc)
   95      * @see org.apache.lucene.search.highlight.Scorer#init(org.apache.lucene.analysis.TokenStream)
   96      */
   97     public TokenStream init(TokenStream tokenStream) {
   98       termAtt = tokenStream.addAttribute(TermAttribute.class);
   99       return null;
  100     }
  101   
  102     /*
  103      * (non-Javadoc)
  104      * 
  105      * @see
  106      * org.apache.lucene.search.highlight.FragmentScorer#startFragment(org.apache
  107      * .lucene.search.highlight.TextFragment)
  108      */
  109     public void startFragment(TextFragment newFragment) {
  110       uniqueTermsInFragment = new HashSet<String>();
  111       currentTextFragment = newFragment;
  112       totalScore = 0;
  113   
  114     }
  115   
  116   
  117     /* (non-Javadoc)
  118      * @see org.apache.lucene.search.highlight.Scorer#getTokenScore()
  119      */
  120     public float getTokenScore() {
  121       String termText = termAtt.term();
  122   
  123       WeightedTerm queryTerm = termsToFind.get(termText);
  124       if (queryTerm == null) {
  125         // not a query term - return
  126         return 0;
  127       }
  128       // found a query term - is it unique in this doc?
  129       if (!uniqueTermsInFragment.contains(termText)) {
  130         totalScore += queryTerm.getWeight();
  131         uniqueTermsInFragment.add(termText);
  132       }
  133       return queryTerm.getWeight();
  134     }
  135   
  136   
  137     /* (non-Javadoc)
  138      * @see org.apache.lucene.search.highlight.Scorer#getFragmentScore()
  139      */
  140     public float getFragmentScore() {
  141       return totalScore;
  142     }
  143   
  144     /*
  145      * (non-Javadoc)
  146      * 
  147      * @see
  148      * org.apache.lucene.search.highlight.FragmentScorer#allFragmentsProcessed()
  149      */
  150     public void allFragmentsProcessed() {
  151       // this class has no special operations to perform at end of processing
  152     }
  153   
  154     /**
  155      * 
  156      * @return The highest weighted term (useful for passing to GradientFormatter
  157      *         to set top end of coloring scale.
  158      */
  159     public float getMaxTermWeight() {
  160       return maxTermWeight;
  161     }
  162   }

Save This Page
Home » lucene-3.0.1-src » org.apache » lucene » search » highlight » [javadoc | source]