Save This Page
Home » lucene-3.0.1-src » org.apache » lucene » search » highlight » [javadoc | source]
    1   package org.apache.lucene.search.highlight;
    2   
    3   import java.io.IOException;
    4   import java.util.HashMap;
    5   import java.util.HashSet;
    6   import java.util.Map;
    7   import java.util.Set;
    8   
    9   import org.apache.lucene.analysis.CachingTokenFilter;
   10   import org.apache.lucene.analysis.Token;
   11   import org.apache.lucene.index.IndexReader;
   12   import org.apache.lucene.search.Query;
   13   
   14   
   15   /**
   16    * {@link Scorer} implementation which scores text fragments by the number of
   17    * unique query terms found. This class converts appropriate Querys to
   18    * SpanQuerys and attempts to score only those terms that participated in
   19    * generating the 'hit' on the document.
   20    */
   21   public class SpanScorer implements Scorer {
   22     private float totalScore;
   23     private Set foundTerms;
   24     private Map fieldWeightedSpanTerms;
   25     private float maxTermWeight;
   26     private int position = -1;
   27     private String defaultField;
   28     private static boolean highlightCnstScrRngQuery;
   29   
   30     /**
   31      * @param query
   32      *            Query to use for highlighting
   33      * @param field
   34      *            Field to highlight - pass null to ignore fields
   35      * @param tokenStream
   36      *            of source text to be highlighted
   37      * @throws IOException
   38      */
   39     public SpanScorer(Query query, String field,
   40       CachingTokenFilter cachingTokenFilter) throws IOException {
   41       init(query, field, cachingTokenFilter, null);
   42     }
   43   
   44     /**
   45      * @param query
   46      *            Query to use for highlighting
   47      * @param field
   48      *            Field to highlight - pass null to ignore fields
   49      * @param tokenStream
   50      *            of source text to be highlighted
   51      * @param reader
   52      * @throws IOException
   53      */
   54     public SpanScorer(Query query, String field,
   55       CachingTokenFilter cachingTokenFilter, IndexReader reader)
   56       throws IOException {
   57       init(query, field, cachingTokenFilter, reader);
   58     }
   59   
   60     /**
   61      * As above, but with ability to pass in an <tt>IndexReader</tt>
   62      */
   63     public SpanScorer(Query query, String field,
   64       CachingTokenFilter cachingTokenFilter, IndexReader reader, String defaultField)
   65       throws IOException {
   66       this.defaultField = defaultField.intern();
   67       init(query, field, cachingTokenFilter, reader);
   68     }
   69   
   70     /**
   71      * @param defaultField - The default field for queries with the field name unspecified
   72      */
   73     public SpanScorer(Query query, String field,
   74       CachingTokenFilter cachingTokenFilter, String defaultField) throws IOException {
   75       this.defaultField = defaultField.intern();
   76       init(query, field, cachingTokenFilter, null);
   77     }
   78   
   79     /**
   80      * @param weightedTerms
   81      */
   82     public SpanScorer(WeightedSpanTerm[] weightedTerms) {
   83       this.fieldWeightedSpanTerms = new HashMap(weightedTerms.length);
   84   
   85       for (int i = 0; i < weightedTerms.length; i++) {
   86         WeightedSpanTerm existingTerm = (WeightedSpanTerm) fieldWeightedSpanTerms.get(weightedTerms[i].term);
   87   
   88         if ((existingTerm == null) ||
   89               (existingTerm.weight < weightedTerms[i].weight)) {
   90           // if a term is defined more than once, always use the highest
   91           // scoring weight
   92           fieldWeightedSpanTerms.put(weightedTerms[i].term, weightedTerms[i]);
   93           maxTermWeight = Math.max(maxTermWeight, weightedTerms[i].getWeight());
   94         }
   95       }
   96     }
   97   
   98     /*
   99      * (non-Javadoc)
  100      *
  101      * @see org.apache.lucene.search.highlight.Scorer#getFragmentScore()
  102      */
  103     public float getFragmentScore() {
  104       return totalScore;
  105     }
  106   
  107     /**
  108      *
  109      * @return The highest weighted term (useful for passing to
  110      *         GradientFormatter to set top end of coloring scale.
  111      */
  112     public float getMaxTermWeight() {
  113       return maxTermWeight;
  114     }
  115   
  116     /*
  117      * (non-Javadoc)
  118      *
  119      * @see org.apache.lucene.search.highlight.Scorer#getTokenScore(org.apache.lucene.analysis.Token,
  120      *      int)
  121      */
  122     public float getTokenScore(Token token) {
  123       position += token.getPositionIncrement();
  124       String termText = token.term();
  125   
  126       WeightedSpanTerm weightedSpanTerm;
  127   
  128       if ((weightedSpanTerm = (WeightedSpanTerm) fieldWeightedSpanTerms.get(
  129                 termText)) == null) {
  130         return 0;
  131       }
  132   
  133       if (weightedSpanTerm.positionSensitive &&
  134             !weightedSpanTerm.checkPosition(position)) {
  135         return 0;
  136       }
  137   
  138       float score = weightedSpanTerm.getWeight();
  139   
  140       // found a query term - is it unique in this doc?
  141       if (!foundTerms.contains(termText)) {
  142         totalScore += score;
  143         foundTerms.add(termText);
  144       }
  145   
  146       return score;
  147     }
  148   
  149     /**
  150      * Retrieve the WeightedSpanTerm for the specified token. Useful for passing
  151      * Span information to a Fragmenter.
  152      *
  153      * @param token
  154      * @return WeightedSpanTerm for token
  155      */
  156     public WeightedSpanTerm getWeightedSpanTerm(String token) {
  157       return (WeightedSpanTerm) fieldWeightedSpanTerms.get(token);
  158     }
  159   
  160     /**
  161      * @param query
  162      * @param field
  163      * @param tokenStream
  164      * @param reader
  165      * @throws IOException
  166      */
  167     private void init(Query query, String field,
  168       CachingTokenFilter cachingTokenFilter, IndexReader reader)
  169       throws IOException {
  170       WeightedSpanTermExtractor qse = defaultField == null ? new WeightedSpanTermExtractor()
  171         : new WeightedSpanTermExtractor(defaultField);
  172       
  173       qse.setHighlightCnstScrRngQuery(highlightCnstScrRngQuery);
  174   
  175       if (reader == null) {
  176         this.fieldWeightedSpanTerms = qse.getWeightedSpanTerms(query,
  177             cachingTokenFilter, field);
  178       } else {
  179         this.fieldWeightedSpanTerms = qse.getWeightedSpanTermsWithScores(query,
  180             cachingTokenFilter, field, reader);
  181       }
  182     }
  183   
  184     /**
  185      * @return whether ConstantScoreRangeQuerys are set to be highlighted
  186      */
  187     public static boolean isHighlightCnstScrRngQuery() {
  188       return highlightCnstScrRngQuery;
  189     }
  190   
  191     /**
  192      * If you call Highlighter#getBestFragment() more than once you must reset
  193      * the SpanScorer between each call.
  194      */
  195     public void reset() {
  196       position = -1;
  197     }
  198   
  199     /**
  200      * Turns highlighting of ConstantScoreRangeQuery on/off. ConstantScoreRangeQuerys cannot be
  201      * highlighted if you rewrite the query first. Must be called before SpanScorer construction.
  202      * 
  203      * @param highlightCnstScrRngQuery
  204      */
  205     public static void setHighlightCnstScrRngQuery(boolean highlight) {
  206       highlightCnstScrRngQuery = highlight;
  207     }
  208   
  209     /*
  210      * (non-Javadoc)
  211      *
  212      * @see org.apache.lucene.search.highlight.Scorer#startFragment(org.apache.lucene.search.highlight.TextFragment)
  213      */
  214     public void startFragment(TextFragment newFragment) {
  215       foundTerms = new HashSet();
  216       totalScore = 0;
  217     }
  218   }

Save This Page
Home » lucene-3.0.1-src » org.apache » lucene » search » highlight » [javadoc | source]