Home » lucene-3.0.1-src » org.apache » lucene » search » [javadoc | source]

    1   package org.apache.lucene.search;
    2   
    3   /**
    4    * Licensed to the Apache Software Foundation (ASF) under one or more
    5    * contributor license agreements.  See the NOTICE file distributed with
    6    * this work for additional information regarding copyright ownership.
    7    * The ASF licenses this file to You under the Apache License, Version 2.0
    8    * (the "License"); you may not use this file except in compliance with
    9    * the License.  You may obtain a copy of the License at
   10    *
   11    *     http://www.apache.org/licenses/LICENSE-2.0
   12    *
   13    * Unless required by applicable law or agreed to in writing, software
   14    * distributed under the License is distributed on an "AS IS" BASIS,
   15    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   16    * See the License for the specific language governing permissions and
   17    * limitations under the License.
   18    */
   19   
   20   import java.io.IOException;
   21   import java.util.Set;
   22   import java.util.ArrayList;
   23   
   24   import org.apache.lucene.index.Term;
   25   import org.apache.lucene.index.TermPositions;
   26   import org.apache.lucene.index.IndexReader;
   27   import org.apache.lucene.search.Explanation.IDFExplanation;
   28   import org.apache.lucene.util.ToStringUtils;
   29   
   30   /** A Query that matches documents containing a particular sequence of terms.
   31    * A PhraseQuery is built by QueryParser for input like <code>"new york"</code>.
   32    * 
   33    * <p>This query may be combined with other terms or queries with a {@link BooleanQuery}.
   34    */
   35   public class PhraseQuery extends Query {
   36     private String field;
   37     private ArrayList<Term> terms = new ArrayList<Term>(4);
   38     private ArrayList<Integer> positions = new ArrayList<Integer>(4);
   39     private int maxPosition = 0;
   40     private int slop = 0;
   41   
   42     /** Constructs an empty phrase query. */
   43     public PhraseQuery() {}
   44   
   45     /** Sets the number of other words permitted between words in query phrase.
   46       If zero, then this is an exact phrase search.  For larger values this works
   47       like a <code>WITHIN</code> or <code>NEAR</code> operator.
   48   
   49       <p>The slop is in fact an edit-distance, where the units correspond to
   50       moves of terms in the query phrase out of position.  For example, to switch
   51       the order of two words requires two moves (the first move places the words
   52       atop one another), so to permit re-orderings of phrases, the slop must be
   53       at least two.
   54   
   55       <p>More exact matches are scored higher than sloppier matches, thus search
   56       results are sorted by exactness.
   57   
   58       <p>The slop is zero by default, requiring exact matches.*/
   59     public void setSlop(int s) { slop = s; }
   60     /** Returns the slop.  See setSlop(). */
   61     public int getSlop() { return slop; }
   62   
   63     /**
   64      * Adds a term to the end of the query phrase.
   65      * The relative position of the term is the one immediately after the last term added.
   66      */
   67     public void add(Term term) {
   68       int position = 0;
   69       if(positions.size() > 0)
   70           position = positions.get(positions.size()-1).intValue() + 1;
   71   
   72       add(term, position);
   73     }
   74   
   75     /**
   76      * Adds a term to the end of the query phrase.
   77      * The relative position of the term within the phrase is specified explicitly.
   78      * This allows e.g. phrases with more than one term at the same position
   79      * or phrases with gaps (e.g. in connection with stopwords).
   80      * 
   81      * @param term
   82      * @param position
   83      */
   84     public void add(Term term, int position) {
   85         if (terms.size() == 0)
   86             field = term.field();
   87         else if (term.field() != field)
   88             throw new IllegalArgumentException("All phrase terms must be in the same field: " + term);
   89   
   90         terms.add(term);
   91         positions.add(Integer.valueOf(position));
   92         if (position > maxPosition) maxPosition = position;
   93     }
   94   
   95     /** Returns the set of terms in this phrase. */
   96     public Term[] getTerms() {
   97       return terms.toArray(new Term[0]);
   98     }
   99   
  100     /**
  101      * Returns the relative positions of terms in this phrase.
  102      */
  103     public int[] getPositions() {
  104         int[] result = new int[positions.size()];
  105         for(int i = 0; i < positions.size(); i++)
  106             result[i] = positions.get(i).intValue();
  107         return result;
  108     }
  109   
  110     private class PhraseWeight extends Weight {
  111       private Similarity similarity;
  112       private float value;
  113       private float idf;
  114       private float queryNorm;
  115       private float queryWeight;
  116       private IDFExplanation idfExp;
  117   
  118       public PhraseWeight(Searcher searcher)
  119         throws IOException {
  120         this.similarity = getSimilarity(searcher);
  121   
  122         idfExp = similarity.idfExplain(terms, searcher);
  123         idf = idfExp.getIdf();
  124       }
  125   
  126       @Override
  127       public String toString() { return "weight(" + PhraseQuery.this + ")"; }
  128   
  129       @Override
  130       public Query getQuery() { return PhraseQuery.this; }
  131   
  132       @Override
  133       public float getValue() { return value; }
  134   
  135       @Override
  136       public float sumOfSquaredWeights() {
  137         queryWeight = idf * getBoost();             // compute query weight
  138         return queryWeight * queryWeight;           // square it
  139       }
  140   
  141       @Override
  142       public void normalize(float queryNorm) {
  143         this.queryNorm = queryNorm;
  144         queryWeight *= queryNorm;                   // normalize query weight
  145         value = queryWeight * idf;                  // idf for document 
  146       }
  147   
  148       @Override
  149       public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException {
  150         if (terms.size() == 0)			  // optimize zero-term case
  151           return null;
  152   
  153         TermPositions[] tps = new TermPositions[terms.size()];
  154         for (int i = 0; i < terms.size(); i++) {
  155           TermPositions p = reader.termPositions(terms.get(i));
  156           if (p == null)
  157             return null;
  158           tps[i] = p;
  159         }
  160   
  161         if (slop == 0)				  // optimize exact case
  162           return new ExactPhraseScorer(this, tps, getPositions(), similarity,
  163                                        reader.norms(field));
  164         else
  165           return
  166             new SloppyPhraseScorer(this, tps, getPositions(), similarity, slop,
  167                                    reader.norms(field));
  168   
  169       }
  170   
  171       @Override
  172       public Explanation explain(IndexReader reader, int doc)
  173         throws IOException {
  174   
  175         Explanation result = new Explanation();
  176         result.setDescription("weight("+getQuery()+" in "+doc+"), product of:");
  177   
  178         StringBuilder docFreqs = new StringBuilder();
  179         StringBuilder query = new StringBuilder();
  180         query.append('\"');
  181         docFreqs.append(idfExp.explain());
  182         for (int i = 0; i < terms.size(); i++) {
  183           if (i != 0) {
  184             query.append(" ");
  185           }
  186   
  187           Term term = terms.get(i);
  188   
  189           query.append(term.text());
  190         }
  191         query.append('\"');
  192   
  193         Explanation idfExpl =
  194           new Explanation(idf, "idf(" + field + ":" + docFreqs + ")");
  195   
  196         // explain query weight
  197         Explanation queryExpl = new Explanation();
  198         queryExpl.setDescription("queryWeight(" + getQuery() + "), product of:");
  199   
  200         Explanation boostExpl = new Explanation(getBoost(), "boost");
  201         if (getBoost() != 1.0f)
  202           queryExpl.addDetail(boostExpl);
  203         queryExpl.addDetail(idfExpl);
  204   
  205         Explanation queryNormExpl = new Explanation(queryNorm,"queryNorm");
  206         queryExpl.addDetail(queryNormExpl);
  207   
  208         queryExpl.setValue(boostExpl.getValue() *
  209                            idfExpl.getValue() *
  210                            queryNormExpl.getValue());
  211   
  212         result.addDetail(queryExpl);
  213   
  214         // explain field weight
  215         Explanation fieldExpl = new Explanation();
  216         fieldExpl.setDescription("fieldWeight("+field+":"+query+" in "+doc+
  217                                  "), product of:");
  218   
  219         PhraseScorer scorer = (PhraseScorer) scorer(reader, true, false);
  220         if (scorer == null) {
  221           return new Explanation(0.0f, "no matching docs");
  222         }
  223         Explanation tfExplanation = new Explanation();
  224         int d = scorer.advance(doc);
  225         float phraseFreq = (d == doc) ? scorer.currentFreq() : 0.0f;
  226         tfExplanation.setValue(similarity.tf(phraseFreq));
  227         tfExplanation.setDescription("tf(phraseFreq=" + phraseFreq + ")");
  228         
  229         fieldExpl.addDetail(tfExplanation);
  230         fieldExpl.addDetail(idfExpl);
  231   
  232         Explanation fieldNormExpl = new Explanation();
  233         byte[] fieldNorms = reader.norms(field);
  234         float fieldNorm =
  235           fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 1.0f;
  236         fieldNormExpl.setValue(fieldNorm);
  237         fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
  238         fieldExpl.addDetail(fieldNormExpl);
  239   
  240         fieldExpl.setValue(tfExplanation.getValue() *
  241                            idfExpl.getValue() *
  242                            fieldNormExpl.getValue());
  243   
  244         result.addDetail(fieldExpl);
  245   
  246         // combine them
  247         result.setValue(queryExpl.getValue() * fieldExpl.getValue());
  248   
  249         if (queryExpl.getValue() == 1.0f)
  250           return fieldExpl;
  251   
  252         return result;
  253       }
  254     }
  255   
  256     @Override
  257     public Weight createWeight(Searcher searcher) throws IOException {
  258       if (terms.size() == 1) {			  // optimize one-term case
  259         Term term = terms.get(0);
  260         Query termQuery = new TermQuery(term);
  261         termQuery.setBoost(getBoost());
  262         return termQuery.createWeight(searcher);
  263       }
  264       return new PhraseWeight(searcher);
  265     }
  266   
  267     /**
  268      * @see org.apache.lucene.search.Query#extractTerms(Set)
  269      */
  270     @Override
  271     public void extractTerms(Set<Term> queryTerms) {
  272       queryTerms.addAll(terms);
  273     }
  274   
  275     /** Prints a user-readable version of this query. */
  276     @Override
  277     public String toString(String f) {
  278       StringBuilder buffer = new StringBuilder();
  279       if (field != null && !field.equals(f)) {
  280         buffer.append(field);
  281         buffer.append(":");
  282       }
  283   
  284       buffer.append("\"");
  285       String[] pieces = new String[maxPosition + 1];
  286       for (int i = 0; i < terms.size(); i++) {
  287         int pos = positions.get(i).intValue();
  288         String s = pieces[pos];
  289         if (s == null) {
  290           s = (terms.get(i)).text();
  291         } else {
  292           s = s + "|" + (terms.get(i)).text();
  293         }
  294         pieces[pos] = s;
  295       }
  296       for (int i = 0; i < pieces.length; i++) {
  297         if (i > 0) {
  298           buffer.append(' ');
  299         }
  300         String s = pieces[i];
  301         if (s == null) {
  302           buffer.append('?');
  303         } else {
  304           buffer.append(s);
  305         }
  306       }
  307       buffer.append("\"");
  308   
  309       if (slop != 0) {
  310         buffer.append("~");
  311         buffer.append(slop);
  312       }
  313   
  314       buffer.append(ToStringUtils.boost(getBoost()));
  315   
  316       return buffer.toString();
  317     }
  318   
  319     /** Returns true iff <code>o</code> is equal to this. */
  320     @Override
  321     public boolean equals(Object o) {
  322       if (!(o instanceof PhraseQuery))
  323         return false;
  324       PhraseQuery other = (PhraseQuery)o;
  325       return (this.getBoost() == other.getBoost())
  326         && (this.slop == other.slop)
  327         &&  this.terms.equals(other.terms)
  328         && this.positions.equals(other.positions);
  329     }
  330   
  331     /** Returns a hash code value for this object.*/
  332     @Override
  333     public int hashCode() {
  334       return Float.floatToIntBits(getBoost())
  335         ^ slop
  336         ^ terms.hashCode()
  337         ^ positions.hashCode();
  338     }
  339   
  340   }

Home » lucene-3.0.1-src » org.apache » lucene » search » [javadoc | source]