1 package org.apache.lucene.search; 2 3 /** 4 * Licensed to the Apache Software Foundation (ASF) under one or more 5 * contributor license agreements. See the NOTICE file distributed with 6 * this work for additional information regarding copyright ownership. 7 * The ASF licenses this file to You under the Apache License, Version 2.0 8 * (the "License"); you may not use this file except in compliance with 9 * the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 */ 19 20 import java.io.IOException; 21 import java.util.Set; 22 import java.util.ArrayList; 23 24 import org.apache.lucene.index.Term; 25 import org.apache.lucene.index.TermPositions; 26 import org.apache.lucene.index.IndexReader; 27 import org.apache.lucene.search.Explanation.IDFExplanation; 28 import org.apache.lucene.util.ToStringUtils; 29 30 /** A Query that matches documents containing a particular sequence of terms. 31 * A PhraseQuery is built by QueryParser for input like <code>"new york"</code>. 32 * 33 * <p>This query may be combined with other terms or queries with a {@link BooleanQuery}. 34 */ 35 public class PhraseQuery extends Query { 36 private String field; 37 private ArrayList<Term> terms = new ArrayList<Term>(4); 38 private ArrayList<Integer> positions = new ArrayList<Integer>(4); 39 private int maxPosition = 0; 40 private int slop = 0; 41 42 /** Constructs an empty phrase query. */ 43 public PhraseQuery() {} 44 45 /** Sets the number of other words permitted between words in query phrase. 46 If zero, then this is an exact phrase search. For larger values this works 47 like a <code>WITHIN</code> or <code>NEAR</code> operator. 48 49 <p>The slop is in fact an edit-distance, where the units correspond to 50 moves of terms in the query phrase out of position. For example, to switch 51 the order of two words requires two moves (the first move places the words 52 atop one another), so to permit re-orderings of phrases, the slop must be 53 at least two. 54 55 <p>More exact matches are scored higher than sloppier matches, thus search 56 results are sorted by exactness. 57 58 <p>The slop is zero by default, requiring exact matches.*/ 59 public void setSlop(int s) { slop = s; } 60 /** Returns the slop. See setSlop(). */ 61 public int getSlop() { return slop; } 62 63 /** 64 * Adds a term to the end of the query phrase. 65 * The relative position of the term is the one immediately after the last term added. 66 */ 67 public void add(Term term) { 68 int position = 0; 69 if(positions.size() > 0) 70 position = positions.get(positions.size()-1).intValue() + 1; 71 72 add(term, position); 73 } 74 75 /** 76 * Adds a term to the end of the query phrase. 77 * The relative position of the term within the phrase is specified explicitly. 78 * This allows e.g. phrases with more than one term at the same position 79 * or phrases with gaps (e.g. in connection with stopwords). 80 * 81 * @param term 82 * @param position 83 */ 84 public void add(Term term, int position) { 85 if (terms.size() == 0) 86 field = term.field(); 87 else if (term.field() != field) 88 throw new IllegalArgumentException("All phrase terms must be in the same field: " + term); 89 90 terms.add(term); 91 positions.add(Integer.valueOf(position)); 92 if (position > maxPosition) maxPosition = position; 93 } 94 95 /** Returns the set of terms in this phrase. */ 96 public Term[] getTerms() { 97 return terms.toArray(new Term[0]); 98 } 99 100 /** 101 * Returns the relative positions of terms in this phrase. 102 */ 103 public int[] getPositions() { 104 int[] result = new int[positions.size()]; 105 for(int i = 0; i < positions.size(); i++) 106 result[i] = positions.get(i).intValue(); 107 return result; 108 } 109 110 private class PhraseWeight extends Weight { 111 private Similarity similarity; 112 private float value; 113 private float idf; 114 private float queryNorm; 115 private float queryWeight; 116 private IDFExplanation idfExp; 117 118 public PhraseWeight(Searcher searcher) 119 throws IOException { 120 this.similarity = getSimilarity(searcher); 121 122 idfExp = similarity.idfExplain(terms, searcher); 123 idf = idfExp.getIdf(); 124 } 125 126 @Override 127 public String toString() { return "weight(" + PhraseQuery.this + ")"; } 128 129 @Override 130 public Query getQuery() { return PhraseQuery.this; } 131 132 @Override 133 public float getValue() { return value; } 134 135 @Override 136 public float sumOfSquaredWeights() { 137 queryWeight = idf * getBoost(); // compute query weight 138 return queryWeight * queryWeight; // square it 139 } 140 141 @Override 142 public void normalize(float queryNorm) { 143 this.queryNorm = queryNorm; 144 queryWeight *= queryNorm; // normalize query weight 145 value = queryWeight * idf; // idf for document 146 } 147 148 @Override 149 public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException { 150 if (terms.size() == 0) // optimize zero-term case 151 return null; 152 153 TermPositions[] tps = new TermPositions[terms.size()]; 154 for (int i = 0; i < terms.size(); i++) { 155 TermPositions p = reader.termPositions(terms.get(i)); 156 if (p == null) 157 return null; 158 tps[i] = p; 159 } 160 161 if (slop == 0) // optimize exact case 162 return new ExactPhraseScorer(this, tps, getPositions(), similarity, 163 reader.norms(field)); 164 else 165 return 166 new SloppyPhraseScorer(this, tps, getPositions(), similarity, slop, 167 reader.norms(field)); 168 169 } 170 171 @Override 172 public Explanation explain(IndexReader reader, int doc) 173 throws IOException { 174 175 Explanation result = new Explanation(); 176 result.setDescription("weight("+getQuery()+" in "+doc+"), product of:"); 177 178 StringBuilder docFreqs = new StringBuilder(); 179 StringBuilder query = new StringBuilder(); 180 query.append('\"'); 181 docFreqs.append(idfExp.explain()); 182 for (int i = 0; i < terms.size(); i++) { 183 if (i != 0) { 184 query.append(" "); 185 } 186 187 Term term = terms.get(i); 188 189 query.append(term.text()); 190 } 191 query.append('\"'); 192 193 Explanation idfExpl = 194 new Explanation(idf, "idf(" + field + ":" + docFreqs + ")"); 195 196 // explain query weight 197 Explanation queryExpl = new Explanation(); 198 queryExpl.setDescription("queryWeight(" + getQuery() + "), product of:"); 199 200 Explanation boostExpl = new Explanation(getBoost(), "boost"); 201 if (getBoost() != 1.0f) 202 queryExpl.addDetail(boostExpl); 203 queryExpl.addDetail(idfExpl); 204 205 Explanation queryNormExpl = new Explanation(queryNorm,"queryNorm"); 206 queryExpl.addDetail(queryNormExpl); 207 208 queryExpl.setValue(boostExpl.getValue() * 209 idfExpl.getValue() * 210 queryNormExpl.getValue()); 211 212 result.addDetail(queryExpl); 213 214 // explain field weight 215 Explanation fieldExpl = new Explanation(); 216 fieldExpl.setDescription("fieldWeight("+field+":"+query+" in "+doc+ 217 "), product of:"); 218 219 PhraseScorer scorer = (PhraseScorer) scorer(reader, true, false); 220 if (scorer == null) { 221 return new Explanation(0.0f, "no matching docs"); 222 } 223 Explanation tfExplanation = new Explanation(); 224 int d = scorer.advance(doc); 225 float phraseFreq = (d == doc) ? scorer.currentFreq() : 0.0f; 226 tfExplanation.setValue(similarity.tf(phraseFreq)); 227 tfExplanation.setDescription("tf(phraseFreq=" + phraseFreq + ")"); 228 229 fieldExpl.addDetail(tfExplanation); 230 fieldExpl.addDetail(idfExpl); 231 232 Explanation fieldNormExpl = new Explanation(); 233 byte[] fieldNorms = reader.norms(field); 234 float fieldNorm = 235 fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 1.0f; 236 fieldNormExpl.setValue(fieldNorm); 237 fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")"); 238 fieldExpl.addDetail(fieldNormExpl); 239 240 fieldExpl.setValue(tfExplanation.getValue() * 241 idfExpl.getValue() * 242 fieldNormExpl.getValue()); 243 244 result.addDetail(fieldExpl); 245 246 // combine them 247 result.setValue(queryExpl.getValue() * fieldExpl.getValue()); 248 249 if (queryExpl.getValue() == 1.0f) 250 return fieldExpl; 251 252 return result; 253 } 254 } 255 256 @Override 257 public Weight createWeight(Searcher searcher) throws IOException { 258 if (terms.size() == 1) { // optimize one-term case 259 Term term = terms.get(0); 260 Query termQuery = new TermQuery(term); 261 termQuery.setBoost(getBoost()); 262 return termQuery.createWeight(searcher); 263 } 264 return new PhraseWeight(searcher); 265 } 266 267 /** 268 * @see org.apache.lucene.search.Query#extractTerms(Set) 269 */ 270 @Override 271 public void extractTerms(Set<Term> queryTerms) { 272 queryTerms.addAll(terms); 273 } 274 275 /** Prints a user-readable version of this query. */ 276 @Override 277 public String toString(String f) { 278 StringBuilder buffer = new StringBuilder(); 279 if (field != null && !field.equals(f)) { 280 buffer.append(field); 281 buffer.append(":"); 282 } 283 284 buffer.append("\""); 285 String[] pieces = new String[maxPosition + 1]; 286 for (int i = 0; i < terms.size(); i++) { 287 int pos = positions.get(i).intValue(); 288 String s = pieces[pos]; 289 if (s == null) { 290 s = (terms.get(i)).text(); 291 } else { 292 s = s + "|" + (terms.get(i)).text(); 293 } 294 pieces[pos] = s; 295 } 296 for (int i = 0; i < pieces.length; i++) { 297 if (i > 0) { 298 buffer.append(' '); 299 } 300 String s = pieces[i]; 301 if (s == null) { 302 buffer.append('?'); 303 } else { 304 buffer.append(s); 305 } 306 } 307 buffer.append("\""); 308 309 if (slop != 0) { 310 buffer.append("~"); 311 buffer.append(slop); 312 } 313 314 buffer.append(ToStringUtils.boost(getBoost())); 315 316 return buffer.toString(); 317 } 318 319 /** Returns true iff <code>o</code> is equal to this. */ 320 @Override 321 public boolean equals(Object o) { 322 if (!(o instanceof PhraseQuery)) 323 return false; 324 PhraseQuery other = (PhraseQuery)o; 325 return (this.getBoost() == other.getBoost()) 326 && (this.slop == other.slop) 327 && this.terms.equals(other.terms) 328 && this.positions.equals(other.positions); 329 } 330 331 /** Returns a hash code value for this object.*/ 332 @Override 333 public int hashCode() { 334 return Float.floatToIntBits(getBoost()) 335 ^ slop 336 ^ terms.hashCode() 337 ^ positions.hashCode(); 338 } 339 340 }