Save This Page
Home » lucene-3.0.1-src » org.apache » lucene » benchmark » quality » [javadoc | source]
    1   /**
    2    * Licensed to the Apache Software Foundation (ASF) under one or more
    3    * contributor license agreements.  See the NOTICE file distributed with
    4    * this work for additional information regarding copyright ownership.
    5    * The ASF licenses this file to You under the Apache License, Version 2.0
    6    * (the "License"); you may not use this file except in compliance with
    7    * the License.  You may obtain a copy of the License at
    8    *
    9    *     http://www.apache.org/licenses/LICENSE-2.0
   10    *
   11    * Unless required by applicable law or agreed to in writing, software
   12    * distributed under the License is distributed on an "AS IS" BASIS,
   13    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   14    * See the License for the specific language governing permissions and
   15    * limitations under the License.
   16    */
   17   package org.apache.lucene.benchmark.quality;
   18   
   19   import java.io.IOException;
   20   import java.io.PrintWriter;
   21   
   22   import org.apache.lucene.benchmark.quality.utils.DocNameExtractor;
   23   import org.apache.lucene.benchmark.quality.utils.SubmissionReport;
   24   import org.apache.lucene.search.Query;
   25   import org.apache.lucene.search.ScoreDoc;
   26   import org.apache.lucene.search.Searcher;
   27   import org.apache.lucene.search.TopDocs;
   28   
   29   /**
   30    * Main entry point for running a quality benchmark.
   31    * <p>
   32    * There are two main configurations for running a quality benchmark: <ul>
   33    * <li>Against existing judgements.</li>
   34    * <li>For submission (e.g. for a contest).</li>
   35    * </ul>
   36    * The first configuration requires a non null
   37    * {@link org.apache.lucene.benchmark.quality.Judge Judge}. 
   38    * The second configuration requires a non null 
   39    * {@link org.apache.lucene.benchmark.quality.utils.SubmissionReport SubmissionLogger}.
   40    */
   41   public class QualityBenchmark {
   42   
   43     /** Quality Queries that this quality benchmark would execute. */
   44     protected QualityQuery qualityQueries[];
   45     
   46     /** Parser for turning QualityQueries into Lucene Queries. */
   47     protected QualityQueryParser qqParser;
   48     
   49     /** Index to be searched. */
   50     protected Searcher searcher;
   51   
   52     /** index field to extract doc name for each search result; used for judging the results. */  
   53     protected String docNameField;
   54     
   55     /** maximal number of queries that this quality benchmark runs. Default: maxint. Useful for debugging. */
   56     private int maxQueries = Integer.MAX_VALUE;
   57     
   58     /** maximal number of results to collect for each query. Default: 1000. */
   59     private int maxResults = 1000;
   60   
   61     /**
   62      * Create a QualityBenchmark.
   63      * @param qqs quality queries to run.
   64      * @param qqParser parser for turning QualityQueries into Lucene Queries. 
   65      * @param searcher index to be searched.
   66      * @param docNameField name of field containing the document name.
   67      *        This allows to extract the doc name for search results,
   68      *        and is important for judging the results.  
   69      */
   70     public QualityBenchmark(QualityQuery qqs[], QualityQueryParser qqParser, 
   71         Searcher searcher, String docNameField) {
   72       this.qualityQueries = qqs;
   73       this.qqParser = qqParser;
   74       this.searcher = searcher;
   75       this.docNameField = docNameField;
   76     }
   77   
   78     /**
   79      * Run the quality benchmark.
   80      * @param judge the judge that can tell if a certain result doc is relevant for a certain quality query. 
   81      *        If null, no judgements would be made. Usually null for a submission run. 
   82      * @param submitRep submission report is created if non null.
   83      * @param qualityLog If not null, quality run data would be printed for each query.
   84      * @return QualityStats of each quality query that was executed.
   85      * @throws Exception if quality benchmark failed to run.
   86      */
   87     public  QualityStats [] execute(Judge judge, SubmissionReport submitRep, 
   88                                     PrintWriter qualityLog) throws Exception {
   89       int nQueries = Math.min(maxQueries, qualityQueries.length);
   90       QualityStats stats[] = new QualityStats[nQueries]; 
   91       for (int i=0; i<nQueries; i++) {
   92         QualityQuery qq = qualityQueries[i];
   93         // generate query
   94         Query q = qqParser.parse(qq);
   95         // search with this query 
   96         long t1 = System.currentTimeMillis();
   97         TopDocs td = searcher.search(q,null,maxResults);
   98         long searchTime = System.currentTimeMillis()-t1;
   99         //most likely we either submit or judge, but check both 
  100         if (judge!=null) {
  101           stats[i] = analyzeQueryResults(qq, q, td, judge, qualityLog, searchTime);
  102         }
  103         if (submitRep!=null) {
  104           submitRep.report(qq,td,docNameField,searcher);
  105         }
  106       } 
  107       if (submitRep!=null) {
  108         submitRep.flush();
  109       }
  110       return stats;
  111     }
  112     
  113     /* Analyze/judge results for a single quality query; optionally log them. */  
  114     private QualityStats analyzeQueryResults(QualityQuery qq, Query q, TopDocs td, Judge judge, PrintWriter logger, long searchTime) throws IOException {
  115       QualityStats stts = new QualityStats(judge.maxRecall(qq),searchTime);
  116       ScoreDoc sd[] = td.scoreDocs;
  117       long t1 = System.currentTimeMillis(); // extraction of first doc name we measure also construction of doc name extractor, just in case.
  118       DocNameExtractor xt = new DocNameExtractor(docNameField);
  119       for (int i=0; i<sd.length; i++) {
  120         String docName = xt.docName(searcher,sd[i].doc);
  121         long docNameExtractTime = System.currentTimeMillis() - t1;
  122         t1 = System.currentTimeMillis();
  123         boolean isRelevant = judge.isRelevant(docName,qq);
  124         stts.addResult(i+1,isRelevant, docNameExtractTime);
  125       }
  126       if (logger!=null) {
  127         logger.println(qq.getQueryID()+"  -  "+q);
  128         stts.log(qq.getQueryID()+" Stats:",1,logger,"  ");
  129       }
  130       return stts;
  131     }
  132   
  133     /**
  134      * @return the maximum number of quality queries to run. Useful at debugging.
  135      */
  136     public int getMaxQueries() {
  137       return maxQueries;
  138     }
  139   
  140     /**
  141      * Set the maximum number of quality queries to run. Useful at debugging.
  142      */
  143     public void setMaxQueries(int maxQueries) {
  144       this.maxQueries = maxQueries;
  145     }
  146   
  147     /**
  148      * @return the maximum number of results to collect for each quality query.
  149      */
  150     public int getMaxResults() {
  151       return maxResults;
  152     }
  153   
  154     /**
  155      * set the maximum number of results to collect for each quality query.
  156      */
  157     public void setMaxResults(int maxResults) {
  158       this.maxResults = maxResults;
  159     }
  160   
  161   }

Save This Page
Home » lucene-3.0.1-src » org.apache » lucene » benchmark » quality » [javadoc | source]