Home » lucene-3.0.1-src » org.apache » lucene » analysis » nl » [javadoc | source]

    1   package org.apache.lucene.analysis.nl;
    2   
    3   /**
    4    * Licensed to the Apache Software Foundation (ASF) under one or more
    5    * contributor license agreements.  See the NOTICE file distributed with
    6    * this work for additional information regarding copyright ownership.
    7    * The ASF licenses this file to You under the Apache License, Version 2.0
    8    * (the "License"); you may not use this file except in compliance with
    9    * the License.  You may obtain a copy of the License at
   10    *
   11    *     http://www.apache.org/licenses/LICENSE-2.0
   12    *
   13    * Unless required by applicable law or agreed to in writing, software
   14    * distributed under the License is distributed on an "AS IS" BASIS,
   15    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   16    * See the License for the specific language governing permissions and
   17    * limitations under the License.
   18    */
   19   
   20   import java.io.File;
   21   import java.io.FileReader;
   22   import java.io.IOException;
   23   import java.io.LineNumberReader;
   24   import java.util.HashMap;
   25   
   26   /**
   27    *         <p/>
   28    *         Loads a text file and adds every line as an entry to a Hashtable. Every line
   29    *         should contain only one word. If the file is not found or on any error, an
   30    *         empty table is returned.
   31    *
   32    * @deprecated use {@link org.apache.lucene.analysis.WordlistLoader} instead
   33    */
   34   public class WordlistLoader {
   35     /**
   36      * @param path     Path to the wordlist
   37      * @param wordfile Name of the wordlist
   38      * @deprecated use {@link org.apache.lucene.analysis.WordlistLoader#getWordSet(File)} instead
   39      */
   40     public static HashMap getWordtable(String path, String wordfile) {
   41       if (path == null || wordfile == null) {
   42         return new HashMap();
   43       }
   44       return getWordtable(new File(path, wordfile));
   45     }
   46   
   47     /**
   48      * @param wordfile Complete path to the wordlist
   49      * @deprecated use {@link org.apache.lucene.analysis.WordlistLoader#getWordSet(File)} instead
   50      */
   51     public static HashMap getWordtable(String wordfile) {
   52       if (wordfile == null) {
   53         return new HashMap();
   54       }
   55       return getWordtable(new File(wordfile));
   56     }
   57   
   58     /**
   59      * Reads a stemsdictionary. Each line contains:
   60      * word \t stem
   61      * i.e. tab seperated)
   62      *
   63      * @return Stem dictionary that overrules, the stemming algorithm
   64      * @deprecated use {@link org.apache.lucene.analysis.WordlistLoader#getStemDict(File)} instead
   65      */
   66     public static HashMap getStemDict(File wordstemfile) {
   67       if (wordstemfile == null) {
   68         return new HashMap();
   69       }
   70       HashMap result = new HashMap();
   71       try {
   72         LineNumberReader lnr = new LineNumberReader(new FileReader(wordstemfile));
   73         String line;
   74         String[] wordstem;
   75         while ((line = lnr.readLine()) != null) {
   76           wordstem = line.split("\t", 2);
   77           result.put(wordstem[0], wordstem[1]);
   78         }
   79       } catch (IOException e) {
   80       }
   81       return result;
   82     }
   83   
   84     /**
   85      * @param wordfile File containing the wordlist
   86      * @deprecated use {@link org.apache.lucene.analysis.WordlistLoader#getWordSet(File)} instead
   87      */
   88     public static HashMap getWordtable(File wordfile) {
   89       if (wordfile == null) {
   90         return new HashMap();
   91       }
   92       HashMap result = null;
   93       try {
   94         LineNumberReader lnr = new LineNumberReader(new FileReader(wordfile));
   95         String word = null;
   96         String[] stopwords = new String[100];
   97         int wordcount = 0;
   98         while ((word = lnr.readLine()) != null) {
   99           wordcount++;
  100           if (wordcount == stopwords.length) {
  101             String[] tmp = new String[stopwords.length + 50];
  102             System.arraycopy(stopwords, 0, tmp, 0, wordcount);
  103             stopwords = tmp;
  104           }
  105           stopwords[wordcount - 1] = word;
  106         }
  107         result = makeWordTable(stopwords, wordcount);
  108       }
  109           // On error, use an empty table
  110       catch (IOException e) {
  111         result = new HashMap();
  112       }
  113       return result;
  114     }
  115   
  116     /**
  117      * Builds the wordlist table.
  118      *
  119      * @param words  Word that where read
  120      * @param length Amount of words that where read into <tt>words</tt>
  121      */
  122     private static HashMap makeWordTable(String[] words, int length) {
  123       HashMap table = new HashMap(length);
  124       for (int i = 0; i < length; i++) {
  125         table.put(words[i], words[i]);
  126       }
  127       return table;
  128     }
  129   }

Home » lucene-3.0.1-src » org.apache » lucene » analysis » nl » [javadoc | source]