Home » lucene-3.0.1-src » org.apache.lucene.analysis.cn.smart.hhmm » [javadoc | source]

    1   /**
    2    * Licensed to the Apache Software Foundation (ASF) under one or more
    3    * contributor license agreements.  See the NOTICE file distributed with
    4    * this work for additional information regarding copyright ownership.
    5    * The ASF licenses this file to You under the Apache License, Version 2.0
    6    * (the "License"); you may not use this file except in compliance with
    7    * the License.  You may obtain a copy of the License at
    8    *
    9    *     http://www.apache.org/licenses/LICENSE-2.0
   10    *
   11    * Unless required by applicable law or agreed to in writing, software
   12    * distributed under the License is distributed on an "AS IS" BASIS,
   13    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   14    * See the License for the specific language governing permissions and
   15    * limitations under the License.
   16    */
   17   
   18   package org.apache.lucene.analysis.cn.smart.hhmm;
   19   
   20   import java.util.ArrayList;
   21   import java.util.HashMap;
   22   import java.util.List;
   23   import java.util.Map;
   24   
   25   /**
   26    * Graph representing possible tokens at each start offset in the sentence.
   27    * <p>
   28    * For each start offset, a list of possible tokens is stored.
   29    * </p>
   30    * <p><font color="#FF0000">
   31    * WARNING: The status of the analyzers/smartcn <b>analysis.cn.smart</b> package is experimental. 
   32    * The APIs and file formats introduced here might change in the future and will not be 
   33    * supported anymore in such a case.</font>
   34    * </p>
   35    */
   36   class SegGraph {
   37   
   38     /**
   39      * Map of start offsets to ArrayList of tokens at that position
   40      */
   41     private Map<Integer,ArrayList<SegToken>> tokenListTable = new HashMap<Integer,ArrayList<SegToken>>();
   42   
   43     private int maxStart = -1;
   44   
   45     /**
   46      * Returns true if a mapping for the specified start offset exists
   47      * 
   48      * @param s startOffset
   49      * @return true if there are tokens for the startOffset
   50      */
   51     public boolean isStartExist(int s) {
   52       return tokenListTable.get(s) != null;
   53     }
   54   
   55     /**
   56      * Get the list of tokens at the specified start offset
   57      * 
   58      * @param s startOffset
   59      * @return List of tokens at the specified start offset.
   60      */
   61     public List<SegToken> getStartList(int s) {
   62       return tokenListTable.get(s);
   63     }
   64   
   65     /**
   66      * Get the highest start offset in the map
   67      * 
   68      * @return maximum start offset, or -1 if the map is empty.
   69      */
   70     public int getMaxStart() {
   71       return maxStart;
   72     }
   73   
   74     /**
   75      * Set the {@link SegToken#index} for each token, based upon its order by startOffset. 
   76      * @return a {@link List} of these ordered tokens.
   77      */
   78     public List<SegToken> makeIndex() {
   79       List<SegToken> result = new ArrayList<SegToken>();
   80       int s = -1, count = 0, size = tokenListTable.size();
   81       List<SegToken> tokenList;
   82       short index = 0;
   83       while (count < size) {
   84         if (isStartExist(s)) {
   85           tokenList = tokenListTable.get(s);
   86           for (SegToken st : tokenList) {
   87             st.index = index;
   88             result.add(st);
   89             index++;
   90           }
   91           count++;
   92         }
   93         s++;
   94       }
   95       return result;
   96     }
   97   
   98     /**
   99      * Add a {@link SegToken} to the mapping, creating a new mapping at the token's startOffset if one does not exist. 
  100      * @param token {@link SegToken}
  101      */
  102     public void addToken(SegToken token) {
  103       int s = token.startOffset;
  104       if (!isStartExist(s)) {
  105         ArrayList<SegToken> newlist = new ArrayList<SegToken>();
  106         newlist.add(token);
  107         tokenListTable.put(s, newlist);
  108       } else {
  109         List<SegToken> tokenList = tokenListTable.get(s);
  110         tokenList.add(token);
  111       }
  112       if (s > maxStart)
  113         maxStart = s;
  114     }
  115   
  116     /**
  117      * Return a {@link List} of all tokens in the map, ordered by startOffset.
  118      * 
  119      * @return {@link List} of all tokens in the map.
  120      */
  121     public List<SegToken> toTokenList() {
  122       List<SegToken> result = new ArrayList<SegToken>();
  123       int s = -1, count = 0, size = tokenListTable.size();
  124       List<SegToken> tokenList;
  125   
  126       while (count < size) {
  127         if (isStartExist(s)) {
  128           tokenList = tokenListTable.get(s);
  129           for (SegToken st : tokenList) {
  130             result.add(st);
  131           }
  132           count++;
  133         }
  134         s++;
  135       }
  136       return result;
  137     }
  138   
  139     @Override
  140     public String toString() {
  141       List<SegToken> tokenList = this.toTokenList();
  142       StringBuilder sb = new StringBuilder();
  143       for (SegToken t : tokenList) {
  144         sb.append(t + "\n");
  145       }
  146       return sb.toString();
  147     }
  148   }

Home » lucene-3.0.1-src » org.apache.lucene.analysis.cn.smart.hhmm » [javadoc | source]