Save This Page
Home » lucene-3.0.1-src » org.apache » lucene » analysis » [javadoc | source]
    1   /**
    2    * Licensed to the Apache Software Foundation (ASF) under one or more
    3    * contributor license agreements.  See the NOTICE file distributed with
    4    * this work for additional information regarding copyright ownership.
    5    * The ASF licenses this file to You under the Apache License, Version 2.0
    6    * (the "License"); you may not use this file except in compliance with
    7    * the License.  You may obtain a copy of the License at
    8    *
    9    *     http://www.apache.org/licenses/LICENSE-2.0
   10    *
   11    * Unless required by applicable law or agreed to in writing, software
   12    * distributed under the License is distributed on an "AS IS" BASIS,
   13    * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   14    * See the License for the specific language governing permissions and
   15    * limitations under the License.
   16    */
   17   
   18   package org.apache.lucene.analysis;
   19   
   20   import java.io.IOException;
   21   import java.io.Reader;
   22   import java.util.LinkedList;
   23   
   24   /**
   25    * Simplistic {@link CharFilter} that applies the mappings
   26    * contained in a {@link NormalizeCharMap} to the character
   27    * stream, and correcting the resulting changes to the
   28    * offsets.
   29    */
   30   public class MappingCharFilter extends BaseCharFilter {
   31   
   32     private final NormalizeCharMap normMap;
   33     private LinkedList<Character> buffer;
   34     private String replacement;
   35     private int charPointer;
   36     private int nextCharCounter;
   37   
   38     /** Default constructor that takes a {@link CharStream}. */
   39     public MappingCharFilter(NormalizeCharMap normMap, CharStream in) {
   40       super(in);
   41       this.normMap = normMap;
   42     }
   43   
   44     /** Easy-use constructor that takes a {@link Reader}. */
   45     public MappingCharFilter(NormalizeCharMap normMap, Reader in) {
   46       super(CharReader.get(in));
   47       this.normMap = normMap;
   48     }
   49   
   50     @Override
   51     public int read() throws IOException {
   52       while(true) {
   53         if (replacement != null && charPointer < replacement.length()) {
   54           return replacement.charAt(charPointer++);
   55         }
   56   
   57         int firstChar = nextChar();
   58         if (firstChar == -1) return -1;
   59         NormalizeCharMap nm = normMap.submap != null ?
   60           normMap.submap.get(Character.valueOf((char) firstChar)) : null;
   61         if (nm == null) return firstChar;
   62         NormalizeCharMap result = match(nm);
   63         if (result == null) return firstChar;
   64         replacement = result.normStr;
   65         charPointer = 0;
   66         if (result.diff != 0) {
   67           int prevCumulativeDiff = getLastCumulativeDiff();
   68           if (result.diff < 0) {
   69             for(int i = 0; i < -result.diff ; i++)
   70               addOffCorrectMap(nextCharCounter + i - prevCumulativeDiff, prevCumulativeDiff - 1 - i);
   71           } else {
   72             addOffCorrectMap(nextCharCounter - result.diff - prevCumulativeDiff, prevCumulativeDiff + result.diff);
   73           }
   74         }
   75       }
   76     }
   77   
   78     private int nextChar() throws IOException {
   79       nextCharCounter++;
   80       if (buffer != null && !buffer.isEmpty()) {
   81         return buffer.removeFirst().charValue();
   82       }
   83       return input.read();
   84     }
   85   
   86     private void pushChar(int c) {
   87       nextCharCounter--;
   88       if(buffer == null)
   89         buffer = new LinkedList<Character>();
   90       buffer.addFirst(Character.valueOf((char) c));
   91     }
   92   
   93     private void pushLastChar(int c) {
   94       if (buffer == null) {
   95         buffer = new LinkedList<Character>();
   96       }
   97       buffer.addLast(Character.valueOf((char) c));
   98     }
   99   
  100     private NormalizeCharMap match(NormalizeCharMap map) throws IOException {
  101       NormalizeCharMap result = null;
  102       if (map.submap != null) {
  103         int chr = nextChar();
  104         if (chr != -1) {
  105           NormalizeCharMap subMap = map.submap.get(Character.valueOf((char) chr));
  106           if (subMap != null) {
  107             result = match(subMap);
  108           }
  109           if (result == null) {
  110             pushChar(chr);
  111           }
  112         }
  113       }
  114       if (result == null && map.normStr != null) {
  115         result = map;
  116       }
  117       return result;
  118     }
  119   
  120     @Override
  121     public int read(char[] cbuf, int off, int len) throws IOException {
  122       char[] tmp = new char[len];
  123       int l = input.read(tmp, 0, len);
  124       if (l != -1) {
  125         for(int i = 0; i < l; i++)
  126           pushLastChar(tmp[i]);
  127       }
  128       l = 0;
  129       for(int i = off; i < off + len; i++) {
  130         int c = read();
  131         if (c == -1) break;
  132         cbuf[i] = (char) c;
  133         l++;
  134       }
  135       return l == 0 ? -1 : l;
  136     }
  137   }

Save This Page
Home » lucene-3.0.1-src » org.apache » lucene » analysis » [javadoc | source]