rallback on wrong version of morphology, adding interafce for morphology

git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@88 d817d54c-26ab-11de-abc9-2f7d1455ff7a
2009-11-17 14:03:59 +00:00
parent 16613c543b
commit 1273cf96ed
19 changed files with 263 additions and 1145 deletions
@@ -31,9 +31,9 @@ import java.util.*;
 public class DictonaryReader {
    private String fileName;
    private String fileEncoding = "windows-1251";
-    protected List<List<FlexiaModel>> wordsFlexias = new ArrayList<List<FlexiaModel>>();
-    protected List<List<String>> wordPrefixes = new ArrayList<List<String>>();
-    protected Set<String> ingnoredForm = new HashSet<String>();
+    private List<List<FlexiaModel>> wordsFlexias = new ArrayList<List<FlexiaModel>>();
+    private List<List<String>> wordPrefixes = new ArrayList<List<String>>();
+    private Set<String> ingnoredForm = new HashSet<String>();

    public DictonaryReader(String fileName, Set<String> ingnoredForm) {
        this.fileName = fileName;
@@ -57,7 +57,7 @@ public class DictonaryReader {
    }


-    protected void readWords(BufferedReader reader, WordProccessor wordProccessor) throws IOException {
+    private void readWords(BufferedReader reader, WordProccessor wordProccessor) throws IOException {
        String s = reader.readLine();
        int count = Integer.valueOf(s);
        for (int i = 0; i < count; i++) {
@@ -81,7 +81,7 @@ public class DictonaryReader {
    }


-    protected void sckipBlock(BufferedReader reader) throws IOException {
+    private void sckipBlock(BufferedReader reader) throws IOException {
        String s = reader.readLine();
        int count = Integer.valueOf(s);
        for (int i = 0; i < count; i++) {
@@ -90,7 +90,7 @@ public class DictonaryReader {
    }


-    protected void readPrefix(BufferedReader reader) throws IOException {
+    private void readPrefix(BufferedReader reader) throws IOException {
        String s = reader.readLine();
        int count = Integer.valueOf(s);
        for (int i = 0; i < count; i++) {
@@ -99,7 +99,7 @@ public class DictonaryReader {
        }
    }

-    protected  void readFlexias(BufferedReader reader) throws IOException {
+    private void readFlexias(BufferedReader reader) throws IOException {
        String s = reader.readLine();
        int count = Integer.valueOf(s);
        for (int i = 0; i < count; i++) {
@@ -112,7 +112,7 @@ public class DictonaryReader {
        }
    }

-    protected  void addFlexia(ArrayList<FlexiaModel> flexiaModelArrayList, String line) {
+    private void addFlexia(ArrayList<FlexiaModel> flexiaModelArrayList, String line) {
        String[] fl = line.split("\\*");
        // we inored all forms thats
        if (fl.length == 3) {
@@ -60,28 +60,6 @@ public class FlexiaModel {

    @Override
    public String toString() {
-        return prefix + " " + suffix + " " + code;
-    }
-
-    @Override
-    public boolean equals(Object o) {
-        if (this == o) return true;
-        if (o == null || getClass() != o.getClass()) return false;
-
-        FlexiaModel that = (FlexiaModel) o;
-
-        if (code != null ? !code.equals(that.code) : that.code != null) return false;
-        if (prefix != null ? !prefix.equals(that.prefix) : that.prefix != null) return false;
-        if (suffix != null ? !suffix.equals(that.suffix) : that.suffix != null) return false;
-
-        return true;
-    }
-
-    @Override
-    public int hashCode() {
-        int result = code != null ? code.hashCode() : 0;
-        result = 31 * result + (suffix != null ? suffix.hashCode() : 0);
-        result = 31 * result + (prefix != null ? prefix.hashCode() : 0);
-        return result;
+        return prefix + " " + suffix;
    }
 }
@@ -1,139 +0,0 @@
-/**
- * Copyright 2009 Alexander Kuznetsov
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.morphology.dictionary;
-
-import org.apache.lucene.morphology.PrefixRule;
-
-import java.util.*;
-import java.io.*;
-
-
-public class PrefixesRulesBuilder extends DictonaryReader {
-    private GrammaReader grammaInfo;
-
-    private Map<FlexiaModel,Set<FlexiaModel>> rules = new HashMap<FlexiaModel,Set<FlexiaModel>>();
-
-    public PrefixesRulesBuilder(String fileName, String fileEncoding, Set<String> ingnoredForm) throws IOException {
-        super(fileName, fileEncoding, ingnoredForm);
-        grammaInfo = new GrammaReader("dictonary/Dicts/Morph/rgramtab.tab");
-    }
-
-    @Override
-    public void proccess(WordProccessor wordProccessor) throws IOException {
-        super.proccess(wordProccessor);
-        System.out.println(rules.size());
-        System.out.println(rules);
-    }
-
-    public List<PrefixRule> getPrefixRules(){
-        List<PrefixRule> prefixRules = new ArrayList<PrefixRule>();
-        for(FlexiaModel key:rules.keySet()){
-            PrefixRule prefixRule = new PrefixRule();
-            prefixRule.setPrefix(key.getPrefix());
-            prefixRule.setLastLetter(key.getSuffix().charAt(0));
-            HashSet<Short> map = new HashSet<Short>();
-            for(FlexiaModel fm:rules.get(key)){
-                int gi = grammaInfo.getGrammInversIndex().get(fm.getCode());
-                map.add((short) gi);
-            }
-            prefixRule.setForms(map);
-            prefixRules.add(prefixRule);
-        }
-        return prefixRules;
-    }
-
-    @Override
-    protected void readWords(BufferedReader reader, WordProccessor wordProccessor) throws IOException {
-        sckipBlock(reader);
-    }
-
-
-
-    @Override
-    protected void readPrefix(BufferedReader reader) throws IOException {
-        sckipBlock(reader);
-    }
-
-    @Override
-    protected void readFlexias(BufferedReader reader) throws IOException {
-        super.readFlexias(reader);
-        //todo research flesias
-        for(List<FlexiaModel> fmList:wordsFlexias){
-            research(fmList);
-        }
-    }
-
-    private void research(List<FlexiaModel> models) {
-        for(FlexiaModel fm:models){
-            if(fm.getPrefix().length() > 0){
-                testFlexia(models, fm);
-            }
-        }
-    }
-
-    private void testFlexia(List<FlexiaModel> models, FlexiaModel fm) {
-        for(FlexiaModel com:models){
-            if(com.getSuffix().equals(fm.getSuffix()) && com.getPrefix().length() == 0){
-                Set<FlexiaModel> models1 = rules.get(convertForKey(fm));
-                if(models1 == null){
-                    models1 = new HashSet<FlexiaModel>();
-                    rules.put(convertForKey(fm),models1);
-                }
-                models1.add(convert(com));
-            }
-        }
-    }
-
-    private FlexiaModel convert(FlexiaModel fm){
-        String suf = fm.getSuffix();
-        //if(suf.length() == 1) System.out.println(fm);
-        return new FlexiaModel(fm.getCode(),""+ suf.charAt(suf.length()-1),fm.getPrefix());
-    }
-
-    private FlexiaModel convertForKey(FlexiaModel fm){
-        String suf = fm.getSuffix();
-        //if(suf.length() == 1) System.out.println(fm);
-        return new FlexiaModel("pr",""+ suf.charAt(suf.length()-1),fm.getPrefix());
-    }
-
-    protected void addFlexia(ArrayList<FlexiaModel> flexiaModelArrayList, String line) {
-        String[] fl = line.split("\\*");
-        if (fl.length == 3) {
-            flexiaModelArrayList.add(new FlexiaModel(fl[1], fl[0].toLowerCase(), fl[2].toLowerCase()));
-        }
-        if (fl.length == 2) flexiaModelArrayList.add(new FlexiaModel(fl[1], fl[0].toLowerCase(), ""));
-    }
-
-    public void savePrefixes(String fileName) throws IOException {
-        OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(fileName), "UTF-8");
-        List<PrefixRule> prefixRuleList = getPrefixRules();
-        writer.write(prefixRuleList.size()+"\n");
-        for(PrefixRule pr: prefixRuleList){
-            writePrefixRule(writer, pr);
-        }
-        writer.close();
-    }
-
-    private void writePrefixRule(OutputStreamWriter writer, PrefixRule pr) throws IOException {
-        writer.write(pr.getPrefix()+"\n");
-        writer.write(pr.getLastLetter()+"\n");
-        HashSet<Short> formInfo = pr.getForms();
-        writer.write(formInfo.size()+"\n");
-        for(Short s:formInfo){
-            writer.write(s+"\n");
-        }
-    }
-}
@@ -19,7 +19,7 @@ package org.apache.lucene.morphology.dictionary;

 import org.apache.lucene.morphology.Heuristic;
 import org.apache.lucene.morphology.LetterDecoderEncoder;
-import org.apache.lucene.morphology.Morphology;
+import org.apache.lucene.morphology.MorphologyImpl;

 import java.io.IOException;
 import java.util.*;
@@ -119,7 +119,7 @@ public class StatiticsCollector implements WordProccessor {
                prevSet = currentSet;
            }
        }
-        Morphology morphology = new Morphology(ints, rulesId, heuristics, grammaReader.getGrammaInfoAsArray());
+        MorphologyImpl morphology = new MorphologyImpl(ints, rulesId, heuristics, grammaReader.getGrammaInfoAsArray());
        morphology.writeToFile(fileName);
    }

@@ -1,39 +0,0 @@
-/**
- * Copyright 2009 Alexander Kuznetsov 
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.lucene.morphology.generator;
-
-import org.apache.lucene.morphology.dictionary.*;
-
-import java.io.IOException;
-import java.util.HashSet;
-
-
-public class RussianPrefixesBuilder {
-    public static void main(String[] args) throws IOException {
-
-        PrefixesRulesBuilder dictonaryReader = new PrefixesRulesBuilder("dictonary/Dicts/SrcMorph/RusSrc/morphs.mrd", "windows-1251",new HashSet<String>());
-
-
-        dictonaryReader.proccess(new WordProccessor() {
-            public void proccess(WordCard wordCard) throws IOException {
-
-            }
-        });
-
-        dictonaryReader.savePrefixes("russian/src/main/resources/org/apache/lucene/morphology/russian/prefixes.info");
-    }
-}
@@ -1,372 +0,0 @@
-[ ть
-  у
-  ем
-  ешь
-  ете
-  ет
-  ут
-
-  ла
-  ло
-  ли
-  я
-  ши
-  ем
-  емте
- по ай
-  ь
- по айте
-  ьте
-  ущий
-  ущего
-  ущему
-  ущего
-  ущий
-  ущим
-  ущем
-  ущая
-  ущей
-  ущей
-  ущую
-  ущей
-  ущею
-  ущей
-  ущее
-  ущего
-  ущему
-  ущее
-  ущим
-  ущем
-  ущие
-  ущих
-  ущим
-  ущих
-  ущие
-  ущими
-  ущих
-  ший
-  шего
-  шему
-  шего
-  ший
-  шим
-  шем
-  шая
-  шей
-  шей
-  шую
-  шей
-  шею
-  шей
-  шее
-  шего
-  шему
-  шее
-  шим
-  шем
-  шие
-  ших
-  шим
-  ших
-  шие
-  шими
-  ших]
-[ большой
-  большого
-  большому
-  большого
-  большой
-  большим
-  большом
-  большая
-  большой
-  большой
-  большую
-  большой
-  большою
-  большой
-  большое
-  большого
-  большому
-  большое
-  большим
-  большом
-  большие
-  больших
-  большим
-  больших
-  большие
-  большими
-  больших
-  велик
-  велика
-  велико
-  велики
-  больше
- по больше
- наи больший
- наи большего
- наи большему
- наи большего
- наи больший
- наи большим
- наи большем
- наи большая
- наи большей
- наи большей
- наи большую
- наи большей
- наи большею
- наи большей
- наи большее
- наи большего
- наи большему
- наи большее
- наи большим
- наи большем
- наи большие
- наи больших
- наи большим
- наи больших
- наи большие
- наи большими
- наи больших]
-[ вероятный
-  вероятного
-  вероятному
-  вероятного
-  вероятный
-  вероятным
-  вероятном
-  вероятная
-  вероятной
-  вероятной
-  вероятную
-  вероятной
-  вероятною
-  вероятной
-  вероятное
-  вероятного
-  вероятному
-  вероятное
-  вероятным
-  вероятном
-  вероятные
-  вероятных
-  вероятным
-  вероятных
-  вероятные
-  вероятными
-  вероятных
-  вероятен
-  вероятна
-  вероятно
-  вероятны
-  вероятнее
-  вероятней
- по вероятнее
- по вероятней
-  вероятнейший
- наи невероятнейший
-  вероятнейшего
- наи невероятнейшего
-  вероятнейшему
- наи невероятнейшему
-  вероятнейшего
- наи невероятнейшего
-  вероятнейший
- наи невероятнейший
-  вероятнейшим
- наи невероятнейшим
-  вероятнейшем
- наи невероятнейшем
-  вероятнейшая
- наи невероятнейшая
-  вероятнейшей
- наи невероятнейшей
-  вероятнейшей
- наи невероятнейшей
-  вероятнейшую
- наи невероятнейшую
-  вероятнейшей
-  вероятнейшею
- наи невероятнейшей
- наи невероятнейшею
-  вероятнейшей
- наи невероятнейшей
-  вероятнейшее
- наи невероятнейшее
-  вероятнейшего
- наи невероятнейшего
-  вероятнейшему
- наи невероятнейшему
-  вероятнейшее
- наи невероятнейшее
-  вероятнейшим
- наи невероятнейшим
-  вероятнейшем
- наи невероятнейшем
-  вероятнейшие
- наи невероятнейшие
-  вероятнейших
- наи невероятнейших
-  вероятнейшим
- наи невероятнейшим
-  вероятнейших
- наи невероятнейших
-  вероятнейшие
- наи невероятнейшие
-  вероятнейшими
- наи невероятнейшими
-  вероятнейших
- наи невероятнейших]
-[ аленький
-  аленького
-  аленькому
-  аленького
-  аленький
-  аленьким
-  аленьком
-  аленькая
-  аленькой
-  аленькой
-  аленькую
-  аленькой
-  аленькою
-  аленькой
-  аленькое
-  аленького
-  аленькому
-  аленькое
-  аленьким
-  аленьком
-  аленькие
-  аленьких
-  аленьким
-  аленьких
-  аленькие
-  аленькими
-  аленьких
-  ал
-  ала
-  ало
-  алы
-  еньше
- по еньше
-  алейший
- наи еньший
-  алейшего
- наи еньшего
-  алейшему
- наи еньшему
-  алейшего
- наи еньшего
-  алейший
- наи еньший
-  алейшим
- наи еньшим
-  алейшем
- наи еньшем
-  алейшая
- наи еньшая
-  алейшей
- наи еньшей
-  алейшей
- наи еньшей
-  алейшую
- наи еньшую
-  алейшей
-  алейшею
- наи еньшей
- наи еньшею
-  алейшей
- наи еньшей
-  алейшее
- наи еньшее
-  алейшего
- наи еньшего
-  алейшему
- наи еньшему
-  алейшее
- наи еньшее
-  алейшим
- наи еньшим
-  алейшем
- наи еньшем
-  алейшие
- наи еньшие
-  алейших
- наи еньших
-  алейшим
- наи еньшим
-  алейших
- наи еньших
-  алейшие
- наи еньшие
-  алейшими
- наи еньшими
-  алейших
- наи еньших]
-[ ьный
-  ьного
-  ьному
-  ьного
-  ьный
-  ьным
-  ьном
-  ьная
-  ьной
-  ьной
-  ьную
-  ьной
-  ьною
-  ьной
-  ьное
-  ьного
-  ьному
-  ьное
-  ьным
-  ьном
-  ьные
-  ьных
-  ьным
-  ьных
-  ьные
-  ьными
-  ьных
-  ен
-  ьна
-  ьно
-  ьны
-  ьны
-  ьнее
-  ьней
- по ьнее
- по ьней
- наи ьнейший
- наи ьнейшего
- наи ьнейшему
- наи ьнейшего
- наи ьнейший
- наи ьнейшим
- наи ьнейшем
- наи ьнейшая
- наи ьнейшей
- наи ьнейшей
- наи ьнейшую
- наи ьнейшей
- наи ьнейшею
- наи ьнейшей
- наи ьнейшее
- наи ьнейшего
- наи ьнейшему
- наи ьнейшее
- наи ьнейшим
- наи ьнейшем
- наи ьнейшие
- наи ьнейших
- наи ьнейшим
- наи ьнейших
- наи ьнейшие
- наи ьнейшими
- наи ьнейших]
@@ -15,12 +15,12 @@
 */
 package org.apache.lucene.morphology.english;

-import org.apache.lucene.morphology.Morphology;
+import org.apache.lucene.morphology.MorphologyImpl;

 import java.io.IOException;


-public class EnglishMorphology extends Morphology {
+public class EnglishMorphology extends MorphologyImpl {

    public EnglishMorphology() throws IOException {
        super(EnglishLuceneMorphology.class.getResourceAsStream("/org/apache/lucene/morphology/english/morph.info"), new EnglishLetterDecoderEncoder());
@@ -23,7 +23,7 @@ import java.util.ArrayList;
 import java.util.List;


-public class LuceneMorphology extends MorphologyWithPrefix {
+public class LuceneMorphology extends MorphologyImpl {

    public LuceneMorphology(String fileName, LetterDecoderEncoder decoderEncoder) throws IOException {
        super(fileName, decoderEncoder);
@@ -33,13 +33,15 @@ public class LuceneMorphology extends MorphologyWithPrefix {
        super(inputStream, decoderEncoder);
    }

-    public LuceneMorphology(InputStream morphFormInputStream, InputStream prefixesInputStream, LetterDecoderEncoder decoderEncoder) throws IOException {
-        super(morphFormInputStream, prefixesInputStream, decoderEncoder);
-    }
-
    @Override
-    protected String createForm(String form, String grammaInfo) {
-        return form;
+    public List<String> getMorhInfo(String s) {
+        ArrayList<String> result = new ArrayList<String>();
+        int[] ints = decoderEncoder.encodeToArray(revertWord(s));
+        int ruleId = findRuleId(ints);
+        for (Heuristic h : rules[rulesId[ruleId]]) {
+            result.add(h.transofrmWord(s));
+        }
+        return result;
    }

    protected void readRules(BufferedReader bufferedReader) throws IOException {
@@ -15,200 +15,11 @@
 */
 package org.apache.lucene.morphology;

-
-import java.io.*;
-import java.util.ArrayList;
-import java.util.HashSet;
 import java.util.List;


-public class Morphology {
-    protected int[][] separators;
-    protected short[] rulesId;
-    protected Heuristic[][] rules;
-    protected String[] grammaInfo;
-    protected LetterDecoderEncoder decoderEncoder;
+public interface Morphology {

-
-    public Morphology(String fileName, LetterDecoderEncoder decoderEncoder) throws IOException {
-        readFromFile(fileName);
-        this.decoderEncoder = decoderEncoder;
-    }
-
-    public Morphology(InputStream inputStream, LetterDecoderEncoder decoderEncoder) throws IOException {
-        readFromInputStream(inputStream);
-        this.decoderEncoder = decoderEncoder;
-    }
-
-    public Morphology(int[][] separators, short[] rulesId, Heuristic[][] rules, String[] grammaInfo) {
-        this.separators = separators;
-        this.rulesId = rulesId;
-        this.rules = rules;
-        this.grammaInfo = grammaInfo;
-    }
-
-    public int[][] getSeparators() {
-        return separators;
-    }
-
-    public short[] getRulesId() {
-        return rulesId;
-    }
-
-    public Heuristic[][] getRules() {
-        return rules;
-    }
-
-    public String[] getGrammaInfo() {
-        return grammaInfo;
-    }
-
-    public List<String> getMorhInfo(String s) {
-        ArrayList<String> result = new ArrayList<String>();
-        int[] ints = decoderEncoder.encodeToArray(revertWord(s));
-        int ruleId = findRuleId(ints);
-        for (Heuristic h : rules[rulesId[ruleId]]) {
-            result.add(createForm(h.transofrmWord(s),grammaInfo[h.getFormMorphInfo()]));
-        }
-        return result;
-    }
-
-    protected String createForm(String form,String grammaInfo){
-        return form+"|"+grammaInfo;
-    }
-
-    protected int findRuleId(int[] ints) {
-        int low = 0;
-        int high = separators.length - 1;
-        int mid = 0;
-        while (low <= high) {
-            mid = (low + high) >>> 1;
-            int[] midVal = separators[mid];
-
-            int comResult = compareToInts(ints, midVal);
-            if (comResult > 0)
-                low = mid + 1;
-            else if (comResult < 0)
-                high = mid - 1;
-            else
-                break;
-        }
-        if (compareToInts(ints, separators[mid]) >= 0) {
-            return mid;
-        } else {
-            return mid - 1;
-        }
+    List<String> getMorhInfo(String s);
    
 }
-
-    private int compareToInts(int[] i1, int[] i2) {
-        int minLength = Math.min(i1.length, i2.length);
-        for (int i = 0; i < minLength; i++) {
-            int i3 = i1[i] < i2[i] ? -1 : (i1[i] == i2[i] ? 0 : 1);
-            if (i3 != 0) return i3;
-        }
-        return i1.length - i2.length;
-    }
-
-    public void writeToFile(String fileName) throws IOException {
-        OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(fileName), "UTF-8");
-        writer.write(separators.length + "\n");
-        for (int[] i : separators) {
-            writer.write(i.length + "\n");
-            for (int j : i) {
-                writer.write(j + "\n");
-            }
-        }
-        for (short i : rulesId) {
-            writer.write(i + "\n");
-        }
-        writer.write(rules.length + "\n");
-        for (Heuristic[] heuristics : rules) {
-            writer.write(heuristics.length + "\n");
-            for (Heuristic heuristic : heuristics) {
-                writer.write(heuristic.toString() + "\n");
-            }
-        }
-        writer.write(grammaInfo.length + "\n");
-        for (String s : grammaInfo) {
-            writer.write(s + "\n");
-        }
-        writer.close();
-    }
-
-    public void readFromFile(String fileName) throws IOException {
-        FileInputStream inputStream = new FileInputStream(fileName);
-        readFromInputStream(inputStream);
-    }
-
-    private void readFromInputStream(InputStream inputStream) throws IOException {
-        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"));
-        String s = bufferedReader.readLine();
-        Integer amount = Integer.valueOf(s);
-
-        readSeparators(bufferedReader, amount);
-
-        readRulesId(bufferedReader, amount);
-
-        readRules(bufferedReader);
-        readGrammaInfo(bufferedReader);
-        bufferedReader.close();
-    }
-
-    private void readGrammaInfo(BufferedReader bufferedReader) throws IOException {
-        String s;
-        Integer amount;
-        s = bufferedReader.readLine();
-        amount = Integer.valueOf(s);
-        grammaInfo = new String[amount];
-        for (int i = 0; i < amount; i++) {
-            grammaInfo[i] = bufferedReader.readLine();
-        }
-    }
-
-    protected void readRules(BufferedReader bufferedReader) throws IOException {
-        String s;
-        Integer amount;
-        s = bufferedReader.readLine();
-        amount = Integer.valueOf(s);
-        rules = new Heuristic[amount][];
-        for (int i = 0; i < amount; i++) {
-            String s1 = bufferedReader.readLine();
-            Integer ruleLenght = Integer.valueOf(s1);
-            rules[i] = new Heuristic[ruleLenght];
-            for (int j = 0; j < ruleLenght; j++) {
-                rules[i][j] = new Heuristic(bufferedReader.readLine());
-            }
-        }
-    }
-
-    private void readRulesId(BufferedReader bufferedReader, Integer amount) throws IOException {
-        rulesId = new short[amount];
-        for (int i = 0; i < amount; i++) {
-            String s1 = bufferedReader.readLine();
-            rulesId[i] = Short.valueOf(s1);
-        }
-    }
-
-    private void readSeparators(BufferedReader bufferedReader, Integer amount) throws IOException {
-        HashSet intetger = new HashSet<Integer>();
-        separators = new int[amount][];
-        for (int i = 0; i < amount; i++) {
-            String s1 = bufferedReader.readLine();
-            Integer wordLenght = Integer.valueOf(s1);
-            separators[i] = new int[wordLenght];
-            for (int j = 0; j < wordLenght; j++) {
-                separators[i][j] = Integer.valueOf(bufferedReader.readLine());
-            }
-            intetger.add(separators[i][0]);
-        }
-    }
-
-    protected String revertWord(String s) {
-        String result = "";
-        for (int i = 1; i <= s.length(); i++) {
-            result += s.charAt(s.length() - i);
-        }
-        return result;
-    }
-}
@@ -0,0 +1,210 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.morphology;
+
+
+import java.io.*;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+
+
+public class MorphologyImpl implements Morphology {
+    protected int[][] separators;
+    protected short[] rulesId;
+    protected Heuristic[][] rules;
+    protected String[] grammaInfo;
+    protected LetterDecoderEncoder decoderEncoder;
+
+
+    public MorphologyImpl(String fileName, LetterDecoderEncoder decoderEncoder) throws IOException {
+        readFromFile(fileName);
+        this.decoderEncoder = decoderEncoder;
+    }
+
+    public MorphologyImpl(InputStream inputStream, LetterDecoderEncoder decoderEncoder) throws IOException {
+        readFromInputStream(inputStream);
+        this.decoderEncoder = decoderEncoder;
+    }
+
+    public MorphologyImpl(int[][] separators, short[] rulesId, Heuristic[][] rules, String[] grammaInfo) {
+        this.separators = separators;
+        this.rulesId = rulesId;
+        this.rules = rules;
+        this.grammaInfo = grammaInfo;
+    }
+
+    public int[][] getSeparators() {
+        return separators;
+    }
+
+    public short[] getRulesId() {
+        return rulesId;
+    }
+
+    public Heuristic[][] getRules() {
+        return rules;
+    }
+
+    public String[] getGrammaInfo() {
+        return grammaInfo;
+    }
+
+    public List<String> getMorhInfo(String s) {
+        ArrayList<String> result = new ArrayList<String>();
+        int[] ints = decoderEncoder.encodeToArray(revertWord(s));
+        int ruleId = findRuleId(ints);
+        for (Heuristic h : rules[rulesId[ruleId]]) {
+            result.add(h.transofrmWord(s) + "|" + grammaInfo[h.getFormMorphInfo()]);
+        }
+        return result;
+    }
+
+    protected int findRuleId(int[] ints) {
+        int low = 0;
+        int high = separators.length - 1;
+        int mid = 0;
+        while (low <= high) {
+            mid = (low + high) >>> 1;
+            int[] midVal = separators[mid];
+
+            int comResult = compareToInts(ints, midVal);
+            if (comResult > 0)
+                low = mid + 1;
+            else if (comResult < 0)
+                high = mid - 1;
+            else
+                break;
+        }
+        if (compareToInts(ints, separators[mid]) >= 0) {
+            return mid;
+        } else {
+            return mid - 1;
+        }
+
+    }
+
+    private int compareToInts(int[] i1, int[] i2) {
+        int minLength = Math.min(i1.length, i2.length);
+        for (int i = 0; i < minLength; i++) {
+            int i3 = i1[i] < i2[i] ? -1 : (i1[i] == i2[i] ? 0 : 1);
+            if (i3 != 0) return i3;
+        }
+        return i1.length - i2.length;
+    }
+
+    public void writeToFile(String fileName) throws IOException {
+        OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(fileName), "UTF-8");
+        writer.write(separators.length + "\n");
+        for (int[] i : separators) {
+            writer.write(i.length + "\n");
+            for (int j : i) {
+                writer.write(j + "\n");
+            }
+        }
+        for (short i : rulesId) {
+            writer.write(i + "\n");
+        }
+        writer.write(rules.length + "\n");
+        for (Heuristic[] heuristics : rules) {
+            writer.write(heuristics.length + "\n");
+            for (Heuristic heuristic : heuristics) {
+                writer.write(heuristic.toString() + "\n");
+            }
+        }
+        writer.write(grammaInfo.length + "\n");
+        for (String s : grammaInfo) {
+            writer.write(s + "\n");
+        }
+        writer.close();
+    }
+
+    public void readFromFile(String fileName) throws IOException {
+        FileInputStream inputStream = new FileInputStream(fileName);
+        readFromInputStream(inputStream);
+    }
+
+    private void readFromInputStream(InputStream inputStream) throws IOException {
+        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"));
+        String s = bufferedReader.readLine();
+        Integer amount = Integer.valueOf(s);
+
+        readSeparators(bufferedReader, amount);
+
+        readRulesId(bufferedReader, amount);
+
+        readRules(bufferedReader);
+        readGrammaInfo(bufferedReader);
+        bufferedReader.close();
+    }
+
+    private void readGrammaInfo(BufferedReader bufferedReader) throws IOException {
+        String s;
+        Integer amount;
+        s = bufferedReader.readLine();
+        amount = Integer.valueOf(s);
+        grammaInfo = new String[amount];
+        for (int i = 0; i < amount; i++) {
+            grammaInfo[i] = bufferedReader.readLine();
+        }
+    }
+
+    protected void readRules(BufferedReader bufferedReader) throws IOException {
+        String s;
+        Integer amount;
+        s = bufferedReader.readLine();
+        amount = Integer.valueOf(s);
+        rules = new Heuristic[amount][];
+        for (int i = 0; i < amount; i++) {
+            String s1 = bufferedReader.readLine();
+            Integer ruleLenght = Integer.valueOf(s1);
+            rules[i] = new Heuristic[ruleLenght];
+            for (int j = 0; j < ruleLenght; j++) {
+                rules[i][j] = new Heuristic(bufferedReader.readLine());
+            }
+        }
+    }
+
+    private void readRulesId(BufferedReader bufferedReader, Integer amount) throws IOException {
+        rulesId = new short[amount];
+        for (int i = 0; i < amount; i++) {
+            String s1 = bufferedReader.readLine();
+            rulesId[i] = Short.valueOf(s1);
+        }
+    }
+
+    private void readSeparators(BufferedReader bufferedReader, Integer amount) throws IOException {
+        HashSet intetger = new HashSet<Integer>();
+        separators = new int[amount][];
+        for (int i = 0; i < amount; i++) {
+            String s1 = bufferedReader.readLine();
+            Integer wordLenght = Integer.valueOf(s1);
+            separators[i] = new int[wordLenght];
+            for (int j = 0; j < wordLenght; j++) {
+                separators[i][j] = Integer.valueOf(bufferedReader.readLine());
+            }
+            intetger.add(separators[i][0]);
+        }
+    }
+
+    protected String revertWord(String s) {
+        String result = "";
+        for (int i = 1; i <= s.length(); i++) {
+            result += s.charAt(s.length() - i);
+        }
+        return result;
+    }
+}
@@ -1,96 +0,0 @@
-/**
- * Copyright 2009 Alexander Kuznetsov
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.morphology;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.BufferedReader;
-import java.io.InputStreamReader;
-import java.util.*;
-
-
-public class MorphologyWithPrefix extends Morphology {
-    private Map<String, PrefixRule> prefixRuleMap = new HashMap<String, PrefixRule>();
-
-    public MorphologyWithPrefix(String fileName, LetterDecoderEncoder decoderEncoder) throws IOException {
-        super(fileName, decoderEncoder);
-    }
-
-    public MorphologyWithPrefix(InputStream morphFormInputStream, LetterDecoderEncoder decoderEncoder) throws IOException {
-        super(morphFormInputStream, decoderEncoder);
-    }
-
-    public MorphologyWithPrefix(InputStream morphFormInputStream,InputStream prefixesInputStream, LetterDecoderEncoder decoderEncoder) throws IOException {
-        super(morphFormInputStream, decoderEncoder);
-        readPrefixes(prefixesInputStream);
-    }
-
-    private void readPrefixes(InputStream inputStream) throws IOException {
-        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"));
-        Integer prefixAmount = Integer.parseInt(bufferedReader.readLine());
-        for(int i = 0; i < prefixAmount;i++){
-            PrefixRule prefixRule = readPrefix(bufferedReader);
-            prefixRuleMap.put(prefixRule.getHashString(),prefixRule);
-        }
-        bufferedReader.close();
-    }
-
-    private PrefixRule readPrefix(BufferedReader bufferedReader) throws IOException {
-        PrefixRule prefixRule = new PrefixRule();
-        String s = bufferedReader.readLine();
-        prefixRule.setPrefix(s);
-        s = bufferedReader.readLine();
-        prefixRule.setLastLetter(s.charAt(0));
-        HashSet<Short> morph = new HashSet<Short>();
-        int formAmount = Integer.valueOf(bufferedReader.readLine());
-        for(int i = 0; i < formAmount; i++){
-            morph.add(Short.valueOf(bufferedReader.readLine()));
-        }
-        prefixRule.setForms(morph);
-        return prefixRule;
-    }
-
-    public MorphologyWithPrefix(int[][] separators, short[] rulesId, Heuristic[][] rules, String[] grammaInfo) {
-        super(separators, rulesId, rules, grammaInfo);
-    }
-
-    @Override
-    public List<String> getMorhInfo(String s) {
-        if (prefixRuleMap.size() == 0 || s.length() < 4) {
-            return super.getMorhInfo(s);
-        }
-        String ruleIndex = "" + s.charAt(0) + s.charAt(s.length() - 1);
-        PrefixRule prefixRule = prefixRuleMap.get(ruleIndex);
-        if (prefixRule == null) {
-            return super.getMorhInfo(s);
-        }
-        if (!s.startsWith(prefixRule.getPrefix())) {
-            return super.getMorhInfo(s);
-        }
-        String sWithoutPrefix = s.substring(prefixRule.getPrefix().length());
-
-        int[] ints = decoderEncoder.encodeToArray(revertWord(sWithoutPrefix));
-        int ruleId = findRuleId(ints);
-         ArrayList<String> result = new ArrayList<String>();
-        for (Heuristic h : rules[rulesId[ruleId]]) {
-            //String morphInfo = grammaInfo[];
-            if(prefixRule.getForms().contains(h.getFormMorphInfo())){
-                result.add(createForm(h.transofrmWord(sWithoutPrefix),"pr"));
-            }
-        }
-        return result.size() > 0 ? result : super.getMorhInfo(s);
-    }
-}
@@ -1,76 +0,0 @@
-/**
- * Copyright 2009 Alexander Kuznetsov
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.morphology;
-
-import java.io.Serializable;
-import java.util.HashSet;
-
-
-public class PrefixRule implements Serializable {
-    private Character lastLetter;
-    private String prefix;
-    private HashSet<Short> forms;
-
-    public Character getLastLetter() {
-        return lastLetter;
-    }
-
-    public void setLastLetter(Character lastLetter) {
-        this.lastLetter = lastLetter;
-    }
-
-    public String getPrefix() {
-        return prefix;
-    }
-
-    public void setPrefix(String prefix) {
-        this.prefix = prefix;
-    }
-
-    public HashSet<Short> getForms() {
-        return forms;
-    }
-
-    public void setForms(HashSet<Short> forms) {
-        this.forms = forms;
-    }
-
-    public String getHashString() {
-        return "" + prefix.charAt(0) + lastLetter;
-    }
-
-    @Override
-    public boolean equals(Object o) {
-        if (this == o) return true;
-        if (o == null || getClass() != o.getClass()) return false;
-
-        PrefixRule that = (PrefixRule) o;
-
-        if (forms != null ? !forms.equals(that.forms) : that.forms != null) return false;
-        if (lastLetter != null ? !lastLetter.equals(that.lastLetter) : that.lastLetter != null) return false;
-        if (prefix != null ? !prefix.equals(that.prefix) : that.prefix != null) return false;
-
-        return true;
-    }
-
-    @Override
-    public int hashCode() {
-        int result = lastLetter != null ? lastLetter.hashCode() : 0;
-        result = 31 * result + (prefix != null ? prefix.hashCode() : 0);
-        result = 31 * result + (forms != null ? forms.hashCode() : 0);
-        return result;
-    }
-}
@@ -22,6 +22,6 @@ import java.io.IOException;
 public class RussianLuceneMorphology extends LuceneMorphology {

    public RussianLuceneMorphology() throws IOException {
-        super(RussianLuceneMorphology.class.getResourceAsStream("/org/apache/lucene/morphology/russian/morph.info"),RussianLuceneMorphology.class.getResourceAsStream("/org/apache/lucene/morphology/russian/prefixes.info"), new RussianLetterDecoderEncoder());
+        super(RussianLuceneMorphology.class.getResourceAsStream("/org/apache/lucene/morphology/russian/morph.info"), new RussianLetterDecoderEncoder());
    }
 }
@@ -15,11 +15,11 @@
 */
 package org.apache.lucene.morphology.russian;

-import org.apache.lucene.morphology.Morphology;
+import org.apache.lucene.morphology.MorphologyImpl;

 import java.io.IOException;

-public class RussianMorphology extends Morphology {
+public class RussianMorphology extends MorphologyImpl {

    public RussianMorphology() throws IOException {
        super(RussianMorphology.class.getResourceAsStream("/org/apache/lucene/morphology/russian/morph.info"), new RussianLetterDecoderEncoder());
@@ -1,60 +0,0 @@
-/**
- * Copyright 2009 Alexander Kuznetsov
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.morphology.russian;
-
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Token;
-
-import java.io.IOException;
-import java.io.FileInputStream;
-import java.io.InputStreamReader;
-import java.util.HashSet;
-
-/**
- * Created by IntelliJ IDEA.
- * User: akuznetsov
- * Date: 31.10.2009
- * Time: 14:01:11
- * To change this template use File | Settings | File Templates.
- */
-public class TestSpeed {
-
-    public static void main(String[] args) throws IOException {
-        RussianAnalayzer russianAnalayzer = new RussianAnalayzer();
-        bookProccess(russianAnalayzer, "C:/tmp/_Aleksandr_Suhov_Tanets_na_raskalennyih_uglyah1.fb2");
-        Long stat = System.currentTimeMillis();
-        bookProccess(russianAnalayzer, "C:/tmp/_Aleksandr_Suhov_Tanets_na_raskalennyih_uglyah1.fb2");
-        System.out.println("Done in " + (System.currentTimeMillis() - stat));
-    }
-
-    private static void bookProccess(RussianAnalayzer russianAnalayzer, String bookName) throws IOException {
-        FileInputStream inputStream = new FileInputStream(bookName);
-        TokenStream tokenStream = russianAnalayzer.tokenStream(null,new InputStreamReader(inputStream,"UTF-8"));
-        final Token reusableToken = new Token();
-        long count = 0;
-        Token nextToken;
-        for (; ;) {
-            nextToken = tokenStream.next(reusableToken);
-           // System.out.println(" " + nextToken.term());
-            count++;
-            if (nextToken == null) {
-                break;
-            }
-
-        }
-        //System.out.println("Words " + count);
-    }
-}
@@ -1,96 +0,0 @@
-11
-наи
-е
-8
-258
-255
-289
-252
-292
-262
-296
-286
-наи
-и
-2
-263
-297
-наи
-ю
-4
-250
-249
-283
-284
-по
-й
-5
-250
-251
-248
-247
-269
-по
-е
-3
-255
-252
-269
-наи
-й
-12
-239
-273
-250
-251
-248
-277
-247
-282
-281
-243
-285
-284
-наи
-о
-6
-274
-253
-276
-287
-242
-240
-наи
-м
-10
-256
-290
-257
-291
-279
-278
-294
-260
-244
-245
-наи
-х
-6
-259
-293
-261
-295
-264
-298
-наи
-я
-2
-246
-280
-наи
-у
-4
-275
-254
-288
-241
@@ -33,7 +33,7 @@ public class RussianLuceneMorphTest {

    @Before
    public void setUp() throws IOException {
-        luceneMorph = new RussianLuceneMorphology();
+        luceneMorph = new LuceneMorphology(this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/morph.info"), new RussianLetterDecoderEncoder());
    }

    @Test
@@ -1,4 +1,3 @@
-наилучший хороший
 еду еда ехать
 тестов тест
 вина вино вина
@@ -18,7 +17,3 @@
 лучший хороший
 на на
 тест тест тесто
-спам спам
-спама спам
-наигранный наигранный
-наивный наивный