rallback on wrong version of morphology, adding interafce for morphology

git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@88 d817d54c-26ab-11de-abc9-2f7d1455ff7a
2009-11-17 14:03:59 +00:00
parent 16613c543b
commit 1273cf96ed
19 changed files with 263 additions and 1145 deletions
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/DictonaryReader.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/DictonaryReader.java
@@ -31,9 +31,9 @@ import java.util.*;
 public class DictonaryReader {
    private String fileName;
    private String fileEncoding = "windows-1251";
-    protected List<List<FlexiaModel>> wordsFlexias = new ArrayList<List<FlexiaModel>>();
-    protected List<List<String>> wordPrefixes = new ArrayList<List<String>>();
-    protected Set<String> ingnoredForm = new HashSet<String>();
+    private List<List<FlexiaModel>> wordsFlexias = new ArrayList<List<FlexiaModel>>();
+    private List<List<String>> wordPrefixes = new ArrayList<List<String>>();
+    private Set<String> ingnoredForm = new HashSet<String>();

    public DictonaryReader(String fileName, Set<String> ingnoredForm) {
        this.fileName = fileName;
@@ -57,7 +57,7 @@ public class DictonaryReader {
    }


-    protected void readWords(BufferedReader reader, WordProccessor wordProccessor) throws IOException {
+    private void readWords(BufferedReader reader, WordProccessor wordProccessor) throws IOException {
        String s = reader.readLine();
        int count = Integer.valueOf(s);
        for (int i = 0; i < count; i++) {
@@ -81,7 +81,7 @@ public class DictonaryReader {
    }


-    protected void sckipBlock(BufferedReader reader) throws IOException {
+    private void sckipBlock(BufferedReader reader) throws IOException {
        String s = reader.readLine();
        int count = Integer.valueOf(s);
        for (int i = 0; i < count; i++) {
@@ -90,7 +90,7 @@ public class DictonaryReader {
    }


-    protected void readPrefix(BufferedReader reader) throws IOException {
+    private void readPrefix(BufferedReader reader) throws IOException {
        String s = reader.readLine();
        int count = Integer.valueOf(s);
        for (int i = 0; i < count; i++) {
@@ -99,7 +99,7 @@ public class DictonaryReader {
        }
    }

-    protected  void readFlexias(BufferedReader reader) throws IOException {
+    private void readFlexias(BufferedReader reader) throws IOException {
        String s = reader.readLine();
        int count = Integer.valueOf(s);
        for (int i = 0; i < count; i++) {
@@ -112,7 +112,7 @@ public class DictonaryReader {
        }
    }

-    protected  void addFlexia(ArrayList<FlexiaModel> flexiaModelArrayList, String line) {
+    private void addFlexia(ArrayList<FlexiaModel> flexiaModelArrayList, String line) {
        String[] fl = line.split("\\*");
        // we inored all forms thats
        if (fl.length == 3) {
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/FlexiaModel.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/FlexiaModel.java
@@ -1,5 +1,5 @@
 /**
- * Copyright 2009 Alexander Kuznetsov
+ * Copyright 2009 Alexander Kuznetsov 
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@@ -60,28 +60,6 @@ public class FlexiaModel {

    @Override
    public String toString() {
-        return prefix + " " + suffix + " " + code;
-    }
-
-    @Override
-    public boolean equals(Object o) {
-        if (this == o) return true;
-        if (o == null || getClass() != o.getClass()) return false;
-
-        FlexiaModel that = (FlexiaModel) o;
-
-        if (code != null ? !code.equals(that.code) : that.code != null) return false;
-        if (prefix != null ? !prefix.equals(that.prefix) : that.prefix != null) return false;
-        if (suffix != null ? !suffix.equals(that.suffix) : that.suffix != null) return false;
-
-        return true;
-    }
-
-    @Override
-    public int hashCode() {
-        int result = code != null ? code.hashCode() : 0;
-        result = 31 * result + (suffix != null ? suffix.hashCode() : 0);
-        result = 31 * result + (prefix != null ? prefix.hashCode() : 0);
-        return result;
+        return prefix + " " + suffix;
    }
 }
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/PrefixesRulesBuilder.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/PrefixesRulesBuilder.java
@@ -1,139 +0,0 @@
-/**
- * Copyright 2009 Alexander Kuznetsov
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.morphology.dictionary;
-
-import org.apache.lucene.morphology.PrefixRule;
-
-import java.util.*;
-import java.io.*;
-
-
-public class PrefixesRulesBuilder extends DictonaryReader {
-    private GrammaReader grammaInfo;
-
-    private Map<FlexiaModel,Set<FlexiaModel>> rules = new HashMap<FlexiaModel,Set<FlexiaModel>>();
-
-    public PrefixesRulesBuilder(String fileName, String fileEncoding, Set<String> ingnoredForm) throws IOException {
-        super(fileName, fileEncoding, ingnoredForm);
-        grammaInfo = new GrammaReader("dictonary/Dicts/Morph/rgramtab.tab");
-    }
-
-    @Override
-    public void proccess(WordProccessor wordProccessor) throws IOException {
-        super.proccess(wordProccessor);
-        System.out.println(rules.size());
-        System.out.println(rules);
-    }
-
-    public List<PrefixRule> getPrefixRules(){
-        List<PrefixRule> prefixRules = new ArrayList<PrefixRule>();
-        for(FlexiaModel key:rules.keySet()){
-            PrefixRule prefixRule = new PrefixRule();
-            prefixRule.setPrefix(key.getPrefix());
-            prefixRule.setLastLetter(key.getSuffix().charAt(0));
-            HashSet<Short> map = new HashSet<Short>();
-            for(FlexiaModel fm:rules.get(key)){
-                int gi = grammaInfo.getGrammInversIndex().get(fm.getCode());
-                map.add((short) gi);
-            }
-            prefixRule.setForms(map);
-            prefixRules.add(prefixRule);
-        }
-        return prefixRules;
-    }
-
-    @Override
-    protected void readWords(BufferedReader reader, WordProccessor wordProccessor) throws IOException {
-        sckipBlock(reader);
-    }
-
-
-
-    @Override
-    protected void readPrefix(BufferedReader reader) throws IOException {
-        sckipBlock(reader);
-    }
-
-    @Override
-    protected void readFlexias(BufferedReader reader) throws IOException {
-        super.readFlexias(reader);
-        //todo research flesias
-        for(List<FlexiaModel> fmList:wordsFlexias){
-            research(fmList);
-        }
-    }
-
-    private void research(List<FlexiaModel> models) {
-        for(FlexiaModel fm:models){
-            if(fm.getPrefix().length() > 0){
-                testFlexia(models, fm);
-            }
-        }
-    }
-
-    private void testFlexia(List<FlexiaModel> models, FlexiaModel fm) {
-        for(FlexiaModel com:models){
-            if(com.getSuffix().equals(fm.getSuffix()) && com.getPrefix().length() == 0){
-                Set<FlexiaModel> models1 = rules.get(convertForKey(fm));
-                if(models1 == null){
-                    models1 = new HashSet<FlexiaModel>();
-                    rules.put(convertForKey(fm),models1);
-                }
-                models1.add(convert(com));
-            }
-        }
-    }
-
-    private FlexiaModel convert(FlexiaModel fm){
-        String suf = fm.getSuffix();
-        //if(suf.length() == 1) System.out.println(fm);
-        return new FlexiaModel(fm.getCode(),""+ suf.charAt(suf.length()-1),fm.getPrefix());
-    }
-
-    private FlexiaModel convertForKey(FlexiaModel fm){
-        String suf = fm.getSuffix();
-        //if(suf.length() == 1) System.out.println(fm);
-        return new FlexiaModel("pr",""+ suf.charAt(suf.length()-1),fm.getPrefix());
-    }
-
-    protected void addFlexia(ArrayList<FlexiaModel> flexiaModelArrayList, String line) {
-        String[] fl = line.split("\\*");
-        if (fl.length == 3) {
-            flexiaModelArrayList.add(new FlexiaModel(fl[1], fl[0].toLowerCase(), fl[2].toLowerCase()));
-        }
-        if (fl.length == 2) flexiaModelArrayList.add(new FlexiaModel(fl[1], fl[0].toLowerCase(), ""));
-    }
-
-    public void savePrefixes(String fileName) throws IOException {
-        OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(fileName), "UTF-8");
-        List<PrefixRule> prefixRuleList = getPrefixRules();
-        writer.write(prefixRuleList.size()+"\n");
-        for(PrefixRule pr: prefixRuleList){
-            writePrefixRule(writer, pr);
-        }
-        writer.close();
-    }
-
-    private void writePrefixRule(OutputStreamWriter writer, PrefixRule pr) throws IOException {
-        writer.write(pr.getPrefix()+"\n");
-        writer.write(pr.getLastLetter()+"\n");
-        HashSet<Short> formInfo = pr.getForms();
-        writer.write(formInfo.size()+"\n");
-        for(Short s:formInfo){
-            writer.write(s+"\n");
-        }
-    }
-}
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/StatiticsCollector.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/StatiticsCollector.java
@@ -19,7 +19,7 @@ package org.apache.lucene.morphology.dictionary;

 import org.apache.lucene.morphology.Heuristic;
 import org.apache.lucene.morphology.LetterDecoderEncoder;
-import org.apache.lucene.morphology.Morphology;
+import org.apache.lucene.morphology.MorphologyImpl;

 import java.io.IOException;
 import java.util.*;
@@ -119,7 +119,7 @@ public class StatiticsCollector implements WordProccessor {
                prevSet = currentSet;
            }
        }
-        Morphology morphology = new Morphology(ints, rulesId, heuristics, grammaReader.getGrammaInfoAsArray());
+        MorphologyImpl morphology = new MorphologyImpl(ints, rulesId, heuristics, grammaReader.getGrammaInfoAsArray());
        morphology.writeToFile(fileName);
    }

--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/RussianPrefixesBuilder.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/RussianPrefixesBuilder.java
@@ -1,39 +0,0 @@
-/**
- * Copyright 2009 Alexander Kuznetsov 
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.lucene.morphology.generator;
-
-import org.apache.lucene.morphology.dictionary.*;
-
-import java.io.IOException;
-import java.util.HashSet;
-
-
-public class RussianPrefixesBuilder {
-    public static void main(String[] args) throws IOException {
-
-        PrefixesRulesBuilder dictonaryReader = new PrefixesRulesBuilder("dictonary/Dicts/SrcMorph/RusSrc/morphs.mrd", "windows-1251",new HashSet<String>());
-
-
-        dictonaryReader.proccess(new WordProccessor() {
-            public void proccess(WordCard wordCard) throws IOException {
-
-            }
-        });
-
-        dictonaryReader.savePrefixes("russian/src/main/resources/org/apache/lucene/morphology/russian/prefixes.info");
-    }
-}