first commit

git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@2 d817d54c-26ab-11de-abc9-2f7d1455ff7a
2009-04-11 19:45:38 +00:00
parent b93b764296
commit 63705d7e3b
35 changed files with 200510 additions and 0 deletions
--- a/src/main/java/org/apache/lucene/App.java
+++ b/src/main/java/org/apache/lucene/App.java
@ -0,0 +1,13 @@
+package org.apache.lucene;
+
+/**
+ * Hello world!
+ *
+ */
+public class App 
+{
+    public static void main( String[] args )
+    {
+        System.out.println( "Hello World!" );
+    }
+}
--- a/src/main/java/org/apache/lucene/russian/morphology/SuffixResearcher.java
+++ b/src/main/java/org/apache/lucene/russian/morphology/SuffixResearcher.java
@ -0,0 +1,87 @@
+package org.apache.lucene.russian.morphology;
+
+import org.apache.lucene.russian.morphology.dictonary.DirtonaryReader;
+import org.apache.lucene.russian.morphology.dictonary.WordProccessor;
+import org.apache.lucene.russian.morphology.dictonary.WordCard;
+import org.apache.lucene.russian.morphology.dictonary.IgnoredFormReader;
+import org.apache.lucene.russian.morphology.evristics.StatiticsCollectors;
+import org.apache.lucene.russian.morphology.evristics.SuffixCounter;
+import org.apache.lucene.russian.morphology.evristics.Evristic;
+
+import java.io.*;
+import java.util.*;
+import java.util.concurrent.atomic.AtomicInteger;
+
+
+public class SuffixResearcher {
+    public static void main(String[] args) throws IOException {
+        IgnoredFormReader formReader = new IgnoredFormReader("igoredFrom.txt");
+        Set<String> form = formReader.getIngnoredFroms();
+        System.out.println(form);
+        DirtonaryReader dirtonaryReader = new DirtonaryReader("morphs.mrd", form);
+        StatiticsCollectors statiticsCollectors = new StatiticsCollectors();
+        dirtonaryReader.proccess(statiticsCollectors);
+        Collection<SuffixCounter> counterCollection = statiticsCollectors.getStatititics().values();
+        Object[] objects = counterCollection.toArray();
+        Arrays.sort(objects);
+        System.out.println("Length " + objects.length + " ingored words " + statiticsCollectors.getIgnoredCount());
+        for(int i = 0; i < 10; i++){
+            System.out.println(objects[i]);
+        }
+
+        final Evristic evristic = new Evristic();
+        for(int i = 0; i < objects.length; i++){
+            evristic.addEvristic(((SuffixCounter) objects[i]).getSuffixEvristic());
+        }
+        final AtomicInteger good = new AtomicInteger(0);
+        final AtomicInteger bad = new AtomicInteger(0);
+        final FileWriter writer = new FileWriter("incorret.txt");
+        dirtonaryReader.proccess(new WordProccessor(){
+            public void proccess(WordCard wordCard) throws IOException {
+                for(String wordForm:wordCard.getWordsFroms()){
+                    String cf = wordCard.getCanonicalFrom();
+                    if (evristic.getNormalForm(wordForm).equals(cf)){
+                        good.incrementAndGet();
+                    } else{
+                        writer.write(wordForm + " c " + cf + " f " + evristic.getNormalForm(wordForm)  + "\n");
+                        bad.incrementAndGet();
+                    }
+                }
+            }
+        });
+        writer.close();
+
+        System.out.println("Good " + good + " Bad " + bad);
+
+        evristic.writeToFile("evriticsb");
+
+
+//        Map<String, Set<String>> perehod = new HashMap<String,Set<String>>();
+//        for(SuffixCounter suffixCounter:statiticsCollectors.getStatititics().values()){
+//            String sf = suffixCounter.getSuffixEvristic().getFormSuffix();
+//            Set<String> stringSet = perehod.get(sf);
+//            if (stringSet == null){
+//                stringSet = new HashSet<String>();
+//                perehod.put(sf,stringSet);
+//            }
+//            stringSet.add(suffixCounter.getSuffixEvristic().getNormalSuffix());
+//            //suffix.add(suffixCounter.getSuffixEvristic().getFormSuffix());
+//            //System.out.println(suffixCounter.);
+//        }
+//        System.out.println("Diffirent suffix " + perehod.size());
+//        int c = 0;
+//        int max_size = 0;
+//        int[] size_dist = new int[20];
+//        for(int j = 0; j < size_dist.length; j++) size_dist[j] = 0;
+//        for(Set<String> set:perehod.values()){
+//            size_dist[set.size()] ++;
+//            if (set.size() > 1){
+//                c++;
+//                //System.out.println(set);
+//            }
+//            if(set.size() > max_size) max_size = set.size();
+//        }
+//        System.out.println("max size of diffirent suffix " + max_size + " " + c);
+//        for(int j = 0; j < size_dist.length; j++) System.out.println("" + j + " " + size_dist[j]);
+    }
+}
--- a/src/main/java/org/apache/lucene/russian/morphology/analayzer/RussianMorphlogyFilter.java
+++ b/src/main/java/org/apache/lucene/russian/morphology/analayzer/RussianMorphlogyFilter.java
@ -0,0 +1,30 @@
+package org.apache.lucene.russian.morphology.analayzer;
+
+import org.apache.lucene.russian.morphology.evristics.ArrayEvristics;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+
+import java.io.IOException;
+
+
+public class RussianMorphlogyFilter extends TokenFilter {
+    private ArrayEvristics arrayEvristics;
+
+    protected RussianMorphlogyFilter(TokenStream tokenStream, ArrayEvristics arrayEvristics) {
+        super(tokenStream);
+        this.arrayEvristics = arrayEvristics;
+    }
+
+    public Token next(final Token reusableToken) throws IOException {
+        assert reusableToken != null;
+        return createToken(arrayEvristics.getCanonicalForm(reusableToken.term()), reusableToken, reusableToken);
+    }
+
+    protected Token createToken(String synonym, Token current, final Token reusableToken) {
+        reusableToken.reinit(current, synonym);
+        reusableToken.setTermBuffer(synonym);
+        reusableToken.setPositionIncrement(0);
+        return reusableToken;
+    }
+}
--- a/src/main/java/org/apache/lucene/russian/morphology/dictonary/DirtonaryReader.java
+++ b/src/main/java/org/apache/lucene/russian/morphology/dictonary/DirtonaryReader.java
@ -0,0 +1,104 @@
+package org.apache.lucene.russian.morphology.dictonary;
+
+import org.apache.lucene.russian.morphology.dictonary.FlexiaModel;
+import com.frielp.morph.automate.WordImpl;
+import org.apache.lucene.russian.morphology.evristics.RussianSuffixDecoderEncoder;
+
+import java.util.*;
+import java.io.*;
+
+
+public class DirtonaryReader {
+    private String fileName;
+    private String fileEncoding = "windows-1251";
+    private List<List<FlexiaModel>> wordsFlexias = new ArrayList<List<FlexiaModel>>();
+    private List<List<String>> wordPrefixes = new ArrayList<List<String>>();
+    private Set<String> ingnoredForm =  new HashSet<String>();
+
+    public DirtonaryReader(String fileName, Set<String> ingnoredForm) {
+        this.fileName = fileName;
+        this.ingnoredForm = ingnoredForm;
+    }
+
+    public DirtonaryReader(String fileName, String fileEncoding, Set<String> ingnoredForm) {
+        this.fileName = fileName;
+        this.fileEncoding = fileEncoding;
+        this.ingnoredForm = ingnoredForm;
+    }
+
+
+    public void proccess(WordProccessor wordProccessor) throws IOException {
+        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), fileEncoding));
+        readFlexias(bufferedReader);
+        sckipBlock(bufferedReader);
+        sckipBlock(bufferedReader);
+        readPrefix(bufferedReader);
+        readWords(bufferedReader,wordProccessor);
+    }
+
+
+    private void readWords(BufferedReader reader,WordProccessor wordProccessor) throws IOException {
+        String s = reader.readLine();
+        int count = Integer.valueOf(s);
+        for (int i = 0; i < count; i++) {
+            s = reader.readLine();
+            if (i % 10000 == 0) System.out.println("Proccess " + i + " word of " + count);
+
+            String[] wd = s.split(" ");
+            String word = wd[0].toLowerCase();
+            if (word.startsWith("-")) continue;
+            word = "#".equals(word) ? "" : word;
+            List<FlexiaModel> models = wordsFlexias.get(Integer.valueOf(wd[1]));
+            if (models.size() > 0 && !ingnoredForm.contains(models.get(0).getCode())) {
+                WordCard card = new WordCard(cleanString(models.get(0).create(word)));
+                for (FlexiaModel fm : models) {
+                       card.addFrom(cleanString(fm.create(word)));
+                }
+                wordProccessor.proccess(card);
+            }
+        }
+    }
+
+    private String cleanString(String s){
+        return s.replace((char)(34 + RussianSuffixDecoderEncoder.RUSSIAN_SMALL_LETTER_OFFSET),(char)(6 + RussianSuffixDecoderEncoder.RUSSIAN_SMALL_LETTER_OFFSET));
+    }
+
+    private void sckipBlock(BufferedReader reader) throws IOException {
+        String s = reader.readLine();
+        int count = Integer.valueOf(s);
+        for (int i = 0; i < count; i++) {
+            s = reader.readLine();
+        }
+    }
+
+
+    private void readPrefix(BufferedReader reader) throws IOException {
+        String s = reader.readLine();
+        int count = Integer.valueOf(s);
+        for (int i = 0; i < count; i++) {
+            s = reader.readLine();
+            wordPrefixes.add(Arrays.asList(s.toLowerCase().split(",")));
+        }
+    }
+
+    private void readFlexias(BufferedReader reader) throws IOException {
+        String s = reader.readLine();
+        int count = Integer.valueOf(s);
+        for (int i = 0; i < count; i++) {
+            s = reader.readLine();
+            ArrayList<FlexiaModel> flexiaModelArrayList = new ArrayList<FlexiaModel>();
+            wordsFlexias.add(flexiaModelArrayList);
+            for (String line : s.split("%")) {
+                addFlexia(flexiaModelArrayList, line);
+            }
+        }
+    }
+
+    private void addFlexia(ArrayList<FlexiaModel> flexiaModelArrayList, String line) {
+        String[] fl = line.split("\\*");
+      //  if (fl.length == 3)
+      //      flexiaModelArrayList.add(new FlexiaModel(fl[1], fl[0].toLowerCase(), fl[2].toLowerCase()));
+        if (fl.length == 2) flexiaModelArrayList.add(new FlexiaModel(fl[1], fl[0].toLowerCase(), ""));
+    }
+
+}
--- a/src/main/java/org/apache/lucene/russian/morphology/dictonary/FlexiaModel.java
+++ b/src/main/java/org/apache/lucene/russian/morphology/dictonary/FlexiaModel.java
@ -0,0 +1,47 @@
+package org.apache.lucene.russian.morphology.dictonary;
+
+
+public class FlexiaModel {
+    private String code;
+    private String suffix;
+    private String prefix;
+
+    public FlexiaModel(String code, String suffix, String prefix) {
+        this.code = code;
+        this.suffix = suffix;
+        this.prefix = prefix;
+    }
+
+    public String getCode() {
+        return code;
+    }
+
+    public void setCode(String code) {
+        this.code = code;
+    }
+
+    public String getSuffix() {
+        return suffix;
+    }
+
+    public void setSuffix(String suffix) {
+        this.suffix = suffix;
+    }
+
+    public String getPrefix() {
+        return prefix;
+    }
+
+    public void setPrefix(String prefix) {
+        this.prefix = prefix;
+    }
+
+    public String create(String s) {
+        return prefix + s + suffix;
+    }
+
+    @Override
+    public String toString() {
+        return prefix + " " + suffix;
+    }
+}
--- a/src/main/java/org/apache/lucene/russian/morphology/dictonary/IgnoredFormReader.java
+++ b/src/main/java/org/apache/lucene/russian/morphology/dictonary/IgnoredFormReader.java
@ -0,0 +1,38 @@
+package org.apache.lucene.russian.morphology.dictonary;
+
+import java.util.Set;
+import java.util.HashSet;
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+import java.io.FileInputStream;
+import java.io.IOException;
+
+
+public class IgnoredFormReader {
+    private String fileName;
+    private String fileEncoding = "windows-1251";
+
+    public IgnoredFormReader(String fileName) {
+        this.fileName = fileName;
+    }
+
+    public IgnoredFormReader(String fileName, String fileEncoding) {
+        this.fileName = fileName;
+        this.fileEncoding = fileEncoding;
+    }
+
+    public Set<String> getIngnoredFroms() throws IOException {
+        BufferedReader bufferedReader = new BufferedReader(
+                new InputStreamReader(
+                        new FileInputStream(fileName), fileEncoding));
+        String s = bufferedReader.readLine();
+        HashSet<String> result = new HashSet<String>();
+        while (s != null) {
+            if (!s.startsWith("//")) {
+                result.add(s.trim().split(" ")[0]);
+            }
+            s = bufferedReader.readLine();
+        }
+        return result;
+    }
+}
--- a/src/main/java/org/apache/lucene/russian/morphology/dictonary/WordCard.java
+++ b/src/main/java/org/apache/lucene/russian/morphology/dictonary/WordCard.java
@ -0,0 +1,26 @@
+package org.apache.lucene.russian.morphology.dictonary;
+
+import java.util.List;
+import java.util.ArrayList;
+
+
+public class WordCard {
+    private String canonicalFrom;
+    private List<String> wordsFroms = new ArrayList<String>();
+
+    public WordCard(String canonicalFrom) {
+        this.canonicalFrom = canonicalFrom;
+    }
+
+    public void addFrom(String word){
+        wordsFroms.add(word);
+    }
+
+    public String getCanonicalFrom() {
+        return canonicalFrom;
+    }
+
+    public List<String> getWordsFroms() {
+        return wordsFroms;
+    }
+}
--- a/src/main/java/org/apache/lucene/russian/morphology/dictonary/WordProccessor.java
+++ b/src/main/java/org/apache/lucene/russian/morphology/dictonary/WordProccessor.java
@ -0,0 +1,9 @@
+package org.apache.lucene.russian.morphology.dictonary;
+
+import java.io.IOException;
+
+
+public interface WordProccessor {
+
+    public void proccess(WordCard wordCard) throws IOException;
+}
--- a/src/main/java/org/apache/lucene/russian/morphology/evristics/ArrayEvristics.java
+++ b/src/main/java/org/apache/lucene/russian/morphology/evristics/ArrayEvristics.java
@ -0,0 +1,37 @@
+package org.apache.lucene.russian.morphology.evristics;
+
+import java.io.FileReader;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.util.Arrays;
+
+
+public class ArrayEvristics {
+    private long[] keys;
+    private long[] values;
+
+    public void readFromFile(String fileName) throws IOException {
+        BufferedReader reader = new BufferedReader(new FileReader(fileName));
+        int size = Integer.valueOf(reader.readLine());
+        keys = new long[size];
+        values = new long[size];
+        for (int i = 0; i < size; i++) {
+            String[] s = reader.readLine().split(" ");
+            keys[i] = Long.valueOf(s[0]);
+            values[i] = Long.valueOf(s[1]);
+        }
+    }
+
+    public String getCanonicalForm(String form) {
+        int startSymbol = form.length() > RussianSuffixDecoderEncoder.SUFFIX_LENGTH ? form.length() - RussianSuffixDecoderEncoder.SUFFIX_LENGTH : 0;
+        Long suffix = RussianSuffixDecoderEncoder.encodeLong(form.substring(startSymbol));
+
+        int index = Arrays.binarySearch(keys,suffix);
+        if(index == -1){
+            return form;
+        }else{
+            String nSuffix = RussianSuffixDecoderEncoder.decodeLong(values[index]);
+            return startSymbol > 0 ? form.substring(0, startSymbol) + nSuffix : nSuffix;
+        }
+    }
+}
--- a/src/main/java/org/apache/lucene/russian/morphology/evristics/Evristic.java
+++ b/src/main/java/org/apache/lucene/russian/morphology/evristics/Evristic.java
@ -0,0 +1,54 @@
+package org.apache.lucene.russian.morphology.evristics;
+
+import java.util.*;
+import java.io.*;
+
+
+public class Evristic {
+    private TreeMap<Long, Long> encodedSuffixesPairs = new TreeMap<Long, Long>();
+
+    public void addEvristic(SuffixEvristic suffixEvristic) {
+        Long suffix = RussianSuffixDecoderEncoder.encodeLong(suffixEvristic.getFormSuffix());
+        Long longs = encodedSuffixesPairs.get(suffix);
+        if (longs == null) {
+            encodedSuffixesPairs.put(suffix, RussianSuffixDecoderEncoder.encodeLong(suffixEvristic.getNormalSuffix()));
+        }
+
+
+    }
+
+    public String getNormalForm(String form) {
+        int startSymbol = form.length() > RussianSuffixDecoderEncoder.SUFFIX_LENGTH ? form.length() - RussianSuffixDecoderEncoder.SUFFIX_LENGTH : 0;
+        Long suffix = RussianSuffixDecoderEncoder.encodeLong(form.substring(startSymbol));
+
+        Long normalSuffix = encodedSuffixesPairs.get(suffix);
+        if (normalSuffix != null) {
+            String nSuffix = RussianSuffixDecoderEncoder.decodeLong(normalSuffix);
+            return startSymbol > 0 ? form.substring(0, startSymbol) + nSuffix : nSuffix;
+
+        }
+        return form;
+    }
+
+    public void readFromFile(String file) throws IOException {
+        BufferedReader reader = new BufferedReader(new FileReader(file));
+        String s = reader.readLine();
+        while (s != null) {
+            String[] sfns = s.split(" ");
+            if(sfns.length == 2){
+                encodedSuffixesPairs.put(Long.valueOf(sfns[0]), Long.valueOf(sfns[0]));
+            }
+            s = reader.readLine();
+        }
+        reader.close();
+    }
+
+    public void writeToFile(String file) throws IOException {
+        FileWriter writer = new FileWriter(file);
+        writer.write(encodedSuffixesPairs.size()+"\n");
+        for(Long k:encodedSuffixesPairs.keySet()){
+            writer.write("" + k + " " + encodedSuffixesPairs.get(k) + "\n");
+        }
+        writer.close();
+    }
+}
--- a/src/main/java/org/apache/lucene/russian/morphology/evristics/LemmasFreq.java
+++ b/src/main/java/org/apache/lucene/russian/morphology/evristics/LemmasFreq.java
@ -0,0 +1,6 @@
+package org.apache.lucene.russian.morphology.evristics;
+
+
+public class LemmasFreq {
+    
+}
--- a/src/main/java/org/apache/lucene/russian/morphology/evristics/RussianSuffixDecoderEncoder.java
+++ b/src/main/java/org/apache/lucene/russian/morphology/evristics/RussianSuffixDecoderEncoder.java
@ -0,0 +1,60 @@
+package org.apache.lucene.russian.morphology.evristics;
+
+
+public class RussianSuffixDecoderEncoder {
+    public static final int RUSSIAN_SMALL_LETTER_OFFSET = 1071;
+    public static final int SUFFIX_LENGTH = 7;
+
+
+    static public Integer encode(String string) {
+        if (string.length() > 6) throw new RuntimeException("suffix to long");
+        int result = 0;
+        for (int i = 0; i < string.length(); i++) {
+            int c = 0 + string.charAt(i) - RUSSIAN_SMALL_LETTER_OFFSET;
+            if (c < 0) {
+                c = 33;
+            }
+            if (c == 34) c = 6;
+            result = result * 35 + c;
+        }
+        return result;
+    }
+
+    static public String decode(Integer suffixN) {
+        String result = "";
+        while (suffixN > 35) {
+            result = (char) (suffixN % 35 + RUSSIAN_SMALL_LETTER_OFFSET) + result;
+            suffixN /= 35;
+        }
+        result = (char) (suffixN + RUSSIAN_SMALL_LETTER_OFFSET) + result;
+        return result;
+    }
+
+    static public Long encodeLong(String string) {
+        if (string.length() > 12) throw new RuntimeException("suffix to long");
+        long result = 0L;
+        for (int i = 0; i < string.length(); i++) {
+            int c = 0 + string.charAt(i) - RUSSIAN_SMALL_LETTER_OFFSET;
+            if (c < 0) {
+                c = 33;
+            }
+            if (c == 34) c = 6;
+            result = result * 35L + c;
+        }
+        return result;
+    }
+
+    static public String decodeLong(Long suffixN) {
+        String result = "";
+        while (suffixN > 35) {
+            long c = suffixN % 35 + RUSSIAN_SMALL_LETTER_OFFSET;
+            if (c == 33 + RUSSIAN_SMALL_LETTER_OFFSET) c = 45;
+            result = (char) c + result;
+            suffixN /= 35;
+        }
+        long c = suffixN + RUSSIAN_SMALL_LETTER_OFFSET;
+        if (c == 33 + RUSSIAN_SMALL_LETTER_OFFSET) c = 45;
+        result = (char) c + result;
+        return result;
+    }
+}
--- a/src/main/java/org/apache/lucene/russian/morphology/evristics/StatiticsCollectors.java
+++ b/src/main/java/org/apache/lucene/russian/morphology/evristics/StatiticsCollectors.java
@ -0,0 +1,51 @@
+package org.apache.lucene.russian.morphology.evristics;
+
+import org.apache.lucene.russian.morphology.dictonary.WordProccessor;
+import org.apache.lucene.russian.morphology.dictonary.WordCard;
+
+import java.util.Map;
+import java.util.HashMap;
+
+
+public class StatiticsCollectors implements WordProccessor{
+    Map<SuffixEvristic,SuffixCounter> statititics = new HashMap<SuffixEvristic,SuffixCounter>();
+
+    private Integer ignoredCount = 0;
+
+    public void proccess(WordCard wordCard) {
+        for(String form:wordCard.getWordsFroms()){
+            SuffixEvristic suffixEvristic = createEvristic(wordCard.getCanonicalFrom(), form);
+            if (suffixEvristic == null) continue;
+            SuffixCounter suffixCounter = statititics.get(suffixEvristic);
+            if(suffixCounter == null){
+                suffixCounter = new SuffixCounter(suffixEvristic);
+                statititics.put(suffixEvristic,suffixCounter);
+            }
+            suffixCounter.incrementAmount();
+        }
+    }
+
+    public Map<SuffixEvristic, SuffixCounter> getStatititics() {
+        return statititics;
+    }
+
+    private SuffixEvristic createEvristic(String word,String form){
+        int startSymbol = form.length() > RussianSuffixDecoderEncoder.SUFFIX_LENGTH ? form.length() - RussianSuffixDecoderEncoder.SUFFIX_LENGTH : 0;
+        String formSuffix = form.substring(startSymbol);
+        if(word.length() < startSymbol){
+            ignoredCount++;
+            return null;            
+        }
+        String wordSuffix = word.length() > startSymbol ? word.substring(startSymbol) : "";
+        if (wordSuffix.length() > 12){
+            System.out.println(word + " " + form);
+            return null;
+        }
+        return new SuffixEvristic(formSuffix,wordSuffix);
+    }
+
+
+    public Integer getIgnoredCount() {
+        return ignoredCount;
+    }
+}
--- a/src/main/java/org/apache/lucene/russian/morphology/evristics/SuffixCounter.java
+++ b/src/main/java/org/apache/lucene/russian/morphology/evristics/SuffixCounter.java
@ -0,0 +1,41 @@
+package org.apache.lucene.russian.morphology.evristics;
+
+
+public class SuffixCounter implements Comparable{
+    private SuffixEvristic suffixEvristic;
+    private Double amnout = 0.0;
+
+    public SuffixCounter(SuffixEvristic suffixEvristic) {
+        this.suffixEvristic = suffixEvristic;
+    }
+
+    public void incrementAmount(){
+        amnout++;
+    }
+
+    public SuffixEvristic getSuffixEvristic() {
+        return suffixEvristic;
+    }
+
+    public void setSuffixEvristic(SuffixEvristic suffixEvristic) {
+        this.suffixEvristic = suffixEvristic;
+    }
+
+    public Double getAmnout() {
+        return amnout;
+    }
+
+    public void setAmnout(Double amnout) {
+        this.amnout = amnout;
+    }
+
+    public int compareTo(Object o) {
+        if(o instanceof SuffixCounter) return (int) Math.round(Math.signum(((SuffixCounter)o).amnout - amnout));
+        return -1;
+    }
+
+    @Override
+    public String toString() {
+        return ""+amnout + " " + suffixEvristic.toString();
+    }
+}
--- a/src/main/java/org/apache/lucene/russian/morphology/evristics/SuffixEvristic.java
+++ b/src/main/java/org/apache/lucene/russian/morphology/evristics/SuffixEvristic.java
@ -0,0 +1,56 @@
+package org.apache.lucene.russian.morphology.evristics;
+
+
+public class SuffixEvristic {
+    private String formSuffix;
+    private String normalSuffix;
+
+    public SuffixEvristic(String formSuffix, String normalSuffix) {
+        this.formSuffix = formSuffix;
+        this.normalSuffix = normalSuffix;
+    }
+
+    public String getFormSuffix() {
+        return formSuffix;
+    }
+
+    public void setFormSuffix(String formSuffix) {
+        this.formSuffix = formSuffix;
+    }
+
+    public String getNormalSuffix() {
+        return normalSuffix;
+    }
+
+    public void setNormalSuffix(String normalSuffix) {
+        this.normalSuffix = normalSuffix;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+
+        SuffixEvristic that = (SuffixEvristic) o;
+
+        if (!formSuffix.equals(that.formSuffix)) return false;
+        if (!normalSuffix.equals(that.normalSuffix)) return false;
+
+        return true;
+    }
+
+    @Override
+    public int hashCode() {
+        int result = formSuffix.hashCode();
+        result = 31 * result + normalSuffix.hashCode();
+        return result;
+    }
+
+    @Override
+    public String toString() {
+        return "SuffixEvristic{" +
+                "formSuffix='" + formSuffix + '\'' +
+                ", normalSuffix='" + normalSuffix + '\'' +
+                '}';
+    }
+}
--- a/src/test/java/org/apache/lucene/AppTest.java
+++ b/src/test/java/org/apache/lucene/AppTest.java
@ -0,0 +1,38 @@
+package org.apache.lucene;
+
+import junit.framework.Test;
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+
+/**
+ * Unit test for simple App.
+ */
+public class AppTest 
+    extends TestCase
+{
+    /**
+     * Create the test case
+     *
+     * @param testName name of the test case
+     */
+    public AppTest( String testName )
+    {
+        super( testName );
+    }
+
+    /**
+     * @return the suite of tests being tested
+     */
+    public static Test suite()
+    {
+        return new TestSuite( AppTest.class );
+    }
+
+    /**
+     * Rigourous Test :-)
+     */
+    public void testApp()
+    {
+        assertTrue( true );
+    }
+}