fixing some spelling errors

git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@100 d817d54c-26ab-11de-abc9-2f7d1455ff7a
2010-10-08 12:18:18 +00:00
parent 76e68a11e0
commit 3de894404c
25 changed files with 55 additions and 342 deletions
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/DictionaryReader.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/DictionaryReader.java
@ -42,25 +42,18 @@ public class DictionaryReader {
        this.filters = filters;
    }
    public DictionaryReader(String fileName, String fileEncoding, Set<String> ignoredForm, List<WordFilter> filters) {
        this.fileName = fileName;
        this.fileEncoding = fileEncoding;
        this.ignoredForm = ignoredForm;
        this.filters = filters;
    }
-
+    public void process(WordProcessor wordProcessor) throws IOException {
    public void proccess(WordProccessor wordProccessor) throws IOException {
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), fileEncoding));
        readFlexias(bufferedReader);
-        sckipBlock(bufferedReader);
+        skipBlock(bufferedReader);
-        sckipBlock(bufferedReader);
+        skipBlock(bufferedReader);
        readPrefix(bufferedReader);
-        readWords(bufferedReader, wordProccessor);
+        readWords(bufferedReader, wordProcessor);
    }
-    private void readWords(BufferedReader reader, WordProccessor wordProccessor) throws IOException {
+    private void readWords(BufferedReader reader, WordProcessor wordProcessor) throws IOException {
        String s = reader.readLine();
        int count = Integer.valueOf(s);
        int actual = 0;
@ -79,7 +72,7 @@ public class DictionaryReader {
                continue;
            }
-            wordProccessor.process(card);
+            wordProcessor.process(card);
            actual++;
        }
@ -106,11 +99,11 @@ public class DictionaryReader {
    }
-    private void sckipBlock(BufferedReader reader) throws IOException {
+    private void skipBlock(BufferedReader reader) throws IOException {
        String s = reader.readLine();
        int count = Integer.valueOf(s);
        for (int i = 0; i < count; i++) {
-            s = reader.readLine();
+            reader.readLine();
        }
    }
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/GrammarReader.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/GrammarReader.java
@ -25,19 +25,19 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-//todo spleet this class on two.
+
-public class GrammaReader {
+public class GrammarReader {
    private String fileName;
    private String fileEncoding = "windows-1251";
-    private List<String> grammaInfo = new ArrayList<String>();
+    private List<String> grammarInfo = new ArrayList<String>();
    private Map<String, Integer> inverseIndex = new HashMap<String, Integer>();
-    public GrammaReader(String fileName) throws IOException {
+    public GrammarReader(String fileName) throws IOException {
        this.fileName = fileName;
        setUp();
    }
-    public GrammaReader(String fileName, String fileEncoding) throws IOException {
+    public GrammarReader(String fileName, String fileEncoding) throws IOException {
        this.fileName = fileName;
        this.fileEncoding = fileEncoding;
        setUp();
@ -50,27 +50,23 @@ public class GrammaReader {
            line = line.trim();
            if (!line.startsWith("//") && line.length() > 0) {
                String[] strings = line.split(" ", 2);
-                Integer i = grammaInfo.size();
+                Integer i = grammarInfo.size();
                inverseIndex.put(strings[0], i);
-                grammaInfo.add(i, strings[1]);
+                grammarInfo.add(i, strings[1]);
            }
            line = bufferedReader.readLine();
        }
    }
-    public List<String> getGrammaInfo() {
+    public List<String> getGrammarInfo() {
-        return grammaInfo;
+        return grammarInfo;
    }
-    public String[] getGrammaInfoAsArray() {
+    public String[] getGrammarInfoAsArray() {
-        return grammaInfo.toArray(new String[grammaInfo.size()]);
+        return grammarInfo.toArray(new String[grammarInfo.size()]);
    }
-    public Map<String, Integer> getGrammInversIndex() {
+    public Map<String, Integer> getGrammarInverseIndex() {
        return inverseIndex;
    }
    public void setInverseIndex(Map<String, Integer> inverseIndex) {
        this.inverseIndex = inverseIndex;
    }
 }
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/StatisticsCollector.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/StatisticsCollector.java
@ -26,16 +26,16 @@ import java.util.*;
 //todo made refactoring this class
-public class StatisticsCollector implements WordProccessor {
+public class StatisticsCollector implements WordProcessor {
    private TreeMap<String, Set<Heuristic>> inverseIndex = new TreeMap<String, Set<Heuristic>>();
    private Map<Set<Heuristic>, Integer> ruleInverseIndex = new HashMap<Set<Heuristic>, Integer>();
    private List<Set<Heuristic>> rules = new ArrayList<Set<Heuristic>>();
-    private GrammaReader grammaReader;
+    private GrammarReader grammarReader;
    private LetterDecoderEncoder decoderEncoder;
-    public StatisticsCollector(GrammaReader grammaReader, LetterDecoderEncoder decoderEncoder) {
+    public StatisticsCollector(GrammarReader grammarReader, LetterDecoderEncoder decoderEncoder) {
-        this.grammaReader = grammaReader;
+        this.grammarReader = grammarReader;
        this.decoderEncoder = decoderEncoder;
    }
@ -115,7 +115,7 @@ public class StatisticsCollector implements WordProccessor {
                prevSet = currentSet;
            }
        }
-        MorphologyImpl morphology = new MorphologyImpl(ints, rulesId, heuristics, grammaReader.getGrammaInfoAsArray());
+        MorphologyImpl morphology = new MorphologyImpl(ints, rulesId, heuristics, grammarReader.getGrammarInfoAsArray());
        morphology.writeToFile(fileName);
    }
@ -134,8 +134,8 @@ public class StatisticsCollector implements WordProccessor {
        Integer length = getCommonLength(form, normalForm);
        Integer actualSuffixLengh = form.length() - length;
        String actualNormalSuffix = normalForm.substring(length);
-        Integer integer = grammaReader.getGrammInversIndex().get(fm.getCode());
+        Integer integer = grammarReader.getGrammarInverseIndex().get(fm.getCode());
-        Integer nf = grammaReader.getGrammInversIndex().get(normalSuffixForm);
+        Integer nf = grammarReader.getGrammarInverseIndex().get(normalSuffixForm);
        return new Heuristic((byte) actualSuffixLengh.intValue(), actualNormalSuffix, (short) integer.intValue(), (short) nf.intValue());
    }
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordProccessor.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordProccessor.java
@ -20,9 +20,8 @@ import java.io.IOException;
 /**
 * Interface allows get information from
 * {@org.apache.lucene.russian.morphology.dictonary.DirtonaryReader}.
 */
-public interface WordProccessor {
+public interface WordProcessor {
    public void process(WordCard wordCard) throws IOException;
 }
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/EnglishHeuristicBuilder.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/EnglishHeuristicBuilder.java
@ -28,14 +28,14 @@ import java.util.List;
 public class EnglishHeuristicBuilder {
    public static void main(String[] args) throws IOException {
-        GrammaReader grammaInfo = new GrammaReader("dictonary/Dicts/Morph/egramtab.tab");
+        GrammarReader grammarInfo = new GrammarReader("dictonary/Dicts/Morph/egramtab.tab");
        EnglishLetterDecoderEncoder decoderEncoder = new EnglishLetterDecoderEncoder();
        List<WordFilter> filters = Arrays.asList(new WordStringCleaner(decoderEncoder), new WordCleaner(decoderEncoder));
        DictionaryReader dictionaryReader = new DictionaryReader("dictonary/Dicts/SrcMorph/EngSrc/morphs.mrd", new HashSet<String>(), filters);
-        StatisticsCollector statisticsCollector = new StatisticsCollector(grammaInfo, decoderEncoder);
+        StatisticsCollector statisticsCollector = new StatisticsCollector(grammarInfo, decoderEncoder);
-        dictionaryReader.proccess(statisticsCollector);
+        dictionaryReader.process(statisticsCollector);
        statisticsCollector.saveHeuristic("english/src/main/resources/org/apache/lucene/morphology/english/morph.info");
    }
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/RussianHeuristicBuilder.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/RussianHeuristicBuilder.java
@ -27,14 +27,14 @@ import java.util.List;
 public class RussianHeuristicBuilder {
    public static void main(String[] args) throws IOException {
-        GrammaReader grammaInfo = new GrammaReader("dictonary/Dicts/Morph/rgramtab.tab");
+        GrammarReader grammarInfo = new GrammarReader("dictonary/Dicts/Morph/rgramtab.tab");
        RussianLetterDecoderEncoder decoderEncoder = new RussianLetterDecoderEncoder();
        List<WordFilter> filters = Arrays.asList(new WordStringCleaner(decoderEncoder), new WordCleaner(decoderEncoder));
        DictionaryReader dictionaryReader = new DictionaryReader("dictonary/Dicts/SrcMorph/RusSrc/morphs.mrd", new HashSet<String>(), filters);
-        StatisticsCollector statisticsCollector = new StatisticsCollector(grammaInfo, decoderEncoder);
+        StatisticsCollector statisticsCollector = new StatisticsCollector(grammarInfo, decoderEncoder);
-        dictionaryReader.proccess(statisticsCollector);
+        dictionaryReader.process(statisticsCollector);
        statisticsCollector.saveHeuristic("russian/src/main/resources/org/apache/lucene/morphology/russian/morph.info");
    }
--- a/dictionary-reader/src/test/java/org/apache/lucene/TestAllWords.java
+++ b/dictionary-reader/src/test/java/org/apache/lucene/TestAllWords.java
@ -67,9 +67,9 @@ public class TestAllWords {
    }
    private void testFullGramma(final Morphology morphology, LetterDecoderEncoder decoderEncoder, String pathToGramma, String pathToDict) throws IOException {
-        GrammaReader grammaInfo = new GrammaReader(pathToGramma);
+        GrammarReader grammarInfo = new GrammarReader(pathToGramma);
-        final List<String> morphInfo = grammaInfo.getGrammaInfo();
+        final List<String> morphInfo = grammarInfo.getGrammarInfo();
-        final Map<String, Integer> inversIndex = grammaInfo.getGrammInversIndex();
+        final Map<String, Integer> inversIndex = grammarInfo.getGrammarInverseIndex();
        List<WordFilter> filters = Arrays.asList(new WordStringCleaner(decoderEncoder), new WordCleaner(decoderEncoder));
@ -79,7 +79,7 @@ public class TestAllWords {
        final AtomicLong wordCount = new AtomicLong(0);
        Long startTime = System.currentTimeMillis();
-        dictionaryReader.proccess(new WordProccessor() {
+        dictionaryReader.process(new WordProcessor() {
            public void process(WordCard wordCard) throws IOException {
                String word = wordCard.getBase() + wordCard.getCanonicalSuffix();
                for (FlexiaModel fm : wordCard.getWordsForms()) {
@ -125,7 +125,7 @@ public class TestAllWords {
        Long startTime = System.currentTimeMillis();
        DictionaryReader dictionaryReader = new DictionaryReader(pathToDic, new HashSet<String>(), filters);
-        dictionaryReader.proccess(new WordProccessor() {
+        dictionaryReader.process(new WordProcessor() {
            public void process(WordCard wordCard) throws IOException {
                String word = wordCard.getBase() + wordCard.getCanonicalSuffix();
                for (FlexiaModel fm : wordCard.getWordsForms()) {
--- a/dictionary-reader/src/test/java/org/apache/lucene/morphology/AnalayzersTest.java
+++ b/dictionary-reader/src/test/java/org/apache/lucene/morphology/AnalayzersTest.java
@ -32,13 +32,13 @@ import static org.hamcrest.Matchers.equalTo;
 import static org.junit.Assert.assertThat;
-public class AnalayzersTest {
+public class AnalyzersTest {
    @Test
    public void englishAnalyzerShouldGiveCorrectWords() throws IOException {
        Analyzer morphlogyAnalyzer = new EnglishAnalyzer();
-        String answerPath = "/english/englsih-analayzer-answer.txt";
+        String answerPath = "/english/english-analyzer-answer.txt";
-        String testPath = "/english/englsih-analayzer-data.txt";
+        String testPath = "/english/english-analyzer-data.txt";
        testAnalayzer(morphlogyAnalyzer, answerPath, testPath);
    }
@ -46,8 +46,8 @@ public class AnalayzersTest {
    @Test
    public void shoudGiveCorretWords() throws IOException {
        Analyzer morphlogyAnalyzer = new RussianAnalyzer();
-        String answerPath = "/russian/russian-analayzer-answer.txt";
+        String answerPath = "/russian/russian-analyzer-answer.txt";
-        String testPath = "/russian/russian-analayzer-data.txt";
+        String testPath = "/russian/russian-analyzer-data.txt";
        testAnalayzer(morphlogyAnalyzer, answerPath, testPath);
    }
--- a/dictionary-reader/src/test/resources/english/englsih-analayzer-answer.txt
+++ b/dictionary-reader/src/test/resources/english/englsih-analayzer-answer.txt
--- a/dictionary-reader/src/test/resources/english/englsih-analayzer-data.txt
+++ b/dictionary-reader/src/test/resources/english/englsih-analayzer-data.txt
--- a/dictionary-reader/src/test/resources/russian/russian-analayzer-answer.txt
+++ b/dictionary-reader/src/test/resources/russian/russian-analayzer-answer.txt
--- a/dictionary-reader/src/test/resources/russian/russian-analayzer-data.txt
+++ b/dictionary-reader/src/test/resources/russian/russian-analayzer-data.txt
--- a/english/src/test/java/org/apache/lucene/morphology/english/EnglishAnalayzerTest.java
+++ b/english/src/test/java/org/apache/lucene/morphology/english/EnglishAnalayzerTest.java
@ -1,58 +0,0 @@
 /**
 * Copyright 2009 Alexander Kuznetsov
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.morphology.english;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import static org.hamcrest.Matchers.equalTo;
 import static org.junit.Assert.assertThat;
 import org.junit.Test;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.util.Arrays;
 import java.util.HashSet;
 public class EnglishAnalayzerTest {
    @Test
    public void shouldGiveCorrectWords() throws IOException {
        InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/english/englsih-analayzer-answer.txt");
        BufferedReader breader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
        String[] strings = breader.readLine().replaceAll(" +", " ").trim().split(" ");
        HashSet<String> answer = new HashSet<String>(Arrays.asList(strings));
        stream.close();
        EnglishAnalyzer morphlogyAnalyzer = new EnglishAnalyzer();
        stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/english/englsih-analayzer-data.txt");
        InputStreamReader reader = new InputStreamReader(stream, "UTF-8");
        TokenStream tokenStream = morphlogyAnalyzer.tokenStream(null, reader);
        HashSet<String> result = new HashSet<String>();
        while (tokenStream.incrementToken()) {
            TermAttribute attribute1 = tokenStream.getAttribute(TermAttribute.class);
            result.add(attribute1.term());
        }
        stream.close();
        assertThat(result, equalTo(answer));
    }
 }
--- a/english/src/test/java/org/apache/lucene/morphology/english/EnglishLuceneMorphTest.java
+++ b/english/src/test/java/org/apache/lucene/morphology/english/EnglishLuceneMorphTest.java
@ -1,55 +0,0 @@
 /**
 * Copyright 2009 Alexander Kuznetsov
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.morphology.english;
 import org.apache.lucene.morphology.LuceneMorphology;
 import static org.hamcrest.Matchers.equalTo;
 import static org.junit.Assert.assertThat;
 import org.junit.Before;
 import org.junit.Test;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.util.HashSet;
 import java.util.Set;
 public class EnglishLuceneMorphTest {
    private LuceneMorphology luceneMorph;
    @Before
    public void setUp() throws IOException {
        luceneMorph = new LuceneMorphology(this.getClass().getResourceAsStream("/org/apache/lucene/morphology/english/morph.info"), new EnglishLetterDecoderEncoder());
    }
    @Test
    public void shoudGetCorrentMorphInfo() throws IOException {
        InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/english/english-morphology-test.txt");
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
        String s = bufferedReader.readLine();
        while (s != null) {
            String[] qa = s.trim().split(" ");
            Set<String> result = new HashSet<String>();
            for (int i = 1; i < qa.length; i++) {
                result.add(qa[i]);
            }
            Set<String> stringList = new HashSet<String>(luceneMorph.getNormalForms(qa[0]));
            assertThat(stringList, equalTo(result));
            s = bufferedReader.readLine();
        }
    }
 }
--- a/english/src/test/resources/org/apache/lucene/morphology/english/english-morphology-test.txt
+++ b/english/src/test/resources/org/apache/lucene/morphology/english/english-morphology-test.txt
@ -1,8 +0,0 @@
 purchases purchase
 existing exist
 was be
 men man
 bore bore bear
 grown grow grown
 came come
 md md
--- a/english/src/test/resources/org/apache/lucene/morphology/english/englsih-analayzer-answer.txt
+++ b/english/src/test/resources/org/apache/lucene/morphology/english/englsih-analayzer-answer.txt
@ -1 +0,0 @@
 following follow the instruction exactly will be help ensure the best well good result
--- a/english/src/test/resources/org/apache/lucene/morphology/english/englsih-analayzer-data.txt
+++ b/english/src/test/resources/org/apache/lucene/morphology/english/englsih-analayzer-data.txt
@ -1 +0,0 @@
 Following the instructions exactly will help ensure the best results
--- a/morph/src/main/java/org/apache/lucene/morphology/MorphologyImpl.java
+++ b/morph/src/main/java/org/apache/lucene/morphology/MorphologyImpl.java
@ -18,7 +18,6 @@ package org.apache.lucene.morphology;
 import java.io.*;
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.List;
@ -26,7 +25,7 @@ public class MorphologyImpl implements Morphology {
    protected int[][] separators;
    protected short[] rulesId;
    protected Heuristic[][] rules;
-    protected String[] grammaInfo;
+    protected String[] grammarInfo;
    protected LetterDecoderEncoder decoderEncoder;
@ -40,27 +39,11 @@ public class MorphologyImpl implements Morphology {
        this.decoderEncoder = decoderEncoder;
    }
-    public MorphologyImpl(int[][] separators, short[] rulesId, Heuristic[][] rules, String[] grammaInfo) {
+    public MorphologyImpl(int[][] separators, short[] rulesId, Heuristic[][] rules, String[] grammarInfo) {
        this.separators = separators;
        this.rulesId = rulesId;
        this.rules = rules;
-        this.grammaInfo = grammaInfo;
+        this.grammarInfo = grammarInfo;
    }
    public int[][] getSeparators() {
        return separators;
    }
    public short[] getRulesId() {
        return rulesId;
    }
    public Heuristic[][] getRules() {
        return rules;
    }
    public String[] getGrammaInfo() {
        return grammaInfo;
    }
    public List<String> getNormalForms(String s) {
@ -78,7 +61,7 @@ public class MorphologyImpl implements Morphology {
        int[] ints = decoderEncoder.encodeToArray(revertWord(s));
        int ruleId = findRuleId(ints);
        for (Heuristic h : rules[rulesId[ruleId]]) {
-            result.add(h.transformWord(s) + "|" + grammaInfo[h.getFormMorphInfo()]);
+            result.add(h.transformWord(s) + "|" + grammarInfo[h.getFormMorphInfo()]);
        }
        return result;
    }
@ -135,8 +118,8 @@ public class MorphologyImpl implements Morphology {
                writer.write(heuristic.toString() + "\n");
            }
        }
-        writer.write(grammaInfo.length + "\n");
+        writer.write(grammarInfo.length + "\n");
-        for (String s : grammaInfo) {
+        for (String s : grammarInfo) {
            writer.write(s + "\n");
        }
        writer.close();
@ -166,9 +149,9 @@ public class MorphologyImpl implements Morphology {
        Integer amount;
        s = bufferedReader.readLine();
        amount = Integer.valueOf(s);
-        grammaInfo = new String[amount];
+        grammarInfo = new String[amount];
        for (int i = 0; i < amount; i++) {
-            grammaInfo[i] = bufferedReader.readLine();
+            grammarInfo[i] = bufferedReader.readLine();
        }
    }
--- a/morph/src/main/java/org/apache/lucene/morphology/analyzer/MorphologyAnalyzer.java
+++ b/morph/src/main/java/org/apache/lucene/morphology/analyzer/MorphologyAnalyzer.java
@ -48,6 +48,6 @@ public class MorphologyAnalyzer extends Analyzer {
        TokenStream result = new StandardTokenizer(Version.LUCENE_30, reader);
        result = new StandardFilter(result);
        result = new LowerCaseFilter(result);
-        return new MorphlogyFilter(result, luceneMorph);
+        return new MorphologyFilter(result, luceneMorph);
    }
 }
--- a/morph/src/main/java/org/apache/lucene/morphology/analyzer/MorphologyFilter.java
+++ b/morph/src/main/java/org/apache/lucene/morphology/analyzer/MorphologyFilter.java
@ -25,12 +25,12 @@ import java.io.IOException;
 import java.util.Iterator;
-public class MorphlogyFilter extends TokenFilter {
+public class MorphologyFilter extends TokenFilter {
    private LuceneMorphology luceneMorph;
    private Iterator<String> iterator;
    private TermAttribute termAtt;
-    public MorphlogyFilter(TokenStream tokenStream, LuceneMorphology luceneMorph) {
+    public MorphologyFilter(TokenStream tokenStream, LuceneMorphology luceneMorph) {
        super(tokenStream);
        this.luceneMorph = luceneMorph;
        termAtt = addAttribute(TermAttribute.class);
--- a/russian/src/test/java/org/apache/lucene/morphology/russian/RussianAnalayzerTest.java
+++ b/russian/src/test/java/org/apache/lucene/morphology/russian/RussianAnalayzerTest.java
@ -1,59 +0,0 @@
 /**
 * Copyright 2009 Alexander Kuznetsov
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.morphology.russian;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import static org.hamcrest.Matchers.equalTo;
 import static org.junit.Assert.assertThat;
 import org.junit.Test;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.util.Arrays;
 import java.util.HashSet;
 public class RussianAnalayzerTest {
    @Test
    public void shoudGiveCorretWords() throws IOException {
        InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/russian-analayzer-answer.txt");
        BufferedReader breader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
        String[] strings = breader.readLine().replaceAll(" +", " ").trim().split(" ");
        HashSet<String> answer = new HashSet<String>(Arrays.asList(strings));
        stream.close();
        RussianAnalyzer morphlogyAnalyzer = new RussianAnalyzer();
        stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/russian-analayzer-data.txt");
        InputStreamReader reader = new InputStreamReader(stream, "UTF-8");
        TokenStream tokenStream = morphlogyAnalyzer.tokenStream(null, reader);
        HashSet<String> result = new HashSet<String>();
        while (tokenStream.incrementToken()) {
            TermAttribute attribute1 = tokenStream.getAttribute(TermAttribute.class);
            result.add(attribute1.term());
        }
        stream.close();
        assertThat(result, equalTo(answer));
    }
 }
--- a/russian/src/test/java/org/apache/lucene/morphology/russian/RussianLuceneMorphTest.java
+++ b/russian/src/test/java/org/apache/lucene/morphology/russian/RussianLuceneMorphTest.java
@ -1,55 +0,0 @@
 /**
 * Copyright 2009 Alexander Kuznetsov
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.morphology.russian;
 import org.apache.lucene.morphology.LuceneMorphology;
 import static org.hamcrest.Matchers.equalTo;
 import static org.junit.Assert.assertThat;
 import org.junit.Before;
 import org.junit.Test;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.util.HashSet;
 import java.util.Set;
 public class RussianLuceneMorphTest {
    private LuceneMorphology luceneMorph;
    @Before
    public void setUp() throws IOException {
        luceneMorph = new LuceneMorphology(this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/morph.info"), new RussianLetterDecoderEncoder());
    }
    @Test
    public void shoudGetCorrentMorphInfo() throws IOException {
        InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/russian-morphology-test.txt");
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
        String s = bufferedReader.readLine();
        while (s != null) {
            String[] qa = s.trim().split(" ");
            Set<String> result = new HashSet<String>();
            for (int i = 1; i < qa.length; i++) {
                result.add(qa[i]);
            }
            Set<String> stringList = new HashSet<String>(luceneMorph.getNormalForms(qa[0]));
            assertThat(stringList, equalTo(result));
            s = bufferedReader.readLine();
        }
    }
 }
--- a/russian/src/test/resources/org/apache/lucene/morphology/russian/russian-analayzer-answer.txt
+++ b/russian/src/test/resources/org/apache/lucene/morphology/russian/russian-analayzer-answer.txt
@ -1 +0,0 @@
 в результат крушение погибнуть командир отряд специальный назначение пря при переть гувд ростовский область полковник милиция михаил перов и предприниматель
--- a/russian/src/test/resources/org/apache/lucene/morphology/russian/russian-analayzer-data.txt
+++ b/russian/src/test/resources/org/apache/lucene/morphology/russian/russian-analayzer-data.txt
@ -1 +0,0 @@
 В результате крушения погибли командир отряда специального назначения при ГУВД Ростовской области полковник милиции Михаил Перов и предприниматель
--- a/russian/src/test/resources/org/apache/lucene/morphology/russian/russian-morphology-test.txt
+++ b/russian/src/test/resources/org/apache/lucene/morphology/russian/russian-morphology-test.txt
@ -1,19 +0,0 @@
 еду еда ехать
 тестов тест
 вина вино вина
 вино вино
 ехать ехать
 ананасов ананас ананасовый
 сухой сухой
 дураков дурак
 пушка пушка пушок
 пушок пушок
 пушек пушка
 козлов козлов козловый козел
 жуков жуков жук
 красив красить красивый
 красивая красивый
 тосклив тоскливый
 лучший хороший
 на на
 тест тест тесто
		`@ -1 +0,0 @@`
			`following follow the instruction exactly will be help ensure the best well good result`
		`@ -1 +0,0 @@`
			`Following the instructions exactly will help ensure the best results`
		`@ -1 +0,0 @@`
			`в результат крушение погибнуть командир отряд специальный назначение пря при переть гувд ростовский область полковник милиция михаил перов и предприниматель`
		`@ -1 +0,0 @@`
			`В результате крушения погибли командир отряда специального назначения при ГУВД Ростовской области полковник милиции Михаил Перов и предприниматель`