From 3de894404c41f93e1dce0f375c67b1360b580ad9 Mon Sep 17 00:00:00 2001 From: "Alexander.A.Kuznetsov" Date: Fri, 8 Oct 2010 12:18:18 +0000 Subject: [PATCH] fixing some spelling errors git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@100 d817d54c-26ab-11de-abc9-2f7d1455ff7a --- .../dictionary/DictionaryReader.java | 23 +++----- .../{GrammaReader.java => GrammarReader.java} | 28 ++++----- .../dictionary/StatisticsCollector.java | 14 ++--- ...WordProccessor.java => WordProcessor.java} | 3 +- .../generator/EnglishHeuristicBuilder.java | 6 +- .../generator/RussianHeuristicBuilder.java | 6 +- .../java/org/apache/lucene/TestAllWords.java | 10 ++-- ...AnalayzersTest.java => AnalyzersTest.java} | 10 ++-- ...answer.txt => english-analyzer-answer.txt} | 0 ...zer-data.txt => english-analyzer-data.txt} | 0 ...answer.txt => russian-analyzer-answer.txt} | 0 ...zer-data.txt => russian-analyzer-data.txt} | 0 .../english/EnglishAnalayzerTest.java | 58 ------------------ .../english/EnglishLuceneMorphTest.java | 55 ----------------- .../english/english-morphology-test.txt | 8 --- .../english/englsih-analayzer-answer.txt | 1 - .../english/englsih-analayzer-data.txt | 1 - .../lucene/morphology/MorphologyImpl.java | 33 +++-------- .../analyzer/MorphologyAnalyzer.java | 2 +- ...hlogyFilter.java => MorphologyFilter.java} | 4 +- .../russian/RussianAnalayzerTest.java | 59 ------------------- .../russian/RussianLuceneMorphTest.java | 55 ----------------- .../russian/russian-analayzer-answer.txt | 1 - .../russian/russian-analayzer-data.txt | 1 - .../russian/russian-morphology-test.txt | 19 ------ 25 files changed, 55 insertions(+), 342 deletions(-) rename dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/{GrammaReader.java => GrammarReader.java} (69%) rename dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/{WordProccessor.java => WordProcessor.java} (88%) rename dictionary-reader/src/test/java/org/apache/lucene/morphology/{AnalayzersTest.java => AnalyzersTest.java} (87%) rename dictionary-reader/src/test/resources/english/{englsih-analayzer-answer.txt => english-analyzer-answer.txt} (100%) rename dictionary-reader/src/test/resources/english/{englsih-analayzer-data.txt => english-analyzer-data.txt} (100%) rename dictionary-reader/src/test/resources/russian/{russian-analayzer-answer.txt => russian-analyzer-answer.txt} (100%) rename dictionary-reader/src/test/resources/russian/{russian-analayzer-data.txt => russian-analyzer-data.txt} (100%) delete mode 100644 english/src/test/java/org/apache/lucene/morphology/english/EnglishAnalayzerTest.java delete mode 100644 english/src/test/java/org/apache/lucene/morphology/english/EnglishLuceneMorphTest.java delete mode 100644 english/src/test/resources/org/apache/lucene/morphology/english/english-morphology-test.txt delete mode 100644 english/src/test/resources/org/apache/lucene/morphology/english/englsih-analayzer-answer.txt delete mode 100644 english/src/test/resources/org/apache/lucene/morphology/english/englsih-analayzer-data.txt rename morph/src/main/java/org/apache/lucene/morphology/analyzer/{MorphlogyFilter.java => MorphologyFilter.java} (89%) delete mode 100644 russian/src/test/java/org/apache/lucene/morphology/russian/RussianAnalayzerTest.java delete mode 100644 russian/src/test/java/org/apache/lucene/morphology/russian/RussianLuceneMorphTest.java delete mode 100644 russian/src/test/resources/org/apache/lucene/morphology/russian/russian-analayzer-answer.txt delete mode 100644 russian/src/test/resources/org/apache/lucene/morphology/russian/russian-analayzer-data.txt delete mode 100644 russian/src/test/resources/org/apache/lucene/morphology/russian/russian-morphology-test.txt diff --git a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/DictionaryReader.java b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/DictionaryReader.java index 280beda..9197ddf 100644 --- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/DictionaryReader.java +++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/DictionaryReader.java @@ -42,25 +42,18 @@ public class DictionaryReader { this.filters = filters; } - public DictionaryReader(String fileName, String fileEncoding, Set ignoredForm, List filters) { - this.fileName = fileName; - this.fileEncoding = fileEncoding; - this.ignoredForm = ignoredForm; - this.filters = filters; - } - - public void proccess(WordProccessor wordProccessor) throws IOException { + public void process(WordProcessor wordProcessor) throws IOException { BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), fileEncoding)); readFlexias(bufferedReader); - sckipBlock(bufferedReader); - sckipBlock(bufferedReader); + skipBlock(bufferedReader); + skipBlock(bufferedReader); readPrefix(bufferedReader); - readWords(bufferedReader, wordProccessor); + readWords(bufferedReader, wordProcessor); } - private void readWords(BufferedReader reader, WordProccessor wordProccessor) throws IOException { + private void readWords(BufferedReader reader, WordProcessor wordProcessor) throws IOException { String s = reader.readLine(); int count = Integer.valueOf(s); int actual = 0; @@ -79,7 +72,7 @@ public class DictionaryReader { continue; } - wordProccessor.process(card); + wordProcessor.process(card); actual++; } @@ -106,11 +99,11 @@ public class DictionaryReader { } - private void sckipBlock(BufferedReader reader) throws IOException { + private void skipBlock(BufferedReader reader) throws IOException { String s = reader.readLine(); int count = Integer.valueOf(s); for (int i = 0; i < count; i++) { - s = reader.readLine(); + reader.readLine(); } } diff --git a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/GrammaReader.java b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/GrammarReader.java similarity index 69% rename from dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/GrammaReader.java rename to dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/GrammarReader.java index 691b872..d56da6a 100644 --- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/GrammaReader.java +++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/GrammarReader.java @@ -25,19 +25,19 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -//todo spleet this class on two. -public class GrammaReader { + +public class GrammarReader { private String fileName; private String fileEncoding = "windows-1251"; - private List grammaInfo = new ArrayList(); + private List grammarInfo = new ArrayList(); private Map inverseIndex = new HashMap(); - public GrammaReader(String fileName) throws IOException { + public GrammarReader(String fileName) throws IOException { this.fileName = fileName; setUp(); } - public GrammaReader(String fileName, String fileEncoding) throws IOException { + public GrammarReader(String fileName, String fileEncoding) throws IOException { this.fileName = fileName; this.fileEncoding = fileEncoding; setUp(); @@ -50,27 +50,23 @@ public class GrammaReader { line = line.trim(); if (!line.startsWith("//") && line.length() > 0) { String[] strings = line.split(" ", 2); - Integer i = grammaInfo.size(); + Integer i = grammarInfo.size(); inverseIndex.put(strings[0], i); - grammaInfo.add(i, strings[1]); + grammarInfo.add(i, strings[1]); } line = bufferedReader.readLine(); } } - public List getGrammaInfo() { - return grammaInfo; + public List getGrammarInfo() { + return grammarInfo; } - public String[] getGrammaInfoAsArray() { - return grammaInfo.toArray(new String[grammaInfo.size()]); + public String[] getGrammarInfoAsArray() { + return grammarInfo.toArray(new String[grammarInfo.size()]); } - public Map getGrammInversIndex() { + public Map getGrammarInverseIndex() { return inverseIndex; } - - public void setInverseIndex(Map inverseIndex) { - this.inverseIndex = inverseIndex; - } } diff --git a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/StatisticsCollector.java b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/StatisticsCollector.java index a9fef93..be65edc 100644 --- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/StatisticsCollector.java +++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/StatisticsCollector.java @@ -26,16 +26,16 @@ import java.util.*; //todo made refactoring this class -public class StatisticsCollector implements WordProccessor { +public class StatisticsCollector implements WordProcessor { private TreeMap> inverseIndex = new TreeMap>(); private Map, Integer> ruleInverseIndex = new HashMap, Integer>(); private List> rules = new ArrayList>(); - private GrammaReader grammaReader; + private GrammarReader grammarReader; private LetterDecoderEncoder decoderEncoder; - public StatisticsCollector(GrammaReader grammaReader, LetterDecoderEncoder decoderEncoder) { - this.grammaReader = grammaReader; + public StatisticsCollector(GrammarReader grammarReader, LetterDecoderEncoder decoderEncoder) { + this.grammarReader = grammarReader; this.decoderEncoder = decoderEncoder; } @@ -115,7 +115,7 @@ public class StatisticsCollector implements WordProccessor { prevSet = currentSet; } } - MorphologyImpl morphology = new MorphologyImpl(ints, rulesId, heuristics, grammaReader.getGrammaInfoAsArray()); + MorphologyImpl morphology = new MorphologyImpl(ints, rulesId, heuristics, grammarReader.getGrammarInfoAsArray()); morphology.writeToFile(fileName); } @@ -134,8 +134,8 @@ public class StatisticsCollector implements WordProccessor { Integer length = getCommonLength(form, normalForm); Integer actualSuffixLengh = form.length() - length; String actualNormalSuffix = normalForm.substring(length); - Integer integer = grammaReader.getGrammInversIndex().get(fm.getCode()); - Integer nf = grammaReader.getGrammInversIndex().get(normalSuffixForm); + Integer integer = grammarReader.getGrammarInverseIndex().get(fm.getCode()); + Integer nf = grammarReader.getGrammarInverseIndex().get(normalSuffixForm); return new Heuristic((byte) actualSuffixLengh.intValue(), actualNormalSuffix, (short) integer.intValue(), (short) nf.intValue()); } diff --git a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordProccessor.java b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordProcessor.java similarity index 88% rename from dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordProccessor.java rename to dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordProcessor.java index 846cee0..709bc7a 100644 --- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordProccessor.java +++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordProcessor.java @@ -20,9 +20,8 @@ import java.io.IOException; /** * Interface allows get information from - * {@org.apache.lucene.russian.morphology.dictonary.DirtonaryReader}. */ -public interface WordProccessor { +public interface WordProcessor { public void process(WordCard wordCard) throws IOException; } diff --git a/dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/EnglishHeuristicBuilder.java b/dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/EnglishHeuristicBuilder.java index 6cd4b9b..49ceeb4 100644 --- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/EnglishHeuristicBuilder.java +++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/EnglishHeuristicBuilder.java @@ -28,14 +28,14 @@ import java.util.List; public class EnglishHeuristicBuilder { public static void main(String[] args) throws IOException { - GrammaReader grammaInfo = new GrammaReader("dictonary/Dicts/Morph/egramtab.tab"); + GrammarReader grammarInfo = new GrammarReader("dictonary/Dicts/Morph/egramtab.tab"); EnglishLetterDecoderEncoder decoderEncoder = new EnglishLetterDecoderEncoder(); List filters = Arrays.asList(new WordStringCleaner(decoderEncoder), new WordCleaner(decoderEncoder)); DictionaryReader dictionaryReader = new DictionaryReader("dictonary/Dicts/SrcMorph/EngSrc/morphs.mrd", new HashSet(), filters); - StatisticsCollector statisticsCollector = new StatisticsCollector(grammaInfo, decoderEncoder); - dictionaryReader.proccess(statisticsCollector); + StatisticsCollector statisticsCollector = new StatisticsCollector(grammarInfo, decoderEncoder); + dictionaryReader.process(statisticsCollector); statisticsCollector.saveHeuristic("english/src/main/resources/org/apache/lucene/morphology/english/morph.info"); } diff --git a/dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/RussianHeuristicBuilder.java b/dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/RussianHeuristicBuilder.java index aadbde2..2dfa8af 100644 --- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/RussianHeuristicBuilder.java +++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/RussianHeuristicBuilder.java @@ -27,14 +27,14 @@ import java.util.List; public class RussianHeuristicBuilder { public static void main(String[] args) throws IOException { - GrammaReader grammaInfo = new GrammaReader("dictonary/Dicts/Morph/rgramtab.tab"); + GrammarReader grammarInfo = new GrammarReader("dictonary/Dicts/Morph/rgramtab.tab"); RussianLetterDecoderEncoder decoderEncoder = new RussianLetterDecoderEncoder(); List filters = Arrays.asList(new WordStringCleaner(decoderEncoder), new WordCleaner(decoderEncoder)); DictionaryReader dictionaryReader = new DictionaryReader("dictonary/Dicts/SrcMorph/RusSrc/morphs.mrd", new HashSet(), filters); - StatisticsCollector statisticsCollector = new StatisticsCollector(grammaInfo, decoderEncoder); - dictionaryReader.proccess(statisticsCollector); + StatisticsCollector statisticsCollector = new StatisticsCollector(grammarInfo, decoderEncoder); + dictionaryReader.process(statisticsCollector); statisticsCollector.saveHeuristic("russian/src/main/resources/org/apache/lucene/morphology/russian/morph.info"); } diff --git a/dictionary-reader/src/test/java/org/apache/lucene/TestAllWords.java b/dictionary-reader/src/test/java/org/apache/lucene/TestAllWords.java index 54c64fa..e5a02a5 100644 --- a/dictionary-reader/src/test/java/org/apache/lucene/TestAllWords.java +++ b/dictionary-reader/src/test/java/org/apache/lucene/TestAllWords.java @@ -67,9 +67,9 @@ public class TestAllWords { } private void testFullGramma(final Morphology morphology, LetterDecoderEncoder decoderEncoder, String pathToGramma, String pathToDict) throws IOException { - GrammaReader grammaInfo = new GrammaReader(pathToGramma); - final List morphInfo = grammaInfo.getGrammaInfo(); - final Map inversIndex = grammaInfo.getGrammInversIndex(); + GrammarReader grammarInfo = new GrammarReader(pathToGramma); + final List morphInfo = grammarInfo.getGrammarInfo(); + final Map inversIndex = grammarInfo.getGrammarInverseIndex(); List filters = Arrays.asList(new WordStringCleaner(decoderEncoder), new WordCleaner(decoderEncoder)); @@ -79,7 +79,7 @@ public class TestAllWords { final AtomicLong wordCount = new AtomicLong(0); Long startTime = System.currentTimeMillis(); - dictionaryReader.proccess(new WordProccessor() { + dictionaryReader.process(new WordProcessor() { public void process(WordCard wordCard) throws IOException { String word = wordCard.getBase() + wordCard.getCanonicalSuffix(); for (FlexiaModel fm : wordCard.getWordsForms()) { @@ -125,7 +125,7 @@ public class TestAllWords { Long startTime = System.currentTimeMillis(); DictionaryReader dictionaryReader = new DictionaryReader(pathToDic, new HashSet(), filters); - dictionaryReader.proccess(new WordProccessor() { + dictionaryReader.process(new WordProcessor() { public void process(WordCard wordCard) throws IOException { String word = wordCard.getBase() + wordCard.getCanonicalSuffix(); for (FlexiaModel fm : wordCard.getWordsForms()) { diff --git a/dictionary-reader/src/test/java/org/apache/lucene/morphology/AnalayzersTest.java b/dictionary-reader/src/test/java/org/apache/lucene/morphology/AnalyzersTest.java similarity index 87% rename from dictionary-reader/src/test/java/org/apache/lucene/morphology/AnalayzersTest.java rename to dictionary-reader/src/test/java/org/apache/lucene/morphology/AnalyzersTest.java index 766de9c..430e275 100644 --- a/dictionary-reader/src/test/java/org/apache/lucene/morphology/AnalayzersTest.java +++ b/dictionary-reader/src/test/java/org/apache/lucene/morphology/AnalyzersTest.java @@ -32,13 +32,13 @@ import static org.hamcrest.Matchers.equalTo; import static org.junit.Assert.assertThat; -public class AnalayzersTest { +public class AnalyzersTest { @Test public void englishAnalyzerShouldGiveCorrectWords() throws IOException { Analyzer morphlogyAnalyzer = new EnglishAnalyzer(); - String answerPath = "/english/englsih-analayzer-answer.txt"; - String testPath = "/english/englsih-analayzer-data.txt"; + String answerPath = "/english/english-analyzer-answer.txt"; + String testPath = "/english/english-analyzer-data.txt"; testAnalayzer(morphlogyAnalyzer, answerPath, testPath); } @@ -46,8 +46,8 @@ public class AnalayzersTest { @Test public void shoudGiveCorretWords() throws IOException { Analyzer morphlogyAnalyzer = new RussianAnalyzer(); - String answerPath = "/russian/russian-analayzer-answer.txt"; - String testPath = "/russian/russian-analayzer-data.txt"; + String answerPath = "/russian/russian-analyzer-answer.txt"; + String testPath = "/russian/russian-analyzer-data.txt"; testAnalayzer(morphlogyAnalyzer, answerPath, testPath); } diff --git a/dictionary-reader/src/test/resources/english/englsih-analayzer-answer.txt b/dictionary-reader/src/test/resources/english/english-analyzer-answer.txt similarity index 100% rename from dictionary-reader/src/test/resources/english/englsih-analayzer-answer.txt rename to dictionary-reader/src/test/resources/english/english-analyzer-answer.txt diff --git a/dictionary-reader/src/test/resources/english/englsih-analayzer-data.txt b/dictionary-reader/src/test/resources/english/english-analyzer-data.txt similarity index 100% rename from dictionary-reader/src/test/resources/english/englsih-analayzer-data.txt rename to dictionary-reader/src/test/resources/english/english-analyzer-data.txt diff --git a/dictionary-reader/src/test/resources/russian/russian-analayzer-answer.txt b/dictionary-reader/src/test/resources/russian/russian-analyzer-answer.txt similarity index 100% rename from dictionary-reader/src/test/resources/russian/russian-analayzer-answer.txt rename to dictionary-reader/src/test/resources/russian/russian-analyzer-answer.txt diff --git a/dictionary-reader/src/test/resources/russian/russian-analayzer-data.txt b/dictionary-reader/src/test/resources/russian/russian-analyzer-data.txt similarity index 100% rename from dictionary-reader/src/test/resources/russian/russian-analayzer-data.txt rename to dictionary-reader/src/test/resources/russian/russian-analyzer-data.txt diff --git a/english/src/test/java/org/apache/lucene/morphology/english/EnglishAnalayzerTest.java b/english/src/test/java/org/apache/lucene/morphology/english/EnglishAnalayzerTest.java deleted file mode 100644 index 5a31ae0..0000000 --- a/english/src/test/java/org/apache/lucene/morphology/english/EnglishAnalayzerTest.java +++ /dev/null @@ -1,58 +0,0 @@ -/** - * Copyright 2009 Alexander Kuznetsov - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.morphology.english; - -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; -import static org.hamcrest.Matchers.equalTo; -import static org.junit.Assert.assertThat; -import org.junit.Test; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.util.Arrays; -import java.util.HashSet; - - -public class EnglishAnalayzerTest { - - @Test - public void shouldGiveCorrectWords() throws IOException { - InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/english/englsih-analayzer-answer.txt"); - BufferedReader breader = new BufferedReader(new InputStreamReader(stream, "UTF-8")); - String[] strings = breader.readLine().replaceAll(" +", " ").trim().split(" "); - HashSet answer = new HashSet(Arrays.asList(strings)); - stream.close(); - - EnglishAnalyzer morphlogyAnalyzer = new EnglishAnalyzer(); - stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/english/englsih-analayzer-data.txt"); - - InputStreamReader reader = new InputStreamReader(stream, "UTF-8"); - - TokenStream tokenStream = morphlogyAnalyzer.tokenStream(null, reader); - HashSet result = new HashSet(); - while (tokenStream.incrementToken()) { - TermAttribute attribute1 = tokenStream.getAttribute(TermAttribute.class); - result.add(attribute1.term()); - } - - stream.close(); - - assertThat(result, equalTo(answer)); - } -} \ No newline at end of file diff --git a/english/src/test/java/org/apache/lucene/morphology/english/EnglishLuceneMorphTest.java b/english/src/test/java/org/apache/lucene/morphology/english/EnglishLuceneMorphTest.java deleted file mode 100644 index 0b6ddd3..0000000 --- a/english/src/test/java/org/apache/lucene/morphology/english/EnglishLuceneMorphTest.java +++ /dev/null @@ -1,55 +0,0 @@ -/** - * Copyright 2009 Alexander Kuznetsov - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.morphology.english; - -import org.apache.lucene.morphology.LuceneMorphology; -import static org.hamcrest.Matchers.equalTo; -import static org.junit.Assert.assertThat; -import org.junit.Before; -import org.junit.Test; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.util.HashSet; -import java.util.Set; - -public class EnglishLuceneMorphTest { - private LuceneMorphology luceneMorph; - - @Before - public void setUp() throws IOException { - luceneMorph = new LuceneMorphology(this.getClass().getResourceAsStream("/org/apache/lucene/morphology/english/morph.info"), new EnglishLetterDecoderEncoder()); - } - - @Test - public void shoudGetCorrentMorphInfo() throws IOException { - InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/english/english-morphology-test.txt"); - BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8")); - String s = bufferedReader.readLine(); - while (s != null) { - String[] qa = s.trim().split(" "); - Set result = new HashSet(); - for (int i = 1; i < qa.length; i++) { - result.add(qa[i]); - } - Set stringList = new HashSet(luceneMorph.getNormalForms(qa[0])); - assertThat(stringList, equalTo(result)); - s = bufferedReader.readLine(); - } - } -} \ No newline at end of file diff --git a/english/src/test/resources/org/apache/lucene/morphology/english/english-morphology-test.txt b/english/src/test/resources/org/apache/lucene/morphology/english/english-morphology-test.txt deleted file mode 100644 index c65e765..0000000 --- a/english/src/test/resources/org/apache/lucene/morphology/english/english-morphology-test.txt +++ /dev/null @@ -1,8 +0,0 @@ -purchases purchase -existing exist -was be -men man -bore bore bear -grown grow grown -came come -md md \ No newline at end of file diff --git a/english/src/test/resources/org/apache/lucene/morphology/english/englsih-analayzer-answer.txt b/english/src/test/resources/org/apache/lucene/morphology/english/englsih-analayzer-answer.txt deleted file mode 100644 index cffa6be..0000000 --- a/english/src/test/resources/org/apache/lucene/morphology/english/englsih-analayzer-answer.txt +++ /dev/null @@ -1 +0,0 @@ -following follow the instruction exactly will be help ensure the best well good result \ No newline at end of file diff --git a/english/src/test/resources/org/apache/lucene/morphology/english/englsih-analayzer-data.txt b/english/src/test/resources/org/apache/lucene/morphology/english/englsih-analayzer-data.txt deleted file mode 100644 index 5c203f8..0000000 --- a/english/src/test/resources/org/apache/lucene/morphology/english/englsih-analayzer-data.txt +++ /dev/null @@ -1 +0,0 @@ -Following the instructions exactly will help ensure the best results \ No newline at end of file diff --git a/morph/src/main/java/org/apache/lucene/morphology/MorphologyImpl.java b/morph/src/main/java/org/apache/lucene/morphology/MorphologyImpl.java index 5328328..ff966b6 100644 --- a/morph/src/main/java/org/apache/lucene/morphology/MorphologyImpl.java +++ b/morph/src/main/java/org/apache/lucene/morphology/MorphologyImpl.java @@ -18,7 +18,6 @@ package org.apache.lucene.morphology; import java.io.*; import java.util.ArrayList; -import java.util.HashSet; import java.util.List; @@ -26,7 +25,7 @@ public class MorphologyImpl implements Morphology { protected int[][] separators; protected short[] rulesId; protected Heuristic[][] rules; - protected String[] grammaInfo; + protected String[] grammarInfo; protected LetterDecoderEncoder decoderEncoder; @@ -40,27 +39,11 @@ public class MorphologyImpl implements Morphology { this.decoderEncoder = decoderEncoder; } - public MorphologyImpl(int[][] separators, short[] rulesId, Heuristic[][] rules, String[] grammaInfo) { + public MorphologyImpl(int[][] separators, short[] rulesId, Heuristic[][] rules, String[] grammarInfo) { this.separators = separators; this.rulesId = rulesId; this.rules = rules; - this.grammaInfo = grammaInfo; - } - - public int[][] getSeparators() { - return separators; - } - - public short[] getRulesId() { - return rulesId; - } - - public Heuristic[][] getRules() { - return rules; - } - - public String[] getGrammaInfo() { - return grammaInfo; + this.grammarInfo = grammarInfo; } public List getNormalForms(String s) { @@ -78,7 +61,7 @@ public class MorphologyImpl implements Morphology { int[] ints = decoderEncoder.encodeToArray(revertWord(s)); int ruleId = findRuleId(ints); for (Heuristic h : rules[rulesId[ruleId]]) { - result.add(h.transformWord(s) + "|" + grammaInfo[h.getFormMorphInfo()]); + result.add(h.transformWord(s) + "|" + grammarInfo[h.getFormMorphInfo()]); } return result; } @@ -135,8 +118,8 @@ public class MorphologyImpl implements Morphology { writer.write(heuristic.toString() + "\n"); } } - writer.write(grammaInfo.length + "\n"); - for (String s : grammaInfo) { + writer.write(grammarInfo.length + "\n"); + for (String s : grammarInfo) { writer.write(s + "\n"); } writer.close(); @@ -166,9 +149,9 @@ public class MorphologyImpl implements Morphology { Integer amount; s = bufferedReader.readLine(); amount = Integer.valueOf(s); - grammaInfo = new String[amount]; + grammarInfo = new String[amount]; for (int i = 0; i < amount; i++) { - grammaInfo[i] = bufferedReader.readLine(); + grammarInfo[i] = bufferedReader.readLine(); } } diff --git a/morph/src/main/java/org/apache/lucene/morphology/analyzer/MorphologyAnalyzer.java b/morph/src/main/java/org/apache/lucene/morphology/analyzer/MorphologyAnalyzer.java index 5e6f08e..0be508d 100644 --- a/morph/src/main/java/org/apache/lucene/morphology/analyzer/MorphologyAnalyzer.java +++ b/morph/src/main/java/org/apache/lucene/morphology/analyzer/MorphologyAnalyzer.java @@ -48,6 +48,6 @@ public class MorphologyAnalyzer extends Analyzer { TokenStream result = new StandardTokenizer(Version.LUCENE_30, reader); result = new StandardFilter(result); result = new LowerCaseFilter(result); - return new MorphlogyFilter(result, luceneMorph); + return new MorphologyFilter(result, luceneMorph); } } diff --git a/morph/src/main/java/org/apache/lucene/morphology/analyzer/MorphlogyFilter.java b/morph/src/main/java/org/apache/lucene/morphology/analyzer/MorphologyFilter.java similarity index 89% rename from morph/src/main/java/org/apache/lucene/morphology/analyzer/MorphlogyFilter.java rename to morph/src/main/java/org/apache/lucene/morphology/analyzer/MorphologyFilter.java index 8fe84c6..38bb9f6 100644 --- a/morph/src/main/java/org/apache/lucene/morphology/analyzer/MorphlogyFilter.java +++ b/morph/src/main/java/org/apache/lucene/morphology/analyzer/MorphologyFilter.java @@ -25,12 +25,12 @@ import java.io.IOException; import java.util.Iterator; -public class MorphlogyFilter extends TokenFilter { +public class MorphologyFilter extends TokenFilter { private LuceneMorphology luceneMorph; private Iterator iterator; private TermAttribute termAtt; - public MorphlogyFilter(TokenStream tokenStream, LuceneMorphology luceneMorph) { + public MorphologyFilter(TokenStream tokenStream, LuceneMorphology luceneMorph) { super(tokenStream); this.luceneMorph = luceneMorph; termAtt = addAttribute(TermAttribute.class); diff --git a/russian/src/test/java/org/apache/lucene/morphology/russian/RussianAnalayzerTest.java b/russian/src/test/java/org/apache/lucene/morphology/russian/RussianAnalayzerTest.java deleted file mode 100644 index a946bf5..0000000 --- a/russian/src/test/java/org/apache/lucene/morphology/russian/RussianAnalayzerTest.java +++ /dev/null @@ -1,59 +0,0 @@ -/** - * Copyright 2009 Alexander Kuznetsov - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.morphology.russian; - -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; -import static org.hamcrest.Matchers.equalTo; -import static org.junit.Assert.assertThat; -import org.junit.Test; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.util.Arrays; -import java.util.HashSet; - - -public class RussianAnalayzerTest { - - @Test - public void shoudGiveCorretWords() throws IOException { - InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/russian-analayzer-answer.txt"); - BufferedReader breader = new BufferedReader(new InputStreamReader(stream, "UTF-8")); - String[] strings = breader.readLine().replaceAll(" +", " ").trim().split(" "); - HashSet answer = new HashSet(Arrays.asList(strings)); - stream.close(); - - RussianAnalyzer morphlogyAnalyzer = new RussianAnalyzer(); - stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/russian-analayzer-data.txt"); - - InputStreamReader reader = new InputStreamReader(stream, "UTF-8"); - - TokenStream tokenStream = morphlogyAnalyzer.tokenStream(null, reader); - HashSet result = new HashSet(); - while (tokenStream.incrementToken()) { - TermAttribute attribute1 = tokenStream.getAttribute(TermAttribute.class); - result.add(attribute1.term()); - } - - stream.close(); - - assertThat(result, equalTo(answer)); - } -} - diff --git a/russian/src/test/java/org/apache/lucene/morphology/russian/RussianLuceneMorphTest.java b/russian/src/test/java/org/apache/lucene/morphology/russian/RussianLuceneMorphTest.java deleted file mode 100644 index 645a77c..0000000 --- a/russian/src/test/java/org/apache/lucene/morphology/russian/RussianLuceneMorphTest.java +++ /dev/null @@ -1,55 +0,0 @@ -/** - * Copyright 2009 Alexander Kuznetsov - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.morphology.russian; - -import org.apache.lucene.morphology.LuceneMorphology; -import static org.hamcrest.Matchers.equalTo; -import static org.junit.Assert.assertThat; -import org.junit.Before; -import org.junit.Test; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.util.HashSet; -import java.util.Set; - -public class RussianLuceneMorphTest { - private LuceneMorphology luceneMorph; - - @Before - public void setUp() throws IOException { - luceneMorph = new LuceneMorphology(this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/morph.info"), new RussianLetterDecoderEncoder()); - } - - @Test - public void shoudGetCorrentMorphInfo() throws IOException { - InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/russian-morphology-test.txt"); - BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8")); - String s = bufferedReader.readLine(); - while (s != null) { - String[] qa = s.trim().split(" "); - Set result = new HashSet(); - for (int i = 1; i < qa.length; i++) { - result.add(qa[i]); - } - Set stringList = new HashSet(luceneMorph.getNormalForms(qa[0])); - assertThat(stringList, equalTo(result)); - s = bufferedReader.readLine(); - } - } -} diff --git a/russian/src/test/resources/org/apache/lucene/morphology/russian/russian-analayzer-answer.txt b/russian/src/test/resources/org/apache/lucene/morphology/russian/russian-analayzer-answer.txt deleted file mode 100644 index 44b1843..0000000 --- a/russian/src/test/resources/org/apache/lucene/morphology/russian/russian-analayzer-answer.txt +++ /dev/null @@ -1 +0,0 @@ -в результат крушение погибнуть командир отряд специальный назначение пря при переть гувд ростовский область полковник милиция михаил перов и предприниматель \ No newline at end of file diff --git a/russian/src/test/resources/org/apache/lucene/morphology/russian/russian-analayzer-data.txt b/russian/src/test/resources/org/apache/lucene/morphology/russian/russian-analayzer-data.txt deleted file mode 100644 index c97b5e9..0000000 --- a/russian/src/test/resources/org/apache/lucene/morphology/russian/russian-analayzer-data.txt +++ /dev/null @@ -1 +0,0 @@ -В результате крушения погибли командир отряда специального назначения при ГУВД Ростовской области полковник милиции Михаил Перов и предприниматель \ No newline at end of file diff --git a/russian/src/test/resources/org/apache/lucene/morphology/russian/russian-morphology-test.txt b/russian/src/test/resources/org/apache/lucene/morphology/russian/russian-morphology-test.txt deleted file mode 100644 index c775e7d..0000000 --- a/russian/src/test/resources/org/apache/lucene/morphology/russian/russian-morphology-test.txt +++ /dev/null @@ -1,19 +0,0 @@ -еду еда ехать -тестов тест -вина вино вина -вино вино -ехать ехать -ананасов ананас ананасовый -сухой сухой -дураков дурак -пушка пушка пушок -пушок пушок -пушек пушка -козлов козлов козловый козел -жуков жуков жук -красив красить красивый -красивая красивый -тосклив тоскливый -лучший хороший -на на -тест тест тесто \ No newline at end of file