fixing some spelling errors

git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@100 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
Alexander.A.Kuznetsov
2010-10-08 12:18:18 +00:00
parent 76e68a11e0
commit 3de894404c
25 changed files with 55 additions and 342 deletions

View File

@ -1,59 +0,0 @@
/**
* Copyright 2009 Alexander Kuznetsov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.morphology.russian;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import static org.hamcrest.Matchers.equalTo;
import static org.junit.Assert.assertThat;
import org.junit.Test;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Arrays;
import java.util.HashSet;
public class RussianAnalayzerTest {
@Test
public void shoudGiveCorretWords() throws IOException {
InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/russian-analayzer-answer.txt");
BufferedReader breader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
String[] strings = breader.readLine().replaceAll(" +", " ").trim().split(" ");
HashSet<String> answer = new HashSet<String>(Arrays.asList(strings));
stream.close();
RussianAnalyzer morphlogyAnalyzer = new RussianAnalyzer();
stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/russian-analayzer-data.txt");
InputStreamReader reader = new InputStreamReader(stream, "UTF-8");
TokenStream tokenStream = morphlogyAnalyzer.tokenStream(null, reader);
HashSet<String> result = new HashSet<String>();
while (tokenStream.incrementToken()) {
TermAttribute attribute1 = tokenStream.getAttribute(TermAttribute.class);
result.add(attribute1.term());
}
stream.close();
assertThat(result, equalTo(answer));
}
}

View File

@ -1,55 +0,0 @@
/**
* Copyright 2009 Alexander Kuznetsov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.morphology.russian;
import org.apache.lucene.morphology.LuceneMorphology;
import static org.hamcrest.Matchers.equalTo;
import static org.junit.Assert.assertThat;
import org.junit.Before;
import org.junit.Test;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.HashSet;
import java.util.Set;
public class RussianLuceneMorphTest {
private LuceneMorphology luceneMorph;
@Before
public void setUp() throws IOException {
luceneMorph = new LuceneMorphology(this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/morph.info"), new RussianLetterDecoderEncoder());
}
@Test
public void shoudGetCorrentMorphInfo() throws IOException {
InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/russian-morphology-test.txt");
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
String s = bufferedReader.readLine();
while (s != null) {
String[] qa = s.trim().split(" ");
Set<String> result = new HashSet<String>();
for (int i = 1; i < qa.length; i++) {
result.add(qa[i]);
}
Set<String> stringList = new HashSet<String>(luceneMorph.getNormalForms(qa[0]));
assertThat(stringList, equalTo(result));
s = bufferedReader.readLine();
}
}
}

View File

@ -1 +0,0 @@
в результат крушение погибнуть командир отряд специальный назначение пря при переть гувд ростовский область полковник милиция михаил перов и предприниматель

View File

@ -1 +0,0 @@
В результате крушения погибли командир отряда специального назначения при ГУВД Ростовской области полковник милиции Михаил Перов и предприниматель

View File

@ -1,19 +0,0 @@
еду еда ехать
тестов тест
вина вино вина
вино вино
ехать ехать
ананасов ананас ананасовый
сухой сухой
дураков дурак
пушка пушка пушок
пушок пушок
пушек пушка
козлов козлов козловый козел
жуков жуков жук
красив красить красивый
красивая красивый
тосклив тоскливый
лучший хороший
на на
тест тест тесто