adding licence
git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@15 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
parent
3f26888bde
commit
b6239ec906
@ -18,6 +18,8 @@ package org.apache.lucene.russian.morphology.analayzer;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import static org.hamcrest.core.IsEqual.equalTo;
|
||||
import static org.junit.Assert.assertThat;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
@ -32,14 +34,17 @@ public class RussianMorphlogyAnalayzerTest {
|
||||
public void shouldCorrectProccessText() throws IOException {
|
||||
RussianMorphlogyAnalayzer morphlogyAnalayzer = new RussianMorphlogyAnalayzer();
|
||||
InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/russian/morphology/analayzer/russian-text.txt");
|
||||
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
|
||||
BufferedReader reader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
|
||||
|
||||
InputStream tokeStream = this.getClass().getResourceAsStream("/org/apache/lucene/russian/morphology/analayzer/token-of-russian-text.txt");
|
||||
BufferedReader tokenReader = new BufferedReader(new InputStreamReader(tokeStream, "UTF-8"));
|
||||
|
||||
final Token reusableToken = new Token();
|
||||
|
||||
Token nextToken;
|
||||
|
||||
|
||||
TokenStream in = morphlogyAnalayzer.tokenStream(null, bufferedReader);
|
||||
TokenStream in = morphlogyAnalayzer.tokenStream(null, reader);
|
||||
for (; ;) {
|
||||
nextToken = in.next(reusableToken);
|
||||
|
||||
@ -47,14 +52,8 @@ public class RussianMorphlogyAnalayzerTest {
|
||||
break;
|
||||
}
|
||||
|
||||
System.out.println(nextToken.term());
|
||||
// nextSampleToken = sample.next(reusableSampleToken);
|
||||
// assertEquals(
|
||||
// "Unicode",
|
||||
// nextToken.term(),
|
||||
// nextSampleToken == null
|
||||
// ? null
|
||||
// : nextSampleToken.term());
|
||||
assertThat(nextToken.term(), equalTo(tokenReader.readLine().trim()));
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,33 @@
|
||||
в
|
||||
условие
|
||||
нарастать
|
||||
пурга
|
||||
быть
|
||||
сделать
|
||||
4
|
||||
успешный
|
||||
заход
|
||||
на
|
||||
посадка
|
||||
все
|
||||
нормальный
|
||||
быть
|
||||
рекомендовать
|
||||
система
|
||||
к
|
||||
внедрение
|
||||
рейс
|
||||
из
|
||||
кейптаун
|
||||
юар
|
||||
на
|
||||
станция
|
||||
новолазаревский
|
||||
антарктида
|
||||
совершаться
|
||||
примерный
|
||||
один
|
||||
раз
|
||||
в
|
||||
два
|
||||
неделя
|
Loading…
x
Reference in New Issue
Block a user