adding licence
git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@15 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
parent
3f26888bde
commit
b6239ec906
@ -18,6 +18,8 @@ package org.apache.lucene.russian.morphology.analayzer;
|
|||||||
|
|
||||||
import org.apache.lucene.analysis.Token;
|
import org.apache.lucene.analysis.Token;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import static org.hamcrest.core.IsEqual.equalTo;
|
||||||
|
import static org.junit.Assert.assertThat;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
@ -32,14 +34,17 @@ public class RussianMorphlogyAnalayzerTest {
|
|||||||
public void shouldCorrectProccessText() throws IOException {
|
public void shouldCorrectProccessText() throws IOException {
|
||||||
RussianMorphlogyAnalayzer morphlogyAnalayzer = new RussianMorphlogyAnalayzer();
|
RussianMorphlogyAnalayzer morphlogyAnalayzer = new RussianMorphlogyAnalayzer();
|
||||||
InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/russian/morphology/analayzer/russian-text.txt");
|
InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/russian/morphology/analayzer/russian-text.txt");
|
||||||
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
|
BufferedReader reader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
|
||||||
|
|
||||||
|
InputStream tokeStream = this.getClass().getResourceAsStream("/org/apache/lucene/russian/morphology/analayzer/token-of-russian-text.txt");
|
||||||
|
BufferedReader tokenReader = new BufferedReader(new InputStreamReader(tokeStream, "UTF-8"));
|
||||||
|
|
||||||
final Token reusableToken = new Token();
|
final Token reusableToken = new Token();
|
||||||
|
|
||||||
Token nextToken;
|
Token nextToken;
|
||||||
|
|
||||||
|
|
||||||
TokenStream in = morphlogyAnalayzer.tokenStream(null, bufferedReader);
|
TokenStream in = morphlogyAnalayzer.tokenStream(null, reader);
|
||||||
for (; ;) {
|
for (; ;) {
|
||||||
nextToken = in.next(reusableToken);
|
nextToken = in.next(reusableToken);
|
||||||
|
|
||||||
@ -47,14 +52,8 @@ public class RussianMorphlogyAnalayzerTest {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
System.out.println(nextToken.term());
|
assertThat(nextToken.term(), equalTo(tokenReader.readLine().trim()));
|
||||||
// nextSampleToken = sample.next(reusableSampleToken);
|
|
||||||
// assertEquals(
|
|
||||||
// "Unicode",
|
|
||||||
// nextToken.term(),
|
|
||||||
// nextSampleToken == null
|
|
||||||
// ? null
|
|
||||||
// : nextSampleToken.term());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,33 @@
|
|||||||
|
в
|
||||||
|
условие
|
||||||
|
нарастать
|
||||||
|
пурга
|
||||||
|
быть
|
||||||
|
сделать
|
||||||
|
4
|
||||||
|
успешный
|
||||||
|
заход
|
||||||
|
на
|
||||||
|
посадка
|
||||||
|
все
|
||||||
|
нормальный
|
||||||
|
быть
|
||||||
|
рекомендовать
|
||||||
|
система
|
||||||
|
к
|
||||||
|
внедрение
|
||||||
|
рейс
|
||||||
|
из
|
||||||
|
кейптаун
|
||||||
|
юар
|
||||||
|
на
|
||||||
|
станция
|
||||||
|
новолазаревский
|
||||||
|
антарктида
|
||||||
|
совершаться
|
||||||
|
примерный
|
||||||
|
один
|
||||||
|
раз
|
||||||
|
в
|
||||||
|
два
|
||||||
|
неделя
|
Loading…
x
Reference in New Issue
Block a user