adding test for lucene analayzer

fixed problem with string checking

git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@62 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
alexander.a.kuznetsov
2009-10-17 17:06:55 +00:00
parent 26ca704ec0
commit 8c833132a8
11 changed files with 196 additions and 35 deletions

View File

@ -1,26 +0,0 @@
/**
* Copyright 2009 Alexander Kuznetsov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.morphology.russian;
import org.junit.Test;
public class AnalayzerTest {
@Test
public void shoudGetCorrentTokens() {
}
}

View File

@ -0,0 +1,68 @@
/**
* Copyright 2009 Alexander Kuznetsov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.morphology.russian;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import static org.hamcrest.Matchers.equalTo;
import static org.junit.Assert.assertThat;
import org.junit.Test;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Arrays;
import java.util.HashSet;
public class RussianAnalayzerTest {
@Test
public void shoudGiveCorretWords() throws IOException {
InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/russian-analayzer-answer.txt");
BufferedReader breader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
String[] strings = breader.readLine().replaceAll(" +", " ").trim().split(" ");
HashSet<String> answer = new HashSet<String>(Arrays.asList(strings));
stream.close();
RussianAnalayzer morphlogyAnalayzer = new RussianAnalayzer();
stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/russian-analayzer-data.txt");
InputStreamReader reader = new InputStreamReader(stream, "UTF-8");
final Token reusableToken = new Token();
Token nextToken;
TokenStream in = morphlogyAnalayzer.tokenStream(null, reader);
HashSet<String> result = new HashSet<String>();
for (; ;) {
nextToken = in.next(reusableToken);
if (nextToken == null) {
break;
}
result.add(nextToken.term());
//
}
stream.close();
assertThat(result, equalTo(answer));
}
}

View File

@ -0,0 +1 @@
в результат крушение погибнуть командир отряд специальный назначение пря при переть гувд ростовский область полковник милиция михаил перов и предприниматель

View File

@ -0,0 +1 @@
В результате крушения погибли командир отряда специального назначения при ГУВД Ростовской области полковник милиции Михаил Перов и предприниматель