adding test for lucene analayzer

fixed problem with string checking git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@62 d817d54c-26ab-11de-abc9-2f7d1455ff7a
2009-10-17 17:06:55 +00:00
parent 26ca704ec0
commit 8c833132a8
11 changed files with 196 additions and 35 deletions
@@ -1,26 +0,0 @@
-/**
- * Copyright 2009 Alexander Kuznetsov
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.morphology.russian;
-
-import org.junit.Test;
-
-public class AnalayzerTest {
-
-    @Test
-    public void shoudGetCorrentTokens() {
-
-    }
-}
@@ -0,0 +1,68 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.morphology.russian;
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import static org.hamcrest.Matchers.equalTo;
+import static org.junit.Assert.assertThat;
+import org.junit.Test;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.Arrays;
+import java.util.HashSet;
+
+
+public class RussianAnalayzerTest {
+
+    @Test
+    public void shoudGiveCorretWords() throws IOException {
+        InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/russian-analayzer-answer.txt");
+        BufferedReader breader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
+        String[] strings = breader.readLine().replaceAll(" +", " ").trim().split(" ");
+        HashSet<String> answer = new HashSet<String>(Arrays.asList(strings));
+        stream.close();
+
+        RussianAnalayzer morphlogyAnalayzer = new RussianAnalayzer();
+        stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/russian-analayzer-data.txt");
+
+        InputStreamReader reader = new InputStreamReader(stream, "UTF-8");
+        final Token reusableToken = new Token();
+
+        Token nextToken;
+        TokenStream in = morphlogyAnalayzer.tokenStream(null, reader);
+        HashSet<String> result = new HashSet<String>();
+        for (; ;) {
+            nextToken = in.next(reusableToken);
+
+            if (nextToken == null) {
+                break;
+            }
+
+            result.add(nextToken.term());
+            //
+
+        }
+
+        stream.close();
+
+        assertThat(result, equalTo(answer));
+    }
+}
+
@@ -0,0 +1 @@
+в результат крушение погибнуть командир отряд специальный назначение пря при переть гувд ростовский область полковник милиция михаил перов и предприниматель
@@ -0,0 +1 @@
+В результате крушения погибли командир отряда специального назначения при ГУВД Ростовской области полковник милиции Михаил Перов и предприниматель
				`@@ -0,0 +1 @@`
				`в результат крушение погибнуть командир отряд специальный назначение пря при переть гувд ростовский область полковник милиция михаил перов и предприниматель`
				`@@ -0,0 +1 @@`
				`В результате крушения погибли командир отряда специального назначения при ГУВД Ростовской области полковник милиции Михаил Перов и предприниматель`