From 80b853c9cdcc51ae94c9c0b09db6f9b51fd8561e Mon Sep 17 00:00:00 2001 From: "alexander.a.kuznetsov" Date: Mon, 5 Oct 2009 18:22:45 +0000 Subject: [PATCH] adding reading morphology fromresource git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@55 d817d54c-26ab-11de-abc9-2f7d1455ff7a --- .../analayzer/MorphlogyAnalayzer.java | 2 +- .../morphology/russian/RussianAnalayzer.java} | 53 +++++++++--------- .../lucene/morphology/russian/Test.java | 49 ----------------- russian/src/main/resources/for.test.txt | 22 -------- .../russian/RussianLuceneMorphTest.java | 55 +++++++++++++++++++ .../russian/russian-morphology-test.txt | 28 +++++++--- 6 files changed, 102 insertions(+), 107 deletions(-) rename russian/src/{test/java/org/apache/lucene/morphology/russian/RussianMorphTest.java => main/java/org/apache/lucene/morphology/russian/RussianAnalayzer.java} (63%) delete mode 100644 russian/src/main/java/org/apache/lucene/morphology/russian/Test.java delete mode 100644 russian/src/main/resources/for.test.txt create mode 100644 russian/src/test/java/org/apache/lucene/morphology/russian/RussianLuceneMorphTest.java diff --git a/morph/src/main/java/org/apache/lucene/morphology/analayzer/MorphlogyAnalayzer.java b/morph/src/main/java/org/apache/lucene/morphology/analayzer/MorphlogyAnalayzer.java index 6c36a68..e429b31 100644 --- a/morph/src/main/java/org/apache/lucene/morphology/analayzer/MorphlogyAnalayzer.java +++ b/morph/src/main/java/org/apache/lucene/morphology/analayzer/MorphlogyAnalayzer.java @@ -32,7 +32,7 @@ public class MorphlogyAnalayzer extends Analyzer { private LuceneMorph luceneMorph; public MorphlogyAnalayzer(String pathToMorph, LetterDecoderEncoder letterDecoderEncoder) throws IOException { - luceneMorph = new LuceneMorph("sep.txt", letterDecoderEncoder); + luceneMorph = new LuceneMorph(pathToMorph, letterDecoderEncoder); } public MorphlogyAnalayzer(InputStream inputStream, LetterDecoderEncoder letterDecoderEncoder) throws IOException { diff --git a/russian/src/test/java/org/apache/lucene/morphology/russian/RussianMorphTest.java b/russian/src/main/java/org/apache/lucene/morphology/russian/RussianAnalayzer.java similarity index 63% rename from russian/src/test/java/org/apache/lucene/morphology/russian/RussianMorphTest.java rename to russian/src/main/java/org/apache/lucene/morphology/russian/RussianAnalayzer.java index a1858d3..2abbce7 100644 --- a/russian/src/test/java/org/apache/lucene/morphology/russian/RussianMorphTest.java +++ b/russian/src/main/java/org/apache/lucene/morphology/russian/RussianAnalayzer.java @@ -1,26 +1,27 @@ -/** - * Copyright 2009 Alexander Kuznetsov - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.morphology.russian; - -import org.junit.Test; - -public class RussianMorphTest { - - @Test - public void shoudGetCorrentMorphInfo() { - - } -} +/** + * Copyright 2009 Alexander Kuznetsov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.morphology.russian; + +import org.apache.lucene.morphology.analayzer.MorphlogyAnalayzer; + +import java.io.IOException; + + +public class RussianAnalayzer extends MorphlogyAnalayzer { + public RussianAnalayzer() throws IOException { + super(RussianAnalayzer.class.getResourceAsStream("/org/apache/lucene/morphology/russian/morph.info"), new RussianLetterDecoderEncoder()); + } +} diff --git a/russian/src/main/java/org/apache/lucene/morphology/russian/Test.java b/russian/src/main/java/org/apache/lucene/morphology/russian/Test.java deleted file mode 100644 index 4e67b12..0000000 --- a/russian/src/main/java/org/apache/lucene/morphology/russian/Test.java +++ /dev/null @@ -1,49 +0,0 @@ -/** - * Copyright 2009 Alexander Kuznetsov - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.lucene.morphology.russian; - - -import org.apache.lucene.morphology.Morph; - -import java.io.BufferedReader; -import java.io.FileReader; -import java.io.IOException; - -/** - * Created by IntelliJ IDEA. - * User: akuznetsov - * Date: 15.08.2009 - * Time: 16:52:24 - * To change this template use File | Settings | File Templates. - */ -public class Test { - - public static void main(String[] args) throws IOException, ClassNotFoundException { - // - Morph splitter = new Morph("russian/src/main/resources/org/apache/lucene/morphology/russian/morph.info", new RussianLetterDecoderEncoder()); - FileReader fileReader = new FileReader("russian/src/main/resources/for.test.txt"); - BufferedReader bufferedReader = new BufferedReader(fileReader); - String s = bufferedReader.readLine(); - while (s != null) { - System.out.println(splitter.getMorhInfo(s)); - s = bufferedReader.readLine(); - } - - fileReader.close(); - System.gc(); - System.in.read(); - } -} diff --git a/russian/src/main/resources/for.test.txt b/russian/src/main/resources/for.test.txt deleted file mode 100644 index bdc902e..0000000 --- a/russian/src/main/resources/for.test.txt +++ /dev/null @@ -1,22 +0,0 @@ -тест -тестов -вина -вино -еду -ехать -ананасов -бухой -сухой -дураков -пушка -пушок -пушек -козлов -жуков -красив -красивая -тосклив -наилучший -лучший -наипрекрасный -на \ No newline at end of file diff --git a/russian/src/test/java/org/apache/lucene/morphology/russian/RussianLuceneMorphTest.java b/russian/src/test/java/org/apache/lucene/morphology/russian/RussianLuceneMorphTest.java new file mode 100644 index 0000000..8811d40 --- /dev/null +++ b/russian/src/test/java/org/apache/lucene/morphology/russian/RussianLuceneMorphTest.java @@ -0,0 +1,55 @@ +/** + * Copyright 2009 Alexander Kuznetsov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.morphology.russian; + +import org.apache.lucene.morphology.LuceneMorph; +import static org.hamcrest.core.IsEqual.equalTo; +import static org.junit.Assert.assertThat; +import org.junit.Before; +import org.junit.Test; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.HashSet; +import java.util.Set; + +public class RussianLuceneMorphTest { + private LuceneMorph luceneMorph; + + @Before + public void setUp() throws IOException { + luceneMorph = new LuceneMorph(this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/morph.info"), new RussianLetterDecoderEncoder()); + } + + @Test + public void shoudGetCorrentMorphInfo() throws IOException { + InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/russian-morphology-test.txt"); + BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8")); + String s = bufferedReader.readLine(); + while (s != null) { + String[] qa = s.trim().split(" "); + Set result = new HashSet(); + for (int i = 1; i < qa.length; i++) { + result.add(qa[i]); + } + Set stringList = new HashSet(luceneMorph.getMorhInfo(qa[0])); + assertThat(stringList, equalTo(result)); + s = bufferedReader.readLine(); + } + } +} diff --git a/russian/src/test/resources/org/apache/lucene/morphology/russian/russian-morphology-test.txt b/russian/src/test/resources/org/apache/lucene/morphology/russian/russian-morphology-test.txt index 150b8cc..6e83f18 100644 --- a/russian/src/test/resources/org/apache/lucene/morphology/russian/russian-morphology-test.txt +++ b/russian/src/test/resources/org/apache/lucene/morphology/russian/russian-morphology-test.txt @@ -1,9 +1,19 @@ -шел идти -турестических турестический -отзывы отзыв -победы победа -поэтическая поэтический -произошло произойти -test test -ананасов ананас -встовашего встовать \ No newline at end of file +тест тест тесто +тестов тест +РІРёРЅР° РІРёРЅРѕ РІРёРЅР° +РІРёРЅРѕ РІРёРЅРѕ +еду еда ехать +ехать ехать +ананасов ананас ананасовый +СЃСѓС…РѕР№ СЃСѓС…РѕР№ +дураков дурак +пушка пушка пушок +пушок пушок +пушек пушка +козлов козлов козловый козел +Р¶СѓРєРѕРІ Р¶СѓРєРѕРІ Р¶СѓРє +красив красить +красивая красивый +тосклив тоскливый +лучший хороший +РЅР° РЅР° \ No newline at end of file