adding reading morphology fromresource

git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@55 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
alexander.a.kuznetsov 2009-10-05 18:22:45 +00:00
parent a6970bda31
commit 80b853c9cd
6 changed files with 102 additions and 107 deletions

View File

@ -32,7 +32,7 @@ public class MorphlogyAnalayzer extends Analyzer {
private LuceneMorph luceneMorph;
public MorphlogyAnalayzer(String pathToMorph, LetterDecoderEncoder letterDecoderEncoder) throws IOException {
luceneMorph = new LuceneMorph("sep.txt", letterDecoderEncoder);
luceneMorph = new LuceneMorph(pathToMorph, letterDecoderEncoder);
}
public MorphlogyAnalayzer(InputStream inputStream, LetterDecoderEncoder letterDecoderEncoder) throws IOException {

View File

@ -15,12 +15,13 @@
*/
package org.apache.lucene.morphology.russian;
import org.junit.Test;
import org.apache.lucene.morphology.analayzer.MorphlogyAnalayzer;
public class RussianMorphTest {
import java.io.IOException;
@Test
public void shoudGetCorrentMorphInfo() {
public class RussianAnalayzer extends MorphlogyAnalayzer {
public RussianAnalayzer() throws IOException {
super(RussianAnalayzer.class.getResourceAsStream("/org/apache/lucene/morphology/russian/morph.info"), new RussianLetterDecoderEncoder());
}
}

View File

@ -1,49 +0,0 @@
/**
* Copyright 2009 Alexander Kuznetsov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.morphology.russian;
import org.apache.lucene.morphology.Morph;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
/**
* Created by IntelliJ IDEA.
* User: akuznetsov
* Date: 15.08.2009
* Time: 16:52:24
* To change this template use File | Settings | File Templates.
*/
public class Test {
public static void main(String[] args) throws IOException, ClassNotFoundException {
//
Morph splitter = new Morph("russian/src/main/resources/org/apache/lucene/morphology/russian/morph.info", new RussianLetterDecoderEncoder());
FileReader fileReader = new FileReader("russian/src/main/resources/for.test.txt");
BufferedReader bufferedReader = new BufferedReader(fileReader);
String s = bufferedReader.readLine();
while (s != null) {
System.out.println(splitter.getMorhInfo(s));
s = bufferedReader.readLine();
}
fileReader.close();
System.gc();
System.in.read();
}
}

View File

@ -1,22 +0,0 @@
тест
тестов
вина
вино
еду
ехать
ананасов
бухой
сухой
дураков
пушка
пушок
пушек
козлов
жуков
красив
красивая
тосклив
наилучший
лучший
наипрекрасный
на

View File

@ -0,0 +1,55 @@
/**
* Copyright 2009 Alexander Kuznetsov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.morphology.russian;
import org.apache.lucene.morphology.LuceneMorph;
import static org.hamcrest.core.IsEqual.equalTo;
import static org.junit.Assert.assertThat;
import org.junit.Before;
import org.junit.Test;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.HashSet;
import java.util.Set;
public class RussianLuceneMorphTest {
private LuceneMorph luceneMorph;
@Before
public void setUp() throws IOException {
luceneMorph = new LuceneMorph(this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/morph.info"), new RussianLetterDecoderEncoder());
}
@Test
public void shoudGetCorrentMorphInfo() throws IOException {
InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/russian-morphology-test.txt");
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
String s = bufferedReader.readLine();
while (s != null) {
String[] qa = s.trim().split(" ");
Set<String> result = new HashSet<String>();
for (int i = 1; i < qa.length; i++) {
result.add(qa[i]);
}
Set<String> stringList = new HashSet<String>(luceneMorph.getMorhInfo(qa[0]));
assertThat(stringList, equalTo(result));
s = bufferedReader.readLine();
}
}
}

View File

@ -1,9 +1,19 @@
шел идти
турестических турестический
отзывы отзыв
победы победа
поэтическая поэтический
произошло произойти
test test
ананасов ананас
встовашего встовать
тест тест тесто
тестов тест
вина вино вина
вино вино
еду еда ехать
ехать ехать
ананасов ананас ананасовый
сухой сухой
дураков дурак
пушка пушка пушок
пушок пушок
пушек пушка
козлов козлов козловый козел
жуков жуков жук
красив красить
красивая красивый
тосклив тоскливый
лучший хороший
на на