adding reading morphology fromresource
git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@55 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
parent
a6970bda31
commit
80b853c9cd
@ -32,7 +32,7 @@ public class MorphlogyAnalayzer extends Analyzer {
|
|||||||
private LuceneMorph luceneMorph;
|
private LuceneMorph luceneMorph;
|
||||||
|
|
||||||
public MorphlogyAnalayzer(String pathToMorph, LetterDecoderEncoder letterDecoderEncoder) throws IOException {
|
public MorphlogyAnalayzer(String pathToMorph, LetterDecoderEncoder letterDecoderEncoder) throws IOException {
|
||||||
luceneMorph = new LuceneMorph("sep.txt", letterDecoderEncoder);
|
luceneMorph = new LuceneMorph(pathToMorph, letterDecoderEncoder);
|
||||||
}
|
}
|
||||||
|
|
||||||
public MorphlogyAnalayzer(InputStream inputStream, LetterDecoderEncoder letterDecoderEncoder) throws IOException {
|
public MorphlogyAnalayzer(InputStream inputStream, LetterDecoderEncoder letterDecoderEncoder) throws IOException {
|
||||||
|
@ -15,12 +15,13 @@
|
|||||||
*/
|
*/
|
||||||
package org.apache.lucene.morphology.russian;
|
package org.apache.lucene.morphology.russian;
|
||||||
|
|
||||||
import org.junit.Test;
|
import org.apache.lucene.morphology.analayzer.MorphlogyAnalayzer;
|
||||||
|
|
||||||
public class RussianMorphTest {
|
import java.io.IOException;
|
||||||
|
|
||||||
@Test
|
|
||||||
public void shoudGetCorrentMorphInfo() {
|
|
||||||
|
|
||||||
|
public class RussianAnalayzer extends MorphlogyAnalayzer {
|
||||||
|
public RussianAnalayzer() throws IOException {
|
||||||
|
super(RussianAnalayzer.class.getResourceAsStream("/org/apache/lucene/morphology/russian/morph.info"), new RussianLetterDecoderEncoder());
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -1,49 +0,0 @@
|
|||||||
/**
|
|
||||||
* Copyright 2009 Alexander Kuznetsov
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
package org.apache.lucene.morphology.russian;
|
|
||||||
|
|
||||||
|
|
||||||
import org.apache.lucene.morphology.Morph;
|
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
|
||||||
import java.io.FileReader;
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Created by IntelliJ IDEA.
|
|
||||||
* User: akuznetsov
|
|
||||||
* Date: 15.08.2009
|
|
||||||
* Time: 16:52:24
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
|
||||||
public class Test {
|
|
||||||
|
|
||||||
public static void main(String[] args) throws IOException, ClassNotFoundException {
|
|
||||||
//
|
|
||||||
Morph splitter = new Morph("russian/src/main/resources/org/apache/lucene/morphology/russian/morph.info", new RussianLetterDecoderEncoder());
|
|
||||||
FileReader fileReader = new FileReader("russian/src/main/resources/for.test.txt");
|
|
||||||
BufferedReader bufferedReader = new BufferedReader(fileReader);
|
|
||||||
String s = bufferedReader.readLine();
|
|
||||||
while (s != null) {
|
|
||||||
System.out.println(splitter.getMorhInfo(s));
|
|
||||||
s = bufferedReader.readLine();
|
|
||||||
}
|
|
||||||
|
|
||||||
fileReader.close();
|
|
||||||
System.gc();
|
|
||||||
System.in.read();
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,22 +0,0 @@
|
|||||||
тест
|
|
||||||
тестов
|
|
||||||
вина
|
|
||||||
вино
|
|
||||||
еду
|
|
||||||
ехать
|
|
||||||
ананасов
|
|
||||||
бухой
|
|
||||||
сухой
|
|
||||||
дураков
|
|
||||||
пушка
|
|
||||||
пушок
|
|
||||||
пушек
|
|
||||||
козлов
|
|
||||||
жуков
|
|
||||||
красив
|
|
||||||
красивая
|
|
||||||
тосклив
|
|
||||||
наилучший
|
|
||||||
лучший
|
|
||||||
наипрекрасный
|
|
||||||
на
|
|
@ -0,0 +1,55 @@
|
|||||||
|
/**
|
||||||
|
* Copyright 2009 Alexander Kuznetsov
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.lucene.morphology.russian;
|
||||||
|
|
||||||
|
import org.apache.lucene.morphology.LuceneMorph;
|
||||||
|
import static org.hamcrest.core.IsEqual.equalTo;
|
||||||
|
import static org.junit.Assert.assertThat;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
public class RussianLuceneMorphTest {
|
||||||
|
private LuceneMorph luceneMorph;
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setUp() throws IOException {
|
||||||
|
luceneMorph = new LuceneMorph(this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/morph.info"), new RussianLetterDecoderEncoder());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void shoudGetCorrentMorphInfo() throws IOException {
|
||||||
|
InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/russian-morphology-test.txt");
|
||||||
|
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
|
||||||
|
String s = bufferedReader.readLine();
|
||||||
|
while (s != null) {
|
||||||
|
String[] qa = s.trim().split(" ");
|
||||||
|
Set<String> result = new HashSet<String>();
|
||||||
|
for (int i = 1; i < qa.length; i++) {
|
||||||
|
result.add(qa[i]);
|
||||||
|
}
|
||||||
|
Set<String> stringList = new HashSet<String>(luceneMorph.getMorhInfo(qa[0]));
|
||||||
|
assertThat(stringList, equalTo(result));
|
||||||
|
s = bufferedReader.readLine();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -1,9 +1,19 @@
|
|||||||
шел идти
|
тест тест тесто
|
||||||
турестических турестический
|
тестов тест
|
||||||
отзывы отзыв
|
вина вино вина
|
||||||
победы победа
|
вино вино
|
||||||
поэтическая поэтический
|
еду еда ехать
|
||||||
произошло произойти
|
ехать ехать
|
||||||
test test
|
ананасов ананас ананасовый
|
||||||
ананасов ананас
|
сухой сухой
|
||||||
встовашего встовать
|
дураков дурак
|
||||||
|
пушка пушка пушок
|
||||||
|
пушок пушок
|
||||||
|
пушек пушка
|
||||||
|
козлов козлов козловый козел
|
||||||
|
жуков жуков жук
|
||||||
|
красив красить
|
||||||
|
красивая красивый
|
||||||
|
тосклив тоскливый
|
||||||
|
лучший хороший
|
||||||
|
на на
|
Loading…
x
Reference in New Issue
Block a user