commit
71af7f6f9d
@ -1,6 +1,6 @@
|
|||||||
# Russian Morphology for Apache Lucene
|
# Russian Morphology for Apache Lucene
|
||||||
|
|
||||||
Russian and English morphology for Java and [Apache Lucene](http://lucene.apache.org) 8.7 framework based on open source dictionary from site [АОТ](http://aot.ru). It uses dictionary base morphology with some heuristics for unknown words. It supports a homonym for example for a Russian word "вина" it gives two variants "вино" and "вина".
|
Russian and English morphology for Java and [Apache Lucene](http://lucene.apache.org) 9.3 framework based on open source dictionary from site [АОТ](http://aot.ru). It uses dictionary base morphology with some heuristics for unknown words. It supports a homonym for example for a Russian word "вина" it gives two variants "вино" and "вина".
|
||||||
|
|
||||||
|
|
||||||
### How to use
|
### How to use
|
||||||
|
@ -16,7 +16,7 @@
|
|||||||
package org.apache.lucene.morphology;
|
package org.apache.lucene.morphology;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
import org.apache.lucene.analysis.CharArraySet;
|
||||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
@ -41,7 +41,7 @@ import java.util.*;
|
|||||||
import static org.hamcrest.Matchers.equalTo;
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
|
|
||||||
|
|
||||||
public class AnalyzersTest extends BaseTokenStreamTestCase {
|
public class TestAnalyzers extends BaseTokenStreamTestCase {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldGiveCorrectWordsForEnglish() throws IOException {
|
public void shouldGiveCorrectWordsForEnglish() throws IOException {
|
@ -33,7 +33,7 @@ import java.util.Set;
|
|||||||
import static org.hamcrest.CoreMatchers.equalTo;
|
import static org.hamcrest.CoreMatchers.equalTo;
|
||||||
|
|
||||||
|
|
||||||
public class LuceneMorphTest {
|
public class TestLuceneMorph {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void englishMorphologyShouldGetCorrectNormalForm() throws IOException {
|
public void englishMorphologyShouldGetCorrectNormalForm() throws IOException {
|
4
pom.xml
4
pom.xml
@ -16,7 +16,7 @@
|
|||||||
</scm>
|
</scm>
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
<lucene.version>8.11.0</lucene.version>
|
<lucene.version>9.3.0</lucene.version>
|
||||||
<morphology.version>1.5</morphology.version>
|
<morphology.version>1.5</morphology.version>
|
||||||
<junit.version>4.13</junit.version>
|
<junit.version>4.13</junit.version>
|
||||||
</properties>
|
</properties>
|
||||||
@ -56,7 +56,7 @@
|
|||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.lucene</groupId>
|
<groupId>org.apache.lucene</groupId>
|
||||||
<artifactId>lucene-analyzers-common</artifactId>
|
<artifactId>lucene-analysis-common</artifactId>
|
||||||
<version>${lucene.version}</version>
|
<version>${lucene.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
@ -15,12 +15,13 @@
|
|||||||
*/
|
*/
|
||||||
package org.apache.lucene.analysis.morphology;
|
package org.apache.lucene.analysis.morphology;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.TokenFilterFactory;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
|
||||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
|
||||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
|
||||||
import org.apache.lucene.morphology.LuceneMorphology;
|
import org.apache.lucene.morphology.LuceneMorphology;
|
||||||
import org.apache.lucene.morphology.analyzer.MorphologyFilter;
|
import org.apache.lucene.morphology.analyzer.MorphologyFilter;
|
||||||
|
import org.apache.lucene.util.ResourceLoader;
|
||||||
|
import org.apache.lucene.util.ResourceLoaderAware;
|
||||||
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
@ -37,7 +38,7 @@ import java.util.Map;
|
|||||||
* </analyzer>
|
* </analyzer>
|
||||||
* </fieldType></pre>
|
* </fieldType></pre>
|
||||||
*/
|
*/
|
||||||
public class MorphologyFilterFactory extends TokenFilterFactory implements ResourceLoaderAware{
|
public class MorphologyFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
|
||||||
|
|
||||||
private static final String LANGUAGE_KEY = "language";
|
private static final String LANGUAGE_KEY = "language";
|
||||||
|
|
||||||
|
@ -15,11 +15,11 @@
|
|||||||
*/
|
*/
|
||||||
package org.apache.lucene.analysis.morphology;
|
package org.apache.lucene.analysis.morphology;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.util.ClasspathResourceLoader;
|
|
||||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
|
||||||
import org.apache.lucene.morphology.LuceneMorphology;
|
import org.apache.lucene.morphology.LuceneMorphology;
|
||||||
import org.apache.lucene.morphology.english.EnglishLuceneMorphology;
|
import org.apache.lucene.morphology.english.EnglishLuceneMorphology;
|
||||||
import org.apache.lucene.morphology.russian.RussianLuceneMorphology;
|
import org.apache.lucene.morphology.russian.RussianLuceneMorphology;
|
||||||
|
import org.apache.lucene.util.ClasspathResourceLoader;
|
||||||
|
import org.apache.lucene.util.ResourceLoader;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
@ -34,12 +34,12 @@ public class MorphologyFilterFactoryTest {
|
|||||||
private Map<String, String> args;
|
private Map<String, String> args;
|
||||||
|
|
||||||
@Before
|
@Before
|
||||||
public void setUp(){
|
public void setUp() {
|
||||||
args = new HashMap<>();
|
args = new HashMap<>();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void if_RussianLanguageKey_then_CreateRussianMorphologyFilter(){
|
public void if_RussianLanguageKey_then_CreateRussianMorphologyFilter() {
|
||||||
|
|
||||||
args.put(LANGUAGE_KEY, "Russian");
|
args.put(LANGUAGE_KEY, "Russian");
|
||||||
MorphologyFilterFactory morphologyFilterFactory = new MorphologyFilterFactory(args);
|
MorphologyFilterFactory morphologyFilterFactory = new MorphologyFilterFactory(args);
|
||||||
@ -51,7 +51,7 @@ public class MorphologyFilterFactoryTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void if_EnglishLanguageKey_then_CreateEnglishMorphologyFilter(){
|
public void if_EnglishLanguageKey_then_CreateEnglishMorphologyFilter() {
|
||||||
|
|
||||||
args.put(LANGUAGE_KEY, "English");
|
args.put(LANGUAGE_KEY, "English");
|
||||||
MorphologyFilterFactory morphologyFilterFactory = new MorphologyFilterFactory(args);
|
MorphologyFilterFactory morphologyFilterFactory = new MorphologyFilterFactory(args);
|
||||||
@ -63,7 +63,7 @@ public class MorphologyFilterFactoryTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void if_NoLanguageKey_then_CreateEnglishMorphologyFilter(){
|
public void if_NoLanguageKey_then_CreateEnglishMorphologyFilter() {
|
||||||
|
|
||||||
MorphologyFilterFactory morphologyFilterFactory = new MorphologyFilterFactory(args);
|
MorphologyFilterFactory morphologyFilterFactory = new MorphologyFilterFactory(args);
|
||||||
morphologyFilterFactory.inform(loader);
|
morphologyFilterFactory.inform(loader);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user