diff --git a/pom.xml b/pom.xml index 7368125..a7dc816 100644 --- a/pom.xml +++ b/pom.xml @@ -17,6 +17,8 @@ <properties> <lucene.version>6.2.0</lucene.version> + <morphology.version>1.3-SNAPSHOT</morphology.version> + <junit.version>4.8.2</junit.version> </properties> <distributionManagement> @@ -168,5 +170,6 @@ <module>dictionary-reader</module> <module>russian</module> <module>english</module> + <module>solr-morphology-analysis</module> </modules> </project> \ No newline at end of file diff --git a/solr-morphology-analysis/pom.xml b/solr-morphology-analysis/pom.xml new file mode 100644 index 0000000..b60f6af --- /dev/null +++ b/solr-morphology-analysis/pom.xml @@ -0,0 +1,40 @@ +<?xml version="1.0" encoding="UTF-8"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <parent> + <artifactId>morphology</artifactId> + <groupId>org.apache.lucene.morphology</groupId> + <version>1.3-SNAPSHOT</version> + </parent> + <modelVersion>4.0.0</modelVersion> + + <groupId>org.apache.lucene.analysis</groupId> + <artifactId>morphology</artifactId> + <name>solr-morphology-analysis</name> + <version>${morphology.version}</version> + <url>http://maven.apache.org</url> + + <dependencies> + + <dependency> + <groupId>org.apache.lucene.morphology</groupId> + <artifactId>russian</artifactId> + <version>${morphology.version}</version> + </dependency> + <dependency> + <groupId>org.apache.lucene.morphology</groupId> + <artifactId>english</artifactId> + <version>${morphology.version}</version> + </dependency> + + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <version>${junit.version}</version> + <scope>test</scope> + </dependency> + + </dependencies> + +</project> \ No newline at end of file diff --git a/solr-morphology-analysis/src/main/java/org/apache/lucene/analysis/morphology/MorphologyFilterFactory.java b/solr-morphology-analysis/src/main/java/org/apache/lucene/analysis/morphology/MorphologyFilterFactory.java new file mode 100644 index 0000000..51321e9 --- /dev/null +++ b/solr-morphology-analysis/src/main/java/org/apache/lucene/analysis/morphology/MorphologyFilterFactory.java @@ -0,0 +1,69 @@ +/** + * Copyright 2009 Alexander Kuznetsov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.analysis.morphology; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.util.ResourceLoader; +import org.apache.lucene.analysis.util.ResourceLoaderAware; +import org.apache.lucene.analysis.util.TokenFilterFactory; +import org.apache.lucene.morphology.LuceneMorphology; +import org.apache.lucene.morphology.analyzer.MorphologyFilter; + +import java.util.Map; + +/** + * Factory for {@link MorphologyFilter}, with configurable language + * <p> + * <b>Note:</b> Two languages are available now: English (default value) and Russian. + * <pre class="prettyprint"> + * <fieldType name="content" class="solr.TextField" positionIncrementGap="100"> + * <analyzer> + * <tokenizer class="solr.StandardTokenizerFactory"/> + * <filter class="solr.LowerCaseFilterFactory"/> + * <filter class="solr.MorphologyFilterFactory" language="English"/> + * </analyzer> + * </fieldType></pre> + */ +public class MorphologyFilterFactory extends TokenFilterFactory implements ResourceLoaderAware{ + + private static final String LANGUAGE_KEY = "language"; + + private String language; + private LuceneMorphology luceneMorphology; + + public MorphologyFilterFactory(Map<String, String> args) { + super(args); + + language = get(args, LANGUAGE_KEY, "English"); + if (!args.isEmpty()) { + throw new IllegalArgumentException("Unknown parameters: " + args); + } + } + + public TokenStream create(TokenStream input) { + return new MorphologyFilter(input, luceneMorphology); + } + + public void inform(ResourceLoader loader) { + + String className = "org.apache.lucene.morphology." + language.toLowerCase() + "." + language + "LuceneMorphology"; + luceneMorphology = loader.newInstance(className, LuceneMorphology.class); + } + + public LuceneMorphology getLuceneMorphology() { + return luceneMorphology; + } +} diff --git a/solr-morphology-analysis/src/test/java/org/apache/lucene/analysis/morphology/MorphologyFilterFactoryTest.java b/solr-morphology-analysis/src/test/java/org/apache/lucene/analysis/morphology/MorphologyFilterFactoryTest.java new file mode 100644 index 0000000..7cb225a --- /dev/null +++ b/solr-morphology-analysis/src/test/java/org/apache/lucene/analysis/morphology/MorphologyFilterFactoryTest.java @@ -0,0 +1,75 @@ +/** + * Copyright 2009 Alexander Kuznetsov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.analysis.morphology; + +import org.apache.lucene.analysis.util.ClasspathResourceLoader; +import org.apache.lucene.analysis.util.ResourceLoader; +import org.apache.lucene.morphology.LuceneMorphology; +import org.apache.lucene.morphology.english.EnglishLuceneMorphology; +import org.apache.lucene.morphology.russian.RussianLuceneMorphology; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.util.HashMap; +import java.util.Map; + +public class MorphologyFilterFactoryTest { + + private static final String LANGUAGE_KEY = "language"; + private ResourceLoader loader = new ClasspathResourceLoader(); + private Map<String, String> args; + + @Before + public void setUp(){ + args = new HashMap<>(); + } + + @Test + public void if_RussianLanguageKey_then_CreateRussianMorphologyFilter(){ + + args.put(LANGUAGE_KEY, "Russian"); + MorphologyFilterFactory morphologyFilterFactory = new MorphologyFilterFactory(args); + morphologyFilterFactory.inform(loader); + + LuceneMorphology luceneMorphology = morphologyFilterFactory.getLuceneMorphology(); + + Assert.assertTrue("Creation the MorphologyFilterFactory with a Russian language key", luceneMorphology instanceof RussianLuceneMorphology); + } + + @Test + public void if_EnglishLanguageKey_then_CreateEnglishMorphologyFilter(){ + + args.put(LANGUAGE_KEY, "English"); + MorphologyFilterFactory morphologyFilterFactory = new MorphologyFilterFactory(args); + morphologyFilterFactory.inform(loader); + + LuceneMorphology luceneMorphology = morphologyFilterFactory.getLuceneMorphology(); + + Assert.assertTrue("Creation the MorphologyFilterFactory with a English language key", luceneMorphology instanceof EnglishLuceneMorphology); + } + + @Test + public void if_NoLanguageKey_then_CreateEnglishMorphologyFilter(){ + + MorphologyFilterFactory morphologyFilterFactory = new MorphologyFilterFactory(args); + morphologyFilterFactory.inform(loader); + + LuceneMorphology luceneMorphology = morphologyFilterFactory.getLuceneMorphology(); + + Assert.assertTrue("Creation the MorphologyFilterFactory without any language keys", luceneMorphology instanceof EnglishLuceneMorphology); + } +}