From 6fc7e109cb23c88cfb44313275df44117b8b97f7 Mon Sep 17 00:00:00 2001 From: Pavel Artanov Date: Fri, 20 Jan 2017 18:36:19 +0400 Subject: [PATCH 1/2] Add an implementation of the TokenFilterFactory for the MorphologyFilter. Now you can use the MorphologyFilterFactory in a schema.xml: --- pom.xml | 3 + solr-morphology-analysis/pom.xml | 40 ++++++++++ .../morphology/MorphologyFilterFactory.java | 69 +++++++++++++++++ .../MorphologyFilterFactoryTest.java | 75 +++++++++++++++++++ 4 files changed, 187 insertions(+) create mode 100644 solr-morphology-analysis/pom.xml create mode 100644 solr-morphology-analysis/src/main/java/org/apache/lucene/analysis/morphology/MorphologyFilterFactory.java create mode 100644 solr-morphology-analysis/src/test/java/org/apache/lucene/analysis/morphology/MorphologyFilterFactoryTest.java diff --git a/pom.xml b/pom.xml index 7368125..a7dc816 100644 --- a/pom.xml +++ b/pom.xml @@ -17,6 +17,8 @@ 6.2.0 + 1.3-SNAPSHOT + 4.8.2 @@ -168,5 +170,6 @@ dictionary-reader russian english + solr-morphology-analysis \ No newline at end of file diff --git a/solr-morphology-analysis/pom.xml b/solr-morphology-analysis/pom.xml new file mode 100644 index 0000000..b60f6af --- /dev/null +++ b/solr-morphology-analysis/pom.xml @@ -0,0 +1,40 @@ + + + + morphology + org.apache.lucene.morphology + 1.3-SNAPSHOT + + 4.0.0 + + org.apache.lucene.analysis + morphology + solr-morphology-analysis + ${morphology.version} + http://maven.apache.org + + + + + org.apache.lucene.morphology + russian + ${morphology.version} + + + org.apache.lucene.morphology + english + ${morphology.version} + + + + junit + junit + ${junit.version} + test + + + + + \ No newline at end of file diff --git a/solr-morphology-analysis/src/main/java/org/apache/lucene/analysis/morphology/MorphologyFilterFactory.java b/solr-morphology-analysis/src/main/java/org/apache/lucene/analysis/morphology/MorphologyFilterFactory.java new file mode 100644 index 0000000..51321e9 --- /dev/null +++ b/solr-morphology-analysis/src/main/java/org/apache/lucene/analysis/morphology/MorphologyFilterFactory.java @@ -0,0 +1,69 @@ +/** + * Copyright 2009 Alexander Kuznetsov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.analysis.morphology; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.util.ResourceLoader; +import org.apache.lucene.analysis.util.ResourceLoaderAware; +import org.apache.lucene.analysis.util.TokenFilterFactory; +import org.apache.lucene.morphology.LuceneMorphology; +import org.apache.lucene.morphology.analyzer.MorphologyFilter; + +import java.util.Map; + +/** + * Factory for {@link MorphologyFilter}, with configurable language + *

+ * Note: Two languages are available now: English (default value) and Russian. + *

+ * <fieldType name="content" class="solr.TextField" positionIncrementGap="100">
+ *   <analyzer>
+ *     <tokenizer class="solr.StandardTokenizerFactory"/>
+ *     <filter class="solr.LowerCaseFilterFactory"/>
+ *     <filter class="solr.MorphologyFilterFactory" language="English"/>
+ *   </analyzer>
+ * </fieldType>
+ */ +public class MorphologyFilterFactory extends TokenFilterFactory implements ResourceLoaderAware{ + + private static final String LANGUAGE_KEY = "language"; + + private String language; + private LuceneMorphology luceneMorphology; + + public MorphologyFilterFactory(Map args) { + super(args); + + language = get(args, LANGUAGE_KEY, "English"); + if (!args.isEmpty()) { + throw new IllegalArgumentException("Unknown parameters: " + args); + } + } + + public TokenStream create(TokenStream input) { + return new MorphologyFilter(input, luceneMorphology); + } + + public void inform(ResourceLoader loader) { + + String className = "org.apache.lucene.morphology." + language.toLowerCase() + "." + language + "LuceneMorphology"; + luceneMorphology = loader.newInstance(className, LuceneMorphology.class); + } + + public LuceneMorphology getLuceneMorphology() { + return luceneMorphology; + } +} diff --git a/solr-morphology-analysis/src/test/java/org/apache/lucene/analysis/morphology/MorphologyFilterFactoryTest.java b/solr-morphology-analysis/src/test/java/org/apache/lucene/analysis/morphology/MorphologyFilterFactoryTest.java new file mode 100644 index 0000000..7cb225a --- /dev/null +++ b/solr-morphology-analysis/src/test/java/org/apache/lucene/analysis/morphology/MorphologyFilterFactoryTest.java @@ -0,0 +1,75 @@ +/** + * Copyright 2009 Alexander Kuznetsov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.analysis.morphology; + +import org.apache.lucene.analysis.util.ClasspathResourceLoader; +import org.apache.lucene.analysis.util.ResourceLoader; +import org.apache.lucene.morphology.LuceneMorphology; +import org.apache.lucene.morphology.english.EnglishLuceneMorphology; +import org.apache.lucene.morphology.russian.RussianLuceneMorphology; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.util.HashMap; +import java.util.Map; + +public class MorphologyFilterFactoryTest { + + private static final String LANGUAGE_KEY = "language"; + private ResourceLoader loader = new ClasspathResourceLoader(); + private Map args; + + @Before + public void setUp(){ + args = new HashMap<>(); + } + + @Test + public void if_RussianLanguageKey_then_CreateRussianMorphologyFilter(){ + + args.put(LANGUAGE_KEY, "Russian"); + MorphologyFilterFactory morphologyFilterFactory = new MorphologyFilterFactory(args); + morphologyFilterFactory.inform(loader); + + LuceneMorphology luceneMorphology = morphologyFilterFactory.getLuceneMorphology(); + + Assert.assertTrue("Creation the MorphologyFilterFactory with a Russian language key", luceneMorphology instanceof RussianLuceneMorphology); + } + + @Test + public void if_EnglishLanguageKey_then_CreateEnglishMorphologyFilter(){ + + args.put(LANGUAGE_KEY, "English"); + MorphologyFilterFactory morphologyFilterFactory = new MorphologyFilterFactory(args); + morphologyFilterFactory.inform(loader); + + LuceneMorphology luceneMorphology = morphologyFilterFactory.getLuceneMorphology(); + + Assert.assertTrue("Creation the MorphologyFilterFactory with a English language key", luceneMorphology instanceof EnglishLuceneMorphology); + } + + @Test + public void if_NoLanguageKey_then_CreateEnglishMorphologyFilter(){ + + MorphologyFilterFactory morphologyFilterFactory = new MorphologyFilterFactory(args); + morphologyFilterFactory.inform(loader); + + LuceneMorphology luceneMorphology = morphologyFilterFactory.getLuceneMorphology(); + + Assert.assertTrue("Creation the MorphologyFilterFactory without any language keys", luceneMorphology instanceof EnglishLuceneMorphology); + } +} From b918dad855f83a4a10b129614f11b37c8c565a7f Mon Sep 17 00:00:00 2001 From: Pavel Artanov Date: Tue, 24 Jan 2017 13:41:57 +0400 Subject: [PATCH 2/2] Add a description how to use the MorphologyFilter in a SOLR schema. --- README.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/README.md b/README.md index 7c15b2e..be4a5a6 100644 --- a/README.md +++ b/README.md @@ -62,6 +62,21 @@ Also if you need get a list of base forms of word, you can use following example LuceneMorphology luceneMorph = new EnglishLuceneMorphology(); List wordBaseForms = luceneMorph.getMorphInfo(word); +### Solr + +You can use the LuceneMorphology as morphology filter in a Solr _schema.xml_ using a **MorphologyFilterFactory:** + +```xml + + + + + + + +``` + +Just add _morphology-1.3.jar_ in your Solr lib-directories ### Restrictions