Merge pull request #10 from Zanzarchik/master
Add an implementation of the TokenFilterFactory for the MorphologyFilter
This commit is contained in:
commit
b180862572
15
README.md
15
README.md
@ -62,6 +62,21 @@ Also if you need get a list of base forms of word, you can use following example
|
||||
LuceneMorphology luceneMorph = new EnglishLuceneMorphology();
|
||||
List<String> wordBaseForms = luceneMorph.getMorphInfo(word);
|
||||
|
||||
### Solr
|
||||
|
||||
You can use the LuceneMorphology as morphology filter in a Solr _schema.xml_ using a **MorphologyFilterFactory:**
|
||||
|
||||
```xml
|
||||
<fieldType name="content" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="org.apache.lucene.analysis.morphology.MorphologyFilterFactory" language="Russian"/>
|
||||
<filter class="org.apache.lucene.analysis.morphology.MorphologyFilterFactory" language="English"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
```
|
||||
|
||||
Just add _morphology-1.3.jar_ in your Solr lib-directories
|
||||
|
||||
### Restrictions
|
||||
|
||||
|
3
pom.xml
3
pom.xml
@ -17,6 +17,8 @@
|
||||
|
||||
<properties>
|
||||
<lucene.version>6.2.0</lucene.version>
|
||||
<morphology.version>1.3-SNAPSHOT</morphology.version>
|
||||
<junit.version>4.8.2</junit.version>
|
||||
</properties>
|
||||
|
||||
<distributionManagement>
|
||||
@ -168,5 +170,6 @@
|
||||
<module>dictionary-reader</module>
|
||||
<module>russian</module>
|
||||
<module>english</module>
|
||||
<module>solr-morphology-analysis</module>
|
||||
</modules>
|
||||
</project>
|
40
solr-morphology-analysis/pom.xml
Normal file
40
solr-morphology-analysis/pom.xml
Normal file
@ -0,0 +1,40 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<parent>
|
||||
<artifactId>morphology</artifactId>
|
||||
<groupId>org.apache.lucene.morphology</groupId>
|
||||
<version>1.3-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
<groupId>org.apache.lucene.analysis</groupId>
|
||||
<artifactId>morphology</artifactId>
|
||||
<name>solr-morphology-analysis</name>
|
||||
<version>${morphology.version}</version>
|
||||
<url>http://maven.apache.org</url>
|
||||
|
||||
<dependencies>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene.morphology</groupId>
|
||||
<artifactId>russian</artifactId>
|
||||
<version>${morphology.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene.morphology</groupId>
|
||||
<artifactId>english</artifactId>
|
||||
<version>${morphology.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<version>${junit.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
|
||||
</project>
|
@ -0,0 +1,69 @@
|
||||
/**
|
||||
* Copyright 2009 Alexander Kuznetsov
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.morphology;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoaderAware;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
import org.apache.lucene.morphology.LuceneMorphology;
|
||||
import org.apache.lucene.morphology.analyzer.MorphologyFilter;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Factory for {@link MorphologyFilter}, with configurable language
|
||||
* <p>
|
||||
* <b>Note:</b> Two languages are available now: English (default value) and Russian.
|
||||
* <pre class="prettyprint">
|
||||
* <fieldType name="content" class="solr.TextField" positionIncrementGap="100">
|
||||
* <analyzer>
|
||||
* <tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
* <filter class="solr.LowerCaseFilterFactory"/>
|
||||
* <filter class="solr.MorphologyFilterFactory" language="English"/>
|
||||
* </analyzer>
|
||||
* </fieldType></pre>
|
||||
*/
|
||||
public class MorphologyFilterFactory extends TokenFilterFactory implements ResourceLoaderAware{
|
||||
|
||||
private static final String LANGUAGE_KEY = "language";
|
||||
|
||||
private String language;
|
||||
private LuceneMorphology luceneMorphology;
|
||||
|
||||
public MorphologyFilterFactory(Map<String, String> args) {
|
||||
super(args);
|
||||
|
||||
language = get(args, LANGUAGE_KEY, "English");
|
||||
if (!args.isEmpty()) {
|
||||
throw new IllegalArgumentException("Unknown parameters: " + args);
|
||||
}
|
||||
}
|
||||
|
||||
public TokenStream create(TokenStream input) {
|
||||
return new MorphologyFilter(input, luceneMorphology);
|
||||
}
|
||||
|
||||
public void inform(ResourceLoader loader) {
|
||||
|
||||
String className = "org.apache.lucene.morphology." + language.toLowerCase() + "." + language + "LuceneMorphology";
|
||||
luceneMorphology = loader.newInstance(className, LuceneMorphology.class);
|
||||
}
|
||||
|
||||
public LuceneMorphology getLuceneMorphology() {
|
||||
return luceneMorphology;
|
||||
}
|
||||
}
|
@ -0,0 +1,75 @@
|
||||
/**
|
||||
* Copyright 2009 Alexander Kuznetsov
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis.morphology;
|
||||
|
||||
import org.apache.lucene.analysis.util.ClasspathResourceLoader;
|
||||
import org.apache.lucene.analysis.util.ResourceLoader;
|
||||
import org.apache.lucene.morphology.LuceneMorphology;
|
||||
import org.apache.lucene.morphology.english.EnglishLuceneMorphology;
|
||||
import org.apache.lucene.morphology.russian.RussianLuceneMorphology;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
public class MorphologyFilterFactoryTest {
|
||||
|
||||
private static final String LANGUAGE_KEY = "language";
|
||||
private ResourceLoader loader = new ClasspathResourceLoader();
|
||||
private Map<String, String> args;
|
||||
|
||||
@Before
|
||||
public void setUp(){
|
||||
args = new HashMap<>();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void if_RussianLanguageKey_then_CreateRussianMorphologyFilter(){
|
||||
|
||||
args.put(LANGUAGE_KEY, "Russian");
|
||||
MorphologyFilterFactory morphologyFilterFactory = new MorphologyFilterFactory(args);
|
||||
morphologyFilterFactory.inform(loader);
|
||||
|
||||
LuceneMorphology luceneMorphology = morphologyFilterFactory.getLuceneMorphology();
|
||||
|
||||
Assert.assertTrue("Creation the MorphologyFilterFactory with a Russian language key", luceneMorphology instanceof RussianLuceneMorphology);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void if_EnglishLanguageKey_then_CreateEnglishMorphologyFilter(){
|
||||
|
||||
args.put(LANGUAGE_KEY, "English");
|
||||
MorphologyFilterFactory morphologyFilterFactory = new MorphologyFilterFactory(args);
|
||||
morphologyFilterFactory.inform(loader);
|
||||
|
||||
LuceneMorphology luceneMorphology = morphologyFilterFactory.getLuceneMorphology();
|
||||
|
||||
Assert.assertTrue("Creation the MorphologyFilterFactory with a English language key", luceneMorphology instanceof EnglishLuceneMorphology);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void if_NoLanguageKey_then_CreateEnglishMorphologyFilter(){
|
||||
|
||||
MorphologyFilterFactory morphologyFilterFactory = new MorphologyFilterFactory(args);
|
||||
morphologyFilterFactory.inform(loader);
|
||||
|
||||
LuceneMorphology luceneMorphology = morphologyFilterFactory.getLuceneMorphology();
|
||||
|
||||
Assert.assertTrue("Creation the MorphologyFilterFactory without any language keys", luceneMorphology instanceof EnglishLuceneMorphology);
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user