- removed deprecated standart filter
- version 1.5 - lucene version 8.0 - new syntax for TokenStreamComponents
This commit is contained in:
parent
48b1ceeb3a
commit
3fe076556f
@ -5,7 +5,7 @@ Russian and English morphology for Java and [Apache Lucene](http://lucene.apache
|
|||||||
|
|
||||||
### How to use
|
### How to use
|
||||||
|
|
||||||
Build project, by running `mvn clean package`, this will provide you the latest versions of the artifacts - 1.4, add it to your classpath. You could select which version to use - Russian or English.
|
Build project, by running `mvn clean package`, this will provide you the latest versions of the artifacts - 1.5, add it to your classpath. You could select which version to use - Russian or English.
|
||||||
|
|
||||||
Now you can create a Lucene Analyzer:
|
Now you can create a Lucene Analyzer:
|
||||||
|
|
||||||
@ -39,7 +39,7 @@ You can use the LuceneMorphology as morphology filter in a Solr _schema.xml_ usi
|
|||||||
</fieldType>
|
</fieldType>
|
||||||
```
|
```
|
||||||
|
|
||||||
Just add _morphology-1.4.jar_ in your Solr lib-directories
|
Just add _morphology-1.5.jar_ in your Solr lib-directories
|
||||||
|
|
||||||
### Restrictions
|
### Restrictions
|
||||||
|
|
||||||
|
@ -3,27 +3,27 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<artifactId>morphology</artifactId>
|
<artifactId>morphology</artifactId>
|
||||||
<groupId>org.apache.lucene.morphology</groupId>
|
<groupId>org.apache.lucene.morphology</groupId>
|
||||||
<version>1.4</version>
|
<version>1.5</version>
|
||||||
</parent>
|
</parent>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<groupId>org.apache.lucene.morphology</groupId>
|
<groupId>org.apache.lucene.morphology</groupId>
|
||||||
<artifactId>dictionary-reader</artifactId>
|
<artifactId>dictionary-reader</artifactId>
|
||||||
<name>dictionary-reader</name>
|
<name>dictionary-reader</name>
|
||||||
<version>1.4</version>
|
<version>1.5</version>
|
||||||
<url>http://maven.apache.org</url>
|
<url>http://maven.apache.org</url>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.lucene.morphology</groupId>
|
<groupId>org.apache.lucene.morphology</groupId>
|
||||||
<artifactId>russian</artifactId>
|
<artifactId>russian</artifactId>
|
||||||
<version>1.4</version>
|
<version>1.5</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.lucene.morphology</groupId>
|
<groupId>org.apache.lucene.morphology</groupId>
|
||||||
<artifactId>english</artifactId>
|
<artifactId>english</artifactId>
|
||||||
<version>1.4</version>
|
<version>1.5</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
|
@ -21,7 +21,6 @@ import org.apache.lucene.analysis.CharArraySet;
|
|||||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
|
||||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
|
||||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
@ -149,10 +148,9 @@ public class AnalyzersTest extends BaseTokenStreamTestCase {
|
|||||||
@Override
|
@Override
|
||||||
protected TokenStreamComponents createComponents(String s) {
|
protected TokenStreamComponents createComponents(String s) {
|
||||||
StandardTokenizer src = new StandardTokenizer();
|
StandardTokenizer src = new StandardTokenizer();
|
||||||
TokenFilter filter = new StandardFilter(src);
|
|
||||||
CharArraySet dontStem = new CharArraySet(1, false);
|
CharArraySet dontStem = new CharArraySet(1, false);
|
||||||
dontStem.add("Tests");
|
dontStem.add("Tests");
|
||||||
filter = new SetKeywordMarkerFilter(filter, dontStem);
|
TokenFilter filter = new SetKeywordMarkerFilter(src, dontStem);
|
||||||
filter = new LowerCaseFilter(filter);
|
filter = new LowerCaseFilter(filter);
|
||||||
try {
|
try {
|
||||||
filter = new MorphologyFilter(filter, new EnglishLuceneMorphology());
|
filter = new MorphologyFilter(filter, new EnglishLuceneMorphology());
|
||||||
|
@ -3,20 +3,20 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<artifactId>morphology</artifactId>
|
<artifactId>morphology</artifactId>
|
||||||
<groupId>org.apache.lucene.morphology</groupId>
|
<groupId>org.apache.lucene.morphology</groupId>
|
||||||
<version>1.4</version>
|
<version>1.5</version>
|
||||||
</parent>
|
</parent>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<groupId>org.apache.lucene.morphology</groupId>
|
<groupId>org.apache.lucene.morphology</groupId>
|
||||||
<artifactId>english</artifactId>
|
<artifactId>english</artifactId>
|
||||||
<name>english</name>
|
<name>english</name>
|
||||||
<version>1.4</version>
|
<version>1.5</version>
|
||||||
<url>http://maven.apache.org</url>
|
<url>http://maven.apache.org</url>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.lucene.morphology</groupId>
|
<groupId>org.apache.lucene.morphology</groupId>
|
||||||
<artifactId>morph</artifactId>
|
<artifactId>morph</artifactId>
|
||||||
<version>1.4</version>
|
<version>1.5</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
@ -3,13 +3,13 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<artifactId>morphology</artifactId>
|
<artifactId>morphology</artifactId>
|
||||||
<groupId>org.apache.lucene.morphology</groupId>
|
<groupId>org.apache.lucene.morphology</groupId>
|
||||||
<version>1.4</version>
|
<version>1.5</version>
|
||||||
</parent>
|
</parent>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<groupId>org.apache.lucene.morphology</groupId>
|
<groupId>org.apache.lucene.morphology</groupId>
|
||||||
<artifactId>morph</artifactId>
|
<artifactId>morph</artifactId>
|
||||||
<name>morph</name>
|
<name>morph</name>
|
||||||
<version>1.4</version>
|
<version>1.5</version>
|
||||||
<url>http://maven.apache.org</url>
|
<url>http://maven.apache.org</url>
|
||||||
|
|
||||||
</project>
|
</project>
|
||||||
|
@ -21,7 +21,6 @@ import org.apache.lucene.analysis.LowerCaseFilter;
|
|||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.payloads.PayloadEncoder;
|
import org.apache.lucene.analysis.payloads.PayloadEncoder;
|
||||||
import org.apache.lucene.analysis.payloads.PayloadHelper;
|
import org.apache.lucene.analysis.payloads.PayloadHelper;
|
||||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
|
||||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||||
import org.apache.lucene.morphology.LetterDecoderEncoder;
|
import org.apache.lucene.morphology.LetterDecoderEncoder;
|
||||||
import org.apache.lucene.morphology.LuceneMorphology;
|
import org.apache.lucene.morphology.LuceneMorphology;
|
||||||
@ -29,7 +28,7 @@ import org.apache.lucene.util.BytesRef;
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.Reader;
|
|
||||||
|
|
||||||
public class MorphologyAnalyzer extends Analyzer {
|
public class MorphologyAnalyzer extends Analyzer {
|
||||||
private LuceneMorphology luceneMorph;
|
private LuceneMorphology luceneMorph;
|
||||||
@ -70,17 +69,10 @@ public class MorphologyAnalyzer extends Analyzer {
|
|||||||
return new BytesRef(bytes, 0, bytes.length);
|
return new BytesRef(bytes, 0, bytes.length);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
TokenFilter filter = new StandardFilter(src);
|
|
||||||
filter = new LowerCaseFilter(filter);
|
TokenFilter filter = new LowerCaseFilter(src);
|
||||||
filter = new MorphologyFilter(filter, luceneMorph);
|
filter = new MorphologyFilter(filter, luceneMorph);
|
||||||
|
|
||||||
return new TokenStreamComponents(src, filter) {
|
return new TokenStreamComponents(r -> src.setReader(r), filter);
|
||||||
@Override
|
|
||||||
protected void setReader(final Reader reader) {
|
|
||||||
super.setReader(reader);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
6
pom.xml
6
pom.xml
@ -4,7 +4,7 @@
|
|||||||
<groupId>org.apache.lucene.morphology</groupId>
|
<groupId>org.apache.lucene.morphology</groupId>
|
||||||
<artifactId>morphology</artifactId>
|
<artifactId>morphology</artifactId>
|
||||||
<packaging>pom</packaging>
|
<packaging>pom</packaging>
|
||||||
<version>1.4</version>
|
<version>1.5</version>
|
||||||
<name>morphology</name>
|
<name>morphology</name>
|
||||||
<url>http://maven.apache.org</url>
|
<url>http://maven.apache.org</url>
|
||||||
|
|
||||||
@ -16,8 +16,8 @@
|
|||||||
</scm>
|
</scm>
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
<lucene.version>7.2.1</lucene.version>
|
<lucene.version>8.0.0</lucene.version>
|
||||||
<morphology.version>1.4</morphology.version>
|
<morphology.version>1.5</morphology.version>
|
||||||
<junit.version>4.12</junit.version>
|
<junit.version>4.12</junit.version>
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
|
@ -3,13 +3,13 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<artifactId>morphology</artifactId>
|
<artifactId>morphology</artifactId>
|
||||||
<groupId>org.apache.lucene.morphology</groupId>
|
<groupId>org.apache.lucene.morphology</groupId>
|
||||||
<version>1.4</version>
|
<version>1.5</version>
|
||||||
</parent>
|
</parent>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
<groupId>org.apache.lucene.morphology</groupId>
|
<groupId>org.apache.lucene.morphology</groupId>
|
||||||
<artifactId>russian</artifactId>
|
<artifactId>russian</artifactId>
|
||||||
<name>russian</name>
|
<name>russian</name>
|
||||||
<version>1.4</version>
|
<version>1.5</version>
|
||||||
<url>http://maven.apache.org</url>
|
<url>http://maven.apache.org</url>
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|
||||||
@ -17,7 +17,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.lucene.morphology</groupId>
|
<groupId>org.apache.lucene.morphology</groupId>
|
||||||
<artifactId>morph</artifactId>
|
<artifactId>morph</artifactId>
|
||||||
<version>1.4</version>
|
<version>1.5</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
<parent>
|
<parent>
|
||||||
<artifactId>morphology</artifactId>
|
<artifactId>morphology</artifactId>
|
||||||
<groupId>org.apache.lucene.morphology</groupId>
|
<groupId>org.apache.lucene.morphology</groupId>
|
||||||
<version>1.4</version>
|
<version>1.5</version>
|
||||||
</parent>
|
</parent>
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user