Adding support of lucene 5.0.0

This commit is contained in:
Alexander Kuznetsov 2015-03-21 00:38:55 +03:00
parent 2b5509c825
commit d300938502
6 changed files with 54 additions and 13 deletions

3
.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
target
.idea
*.iml

View File

@ -19,7 +19,6 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.morphology.english.EnglishAnalyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.morphology.russian.RussianAnalyzer;
import org.junit.Test;
@ -66,6 +65,7 @@ public class AnalyzersTest {
InputStreamReader reader = new InputStreamReader(stream, "UTF-8");
TokenStream tokenStream = morphlogyAnalyzer.tokenStream(null, reader);
tokenStream.reset();
HashSet<String> result = new HashSet<String>();
while (tokenStream.incrementToken()) {
CharTermAttribute attribute1 = tokenStream.getAttribute(CharTermAttribute.class);

View File

@ -25,6 +25,7 @@ import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import static org.hamcrest.CoreMatchers.equalTo;
@ -43,6 +44,8 @@ public class LuceneMorphTest {
@Test
public void russianMorphologyShouldGetCorrectNormalForm() throws IOException {
LuceneMorphology luceneMorph = new RussianLuceneMorphology();
List<String> v = luceneMorph.getMorphInfo("вина");
System.out.println(v);
String pathToTestData = "/russian/russian-morphology-test.txt";
testMorphology(luceneMorph, pathToTestData);
}

View File

@ -17,12 +17,17 @@
package org.apache.lucene.morphology.analyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.payloads.DelimitedPayloadTokenFilter;
import org.apache.lucene.analysis.payloads.PayloadEncoder;
import org.apache.lucene.analysis.payloads.PayloadHelper;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.morphology.LetterDecoderEncoder;
import org.apache.lucene.morphology.LuceneMorphology;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Version;
import java.io.IOException;
@ -44,15 +49,39 @@ public class MorphologyAnalyzer extends Analyzer {
luceneMorph = new LuceneMorphology(inputStream, letterDecoderEncoder);
}
final public TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream result = new StandardTokenizer(Version.LUCENE_35, reader);
result = new StandardFilter(Version.LUCENE_35,result);
result = new LowerCaseFilter(Version.LUCENE_35,result);
return new MorphologyFilter(result, luceneMorph);
}
@Override
final public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
return super.reusableTokenStream(fieldName, reader);
protected TokenStreamComponents createComponents(String s) {
StandardTokenizer src = new StandardTokenizer();
final PayloadEncoder encoder = new PayloadEncoder() {
@Override
public BytesRef encode(char[] buffer) {
final Float payload = Float.valueOf(new String(buffer));
System.out.println(payload);
final byte[] bytes = PayloadHelper.encodeFloat(payload);
return new BytesRef(bytes, 0, bytes.length);
}
@Override
public BytesRef encode(char[] buffer, int offset, int length) {
final Float payload = Float.valueOf(new String(buffer, offset, length));
System.out.println(payload);
final byte[] bytes = PayloadHelper.encodeFloat(payload);
return new BytesRef(bytes, 0, bytes.length);
}
};
TokenFilter filter = new StandardFilter(src);
filter = new LowerCaseFilter(filter);
filter = new MorphologyFilter(filter, luceneMorph);
return new TokenStreamComponents(src, filter) {
@Override
protected void setReader(final Reader reader) throws IOException {
super.setReader(reader);
}
};
}
}

View File

@ -19,7 +19,6 @@ package org.apache.lucene.morphology.analyzer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.morphology.LuceneMorphology;
import java.io.IOException;

View File

@ -49,8 +49,15 @@
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>3.5.0</version>
<version>5.0.0</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>5.0.0</version>
</dependency>
</dependencies>
<repositories>