This commit is contained in:
parent
3e69baa332
commit
1323228212
@ -18,9 +18,13 @@ package org.apache.lucene.morphology;
|
|||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||||
|
import org.apache.lucene.morphology.analyzer.MorphologyAnalyzer;
|
||||||
|
import org.apache.lucene.morphology.analyzer.MorphologyFilter;
|
||||||
import org.apache.lucene.morphology.english.EnglishAnalyzer;
|
import org.apache.lucene.morphology.english.EnglishAnalyzer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.morphology.english.EnglishLuceneMorphology;
|
||||||
import org.apache.lucene.morphology.russian.RussianAnalyzer;
|
import org.apache.lucene.morphology.russian.RussianAnalyzer;
|
||||||
|
import org.apache.lucene.morphology.russian.RussianLuceneMorphology;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
@ -50,6 +54,22 @@ public class AnalyzersTest {
|
|||||||
testAnalayzer(morphlogyAnalyzer, answerPath, testPath);
|
testAnalayzer(morphlogyAnalyzer, answerPath, testPath);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void emptyStringTest() throws IOException {
|
||||||
|
LuceneMorphology russianLuceneMorphology = new RussianLuceneMorphology();
|
||||||
|
LuceneMorphology englishLuceneMorphology = new EnglishLuceneMorphology();
|
||||||
|
|
||||||
|
MorphologyAnalyzer russianAnalyzer = new MorphologyAnalyzer(russianLuceneMorphology);
|
||||||
|
InputStreamReader reader = new InputStreamReader(new ByteArrayInputStream("тест пм тест".getBytes()), "UTF-8");
|
||||||
|
TokenStream stream = russianAnalyzer.tokenStream(null, reader);
|
||||||
|
MorphologyFilter englishFilter = new MorphologyFilter(stream, englishLuceneMorphology);
|
||||||
|
|
||||||
|
englishFilter.reset();
|
||||||
|
while (englishFilter.incrementToken()) {
|
||||||
|
System.out.println(englishFilter.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void shouldProvideCorrectIndentForWordWithMelitaForm() throws IOException {
|
public void shouldProvideCorrectIndentForWordWithMelitaForm() throws IOException {
|
||||||
Analyzer morphlogyAnalyzer = new RussianAnalyzer();
|
Analyzer morphlogyAnalyzer = new RussianAnalyzer();
|
||||||
|
@ -50,8 +50,15 @@ public class MorphologyImpl implements Morphology {
|
|||||||
ArrayList<String> result = new ArrayList<String>();
|
ArrayList<String> result = new ArrayList<String>();
|
||||||
int[] ints = decoderEncoder.encodeToArray(revertWord(s));
|
int[] ints = decoderEncoder.encodeToArray(revertWord(s));
|
||||||
int ruleId = findRuleId(ints);
|
int ruleId = findRuleId(ints);
|
||||||
|
boolean notSeenEmptyString = true;
|
||||||
for (Heuristic h : rules[rulesId[ruleId]]) {
|
for (Heuristic h : rules[rulesId[ruleId]]) {
|
||||||
result.add(h.transformWord(s).toString());
|
String e = h.transformWord(s).toString();
|
||||||
|
if (e.length() > 0) {
|
||||||
|
result.add(e);
|
||||||
|
} else if (notSeenEmptyString) {
|
||||||
|
result.add(s);
|
||||||
|
notSeenEmptyString = false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -19,8 +19,6 @@ package org.apache.lucene.morphology.analyzer;
|
|||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
|
||||||
import org.apache.lucene.analysis.payloads.DelimitedPayloadTokenFilter;
|
|
||||||
import org.apache.lucene.analysis.payloads.PayloadEncoder;
|
import org.apache.lucene.analysis.payloads.PayloadEncoder;
|
||||||
import org.apache.lucene.analysis.payloads.PayloadHelper;
|
import org.apache.lucene.analysis.payloads.PayloadHelper;
|
||||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||||
@ -28,7 +26,6 @@ import org.apache.lucene.analysis.standard.StandardTokenizer;
|
|||||||
import org.apache.lucene.morphology.LetterDecoderEncoder;
|
import org.apache.lucene.morphology.LetterDecoderEncoder;
|
||||||
import org.apache.lucene.morphology.LuceneMorphology;
|
import org.apache.lucene.morphology.LuceneMorphology;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.Version;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
@ -84,4 +81,6 @@ public class MorphologyAnalyzer extends Analyzer {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user