adding support of lucene 3.5 and start working english stemmer

git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@123 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
Alexander.A.Kuznetsov@gmail.com
2011-11-28 16:44:15 +00:00
parent d46651f2ba
commit 466de768ca
6 changed files with 75 additions and 11 deletions

View File

@ -0,0 +1,23 @@
package org.apache.lucene.morphology.english.stemmer;
import org.apache.lucene.morphology.english.EnglishLuceneMorphology;
import java.util.List;
public class EnglishStemmer {
private EnglishLuceneMorphology englishLuceneMorphology;
public String getStemmedWord(String word){
List<String> normalForms = englishLuceneMorphology.getNormalForms(word);
if(normalForms.size() == 1){
return normalForms.get(0);
}
normalForms.remove(word);
if(normalForms.size() == 1){
return normalForms.get(0);
}
return word;
}
}

View File

@ -0,0 +1,34 @@
package org.apache.lucene.morphology.english.stemmer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.morphology.LuceneMorphology;
import java.io.IOException;
import java.util.Iterator;
public class EnglishStemmerFilter extends TokenFilter {
private EnglishStemmer englishStemmer;
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
public EnglishStemmerFilter(TokenStream input, EnglishStemmer englishStemmer) {
super(input);
this.englishStemmer = englishStemmer;
}
final public boolean incrementToken() throws IOException {
boolean b = input.incrementToken();
if (!b) {
return false;
}
String s = new String(termAtt.buffer(), 0, termAtt.length());
termAtt.setEmpty();
termAtt.append(s);
return true;
}
}