adding test for english stemmer

git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@124 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
Alexander.A.Kuznetsov@gmail.com
2011-11-29 05:41:36 +00:00
parent 466de768ca
commit 77ce4252ad
7 changed files with 47 additions and 9 deletions

View File

@ -8,7 +8,14 @@ import java.util.List;
public class EnglishStemmer {
private EnglishLuceneMorphology englishLuceneMorphology;
public EnglishStemmer(EnglishLuceneMorphology englishLuceneMorphology) {
this.englishLuceneMorphology = englishLuceneMorphology;
}
public String getStemmedWord(String word){
if(!englishLuceneMorphology.checkString(word)){
return word;
}
List<String> normalForms = englishLuceneMorphology.getNormalForms(word);
if(normalForms.size() == 1){
return normalForms.get(0);

View File

@ -0,0 +1,34 @@
package org.apache.lucene.morphology.english.stemmer;
import org.apache.lucene.morphology.english.EnglishLuceneMorphology;
import org.junit.Test;
import static org.hamcrest.core.IsEqual.equalTo;
import static org.junit.Assert.assertThat;
public class EnglishStemmerTest {
@Test
public void testGetStemmedWord() throws Exception {
EnglishLuceneMorphology englishLuceneMorphology = new EnglishLuceneMorphology();
EnglishStemmer englishStemmer = new EnglishStemmer(englishLuceneMorphology);
assertThat(englishStemmer.getStemmedWord("running"),equalTo("run"));
assertThat(englishStemmer.getStemmedWord("run"),equalTo("run"));
assertThat(englishStemmer.getStemmedWord("killed"),equalTo("kill"));
assertThat(englishStemmer.getStemmedWord("kill"),equalTo("kill"));
assertThat(englishStemmer.getStemmedWord("networking"),equalTo("network"));
assertThat(englishStemmer.getStemmedWord("network"),equalTo("network"));
assertThat(englishStemmer.getStemmedWord("statistics"),equalTo("statistic"));
assertThat(englishStemmer.getStemmedWord("statistic"),equalTo("statistic"));
assertThat(englishStemmer.getStemmedWord("stats"),equalTo("stat"));
assertThat(englishStemmer.getStemmedWord("stat"),equalTo("stat"));
assertThat(englishStemmer.getStemmedWord("countries"),equalTo("country"));
assertThat(englishStemmer.getStemmedWord("country"),equalTo("country"));
assertThat(englishStemmer.getStemmedWord("delete"),equalTo("delete"));
assertThat(englishStemmer.getStemmedWord("ended"),equalTo("end"));
assertThat(englishStemmer.getStemmedWord("end"),equalTo("end"));
assertThat(englishStemmer.getStemmedWord("ends"),equalTo("end"));
assertThat(englishStemmer.getStemmedWord("given"),equalTo("give"));
assertThat(englishStemmer.getStemmedWord("give"),equalTo("give"));
assertThat(englishStemmer.getStemmedWord("log4j"),equalTo("log4j"));
}
}