adding test for english stemmer

git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@124 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
Alexander.A.Kuznetsov@gmail.com 2011-11-29 05:41:36 +00:00
parent 466de768ca
commit 77ce4252ad
7 changed files with 47 additions and 9 deletions

View File

@ -16,6 +16,7 @@
package org.apache.lucene.morphology;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.morphology.english.EnglishAnalyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
@ -67,8 +68,8 @@ public class AnalyzersTest {
TokenStream tokenStream = morphlogyAnalyzer.tokenStream(null, reader);
HashSet<String> result = new HashSet<String>();
while (tokenStream.incrementToken()) {
TermAttribute attribute1 = tokenStream.getAttribute(TermAttribute.class);
result.add(attribute1.term());
CharTermAttribute attribute1 = tokenStream.getAttribute(CharTermAttribute.class);
result.add(attribute1.toString());
}
stream.close();

View File

@ -19,11 +19,10 @@
<version>1.1-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.4</version>
<version>4.8.2</version>
<scope>test</scope>
</dependency>
</dependencies>

View File

@ -8,7 +8,14 @@ import java.util.List;
public class EnglishStemmer {
private EnglishLuceneMorphology englishLuceneMorphology;
public EnglishStemmer(EnglishLuceneMorphology englishLuceneMorphology) {
this.englishLuceneMorphology = englishLuceneMorphology;
}
public String getStemmedWord(String word){
if(!englishLuceneMorphology.checkString(word)){
return word;
}
List<String> normalForms = englishLuceneMorphology.getNormalForms(word);
if(normalForms.size() == 1){
return normalForms.get(0);

View File

@ -0,0 +1,34 @@
package org.apache.lucene.morphology.english.stemmer;
import org.apache.lucene.morphology.english.EnglishLuceneMorphology;
import org.junit.Test;
import static org.hamcrest.core.IsEqual.equalTo;
import static org.junit.Assert.assertThat;
public class EnglishStemmerTest {
@Test
public void testGetStemmedWord() throws Exception {
EnglishLuceneMorphology englishLuceneMorphology = new EnglishLuceneMorphology();
EnglishStemmer englishStemmer = new EnglishStemmer(englishLuceneMorphology);
assertThat(englishStemmer.getStemmedWord("running"),equalTo("run"));
assertThat(englishStemmer.getStemmedWord("run"),equalTo("run"));
assertThat(englishStemmer.getStemmedWord("killed"),equalTo("kill"));
assertThat(englishStemmer.getStemmedWord("kill"),equalTo("kill"));
assertThat(englishStemmer.getStemmedWord("networking"),equalTo("network"));
assertThat(englishStemmer.getStemmedWord("network"),equalTo("network"));
assertThat(englishStemmer.getStemmedWord("statistics"),equalTo("statistic"));
assertThat(englishStemmer.getStemmedWord("statistic"),equalTo("statistic"));
assertThat(englishStemmer.getStemmedWord("stats"),equalTo("stat"));
assertThat(englishStemmer.getStemmedWord("stat"),equalTo("stat"));
assertThat(englishStemmer.getStemmedWord("countries"),equalTo("country"));
assertThat(englishStemmer.getStemmedWord("country"),equalTo("country"));
assertThat(englishStemmer.getStemmedWord("delete"),equalTo("delete"));
assertThat(englishStemmer.getStemmedWord("ended"),equalTo("end"));
assertThat(englishStemmer.getStemmedWord("end"),equalTo("end"));
assertThat(englishStemmer.getStemmedWord("ends"),equalTo("end"));
assertThat(englishStemmer.getStemmedWord("given"),equalTo("give"));
assertThat(englishStemmer.getStemmedWord("give"),equalTo("give"));
assertThat(englishStemmer.getStemmedWord("log4j"),equalTo("log4j"));
}
}

View File

@ -12,7 +12,4 @@
<version>1.1-SNAPSHOT</version>
<url>http://maven.apache.org</url>
<dependencies>
</dependencies>
</project>

View File

@ -35,7 +35,7 @@
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.4</version>
<version>4.8.2</version>
<scope>test</scope>
</dependency>

View File

@ -23,7 +23,7 @@
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.4</version>
<version>4.8.2</version>
<scope>test</scope>
</dependency>