adding test for english stemmer
git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@124 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
parent
466de768ca
commit
77ce4252ad
@ -16,6 +16,7 @@
|
||||
package org.apache.lucene.morphology;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.morphology.english.EnglishAnalyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
@ -67,8 +68,8 @@ public class AnalyzersTest {
|
||||
TokenStream tokenStream = morphlogyAnalyzer.tokenStream(null, reader);
|
||||
HashSet<String> result = new HashSet<String>();
|
||||
while (tokenStream.incrementToken()) {
|
||||
TermAttribute attribute1 = tokenStream.getAttribute(TermAttribute.class);
|
||||
result.add(attribute1.term());
|
||||
CharTermAttribute attribute1 = tokenStream.getAttribute(CharTermAttribute.class);
|
||||
result.add(attribute1.toString());
|
||||
}
|
||||
|
||||
stream.close();
|
||||
|
@ -19,11 +19,10 @@
|
||||
<version>1.1-SNAPSHOT</version>
|
||||
</dependency>
|
||||
|
||||
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<version>4.4</version>
|
||||
<version>4.8.2</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
@ -8,7 +8,14 @@ import java.util.List;
|
||||
public class EnglishStemmer {
|
||||
private EnglishLuceneMorphology englishLuceneMorphology;
|
||||
|
||||
public EnglishStemmer(EnglishLuceneMorphology englishLuceneMorphology) {
|
||||
this.englishLuceneMorphology = englishLuceneMorphology;
|
||||
}
|
||||
|
||||
public String getStemmedWord(String word){
|
||||
if(!englishLuceneMorphology.checkString(word)){
|
||||
return word;
|
||||
}
|
||||
List<String> normalForms = englishLuceneMorphology.getNormalForms(word);
|
||||
if(normalForms.size() == 1){
|
||||
return normalForms.get(0);
|
||||
|
@ -0,0 +1,34 @@
|
||||
package org.apache.lucene.morphology.english.stemmer;
|
||||
|
||||
import org.apache.lucene.morphology.english.EnglishLuceneMorphology;
|
||||
import org.junit.Test;
|
||||
import static org.hamcrest.core.IsEqual.equalTo;
|
||||
import static org.junit.Assert.assertThat;
|
||||
|
||||
|
||||
public class EnglishStemmerTest {
|
||||
@Test
|
||||
public void testGetStemmedWord() throws Exception {
|
||||
EnglishLuceneMorphology englishLuceneMorphology = new EnglishLuceneMorphology();
|
||||
EnglishStemmer englishStemmer = new EnglishStemmer(englishLuceneMorphology);
|
||||
assertThat(englishStemmer.getStemmedWord("running"),equalTo("run"));
|
||||
assertThat(englishStemmer.getStemmedWord("run"),equalTo("run"));
|
||||
assertThat(englishStemmer.getStemmedWord("killed"),equalTo("kill"));
|
||||
assertThat(englishStemmer.getStemmedWord("kill"),equalTo("kill"));
|
||||
assertThat(englishStemmer.getStemmedWord("networking"),equalTo("network"));
|
||||
assertThat(englishStemmer.getStemmedWord("network"),equalTo("network"));
|
||||
assertThat(englishStemmer.getStemmedWord("statistics"),equalTo("statistic"));
|
||||
assertThat(englishStemmer.getStemmedWord("statistic"),equalTo("statistic"));
|
||||
assertThat(englishStemmer.getStemmedWord("stats"),equalTo("stat"));
|
||||
assertThat(englishStemmer.getStemmedWord("stat"),equalTo("stat"));
|
||||
assertThat(englishStemmer.getStemmedWord("countries"),equalTo("country"));
|
||||
assertThat(englishStemmer.getStemmedWord("country"),equalTo("country"));
|
||||
assertThat(englishStemmer.getStemmedWord("delete"),equalTo("delete"));
|
||||
assertThat(englishStemmer.getStemmedWord("ended"),equalTo("end"));
|
||||
assertThat(englishStemmer.getStemmedWord("end"),equalTo("end"));
|
||||
assertThat(englishStemmer.getStemmedWord("ends"),equalTo("end"));
|
||||
assertThat(englishStemmer.getStemmedWord("given"),equalTo("give"));
|
||||
assertThat(englishStemmer.getStemmedWord("give"),equalTo("give"));
|
||||
assertThat(englishStemmer.getStemmedWord("log4j"),equalTo("log4j"));
|
||||
}
|
||||
}
|
@ -12,7 +12,4 @@
|
||||
<version>1.1-SNAPSHOT</version>
|
||||
<url>http://maven.apache.org</url>
|
||||
|
||||
<dependencies>
|
||||
|
||||
</dependencies>
|
||||
</project>
|
||||
|
2
pom.xml
2
pom.xml
@ -35,7 +35,7 @@
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<version>4.4</version>
|
||||
<version>4.8.2</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
|
@ -23,7 +23,7 @@
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<version>4.4</version>
|
||||
<version>4.8.2</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user