adding test for english stemmer
git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@124 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
parent
466de768ca
commit
77ce4252ad
@ -16,6 +16,7 @@
|
|||||||
package org.apache.lucene.morphology;
|
package org.apache.lucene.morphology;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
import org.apache.lucene.morphology.english.EnglishAnalyzer;
|
import org.apache.lucene.morphology.english.EnglishAnalyzer;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||||
@ -67,8 +68,8 @@ public class AnalyzersTest {
|
|||||||
TokenStream tokenStream = morphlogyAnalyzer.tokenStream(null, reader);
|
TokenStream tokenStream = morphlogyAnalyzer.tokenStream(null, reader);
|
||||||
HashSet<String> result = new HashSet<String>();
|
HashSet<String> result = new HashSet<String>();
|
||||||
while (tokenStream.incrementToken()) {
|
while (tokenStream.incrementToken()) {
|
||||||
TermAttribute attribute1 = tokenStream.getAttribute(TermAttribute.class);
|
CharTermAttribute attribute1 = tokenStream.getAttribute(CharTermAttribute.class);
|
||||||
result.add(attribute1.term());
|
result.add(attribute1.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
stream.close();
|
stream.close();
|
||||||
|
@ -19,11 +19,10 @@
|
|||||||
<version>1.1-SNAPSHOT</version>
|
<version>1.1-SNAPSHOT</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>junit</groupId>
|
<groupId>junit</groupId>
|
||||||
<artifactId>junit</artifactId>
|
<artifactId>junit</artifactId>
|
||||||
<version>4.4</version>
|
<version>4.8.2</version>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
@ -8,7 +8,14 @@ import java.util.List;
|
|||||||
public class EnglishStemmer {
|
public class EnglishStemmer {
|
||||||
private EnglishLuceneMorphology englishLuceneMorphology;
|
private EnglishLuceneMorphology englishLuceneMorphology;
|
||||||
|
|
||||||
|
public EnglishStemmer(EnglishLuceneMorphology englishLuceneMorphology) {
|
||||||
|
this.englishLuceneMorphology = englishLuceneMorphology;
|
||||||
|
}
|
||||||
|
|
||||||
public String getStemmedWord(String word){
|
public String getStemmedWord(String word){
|
||||||
|
if(!englishLuceneMorphology.checkString(word)){
|
||||||
|
return word;
|
||||||
|
}
|
||||||
List<String> normalForms = englishLuceneMorphology.getNormalForms(word);
|
List<String> normalForms = englishLuceneMorphology.getNormalForms(word);
|
||||||
if(normalForms.size() == 1){
|
if(normalForms.size() == 1){
|
||||||
return normalForms.get(0);
|
return normalForms.get(0);
|
||||||
|
@ -0,0 +1,34 @@
|
|||||||
|
package org.apache.lucene.morphology.english.stemmer;
|
||||||
|
|
||||||
|
import org.apache.lucene.morphology.english.EnglishLuceneMorphology;
|
||||||
|
import org.junit.Test;
|
||||||
|
import static org.hamcrest.core.IsEqual.equalTo;
|
||||||
|
import static org.junit.Assert.assertThat;
|
||||||
|
|
||||||
|
|
||||||
|
public class EnglishStemmerTest {
|
||||||
|
@Test
|
||||||
|
public void testGetStemmedWord() throws Exception {
|
||||||
|
EnglishLuceneMorphology englishLuceneMorphology = new EnglishLuceneMorphology();
|
||||||
|
EnglishStemmer englishStemmer = new EnglishStemmer(englishLuceneMorphology);
|
||||||
|
assertThat(englishStemmer.getStemmedWord("running"),equalTo("run"));
|
||||||
|
assertThat(englishStemmer.getStemmedWord("run"),equalTo("run"));
|
||||||
|
assertThat(englishStemmer.getStemmedWord("killed"),equalTo("kill"));
|
||||||
|
assertThat(englishStemmer.getStemmedWord("kill"),equalTo("kill"));
|
||||||
|
assertThat(englishStemmer.getStemmedWord("networking"),equalTo("network"));
|
||||||
|
assertThat(englishStemmer.getStemmedWord("network"),equalTo("network"));
|
||||||
|
assertThat(englishStemmer.getStemmedWord("statistics"),equalTo("statistic"));
|
||||||
|
assertThat(englishStemmer.getStemmedWord("statistic"),equalTo("statistic"));
|
||||||
|
assertThat(englishStemmer.getStemmedWord("stats"),equalTo("stat"));
|
||||||
|
assertThat(englishStemmer.getStemmedWord("stat"),equalTo("stat"));
|
||||||
|
assertThat(englishStemmer.getStemmedWord("countries"),equalTo("country"));
|
||||||
|
assertThat(englishStemmer.getStemmedWord("country"),equalTo("country"));
|
||||||
|
assertThat(englishStemmer.getStemmedWord("delete"),equalTo("delete"));
|
||||||
|
assertThat(englishStemmer.getStemmedWord("ended"),equalTo("end"));
|
||||||
|
assertThat(englishStemmer.getStemmedWord("end"),equalTo("end"));
|
||||||
|
assertThat(englishStemmer.getStemmedWord("ends"),equalTo("end"));
|
||||||
|
assertThat(englishStemmer.getStemmedWord("given"),equalTo("give"));
|
||||||
|
assertThat(englishStemmer.getStemmedWord("give"),equalTo("give"));
|
||||||
|
assertThat(englishStemmer.getStemmedWord("log4j"),equalTo("log4j"));
|
||||||
|
}
|
||||||
|
}
|
@ -12,7 +12,4 @@
|
|||||||
<version>1.1-SNAPSHOT</version>
|
<version>1.1-SNAPSHOT</version>
|
||||||
<url>http://maven.apache.org</url>
|
<url>http://maven.apache.org</url>
|
||||||
|
|
||||||
<dependencies>
|
|
||||||
|
|
||||||
</dependencies>
|
|
||||||
</project>
|
</project>
|
||||||
|
2
pom.xml
2
pom.xml
@ -35,7 +35,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>junit</groupId>
|
<groupId>junit</groupId>
|
||||||
<artifactId>junit</artifactId>
|
<artifactId>junit</artifactId>
|
||||||
<version>4.4</version>
|
<version>4.8.2</version>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
@ -23,7 +23,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>junit</groupId>
|
<groupId>junit</groupId>
|
||||||
<artifactId>junit</artifactId>
|
<artifactId>junit</artifactId>
|
||||||
<version>4.4</version>
|
<version>4.8.2</version>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user