diff --git a/dictionary-reader/src/test/java/org/apache/lucene/morphology/AnalyzersTest.java b/dictionary-reader/src/test/java/org/apache/lucene/morphology/AnalyzersTest.java index 3cb2794..a37bfa7 100644 --- a/dictionary-reader/src/test/java/org/apache/lucene/morphology/AnalyzersTest.java +++ b/dictionary-reader/src/test/java/org/apache/lucene/morphology/AnalyzersTest.java @@ -16,6 +16,7 @@ package org.apache.lucene.morphology; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.morphology.english.EnglishAnalyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.TermAttribute; @@ -67,8 +68,8 @@ public class AnalyzersTest { TokenStream tokenStream = morphlogyAnalyzer.tokenStream(null, reader); HashSet result = new HashSet(); while (tokenStream.incrementToken()) { - TermAttribute attribute1 = tokenStream.getAttribute(TermAttribute.class); - result.add(attribute1.term()); + CharTermAttribute attribute1 = tokenStream.getAttribute(CharTermAttribute.class); + result.add(attribute1.toString()); } stream.close(); diff --git a/english/pom.xml b/english/pom.xml index 0efdc20..2348a02 100644 --- a/english/pom.xml +++ b/english/pom.xml @@ -19,11 +19,10 @@ 1.1-SNAPSHOT - junit junit - 4.4 + 4.8.2 test diff --git a/english/src/main/java/org/apache/lucene/morphology/english/stemmer/EnglishStemmer.java b/english/src/main/java/org/apache/lucene/morphology/english/stemmer/EnglishStemmer.java index 346ea45..a0ca968 100644 --- a/english/src/main/java/org/apache/lucene/morphology/english/stemmer/EnglishStemmer.java +++ b/english/src/main/java/org/apache/lucene/morphology/english/stemmer/EnglishStemmer.java @@ -8,7 +8,14 @@ import java.util.List; public class EnglishStemmer { private EnglishLuceneMorphology englishLuceneMorphology; + public EnglishStemmer(EnglishLuceneMorphology englishLuceneMorphology) { + this.englishLuceneMorphology = englishLuceneMorphology; + } + public String getStemmedWord(String word){ + if(!englishLuceneMorphology.checkString(word)){ + return word; + } List normalForms = englishLuceneMorphology.getNormalForms(word); if(normalForms.size() == 1){ return normalForms.get(0); diff --git a/english/src/test/java/org/apache/lucene/morphology/english/stemmer/EnglishStemmerTest.java b/english/src/test/java/org/apache/lucene/morphology/english/stemmer/EnglishStemmerTest.java new file mode 100644 index 0000000..e892172 --- /dev/null +++ b/english/src/test/java/org/apache/lucene/morphology/english/stemmer/EnglishStemmerTest.java @@ -0,0 +1,34 @@ +package org.apache.lucene.morphology.english.stemmer; + +import org.apache.lucene.morphology.english.EnglishLuceneMorphology; +import org.junit.Test; +import static org.hamcrest.core.IsEqual.equalTo; +import static org.junit.Assert.assertThat; + + +public class EnglishStemmerTest { + @Test + public void testGetStemmedWord() throws Exception { + EnglishLuceneMorphology englishLuceneMorphology = new EnglishLuceneMorphology(); + EnglishStemmer englishStemmer = new EnglishStemmer(englishLuceneMorphology); + assertThat(englishStemmer.getStemmedWord("running"),equalTo("run")); + assertThat(englishStemmer.getStemmedWord("run"),equalTo("run")); + assertThat(englishStemmer.getStemmedWord("killed"),equalTo("kill")); + assertThat(englishStemmer.getStemmedWord("kill"),equalTo("kill")); + assertThat(englishStemmer.getStemmedWord("networking"),equalTo("network")); + assertThat(englishStemmer.getStemmedWord("network"),equalTo("network")); + assertThat(englishStemmer.getStemmedWord("statistics"),equalTo("statistic")); + assertThat(englishStemmer.getStemmedWord("statistic"),equalTo("statistic")); + assertThat(englishStemmer.getStemmedWord("stats"),equalTo("stat")); + assertThat(englishStemmer.getStemmedWord("stat"),equalTo("stat")); + assertThat(englishStemmer.getStemmedWord("countries"),equalTo("country")); + assertThat(englishStemmer.getStemmedWord("country"),equalTo("country")); + assertThat(englishStemmer.getStemmedWord("delete"),equalTo("delete")); + assertThat(englishStemmer.getStemmedWord("ended"),equalTo("end")); + assertThat(englishStemmer.getStemmedWord("end"),equalTo("end")); + assertThat(englishStemmer.getStemmedWord("ends"),equalTo("end")); + assertThat(englishStemmer.getStemmedWord("given"),equalTo("give")); + assertThat(englishStemmer.getStemmedWord("give"),equalTo("give")); + assertThat(englishStemmer.getStemmedWord("log4j"),equalTo("log4j")); + } +} diff --git a/morph/pom.xml b/morph/pom.xml index afb2d2c..03e1367 100644 --- a/morph/pom.xml +++ b/morph/pom.xml @@ -12,7 +12,4 @@ 1.1-SNAPSHOT http://maven.apache.org - - - diff --git a/pom.xml b/pom.xml index 9bca627..cb265f7 100644 --- a/pom.xml +++ b/pom.xml @@ -35,7 +35,7 @@ junit junit - 4.4 + 4.8.2 test diff --git a/russian/pom.xml b/russian/pom.xml index ac296a0..0b6e435 100644 --- a/russian/pom.xml +++ b/russian/pom.xml @@ -23,7 +23,7 @@ junit junit - 4.4 + 4.8.2 test