test for suffix evristics

git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@12 d817d54c-26ab-11de-abc9-2f7d1455ff7a
2009-04-13 14:11:45 +00:00
parent 48ae7d4cb6
commit 57b600d629
2 changed files with 9 additions and 3 deletions
@@ -2,6 +2,9 @@ package org.apache.lucene.russian.morphology.analayzer;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.standard.StandardFilter;

 import java.io.Reader;
 import java.io.IOException;
@@ -14,6 +17,9 @@ public class RussianMorphlogyAnalayzer  extends Analyzer {
    }

    public TokenStream tokenStream(String fieldName, Reader reader) {
-        return null;  //To change body of implemented methods use File | Settings | File Templates.
+        TokenStream result = new StandardTokenizer(reader);
+        result = new StandardFilter(result);
+        result = new LowerCaseFilter(result);
+        return new RussianMorphlogyFilter(result,suffixEvristics);
    }
 }
@@ -10,7 +10,7 @@ import java.io.IOException;
 public class RussianMorphlogyFilter extends TokenFilter {
    private SuffixEvristics suffixEvristics;

-    public RussianMorphlogyFilter(TokenStream tokenStream, SuffixEvristics suffixEvristics) throws IOException {
+    public RussianMorphlogyFilter(TokenStream tokenStream, SuffixEvristics suffixEvristics) {
        super(tokenStream);
        this.suffixEvristics = suffixEvristics;
    }
@@ -18,7 +18,7 @@ public class RussianMorphlogyFilter extends TokenFilter {
    public Token next(final Token reusableToken) throws IOException {
        Token nextToken = input.next(reusableToken);
        if(nextToken == null || nextToken.term().length() == 0) return nextToken;
-        String word = nextToken.term().toLowerCase();
+        String word = nextToken.term();
        Character testC = word.charAt(0);
        if (Character.UnicodeBlock.of(testC) != Character.UnicodeBlock.CYRILLIC){
            return  nextToken;