From 57b600d62972121be446cb05edded18fe5cf4949 Mon Sep 17 00:00:00 2001 From: "alexander.a.kuznetsov" Date: Mon, 13 Apr 2009 14:11:45 +0000 Subject: [PATCH] test for suffix evristics git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@12 d817d54c-26ab-11de-abc9-2f7d1455ff7a --- .../morphology/analayzer/RussianMorphlogyAnalayzer.java | 8 +++++++- .../morphology/analayzer/RussianMorphlogyFilter.java | 4 ++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/apache/lucene/russian/morphology/analayzer/RussianMorphlogyAnalayzer.java b/src/main/java/org/apache/lucene/russian/morphology/analayzer/RussianMorphlogyAnalayzer.java index 4287104..3337239 100644 --- a/src/main/java/org/apache/lucene/russian/morphology/analayzer/RussianMorphlogyAnalayzer.java +++ b/src/main/java/org/apache/lucene/russian/morphology/analayzer/RussianMorphlogyAnalayzer.java @@ -2,6 +2,9 @@ package org.apache.lucene.russian.morphology.analayzer; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.LowerCaseFilter; +import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.analysis.standard.StandardFilter; import java.io.Reader; import java.io.IOException; @@ -14,6 +17,9 @@ public class RussianMorphlogyAnalayzer extends Analyzer { } public TokenStream tokenStream(String fieldName, Reader reader) { - return null; //To change body of implemented methods use File | Settings | File Templates. + TokenStream result = new StandardTokenizer(reader); + result = new StandardFilter(result); + result = new LowerCaseFilter(result); + return new RussianMorphlogyFilter(result,suffixEvristics); } } diff --git a/src/main/java/org/apache/lucene/russian/morphology/analayzer/RussianMorphlogyFilter.java b/src/main/java/org/apache/lucene/russian/morphology/analayzer/RussianMorphlogyFilter.java index 08bd435..b324064 100644 --- a/src/main/java/org/apache/lucene/russian/morphology/analayzer/RussianMorphlogyFilter.java +++ b/src/main/java/org/apache/lucene/russian/morphology/analayzer/RussianMorphlogyFilter.java @@ -10,7 +10,7 @@ import java.io.IOException; public class RussianMorphlogyFilter extends TokenFilter { private SuffixEvristics suffixEvristics; - public RussianMorphlogyFilter(TokenStream tokenStream, SuffixEvristics suffixEvristics) throws IOException { + public RussianMorphlogyFilter(TokenStream tokenStream, SuffixEvristics suffixEvristics) { super(tokenStream); this.suffixEvristics = suffixEvristics; } @@ -18,7 +18,7 @@ public class RussianMorphlogyFilter extends TokenFilter { public Token next(final Token reusableToken) throws IOException { Token nextToken = input.next(reusableToken); if(nextToken == null || nextToken.term().length() == 0) return nextToken; - String word = nextToken.term().toLowerCase(); + String word = nextToken.term(); Character testC = word.charAt(0); if (Character.UnicodeBlock.of(testC) != Character.UnicodeBlock.CYRILLIC){ return nextToken;