test for suffix evristics

git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@12 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
alexander.a.kuznetsov 2009-04-13 14:11:45 +00:00
parent 48ae7d4cb6
commit 57b600d629
2 changed files with 9 additions and 3 deletions

View File

@ -2,6 +2,9 @@ package org.apache.lucene.russian.morphology.analayzer;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.standard.StandardFilter;
import java.io.Reader; import java.io.Reader;
import java.io.IOException; import java.io.IOException;
@ -14,6 +17,9 @@ public class RussianMorphlogyAnalayzer extends Analyzer {
} }
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStream tokenStream(String fieldName, Reader reader) {
return null; //To change body of implemented methods use File | Settings | File Templates. TokenStream result = new StandardTokenizer(reader);
result = new StandardFilter(result);
result = new LowerCaseFilter(result);
return new RussianMorphlogyFilter(result,suffixEvristics);
} }
} }

View File

@ -10,7 +10,7 @@ import java.io.IOException;
public class RussianMorphlogyFilter extends TokenFilter { public class RussianMorphlogyFilter extends TokenFilter {
private SuffixEvristics suffixEvristics; private SuffixEvristics suffixEvristics;
public RussianMorphlogyFilter(TokenStream tokenStream, SuffixEvristics suffixEvristics) throws IOException { public RussianMorphlogyFilter(TokenStream tokenStream, SuffixEvristics suffixEvristics) {
super(tokenStream); super(tokenStream);
this.suffixEvristics = suffixEvristics; this.suffixEvristics = suffixEvristics;
} }
@ -18,7 +18,7 @@ public class RussianMorphlogyFilter extends TokenFilter {
public Token next(final Token reusableToken) throws IOException { public Token next(final Token reusableToken) throws IOException {
Token nextToken = input.next(reusableToken); Token nextToken = input.next(reusableToken);
if(nextToken == null || nextToken.term().length() == 0) return nextToken; if(nextToken == null || nextToken.term().length() == 0) return nextToken;
String word = nextToken.term().toLowerCase(); String word = nextToken.term();
Character testC = word.charAt(0); Character testC = word.charAt(0);
if (Character.UnicodeBlock.of(testC) != Character.UnicodeBlock.CYRILLIC){ if (Character.UnicodeBlock.of(testC) != Character.UnicodeBlock.CYRILLIC){
return nextToken; return nextToken;