fixed filter

git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@5 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
alexander.a.kuznetsov 2009-04-11 21:03:18 +00:00
parent 5214a0b7f4
commit 595b25ab65

View File

@ -10,14 +10,20 @@ import java.io.IOException;
public class RussianMorphlogyFilter extends TokenFilter { public class RussianMorphlogyFilter extends TokenFilter {
private ArrayEvristics arrayEvristics; private ArrayEvristics arrayEvristics;
protected RussianMorphlogyFilter(TokenStream tokenStream, ArrayEvristics arrayEvristics) { public RussianMorphlogyFilter(TokenStream tokenStream, ArrayEvristics arrayEvristics) throws IOException {
super(tokenStream); super(tokenStream);
this.arrayEvristics = arrayEvristics; this.arrayEvristics = arrayEvristics;
} }
public Token next(final Token reusableToken) throws IOException { public Token next(final Token reusableToken) throws IOException {
assert reusableToken != null; Token nextToken = input.next(reusableToken);
return createToken(arrayEvristics.getCanonicalForm(reusableToken.term()), reusableToken, reusableToken); if(nextToken == null || nextToken.term().length() == 0) return nextToken;
Character testC = nextToken.term().charAt(0);
if (Character.UnicodeBlock.of(testC) != Character.UnicodeBlock.CYRILLIC){
return nextToken;
}
Token current = (Token) nextToken.clone();
return createToken(arrayEvristics.getCanonicalForm(nextToken.term()), current, reusableToken);
} }
protected Token createToken(String synonym, Token current, final Token reusableToken) { protected Token createToken(String synonym, Token current, final Token reusableToken) {