fixed filter
git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@5 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
parent
5214a0b7f4
commit
595b25ab65
@ -10,14 +10,20 @@ import java.io.IOException;
|
|||||||
public class RussianMorphlogyFilter extends TokenFilter {
|
public class RussianMorphlogyFilter extends TokenFilter {
|
||||||
private ArrayEvristics arrayEvristics;
|
private ArrayEvristics arrayEvristics;
|
||||||
|
|
||||||
protected RussianMorphlogyFilter(TokenStream tokenStream, ArrayEvristics arrayEvristics) {
|
public RussianMorphlogyFilter(TokenStream tokenStream, ArrayEvristics arrayEvristics) throws IOException {
|
||||||
super(tokenStream);
|
super(tokenStream);
|
||||||
this.arrayEvristics = arrayEvristics;
|
this.arrayEvristics = arrayEvristics;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Token next(final Token reusableToken) throws IOException {
|
public Token next(final Token reusableToken) throws IOException {
|
||||||
assert reusableToken != null;
|
Token nextToken = input.next(reusableToken);
|
||||||
return createToken(arrayEvristics.getCanonicalForm(reusableToken.term()), reusableToken, reusableToken);
|
if(nextToken == null || nextToken.term().length() == 0) return nextToken;
|
||||||
|
Character testC = nextToken.term().charAt(0);
|
||||||
|
if (Character.UnicodeBlock.of(testC) != Character.UnicodeBlock.CYRILLIC){
|
||||||
|
return nextToken;
|
||||||
|
}
|
||||||
|
Token current = (Token) nextToken.clone();
|
||||||
|
return createToken(arrayEvristics.getCanonicalForm(nextToken.term()), current, reusableToken);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Token createToken(String synonym, Token current, final Token reusableToken) {
|
protected Token createToken(String synonym, Token current, final Token reusableToken) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user