adding test for lucene analayzer

fixed problem with string checking

git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@62 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
alexander.a.kuznetsov
2009-10-17 17:06:55 +00:00
parent 26ca704ec0
commit 8c833132a8
11 changed files with 196 additions and 35 deletions

View File

@ -75,4 +75,8 @@ public class LuceneMorph extends Morph {
}
return result.toArray(new Heuristic[result.size()]);
}
public boolean checkString(String s) {
return decoderEncoder.checkString(s);
}
}

View File

@ -53,9 +53,7 @@ public class MorphlogyFilter extends TokenFilter {
Token nextToken = input.next(reusableToken);
if (nextToken == null) return null; // EOS; iterator exhausted
Character testC = nextToken.term().charAt(0);
//todo check here for decoder endocoder
if (Character.UnicodeBlock.of(testC) != Character.UnicodeBlock.CYRILLIC) {
if (!luceneMorph.checkString(nextToken.term())) {
return nextToken;
}
stack = luceneMorph.getMorhInfo(nextToken.term());