working on analayzer and test
git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@9 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
parent
5576a22a3a
commit
5c7c629746
@ -25,7 +25,7 @@ public class ArrayEvristics {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public void readFromResource() throws IOException {
|
public void readFromResource() throws IOException {
|
||||||
InputStream stream = this.getClass().getResourceAsStream("/russianSuffixesEvristics.txt");
|
InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/russian/morphology/russianSuffixesEvristics.txt");
|
||||||
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream));
|
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream));
|
||||||
readFromBufferedRreader(bufferedReader);
|
readFromBufferedRreader(bufferedReader);
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,19 @@
|
|||||||
|
package org.apache.lucene.russian.morphology.analayzer;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
|
||||||
|
import java.io.Reader;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
public class RussianMorphlogyAnalayzer extends Analyzer {
|
||||||
|
private ArrayEvristics arrayEvristics;
|
||||||
|
|
||||||
|
public RussianMorphlogyAnalayzer() throws IOException {
|
||||||
|
arrayEvristics = new ArrayEvristics();
|
||||||
|
}
|
||||||
|
|
||||||
|
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||||
|
return null; //To change body of implemented methods use File | Settings | File Templates.
|
||||||
|
}
|
||||||
|
}
|
@ -18,12 +18,13 @@ public class RussianMorphlogyFilter extends TokenFilter {
|
|||||||
public Token next(final Token reusableToken) throws IOException {
|
public Token next(final Token reusableToken) throws IOException {
|
||||||
Token nextToken = input.next(reusableToken);
|
Token nextToken = input.next(reusableToken);
|
||||||
if(nextToken == null || nextToken.term().length() == 0) return nextToken;
|
if(nextToken == null || nextToken.term().length() == 0) return nextToken;
|
||||||
Character testC = nextToken.term().charAt(0);
|
String word = nextToken.term().toLowerCase();
|
||||||
|
Character testC = word.charAt(0);
|
||||||
if (Character.UnicodeBlock.of(testC) != Character.UnicodeBlock.CYRILLIC){
|
if (Character.UnicodeBlock.of(testC) != Character.UnicodeBlock.CYRILLIC){
|
||||||
return nextToken;
|
return nextToken;
|
||||||
}
|
}
|
||||||
Token current = (Token) nextToken.clone();
|
Token current = (Token) nextToken.clone();
|
||||||
return createToken(arrayEvristics.getCanonicalForm(nextToken.term()), current, reusableToken);
|
return createToken(arrayEvristics.getCanonicalForm(word), current, reusableToken);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Token createToken(String synonym, Token current, final Token reusableToken) {
|
protected Token createToken(String synonym, Token current, final Token reusableToken) {
|
||||||
|
@ -15,7 +15,7 @@ public class RussianSuffixDecoderEncoderTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testShouldCorretDecodeEncode() throws IOException {
|
public void testShouldCorretDecodeEncode() throws IOException {
|
||||||
InputStream stream = this.getClass().getResourceAsStream("/decoder-test-data.txt");
|
InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/russian/morphology/analayzer/decoder-test-data.txt");
|
||||||
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream));
|
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream));
|
||||||
String s = bufferedReader.readLine();
|
String s = bufferedReader.readLine();
|
||||||
while(s != null){
|
while(s != null){
|
||||||
|
@ -0,0 +1,12 @@
|
|||||||
|
package org.apache.lucene.russian.morphology.analayzer;
|
||||||
|
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
|
||||||
|
public class ArrayEvristicsTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testShouldDefineCorretCononicalWordForm(){
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user