working on analayzer and test
git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@10 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
parent
5c7c629746
commit
585d43877a
@ -1,8 +1,6 @@
|
|||||||
package org.apache.lucene.russian.morphology;
|
package org.apache.lucene.russian.morphology;
|
||||||
|
|
||||||
import org.apache.lucene.russian.morphology.dictonary.DictonaryReader;
|
import org.apache.lucene.russian.morphology.dictonary.DictonaryReader;
|
||||||
import org.apache.lucene.russian.morphology.dictonary.WordProccessor;
|
|
||||||
import org.apache.lucene.russian.morphology.dictonary.WordCard;
|
|
||||||
import org.apache.lucene.russian.morphology.dictonary.IgnoredFormReader;
|
import org.apache.lucene.russian.morphology.dictonary.IgnoredFormReader;
|
||||||
import org.apache.lucene.russian.morphology.evristics.StatiticsCollectors;
|
import org.apache.lucene.russian.morphology.evristics.StatiticsCollectors;
|
||||||
import org.apache.lucene.russian.morphology.evristics.SuffixCounter;
|
import org.apache.lucene.russian.morphology.evristics.SuffixCounter;
|
||||||
@ -34,6 +32,6 @@ public class EvristicBuilder {
|
|||||||
evristic.addEvristic(((SuffixCounter) objects[i]).getSuffixEvristic());
|
evristic.addEvristic(((SuffixCounter) objects[i]).getSuffixEvristic());
|
||||||
}
|
}
|
||||||
|
|
||||||
evristic.writeToFile("src/main/resources/russianSuffixesEvristics.txt");
|
evristic.writeToFile("src/main/resources/org/apache/lucene/russian/morpholgy/russianSuffixesEvristics.txt");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -7,10 +7,10 @@ import java.io.Reader;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
public class RussianMorphlogyAnalayzer extends Analyzer {
|
public class RussianMorphlogyAnalayzer extends Analyzer {
|
||||||
private ArrayEvristics arrayEvristics;
|
private SuffixEvristics suffixEvristics;
|
||||||
|
|
||||||
public RussianMorphlogyAnalayzer() throws IOException {
|
public RussianMorphlogyAnalayzer() throws IOException {
|
||||||
arrayEvristics = new ArrayEvristics();
|
suffixEvristics = new SuffixEvristics();
|
||||||
}
|
}
|
||||||
|
|
||||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||||
|
@ -8,11 +8,11 @@ import java.io.IOException;
|
|||||||
|
|
||||||
|
|
||||||
public class RussianMorphlogyFilter extends TokenFilter {
|
public class RussianMorphlogyFilter extends TokenFilter {
|
||||||
private ArrayEvristics arrayEvristics;
|
private SuffixEvristics suffixEvristics;
|
||||||
|
|
||||||
public RussianMorphlogyFilter(TokenStream tokenStream, ArrayEvristics arrayEvristics) throws IOException {
|
public RussianMorphlogyFilter(TokenStream tokenStream, SuffixEvristics suffixEvristics) throws IOException {
|
||||||
super(tokenStream);
|
super(tokenStream);
|
||||||
this.arrayEvristics = arrayEvristics;
|
this.suffixEvristics = suffixEvristics;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Token next(final Token reusableToken) throws IOException {
|
public Token next(final Token reusableToken) throws IOException {
|
||||||
@ -24,7 +24,7 @@ public class RussianMorphlogyFilter extends TokenFilter {
|
|||||||
return nextToken;
|
return nextToken;
|
||||||
}
|
}
|
||||||
Token current = (Token) nextToken.clone();
|
Token current = (Token) nextToken.clone();
|
||||||
return createToken(arrayEvristics.getCanonicalForm(word), current, reusableToken);
|
return createToken(suffixEvristics.getCanonicalForm(word), current, reusableToken);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Token createToken(String synonym, Token current, final Token reusableToken) {
|
protected Token createToken(String synonym, Token current, final Token reusableToken) {
|
||||||
|
@ -6,7 +6,7 @@ import java.io.*;
|
|||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
|
||||||
public class ArrayEvristics {
|
public class SuffixEvristics {
|
||||||
private long[] keys;
|
private long[] keys;
|
||||||
private long[] values;
|
private long[] values;
|
||||||
|
|
||||||
@ -16,11 +16,11 @@ public class ArrayEvristics {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public ArrayEvristics() throws IOException {
|
public SuffixEvristics() throws IOException {
|
||||||
readFromResource();
|
readFromResource();
|
||||||
}
|
}
|
||||||
|
|
||||||
public ArrayEvristics(String fileName) throws IOException {
|
public SuffixEvristics(String fileName) throws IOException {
|
||||||
readFromFile(fileName);
|
readFromFile(fileName);
|
||||||
}
|
}
|
||||||
|
|
@ -15,7 +15,7 @@ public class RussianSuffixDecoderEncoderTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testShouldCorretDecodeEncode() throws IOException {
|
public void testShouldCorretDecodeEncode() throws IOException {
|
||||||
InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/russian/morphology/analayzer/decoder-test-data.txt");
|
InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/russian/morphology/decoder-test-data.txt");
|
||||||
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream));
|
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream));
|
||||||
String s = bufferedReader.readLine();
|
String s = bufferedReader.readLine();
|
||||||
while(s != null){
|
while(s != null){
|
||||||
|
Loading…
x
Reference in New Issue
Block a user