working on analayzer and test

git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@10 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
alexander.a.kuznetsov 2009-04-12 19:25:23 +00:00
parent 5c7c629746
commit 585d43877a
6 changed files with 11 additions and 13 deletions

View File

@ -1,8 +1,6 @@
package org.apache.lucene.russian.morphology; package org.apache.lucene.russian.morphology;
import org.apache.lucene.russian.morphology.dictonary.DictonaryReader; import org.apache.lucene.russian.morphology.dictonary.DictonaryReader;
import org.apache.lucene.russian.morphology.dictonary.WordProccessor;
import org.apache.lucene.russian.morphology.dictonary.WordCard;
import org.apache.lucene.russian.morphology.dictonary.IgnoredFormReader; import org.apache.lucene.russian.morphology.dictonary.IgnoredFormReader;
import org.apache.lucene.russian.morphology.evristics.StatiticsCollectors; import org.apache.lucene.russian.morphology.evristics.StatiticsCollectors;
import org.apache.lucene.russian.morphology.evristics.SuffixCounter; import org.apache.lucene.russian.morphology.evristics.SuffixCounter;
@ -34,6 +32,6 @@ public class EvristicBuilder {
evristic.addEvristic(((SuffixCounter) objects[i]).getSuffixEvristic()); evristic.addEvristic(((SuffixCounter) objects[i]).getSuffixEvristic());
} }
evristic.writeToFile("src/main/resources/russianSuffixesEvristics.txt"); evristic.writeToFile("src/main/resources/org/apache/lucene/russian/morpholgy/russianSuffixesEvristics.txt");
} }
} }

View File

@ -7,10 +7,10 @@ import java.io.Reader;
import java.io.IOException; import java.io.IOException;
public class RussianMorphlogyAnalayzer extends Analyzer { public class RussianMorphlogyAnalayzer extends Analyzer {
private ArrayEvristics arrayEvristics; private SuffixEvristics suffixEvristics;
public RussianMorphlogyAnalayzer() throws IOException { public RussianMorphlogyAnalayzer() throws IOException {
arrayEvristics = new ArrayEvristics(); suffixEvristics = new SuffixEvristics();
} }
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStream tokenStream(String fieldName, Reader reader) {

View File

@ -8,11 +8,11 @@ import java.io.IOException;
public class RussianMorphlogyFilter extends TokenFilter { public class RussianMorphlogyFilter extends TokenFilter {
private ArrayEvristics arrayEvristics; private SuffixEvristics suffixEvristics;
public RussianMorphlogyFilter(TokenStream tokenStream, ArrayEvristics arrayEvristics) throws IOException { public RussianMorphlogyFilter(TokenStream tokenStream, SuffixEvristics suffixEvristics) throws IOException {
super(tokenStream); super(tokenStream);
this.arrayEvristics = arrayEvristics; this.suffixEvristics = suffixEvristics;
} }
public Token next(final Token reusableToken) throws IOException { public Token next(final Token reusableToken) throws IOException {
@ -24,7 +24,7 @@ public class RussianMorphlogyFilter extends TokenFilter {
return nextToken; return nextToken;
} }
Token current = (Token) nextToken.clone(); Token current = (Token) nextToken.clone();
return createToken(arrayEvristics.getCanonicalForm(word), current, reusableToken); return createToken(suffixEvristics.getCanonicalForm(word), current, reusableToken);
} }
protected Token createToken(String synonym, Token current, final Token reusableToken) { protected Token createToken(String synonym, Token current, final Token reusableToken) {

View File

@ -6,7 +6,7 @@ import java.io.*;
import java.util.Arrays; import java.util.Arrays;
public class ArrayEvristics { public class SuffixEvristics {
private long[] keys; private long[] keys;
private long[] values; private long[] values;
@ -16,11 +16,11 @@ public class ArrayEvristics {
} }
public ArrayEvristics() throws IOException { public SuffixEvristics() throws IOException {
readFromResource(); readFromResource();
} }
public ArrayEvristics(String fileName) throws IOException { public SuffixEvristics(String fileName) throws IOException {
readFromFile(fileName); readFromFile(fileName);
} }

View File

@ -15,7 +15,7 @@ public class RussianSuffixDecoderEncoderTest {
@Test @Test
public void testShouldCorretDecodeEncode() throws IOException { public void testShouldCorretDecodeEncode() throws IOException {
InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/russian/morphology/analayzer/decoder-test-data.txt"); InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/russian/morphology/decoder-test-data.txt");
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream)); BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream));
String s = bufferedReader.readLine(); String s = bufferedReader.readLine();
while(s != null){ while(s != null){