From 585d43877a7e2b83c406e3078f6760a22bc04aaa Mon Sep 17 00:00:00 2001 From: "alexander.a.kuznetsov" Date: Sun, 12 Apr 2009 19:25:23 +0000 Subject: [PATCH] working on analayzer and test git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@10 d817d54c-26ab-11de-abc9-2f7d1455ff7a --- .../apache/lucene/russian/morphology/EvristicBuilder.java | 4 +--- .../morphology/analayzer/RussianMorphlogyAnalayzer.java | 4 ++-- .../morphology/analayzer/RussianMorphlogyFilter.java | 8 ++++---- .../{ArrayEvristics.java => SuffixEvristics.java} | 6 +++--- .../morphology/RussianSuffixDecoderEncoderTest.java | 2 +- .../morphology/{analayzer => }/decoder-test-data.txt | 0 6 files changed, 11 insertions(+), 13 deletions(-) rename src/main/java/org/apache/lucene/russian/morphology/analayzer/{ArrayEvristics.java => SuffixEvristics.java} (90%) rename src/test/resources/org/apache/lucene/russian/morphology/{analayzer => }/decoder-test-data.txt (100%) diff --git a/src/main/java/org/apache/lucene/russian/morphology/EvristicBuilder.java b/src/main/java/org/apache/lucene/russian/morphology/EvristicBuilder.java index c922b59..3633708 100644 --- a/src/main/java/org/apache/lucene/russian/morphology/EvristicBuilder.java +++ b/src/main/java/org/apache/lucene/russian/morphology/EvristicBuilder.java @@ -1,8 +1,6 @@ package org.apache.lucene.russian.morphology; import org.apache.lucene.russian.morphology.dictonary.DictonaryReader; -import org.apache.lucene.russian.morphology.dictonary.WordProccessor; -import org.apache.lucene.russian.morphology.dictonary.WordCard; import org.apache.lucene.russian.morphology.dictonary.IgnoredFormReader; import org.apache.lucene.russian.morphology.evristics.StatiticsCollectors; import org.apache.lucene.russian.morphology.evristics.SuffixCounter; @@ -34,6 +32,6 @@ public class EvristicBuilder { evristic.addEvristic(((SuffixCounter) objects[i]).getSuffixEvristic()); } - evristic.writeToFile("src/main/resources/russianSuffixesEvristics.txt"); + evristic.writeToFile("src/main/resources/org/apache/lucene/russian/morpholgy/russianSuffixesEvristics.txt"); } } diff --git a/src/main/java/org/apache/lucene/russian/morphology/analayzer/RussianMorphlogyAnalayzer.java b/src/main/java/org/apache/lucene/russian/morphology/analayzer/RussianMorphlogyAnalayzer.java index c5f3e15..4287104 100644 --- a/src/main/java/org/apache/lucene/russian/morphology/analayzer/RussianMorphlogyAnalayzer.java +++ b/src/main/java/org/apache/lucene/russian/morphology/analayzer/RussianMorphlogyAnalayzer.java @@ -7,10 +7,10 @@ import java.io.Reader; import java.io.IOException; public class RussianMorphlogyAnalayzer extends Analyzer { - private ArrayEvristics arrayEvristics; + private SuffixEvristics suffixEvristics; public RussianMorphlogyAnalayzer() throws IOException { - arrayEvristics = new ArrayEvristics(); + suffixEvristics = new SuffixEvristics(); } public TokenStream tokenStream(String fieldName, Reader reader) { diff --git a/src/main/java/org/apache/lucene/russian/morphology/analayzer/RussianMorphlogyFilter.java b/src/main/java/org/apache/lucene/russian/morphology/analayzer/RussianMorphlogyFilter.java index fd63b0e..08bd435 100644 --- a/src/main/java/org/apache/lucene/russian/morphology/analayzer/RussianMorphlogyFilter.java +++ b/src/main/java/org/apache/lucene/russian/morphology/analayzer/RussianMorphlogyFilter.java @@ -8,11 +8,11 @@ import java.io.IOException; public class RussianMorphlogyFilter extends TokenFilter { - private ArrayEvristics arrayEvristics; + private SuffixEvristics suffixEvristics; - public RussianMorphlogyFilter(TokenStream tokenStream, ArrayEvristics arrayEvristics) throws IOException { + public RussianMorphlogyFilter(TokenStream tokenStream, SuffixEvristics suffixEvristics) throws IOException { super(tokenStream); - this.arrayEvristics = arrayEvristics; + this.suffixEvristics = suffixEvristics; } public Token next(final Token reusableToken) throws IOException { @@ -24,7 +24,7 @@ public class RussianMorphlogyFilter extends TokenFilter { return nextToken; } Token current = (Token) nextToken.clone(); - return createToken(arrayEvristics.getCanonicalForm(word), current, reusableToken); + return createToken(suffixEvristics.getCanonicalForm(word), current, reusableToken); } protected Token createToken(String synonym, Token current, final Token reusableToken) { diff --git a/src/main/java/org/apache/lucene/russian/morphology/analayzer/ArrayEvristics.java b/src/main/java/org/apache/lucene/russian/morphology/analayzer/SuffixEvristics.java similarity index 90% rename from src/main/java/org/apache/lucene/russian/morphology/analayzer/ArrayEvristics.java rename to src/main/java/org/apache/lucene/russian/morphology/analayzer/SuffixEvristics.java index 565607a..a4de266 100644 --- a/src/main/java/org/apache/lucene/russian/morphology/analayzer/ArrayEvristics.java +++ b/src/main/java/org/apache/lucene/russian/morphology/analayzer/SuffixEvristics.java @@ -6,7 +6,7 @@ import java.io.*; import java.util.Arrays; -public class ArrayEvristics { +public class SuffixEvristics { private long[] keys; private long[] values; @@ -16,11 +16,11 @@ public class ArrayEvristics { } - public ArrayEvristics() throws IOException { + public SuffixEvristics() throws IOException { readFromResource(); } - public ArrayEvristics(String fileName) throws IOException { + public SuffixEvristics(String fileName) throws IOException { readFromFile(fileName); } diff --git a/src/test/java/org/apache/lucene/russian/morphology/RussianSuffixDecoderEncoderTest.java b/src/test/java/org/apache/lucene/russian/morphology/RussianSuffixDecoderEncoderTest.java index ce7aca4..6cfb985 100644 --- a/src/test/java/org/apache/lucene/russian/morphology/RussianSuffixDecoderEncoderTest.java +++ b/src/test/java/org/apache/lucene/russian/morphology/RussianSuffixDecoderEncoderTest.java @@ -15,7 +15,7 @@ public class RussianSuffixDecoderEncoderTest { @Test public void testShouldCorretDecodeEncode() throws IOException { - InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/russian/morphology/analayzer/decoder-test-data.txt"); + InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/russian/morphology/decoder-test-data.txt"); BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream)); String s = bufferedReader.readLine(); while(s != null){ diff --git a/src/test/resources/org/apache/lucene/russian/morphology/analayzer/decoder-test-data.txt b/src/test/resources/org/apache/lucene/russian/morphology/decoder-test-data.txt similarity index 100% rename from src/test/resources/org/apache/lucene/russian/morphology/analayzer/decoder-test-data.txt rename to src/test/resources/org/apache/lucene/russian/morphology/decoder-test-data.txt