diff --git a/src/main/java/org/apache/lucene/russian/morphology/EvristicBuilder.java b/src/main/java/org/apache/lucene/russian/morphology/HeuristicBuilder.java similarity index 77% rename from src/main/java/org/apache/lucene/russian/morphology/EvristicBuilder.java rename to src/main/java/org/apache/lucene/russian/morphology/HeuristicBuilder.java index 0f08828..0ac6f29 100644 --- a/src/main/java/org/apache/lucene/russian/morphology/EvristicBuilder.java +++ b/src/main/java/org/apache/lucene/russian/morphology/HeuristicBuilder.java @@ -18,9 +18,9 @@ package org.apache.lucene.russian.morphology; import org.apache.lucene.russian.morphology.dictonary.DictonaryReader; import org.apache.lucene.russian.morphology.dictonary.IgnoredFormReader; -import org.apache.lucene.russian.morphology.evristics.Evristic; -import org.apache.lucene.russian.morphology.evristics.StatiticsCollectors; -import org.apache.lucene.russian.morphology.evristics.SuffixCounter; +import org.apache.lucene.russian.morphology.heuristic.Heuristic; +import org.apache.lucene.russian.morphology.heuristic.StatiticsCollectors; +import org.apache.lucene.russian.morphology.heuristic.SuffixCounter; import java.io.IOException; import java.util.Arrays; @@ -28,7 +28,7 @@ import java.util.Collection; import java.util.Set; -public class EvristicBuilder { +public class HeuristicBuilder { public static void main(String[] args) throws IOException { IgnoredFormReader formReader = new IgnoredFormReader("data/igoredFrom.txt"); Set form = formReader.getIngnoredFroms(); @@ -44,11 +44,11 @@ public class EvristicBuilder { System.out.println(objects[i]); } - final Evristic evristic = new Evristic(); + final Heuristic heuristic = new Heuristic(); for (int i = 0; i < objects.length; i++) { - evristic.addEvristic(((SuffixCounter) objects[i]).getSuffixEvristic()); + heuristic.addEvristic(((SuffixCounter) objects[i]).getSuffixEvristic()); } - evristic.writeToFile("src/main/resources/org/apache/lucene/russian/morpholgy/russianSuffixesEvristics.txt"); + heuristic.writeToFile("src/main/resources/org/apache/lucene/russian/morpholgy/russianSuffixesEvristics.txt"); } } diff --git a/src/main/java/org/apache/lucene/russian/morphology/analayzer/RussianMorphlogyAnalayzer.java b/src/main/java/org/apache/lucene/russian/morphology/analayzer/RussianMorphlogyAnalayzer.java index d073ca5..9859fe1 100644 --- a/src/main/java/org/apache/lucene/russian/morphology/analayzer/RussianMorphlogyAnalayzer.java +++ b/src/main/java/org/apache/lucene/russian/morphology/analayzer/RussianMorphlogyAnalayzer.java @@ -26,16 +26,16 @@ import java.io.IOException; import java.io.Reader; public class RussianMorphlogyAnalayzer extends Analyzer { - private SuffixEvristics suffixEvristics; + private SuffixHeuristic suffixHeuristic; public RussianMorphlogyAnalayzer() throws IOException { - suffixEvristics = new SuffixEvristics(); + suffixHeuristic = new SuffixHeuristic(); } public TokenStream tokenStream(String fieldName, Reader reader) { TokenStream result = new StandardTokenizer(reader); result = new StandardFilter(result); result = new LowerCaseFilter(result); - return new RussianMorphlogyFilter(result, suffixEvristics); + return new RussianMorphlogyFilter(result, suffixHeuristic); } } diff --git a/src/main/java/org/apache/lucene/russian/morphology/analayzer/RussianMorphlogyFilter.java b/src/main/java/org/apache/lucene/russian/morphology/analayzer/RussianMorphlogyFilter.java index 28172c6..7772757 100644 --- a/src/main/java/org/apache/lucene/russian/morphology/analayzer/RussianMorphlogyFilter.java +++ b/src/main/java/org/apache/lucene/russian/morphology/analayzer/RussianMorphlogyFilter.java @@ -24,11 +24,11 @@ import java.io.IOException; public class RussianMorphlogyFilter extends TokenFilter { - private SuffixEvristics suffixEvristics; + private SuffixHeuristic suffixHeuristic; - public RussianMorphlogyFilter(TokenStream tokenStream, SuffixEvristics suffixEvristics) { + public RussianMorphlogyFilter(TokenStream tokenStream, SuffixHeuristic suffixHeuristic) { super(tokenStream); - this.suffixEvristics = suffixEvristics; + this.suffixHeuristic = suffixHeuristic; } public Token next(final Token reusableToken) throws IOException { @@ -40,7 +40,7 @@ public class RussianMorphlogyFilter extends TokenFilter { return nextToken; } Token current = (Token) nextToken.clone(); - return createToken(suffixEvristics.getCanonicalForm(word), current, reusableToken); + return createToken(suffixHeuristic.getCanonicalForm(word), current, reusableToken); } protected Token createToken(String synonym, Token current, final Token reusableToken) { diff --git a/src/main/java/org/apache/lucene/russian/morphology/analayzer/SuffixEvristics.java b/src/main/java/org/apache/lucene/russian/morphology/analayzer/SuffixHeuristic.java similarity index 92% rename from src/main/java/org/apache/lucene/russian/morphology/analayzer/SuffixEvristics.java rename to src/main/java/org/apache/lucene/russian/morphology/analayzer/SuffixHeuristic.java index 90c8c46..8795913 100644 --- a/src/main/java/org/apache/lucene/russian/morphology/analayzer/SuffixEvristics.java +++ b/src/main/java/org/apache/lucene/russian/morphology/analayzer/SuffixHeuristic.java @@ -22,7 +22,7 @@ import java.io.*; import java.util.Arrays; -public class SuffixEvristics { +public class SuffixHeuristic { private long[] keys; private long[] values; @@ -32,11 +32,11 @@ public class SuffixEvristics { } - public SuffixEvristics() throws IOException { + public SuffixHeuristic() throws IOException { readFromResource(); } - public SuffixEvristics(String fileName) throws IOException { + public SuffixHeuristic(String fileName) throws IOException { readFromFile(fileName); } diff --git a/src/main/java/org/apache/lucene/russian/morphology/evristics/Evristic.java b/src/main/java/org/apache/lucene/russian/morphology/heuristic/Heuristic.java similarity index 87% rename from src/main/java/org/apache/lucene/russian/morphology/evristics/Evristic.java rename to src/main/java/org/apache/lucene/russian/morphology/heuristic/Heuristic.java index 2ef8a15..48a18db 100644 --- a/src/main/java/org/apache/lucene/russian/morphology/evristics/Evristic.java +++ b/src/main/java/org/apache/lucene/russian/morphology/heuristic/Heuristic.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package org.apache.lucene.russian.morphology.evristics; +package org.apache.lucene.russian.morphology.heuristic; import org.apache.lucene.russian.morphology.RussianSuffixDecoderEncoder; @@ -25,14 +25,14 @@ import java.io.IOException; import java.util.TreeMap; -public class Evristic { +public class Heuristic { private TreeMap encodedSuffixesPairs = new TreeMap(); - public void addEvristic(SuffixEvristic suffixEvristic) { - Long suffix = RussianSuffixDecoderEncoder.encode(suffixEvristic.getFormSuffix()); + public void addEvristic(SuffixHeuristic suffixHeuristic) { + Long suffix = RussianSuffixDecoderEncoder.encode(suffixHeuristic.getFormSuffix()); Long longs = encodedSuffixesPairs.get(suffix); if (longs == null) { - encodedSuffixesPairs.put(suffix, RussianSuffixDecoderEncoder.encode(suffixEvristic.getNormalSuffix())); + encodedSuffixesPairs.put(suffix, RussianSuffixDecoderEncoder.encode(suffixHeuristic.getNormalSuffix())); } } diff --git a/src/main/java/org/apache/lucene/russian/morphology/evristics/StatiticsCollectors.java b/src/main/java/org/apache/lucene/russian/morphology/heuristic/StatiticsCollectors.java similarity index 72% rename from src/main/java/org/apache/lucene/russian/morphology/evristics/StatiticsCollectors.java rename to src/main/java/org/apache/lucene/russian/morphology/heuristic/StatiticsCollectors.java index 55f6cae..e5eb258 100644 --- a/src/main/java/org/apache/lucene/russian/morphology/evristics/StatiticsCollectors.java +++ b/src/main/java/org/apache/lucene/russian/morphology/heuristic/StatiticsCollectors.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package org.apache.lucene.russian.morphology.evristics; +package org.apache.lucene.russian.morphology.heuristic; import org.apache.lucene.russian.morphology.RussianSuffixDecoderEncoder; import org.apache.lucene.russian.morphology.dictonary.WordCard; @@ -25,28 +25,28 @@ import java.util.Map; public class StatiticsCollectors implements WordProccessor { - Map statititics = new HashMap(); + Map statititics = new HashMap(); private Integer ignoredCount = 0; public void proccess(WordCard wordCard) { for (String form : wordCard.getWordsFroms()) { - SuffixEvristic suffixEvristic = createEvristic(wordCard.getCanonicalFrom(), form); - if (suffixEvristic == null) continue; - SuffixCounter suffixCounter = statititics.get(suffixEvristic); + SuffixHeuristic suffixHeuristic = createEvristic(wordCard.getCanonicalFrom(), form); + if (suffixHeuristic == null) continue; + SuffixCounter suffixCounter = statititics.get(suffixHeuristic); if (suffixCounter == null) { - suffixCounter = new SuffixCounter(suffixEvristic); - statititics.put(suffixEvristic, suffixCounter); + suffixCounter = new SuffixCounter(suffixHeuristic); + statititics.put(suffixHeuristic, suffixCounter); } suffixCounter.incrementAmount(); } } - public Map getStatititics() { + public Map getStatititics() { return statititics; } - private SuffixEvristic createEvristic(String word, String form) { + private SuffixHeuristic createEvristic(String word, String form) { int startSymbol = form.length() > RussianSuffixDecoderEncoder.SUFFIX_LENGTH ? form.length() - RussianSuffixDecoderEncoder.SUFFIX_LENGTH : 0; String formSuffix = form.substring(startSymbol); if (word.length() < startSymbol) { @@ -58,7 +58,7 @@ public class StatiticsCollectors implements WordProccessor { System.out.println(word + " " + form); return null; } - return new SuffixEvristic(formSuffix, wordSuffix); + return new SuffixHeuristic(formSuffix, wordSuffix); } diff --git a/src/main/java/org/apache/lucene/russian/morphology/evristics/SuffixCounter.java b/src/main/java/org/apache/lucene/russian/morphology/heuristic/SuffixCounter.java similarity index 69% rename from src/main/java/org/apache/lucene/russian/morphology/evristics/SuffixCounter.java rename to src/main/java/org/apache/lucene/russian/morphology/heuristic/SuffixCounter.java index 11401a8..b858615 100644 --- a/src/main/java/org/apache/lucene/russian/morphology/evristics/SuffixCounter.java +++ b/src/main/java/org/apache/lucene/russian/morphology/heuristic/SuffixCounter.java @@ -14,30 +14,30 @@ * limitations under the License. */ -package org.apache.lucene.russian.morphology.evristics; +package org.apache.lucene.russian.morphology.heuristic; /** * Conains information of freqency of suffix evristic * in dictionary. */ public class SuffixCounter implements Comparable { - private SuffixEvristic suffixEvristic; + private SuffixHeuristic suffixHeuristic; private Double amnout = 0.0; - public SuffixCounter(SuffixEvristic suffixEvristic) { - this.suffixEvristic = suffixEvristic; + public SuffixCounter(SuffixHeuristic suffixHeuristic) { + this.suffixHeuristic = suffixHeuristic; } public void incrementAmount() { amnout++; } - public SuffixEvristic getSuffixEvristic() { - return suffixEvristic; + public SuffixHeuristic getSuffixEvristic() { + return suffixHeuristic; } - public void setSuffixEvristic(SuffixEvristic suffixEvristic) { - this.suffixEvristic = suffixEvristic; + public void setSuffixEvristic(SuffixHeuristic suffixHeuristic) { + this.suffixHeuristic = suffixHeuristic; } public Double getAmnout() { @@ -55,6 +55,6 @@ public class SuffixCounter implements Comparable { @Override public String toString() { - return "" + amnout + " " + suffixEvristic.toString(); + return "" + amnout + " " + suffixHeuristic.toString(); } } diff --git a/src/main/java/org/apache/lucene/russian/morphology/evristics/SuffixEvristic.java b/src/main/java/org/apache/lucene/russian/morphology/heuristic/SuffixHeuristic.java similarity index 85% rename from src/main/java/org/apache/lucene/russian/morphology/evristics/SuffixEvristic.java rename to src/main/java/org/apache/lucene/russian/morphology/heuristic/SuffixHeuristic.java index cc4621d..df84bc0 100644 --- a/src/main/java/org/apache/lucene/russian/morphology/evristics/SuffixEvristic.java +++ b/src/main/java/org/apache/lucene/russian/morphology/heuristic/SuffixHeuristic.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package org.apache.lucene.russian.morphology.evristics; +package org.apache.lucene.russian.morphology.heuristic; /** * Represent evristic that assume that @@ -22,11 +22,11 @@ package org.apache.lucene.russian.morphology.evristics; * It contains to suffixes from given position of * canonical word form and for form. */ -public class SuffixEvristic { +public class SuffixHeuristic { private String formSuffix; private String normalSuffix; - public SuffixEvristic(String formSuffix, String normalSuffix) { + public SuffixHeuristic(String formSuffix, String normalSuffix) { this.formSuffix = formSuffix; this.normalSuffix = normalSuffix; } @@ -52,7 +52,7 @@ public class SuffixEvristic { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; - SuffixEvristic that = (SuffixEvristic) o; + SuffixHeuristic that = (SuffixHeuristic) o; if (!formSuffix.equals(that.formSuffix)) return false; if (!normalSuffix.equals(that.normalSuffix)) return false; @@ -69,7 +69,7 @@ public class SuffixEvristic { @Override public String toString() { - return "SuffixEvristic{" + + return "SuffixHeuristic{" + "formSuffix='" + formSuffix + '\'' + ", normalSuffix='" + normalSuffix + '\'' + '}'; diff --git a/src/test/java/org/apache/lucene/russian/morphology/analayzer/SuffixEvristicsTest.java b/src/test/java/org/apache/lucene/russian/morphology/analayzer/SuffixEvristicsTest.java index 7191853..bf98827 100644 --- a/src/test/java/org/apache/lucene/russian/morphology/analayzer/SuffixEvristicsTest.java +++ b/src/test/java/org/apache/lucene/russian/morphology/analayzer/SuffixEvristicsTest.java @@ -30,13 +30,13 @@ public class SuffixEvristicsTest { @Test public void testShouldDefineCorretCononicalWordForm() throws IOException { - SuffixEvristics suffixEvristics = new SuffixEvristics(); + SuffixHeuristic suffixHeuristic = new SuffixHeuristic(); InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/russian/morphology/analayzer/suffix-evristics-test-data.txt"); BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8")); String s = bufferedReader.readLine(); while (s != null) { String[] qa = s.trim().split(" "); - assertThat(suffixEvristics.getCanonicalForm(qa[0]), equalTo(qa[1])); + assertThat(suffixHeuristic.getCanonicalForm(qa[0]), equalTo(qa[1])); s = bufferedReader.readLine(); } }