some spell cheking fixes

git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@24 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
alexander.a.kuznetsov 2009-04-14 09:25:39 +00:00
parent 65670d4c9b
commit c68fbb0827
9 changed files with 48 additions and 48 deletions

View File

@ -18,9 +18,9 @@ package org.apache.lucene.russian.morphology;
import org.apache.lucene.russian.morphology.dictonary.DictonaryReader; import org.apache.lucene.russian.morphology.dictonary.DictonaryReader;
import org.apache.lucene.russian.morphology.dictonary.IgnoredFormReader; import org.apache.lucene.russian.morphology.dictonary.IgnoredFormReader;
import org.apache.lucene.russian.morphology.evristics.Evristic; import org.apache.lucene.russian.morphology.heuristic.Heuristic;
import org.apache.lucene.russian.morphology.evristics.StatiticsCollectors; import org.apache.lucene.russian.morphology.heuristic.StatiticsCollectors;
import org.apache.lucene.russian.morphology.evristics.SuffixCounter; import org.apache.lucene.russian.morphology.heuristic.SuffixCounter;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
@ -28,7 +28,7 @@ import java.util.Collection;
import java.util.Set; import java.util.Set;
public class EvristicBuilder { public class HeuristicBuilder {
public static void main(String[] args) throws IOException { public static void main(String[] args) throws IOException {
IgnoredFormReader formReader = new IgnoredFormReader("data/igoredFrom.txt"); IgnoredFormReader formReader = new IgnoredFormReader("data/igoredFrom.txt");
Set<String> form = formReader.getIngnoredFroms(); Set<String> form = formReader.getIngnoredFroms();
@ -44,11 +44,11 @@ public class EvristicBuilder {
System.out.println(objects[i]); System.out.println(objects[i]);
} }
final Evristic evristic = new Evristic(); final Heuristic heuristic = new Heuristic();
for (int i = 0; i < objects.length; i++) { for (int i = 0; i < objects.length; i++) {
evristic.addEvristic(((SuffixCounter) objects[i]).getSuffixEvristic()); heuristic.addEvristic(((SuffixCounter) objects[i]).getSuffixEvristic());
} }
evristic.writeToFile("src/main/resources/org/apache/lucene/russian/morpholgy/russianSuffixesEvristics.txt"); heuristic.writeToFile("src/main/resources/org/apache/lucene/russian/morpholgy/russianSuffixesEvristics.txt");
} }
} }

View File

@ -26,16 +26,16 @@ import java.io.IOException;
import java.io.Reader; import java.io.Reader;
public class RussianMorphlogyAnalayzer extends Analyzer { public class RussianMorphlogyAnalayzer extends Analyzer {
private SuffixEvristics suffixEvristics; private SuffixHeuristic suffixHeuristic;
public RussianMorphlogyAnalayzer() throws IOException { public RussianMorphlogyAnalayzer() throws IOException {
suffixEvristics = new SuffixEvristics(); suffixHeuristic = new SuffixHeuristic();
} }
public TokenStream tokenStream(String fieldName, Reader reader) { public TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream result = new StandardTokenizer(reader); TokenStream result = new StandardTokenizer(reader);
result = new StandardFilter(result); result = new StandardFilter(result);
result = new LowerCaseFilter(result); result = new LowerCaseFilter(result);
return new RussianMorphlogyFilter(result, suffixEvristics); return new RussianMorphlogyFilter(result, suffixHeuristic);
} }
} }

View File

@ -24,11 +24,11 @@ import java.io.IOException;
public class RussianMorphlogyFilter extends TokenFilter { public class RussianMorphlogyFilter extends TokenFilter {
private SuffixEvristics suffixEvristics; private SuffixHeuristic suffixHeuristic;
public RussianMorphlogyFilter(TokenStream tokenStream, SuffixEvristics suffixEvristics) { public RussianMorphlogyFilter(TokenStream tokenStream, SuffixHeuristic suffixHeuristic) {
super(tokenStream); super(tokenStream);
this.suffixEvristics = suffixEvristics; this.suffixHeuristic = suffixHeuristic;
} }
public Token next(final Token reusableToken) throws IOException { public Token next(final Token reusableToken) throws IOException {
@ -40,7 +40,7 @@ public class RussianMorphlogyFilter extends TokenFilter {
return nextToken; return nextToken;
} }
Token current = (Token) nextToken.clone(); Token current = (Token) nextToken.clone();
return createToken(suffixEvristics.getCanonicalForm(word), current, reusableToken); return createToken(suffixHeuristic.getCanonicalForm(word), current, reusableToken);
} }
protected Token createToken(String synonym, Token current, final Token reusableToken) { protected Token createToken(String synonym, Token current, final Token reusableToken) {

View File

@ -22,7 +22,7 @@ import java.io.*;
import java.util.Arrays; import java.util.Arrays;
public class SuffixEvristics { public class SuffixHeuristic {
private long[] keys; private long[] keys;
private long[] values; private long[] values;
@ -32,11 +32,11 @@ public class SuffixEvristics {
} }
public SuffixEvristics() throws IOException { public SuffixHeuristic() throws IOException {
readFromResource(); readFromResource();
} }
public SuffixEvristics(String fileName) throws IOException { public SuffixHeuristic(String fileName) throws IOException {
readFromFile(fileName); readFromFile(fileName);
} }

View File

@ -14,7 +14,7 @@
* limitations under the License. * limitations under the License.
*/ */
package org.apache.lucene.russian.morphology.evristics; package org.apache.lucene.russian.morphology.heuristic;
import org.apache.lucene.russian.morphology.RussianSuffixDecoderEncoder; import org.apache.lucene.russian.morphology.RussianSuffixDecoderEncoder;
@ -25,14 +25,14 @@ import java.io.IOException;
import java.util.TreeMap; import java.util.TreeMap;
public class Evristic { public class Heuristic {
private TreeMap<Long, Long> encodedSuffixesPairs = new TreeMap<Long, Long>(); private TreeMap<Long, Long> encodedSuffixesPairs = new TreeMap<Long, Long>();
public void addEvristic(SuffixEvristic suffixEvristic) { public void addEvristic(SuffixHeuristic suffixHeuristic) {
Long suffix = RussianSuffixDecoderEncoder.encode(suffixEvristic.getFormSuffix()); Long suffix = RussianSuffixDecoderEncoder.encode(suffixHeuristic.getFormSuffix());
Long longs = encodedSuffixesPairs.get(suffix); Long longs = encodedSuffixesPairs.get(suffix);
if (longs == null) { if (longs == null) {
encodedSuffixesPairs.put(suffix, RussianSuffixDecoderEncoder.encode(suffixEvristic.getNormalSuffix())); encodedSuffixesPairs.put(suffix, RussianSuffixDecoderEncoder.encode(suffixHeuristic.getNormalSuffix()));
} }
} }

View File

@ -14,7 +14,7 @@
* limitations under the License. * limitations under the License.
*/ */
package org.apache.lucene.russian.morphology.evristics; package org.apache.lucene.russian.morphology.heuristic;
import org.apache.lucene.russian.morphology.RussianSuffixDecoderEncoder; import org.apache.lucene.russian.morphology.RussianSuffixDecoderEncoder;
import org.apache.lucene.russian.morphology.dictonary.WordCard; import org.apache.lucene.russian.morphology.dictonary.WordCard;
@ -25,28 +25,28 @@ import java.util.Map;
public class StatiticsCollectors implements WordProccessor { public class StatiticsCollectors implements WordProccessor {
Map<SuffixEvristic, SuffixCounter> statititics = new HashMap<SuffixEvristic, SuffixCounter>(); Map<SuffixHeuristic, SuffixCounter> statititics = new HashMap<SuffixHeuristic, SuffixCounter>();
private Integer ignoredCount = 0; private Integer ignoredCount = 0;
public void proccess(WordCard wordCard) { public void proccess(WordCard wordCard) {
for (String form : wordCard.getWordsFroms()) { for (String form : wordCard.getWordsFroms()) {
SuffixEvristic suffixEvristic = createEvristic(wordCard.getCanonicalFrom(), form); SuffixHeuristic suffixHeuristic = createEvristic(wordCard.getCanonicalFrom(), form);
if (suffixEvristic == null) continue; if (suffixHeuristic == null) continue;
SuffixCounter suffixCounter = statititics.get(suffixEvristic); SuffixCounter suffixCounter = statititics.get(suffixHeuristic);
if (suffixCounter == null) { if (suffixCounter == null) {
suffixCounter = new SuffixCounter(suffixEvristic); suffixCounter = new SuffixCounter(suffixHeuristic);
statititics.put(suffixEvristic, suffixCounter); statititics.put(suffixHeuristic, suffixCounter);
} }
suffixCounter.incrementAmount(); suffixCounter.incrementAmount();
} }
} }
public Map<SuffixEvristic, SuffixCounter> getStatititics() { public Map<SuffixHeuristic, SuffixCounter> getStatititics() {
return statititics; return statititics;
} }
private SuffixEvristic createEvristic(String word, String form) { private SuffixHeuristic createEvristic(String word, String form) {
int startSymbol = form.length() > RussianSuffixDecoderEncoder.SUFFIX_LENGTH ? form.length() - RussianSuffixDecoderEncoder.SUFFIX_LENGTH : 0; int startSymbol = form.length() > RussianSuffixDecoderEncoder.SUFFIX_LENGTH ? form.length() - RussianSuffixDecoderEncoder.SUFFIX_LENGTH : 0;
String formSuffix = form.substring(startSymbol); String formSuffix = form.substring(startSymbol);
if (word.length() < startSymbol) { if (word.length() < startSymbol) {
@ -58,7 +58,7 @@ public class StatiticsCollectors implements WordProccessor {
System.out.println(word + " " + form); System.out.println(word + " " + form);
return null; return null;
} }
return new SuffixEvristic(formSuffix, wordSuffix); return new SuffixHeuristic(formSuffix, wordSuffix);
} }

View File

@ -14,30 +14,30 @@
* limitations under the License. * limitations under the License.
*/ */
package org.apache.lucene.russian.morphology.evristics; package org.apache.lucene.russian.morphology.heuristic;
/** /**
* Conains information of freqency of suffix evristic * Conains information of freqency of suffix evristic
* in dictionary. * in dictionary.
*/ */
public class SuffixCounter implements Comparable { public class SuffixCounter implements Comparable {
private SuffixEvristic suffixEvristic; private SuffixHeuristic suffixHeuristic;
private Double amnout = 0.0; private Double amnout = 0.0;
public SuffixCounter(SuffixEvristic suffixEvristic) { public SuffixCounter(SuffixHeuristic suffixHeuristic) {
this.suffixEvristic = suffixEvristic; this.suffixHeuristic = suffixHeuristic;
} }
public void incrementAmount() { public void incrementAmount() {
amnout++; amnout++;
} }
public SuffixEvristic getSuffixEvristic() { public SuffixHeuristic getSuffixEvristic() {
return suffixEvristic; return suffixHeuristic;
} }
public void setSuffixEvristic(SuffixEvristic suffixEvristic) { public void setSuffixEvristic(SuffixHeuristic suffixHeuristic) {
this.suffixEvristic = suffixEvristic; this.suffixHeuristic = suffixHeuristic;
} }
public Double getAmnout() { public Double getAmnout() {
@ -55,6 +55,6 @@ public class SuffixCounter implements Comparable {
@Override @Override
public String toString() { public String toString() {
return "" + amnout + " " + suffixEvristic.toString(); return "" + amnout + " " + suffixHeuristic.toString();
} }
} }

View File

@ -14,7 +14,7 @@
* limitations under the License. * limitations under the License.
*/ */
package org.apache.lucene.russian.morphology.evristics; package org.apache.lucene.russian.morphology.heuristic;
/** /**
* Represent evristic that assume that * Represent evristic that assume that
@ -22,11 +22,11 @@ package org.apache.lucene.russian.morphology.evristics;
* It contains to suffixes from given position of * It contains to suffixes from given position of
* canonical word form and for form. * canonical word form and for form.
*/ */
public class SuffixEvristic { public class SuffixHeuristic {
private String formSuffix; private String formSuffix;
private String normalSuffix; private String normalSuffix;
public SuffixEvristic(String formSuffix, String normalSuffix) { public SuffixHeuristic(String formSuffix, String normalSuffix) {
this.formSuffix = formSuffix; this.formSuffix = formSuffix;
this.normalSuffix = normalSuffix; this.normalSuffix = normalSuffix;
} }
@ -52,7 +52,7 @@ public class SuffixEvristic {
if (this == o) return true; if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false; if (o == null || getClass() != o.getClass()) return false;
SuffixEvristic that = (SuffixEvristic) o; SuffixHeuristic that = (SuffixHeuristic) o;
if (!formSuffix.equals(that.formSuffix)) return false; if (!formSuffix.equals(that.formSuffix)) return false;
if (!normalSuffix.equals(that.normalSuffix)) return false; if (!normalSuffix.equals(that.normalSuffix)) return false;
@ -69,7 +69,7 @@ public class SuffixEvristic {
@Override @Override
public String toString() { public String toString() {
return "SuffixEvristic{" + return "SuffixHeuristic{" +
"formSuffix='" + formSuffix + '\'' + "formSuffix='" + formSuffix + '\'' +
", normalSuffix='" + normalSuffix + '\'' + ", normalSuffix='" + normalSuffix + '\'' +
'}'; '}';

View File

@ -30,13 +30,13 @@ public class SuffixEvristicsTest {
@Test @Test
public void testShouldDefineCorretCononicalWordForm() throws IOException { public void testShouldDefineCorretCononicalWordForm() throws IOException {
SuffixEvristics suffixEvristics = new SuffixEvristics(); SuffixHeuristic suffixHeuristic = new SuffixHeuristic();
InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/russian/morphology/analayzer/suffix-evristics-test-data.txt"); InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/russian/morphology/analayzer/suffix-evristics-test-data.txt");
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8")); BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
String s = bufferedReader.readLine(); String s = bufferedReader.readLine();
while (s != null) { while (s != null) {
String[] qa = s.trim().split(" "); String[] qa = s.trim().split(" ");
assertThat(suffixEvristics.getCanonicalForm(qa[0]), equalTo(qa[1])); assertThat(suffixHeuristic.getCanonicalForm(qa[0]), equalTo(qa[1]));
s = bufferedReader.readLine(); s = bufferedReader.readLine();
} }
} }