some spell cheking fixes

git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@24 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
alexander.a.kuznetsov 2009-04-14 09:25:39 +00:00
parent 65670d4c9b
commit c68fbb0827
9 changed files with 48 additions and 48 deletions

View File

@ -18,9 +18,9 @@ package org.apache.lucene.russian.morphology;
import org.apache.lucene.russian.morphology.dictonary.DictonaryReader;
import org.apache.lucene.russian.morphology.dictonary.IgnoredFormReader;
import org.apache.lucene.russian.morphology.evristics.Evristic;
import org.apache.lucene.russian.morphology.evristics.StatiticsCollectors;
import org.apache.lucene.russian.morphology.evristics.SuffixCounter;
import org.apache.lucene.russian.morphology.heuristic.Heuristic;
import org.apache.lucene.russian.morphology.heuristic.StatiticsCollectors;
import org.apache.lucene.russian.morphology.heuristic.SuffixCounter;
import java.io.IOException;
import java.util.Arrays;
@ -28,7 +28,7 @@ import java.util.Collection;
import java.util.Set;
public class EvristicBuilder {
public class HeuristicBuilder {
public static void main(String[] args) throws IOException {
IgnoredFormReader formReader = new IgnoredFormReader("data/igoredFrom.txt");
Set<String> form = formReader.getIngnoredFroms();
@ -44,11 +44,11 @@ public class EvristicBuilder {
System.out.println(objects[i]);
}
final Evristic evristic = new Evristic();
final Heuristic heuristic = new Heuristic();
for (int i = 0; i < objects.length; i++) {
evristic.addEvristic(((SuffixCounter) objects[i]).getSuffixEvristic());
heuristic.addEvristic(((SuffixCounter) objects[i]).getSuffixEvristic());
}
evristic.writeToFile("src/main/resources/org/apache/lucene/russian/morpholgy/russianSuffixesEvristics.txt");
heuristic.writeToFile("src/main/resources/org/apache/lucene/russian/morpholgy/russianSuffixesEvristics.txt");
}
}

View File

@ -26,16 +26,16 @@ import java.io.IOException;
import java.io.Reader;
public class RussianMorphlogyAnalayzer extends Analyzer {
private SuffixEvristics suffixEvristics;
private SuffixHeuristic suffixHeuristic;
public RussianMorphlogyAnalayzer() throws IOException {
suffixEvristics = new SuffixEvristics();
suffixHeuristic = new SuffixHeuristic();
}
public TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream result = new StandardTokenizer(reader);
result = new StandardFilter(result);
result = new LowerCaseFilter(result);
return new RussianMorphlogyFilter(result, suffixEvristics);
return new RussianMorphlogyFilter(result, suffixHeuristic);
}
}

View File

@ -24,11 +24,11 @@ import java.io.IOException;
public class RussianMorphlogyFilter extends TokenFilter {
private SuffixEvristics suffixEvristics;
private SuffixHeuristic suffixHeuristic;
public RussianMorphlogyFilter(TokenStream tokenStream, SuffixEvristics suffixEvristics) {
public RussianMorphlogyFilter(TokenStream tokenStream, SuffixHeuristic suffixHeuristic) {
super(tokenStream);
this.suffixEvristics = suffixEvristics;
this.suffixHeuristic = suffixHeuristic;
}
public Token next(final Token reusableToken) throws IOException {
@ -40,7 +40,7 @@ public class RussianMorphlogyFilter extends TokenFilter {
return nextToken;
}
Token current = (Token) nextToken.clone();
return createToken(suffixEvristics.getCanonicalForm(word), current, reusableToken);
return createToken(suffixHeuristic.getCanonicalForm(word), current, reusableToken);
}
protected Token createToken(String synonym, Token current, final Token reusableToken) {

View File

@ -22,7 +22,7 @@ import java.io.*;
import java.util.Arrays;
public class SuffixEvristics {
public class SuffixHeuristic {
private long[] keys;
private long[] values;
@ -32,11 +32,11 @@ public class SuffixEvristics {
}
public SuffixEvristics() throws IOException {
public SuffixHeuristic() throws IOException {
readFromResource();
}
public SuffixEvristics(String fileName) throws IOException {
public SuffixHeuristic(String fileName) throws IOException {
readFromFile(fileName);
}

View File

@ -14,7 +14,7 @@
* limitations under the License.
*/
package org.apache.lucene.russian.morphology.evristics;
package org.apache.lucene.russian.morphology.heuristic;
import org.apache.lucene.russian.morphology.RussianSuffixDecoderEncoder;
@ -25,14 +25,14 @@ import java.io.IOException;
import java.util.TreeMap;
public class Evristic {
public class Heuristic {
private TreeMap<Long, Long> encodedSuffixesPairs = new TreeMap<Long, Long>();
public void addEvristic(SuffixEvristic suffixEvristic) {
Long suffix = RussianSuffixDecoderEncoder.encode(suffixEvristic.getFormSuffix());
public void addEvristic(SuffixHeuristic suffixHeuristic) {
Long suffix = RussianSuffixDecoderEncoder.encode(suffixHeuristic.getFormSuffix());
Long longs = encodedSuffixesPairs.get(suffix);
if (longs == null) {
encodedSuffixesPairs.put(suffix, RussianSuffixDecoderEncoder.encode(suffixEvristic.getNormalSuffix()));
encodedSuffixesPairs.put(suffix, RussianSuffixDecoderEncoder.encode(suffixHeuristic.getNormalSuffix()));
}
}

View File

@ -14,7 +14,7 @@
* limitations under the License.
*/
package org.apache.lucene.russian.morphology.evristics;
package org.apache.lucene.russian.morphology.heuristic;
import org.apache.lucene.russian.morphology.RussianSuffixDecoderEncoder;
import org.apache.lucene.russian.morphology.dictonary.WordCard;
@ -25,28 +25,28 @@ import java.util.Map;
public class StatiticsCollectors implements WordProccessor {
Map<SuffixEvristic, SuffixCounter> statititics = new HashMap<SuffixEvristic, SuffixCounter>();
Map<SuffixHeuristic, SuffixCounter> statititics = new HashMap<SuffixHeuristic, SuffixCounter>();
private Integer ignoredCount = 0;
public void proccess(WordCard wordCard) {
for (String form : wordCard.getWordsFroms()) {
SuffixEvristic suffixEvristic = createEvristic(wordCard.getCanonicalFrom(), form);
if (suffixEvristic == null) continue;
SuffixCounter suffixCounter = statititics.get(suffixEvristic);
SuffixHeuristic suffixHeuristic = createEvristic(wordCard.getCanonicalFrom(), form);
if (suffixHeuristic == null) continue;
SuffixCounter suffixCounter = statititics.get(suffixHeuristic);
if (suffixCounter == null) {
suffixCounter = new SuffixCounter(suffixEvristic);
statititics.put(suffixEvristic, suffixCounter);
suffixCounter = new SuffixCounter(suffixHeuristic);
statititics.put(suffixHeuristic, suffixCounter);
}
suffixCounter.incrementAmount();
}
}
public Map<SuffixEvristic, SuffixCounter> getStatititics() {
public Map<SuffixHeuristic, SuffixCounter> getStatititics() {
return statititics;
}
private SuffixEvristic createEvristic(String word, String form) {
private SuffixHeuristic createEvristic(String word, String form) {
int startSymbol = form.length() > RussianSuffixDecoderEncoder.SUFFIX_LENGTH ? form.length() - RussianSuffixDecoderEncoder.SUFFIX_LENGTH : 0;
String formSuffix = form.substring(startSymbol);
if (word.length() < startSymbol) {
@ -58,7 +58,7 @@ public class StatiticsCollectors implements WordProccessor {
System.out.println(word + " " + form);
return null;
}
return new SuffixEvristic(formSuffix, wordSuffix);
return new SuffixHeuristic(formSuffix, wordSuffix);
}

View File

@ -14,30 +14,30 @@
* limitations under the License.
*/
package org.apache.lucene.russian.morphology.evristics;
package org.apache.lucene.russian.morphology.heuristic;
/**
* Conains information of freqency of suffix evristic
* in dictionary.
*/
public class SuffixCounter implements Comparable {
private SuffixEvristic suffixEvristic;
private SuffixHeuristic suffixHeuristic;
private Double amnout = 0.0;
public SuffixCounter(SuffixEvristic suffixEvristic) {
this.suffixEvristic = suffixEvristic;
public SuffixCounter(SuffixHeuristic suffixHeuristic) {
this.suffixHeuristic = suffixHeuristic;
}
public void incrementAmount() {
amnout++;
}
public SuffixEvristic getSuffixEvristic() {
return suffixEvristic;
public SuffixHeuristic getSuffixEvristic() {
return suffixHeuristic;
}
public void setSuffixEvristic(SuffixEvristic suffixEvristic) {
this.suffixEvristic = suffixEvristic;
public void setSuffixEvristic(SuffixHeuristic suffixHeuristic) {
this.suffixHeuristic = suffixHeuristic;
}
public Double getAmnout() {
@ -55,6 +55,6 @@ public class SuffixCounter implements Comparable {
@Override
public String toString() {
return "" + amnout + " " + suffixEvristic.toString();
return "" + amnout + " " + suffixHeuristic.toString();
}
}

View File

@ -14,7 +14,7 @@
* limitations under the License.
*/
package org.apache.lucene.russian.morphology.evristics;
package org.apache.lucene.russian.morphology.heuristic;
/**
* Represent evristic that assume that
@ -22,11 +22,11 @@ package org.apache.lucene.russian.morphology.evristics;
* It contains to suffixes from given position of
* canonical word form and for form.
*/
public class SuffixEvristic {
public class SuffixHeuristic {
private String formSuffix;
private String normalSuffix;
public SuffixEvristic(String formSuffix, String normalSuffix) {
public SuffixHeuristic(String formSuffix, String normalSuffix) {
this.formSuffix = formSuffix;
this.normalSuffix = normalSuffix;
}
@ -52,7 +52,7 @@ public class SuffixEvristic {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
SuffixEvristic that = (SuffixEvristic) o;
SuffixHeuristic that = (SuffixHeuristic) o;
if (!formSuffix.equals(that.formSuffix)) return false;
if (!normalSuffix.equals(that.normalSuffix)) return false;
@ -69,7 +69,7 @@ public class SuffixEvristic {
@Override
public String toString() {
return "SuffixEvristic{" +
return "SuffixHeuristic{" +
"formSuffix='" + formSuffix + '\'' +
", normalSuffix='" + normalSuffix + '\'' +
'}';

View File

@ -30,13 +30,13 @@ public class SuffixEvristicsTest {
@Test
public void testShouldDefineCorretCononicalWordForm() throws IOException {
SuffixEvristics suffixEvristics = new SuffixEvristics();
SuffixHeuristic suffixHeuristic = new SuffixHeuristic();
InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/russian/morphology/analayzer/suffix-evristics-test-data.txt");
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
String s = bufferedReader.readLine();
while (s != null) {
String[] qa = s.trim().split(" ");
assertThat(suffixEvristics.getCanonicalForm(qa[0]), equalTo(qa[1]));
assertThat(suffixHeuristic.getCanonicalForm(qa[0]), equalTo(qa[1]));
s = bufferedReader.readLine();
}
}