fixing some spelling errors

git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@100 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
Alexander.A.Kuznetsov
2010-10-08 12:18:18 +00:00
parent 76e68a11e0
commit 3de894404c
25 changed files with 55 additions and 342 deletions

View File

@ -42,25 +42,18 @@ public class DictionaryReader {
this.filters = filters;
}
public DictionaryReader(String fileName, String fileEncoding, Set<String> ignoredForm, List<WordFilter> filters) {
this.fileName = fileName;
this.fileEncoding = fileEncoding;
this.ignoredForm = ignoredForm;
this.filters = filters;
}
public void proccess(WordProccessor wordProccessor) throws IOException {
public void process(WordProcessor wordProcessor) throws IOException {
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), fileEncoding));
readFlexias(bufferedReader);
sckipBlock(bufferedReader);
sckipBlock(bufferedReader);
skipBlock(bufferedReader);
skipBlock(bufferedReader);
readPrefix(bufferedReader);
readWords(bufferedReader, wordProccessor);
readWords(bufferedReader, wordProcessor);
}
private void readWords(BufferedReader reader, WordProccessor wordProccessor) throws IOException {
private void readWords(BufferedReader reader, WordProcessor wordProcessor) throws IOException {
String s = reader.readLine();
int count = Integer.valueOf(s);
int actual = 0;
@ -79,7 +72,7 @@ public class DictionaryReader {
continue;
}
wordProccessor.process(card);
wordProcessor.process(card);
actual++;
}
@ -106,11 +99,11 @@ public class DictionaryReader {
}
private void sckipBlock(BufferedReader reader) throws IOException {
private void skipBlock(BufferedReader reader) throws IOException {
String s = reader.readLine();
int count = Integer.valueOf(s);
for (int i = 0; i < count; i++) {
s = reader.readLine();
reader.readLine();
}
}

View File

@ -25,19 +25,19 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
//todo spleet this class on two.
public class GrammaReader {
public class GrammarReader {
private String fileName;
private String fileEncoding = "windows-1251";
private List<String> grammaInfo = new ArrayList<String>();
private List<String> grammarInfo = new ArrayList<String>();
private Map<String, Integer> inverseIndex = new HashMap<String, Integer>();
public GrammaReader(String fileName) throws IOException {
public GrammarReader(String fileName) throws IOException {
this.fileName = fileName;
setUp();
}
public GrammaReader(String fileName, String fileEncoding) throws IOException {
public GrammarReader(String fileName, String fileEncoding) throws IOException {
this.fileName = fileName;
this.fileEncoding = fileEncoding;
setUp();
@ -50,27 +50,23 @@ public class GrammaReader {
line = line.trim();
if (!line.startsWith("//") && line.length() > 0) {
String[] strings = line.split(" ", 2);
Integer i = grammaInfo.size();
Integer i = grammarInfo.size();
inverseIndex.put(strings[0], i);
grammaInfo.add(i, strings[1]);
grammarInfo.add(i, strings[1]);
}
line = bufferedReader.readLine();
}
}
public List<String> getGrammaInfo() {
return grammaInfo;
public List<String> getGrammarInfo() {
return grammarInfo;
}
public String[] getGrammaInfoAsArray() {
return grammaInfo.toArray(new String[grammaInfo.size()]);
public String[] getGrammarInfoAsArray() {
return grammarInfo.toArray(new String[grammarInfo.size()]);
}
public Map<String, Integer> getGrammInversIndex() {
public Map<String, Integer> getGrammarInverseIndex() {
return inverseIndex;
}
public void setInverseIndex(Map<String, Integer> inverseIndex) {
this.inverseIndex = inverseIndex;
}
}

View File

@ -26,16 +26,16 @@ import java.util.*;
//todo made refactoring this class
public class StatisticsCollector implements WordProccessor {
public class StatisticsCollector implements WordProcessor {
private TreeMap<String, Set<Heuristic>> inverseIndex = new TreeMap<String, Set<Heuristic>>();
private Map<Set<Heuristic>, Integer> ruleInverseIndex = new HashMap<Set<Heuristic>, Integer>();
private List<Set<Heuristic>> rules = new ArrayList<Set<Heuristic>>();
private GrammaReader grammaReader;
private GrammarReader grammarReader;
private LetterDecoderEncoder decoderEncoder;
public StatisticsCollector(GrammaReader grammaReader, LetterDecoderEncoder decoderEncoder) {
this.grammaReader = grammaReader;
public StatisticsCollector(GrammarReader grammarReader, LetterDecoderEncoder decoderEncoder) {
this.grammarReader = grammarReader;
this.decoderEncoder = decoderEncoder;
}
@ -115,7 +115,7 @@ public class StatisticsCollector implements WordProccessor {
prevSet = currentSet;
}
}
MorphologyImpl morphology = new MorphologyImpl(ints, rulesId, heuristics, grammaReader.getGrammaInfoAsArray());
MorphologyImpl morphology = new MorphologyImpl(ints, rulesId, heuristics, grammarReader.getGrammarInfoAsArray());
morphology.writeToFile(fileName);
}
@ -134,8 +134,8 @@ public class StatisticsCollector implements WordProccessor {
Integer length = getCommonLength(form, normalForm);
Integer actualSuffixLengh = form.length() - length;
String actualNormalSuffix = normalForm.substring(length);
Integer integer = grammaReader.getGrammInversIndex().get(fm.getCode());
Integer nf = grammaReader.getGrammInversIndex().get(normalSuffixForm);
Integer integer = grammarReader.getGrammarInverseIndex().get(fm.getCode());
Integer nf = grammarReader.getGrammarInverseIndex().get(normalSuffixForm);
return new Heuristic((byte) actualSuffixLengh.intValue(), actualNormalSuffix, (short) integer.intValue(), (short) nf.intValue());
}

View File

@ -20,9 +20,8 @@ import java.io.IOException;
/**
* Interface allows get information from
* {@org.apache.lucene.russian.morphology.dictonary.DirtonaryReader}.
*/
public interface WordProccessor {
public interface WordProcessor {
public void process(WordCard wordCard) throws IOException;
}

View File

@ -28,14 +28,14 @@ import java.util.List;
public class EnglishHeuristicBuilder {
public static void main(String[] args) throws IOException {
GrammaReader grammaInfo = new GrammaReader("dictonary/Dicts/Morph/egramtab.tab");
GrammarReader grammarInfo = new GrammarReader("dictonary/Dicts/Morph/egramtab.tab");
EnglishLetterDecoderEncoder decoderEncoder = new EnglishLetterDecoderEncoder();
List<WordFilter> filters = Arrays.asList(new WordStringCleaner(decoderEncoder), new WordCleaner(decoderEncoder));
DictionaryReader dictionaryReader = new DictionaryReader("dictonary/Dicts/SrcMorph/EngSrc/morphs.mrd", new HashSet<String>(), filters);
StatisticsCollector statisticsCollector = new StatisticsCollector(grammaInfo, decoderEncoder);
dictionaryReader.proccess(statisticsCollector);
StatisticsCollector statisticsCollector = new StatisticsCollector(grammarInfo, decoderEncoder);
dictionaryReader.process(statisticsCollector);
statisticsCollector.saveHeuristic("english/src/main/resources/org/apache/lucene/morphology/english/morph.info");
}

View File

@ -27,14 +27,14 @@ import java.util.List;
public class RussianHeuristicBuilder {
public static void main(String[] args) throws IOException {
GrammaReader grammaInfo = new GrammaReader("dictonary/Dicts/Morph/rgramtab.tab");
GrammarReader grammarInfo = new GrammarReader("dictonary/Dicts/Morph/rgramtab.tab");
RussianLetterDecoderEncoder decoderEncoder = new RussianLetterDecoderEncoder();
List<WordFilter> filters = Arrays.asList(new WordStringCleaner(decoderEncoder), new WordCleaner(decoderEncoder));
DictionaryReader dictionaryReader = new DictionaryReader("dictonary/Dicts/SrcMorph/RusSrc/morphs.mrd", new HashSet<String>(), filters);
StatisticsCollector statisticsCollector = new StatisticsCollector(grammaInfo, decoderEncoder);
dictionaryReader.proccess(statisticsCollector);
StatisticsCollector statisticsCollector = new StatisticsCollector(grammarInfo, decoderEncoder);
dictionaryReader.process(statisticsCollector);
statisticsCollector.saveHeuristic("russian/src/main/resources/org/apache/lucene/morphology/russian/morph.info");
}

View File

@ -67,9 +67,9 @@ public class TestAllWords {
}
private void testFullGramma(final Morphology morphology, LetterDecoderEncoder decoderEncoder, String pathToGramma, String pathToDict) throws IOException {
GrammaReader grammaInfo = new GrammaReader(pathToGramma);
final List<String> morphInfo = grammaInfo.getGrammaInfo();
final Map<String, Integer> inversIndex = grammaInfo.getGrammInversIndex();
GrammarReader grammarInfo = new GrammarReader(pathToGramma);
final List<String> morphInfo = grammarInfo.getGrammarInfo();
final Map<String, Integer> inversIndex = grammarInfo.getGrammarInverseIndex();
List<WordFilter> filters = Arrays.asList(new WordStringCleaner(decoderEncoder), new WordCleaner(decoderEncoder));
@ -79,7 +79,7 @@ public class TestAllWords {
final AtomicLong wordCount = new AtomicLong(0);
Long startTime = System.currentTimeMillis();
dictionaryReader.proccess(new WordProccessor() {
dictionaryReader.process(new WordProcessor() {
public void process(WordCard wordCard) throws IOException {
String word = wordCard.getBase() + wordCard.getCanonicalSuffix();
for (FlexiaModel fm : wordCard.getWordsForms()) {
@ -125,7 +125,7 @@ public class TestAllWords {
Long startTime = System.currentTimeMillis();
DictionaryReader dictionaryReader = new DictionaryReader(pathToDic, new HashSet<String>(), filters);
dictionaryReader.proccess(new WordProccessor() {
dictionaryReader.process(new WordProcessor() {
public void process(WordCard wordCard) throws IOException {
String word = wordCard.getBase() + wordCard.getCanonicalSuffix();
for (FlexiaModel fm : wordCard.getWordsForms()) {

View File

@ -32,13 +32,13 @@ import static org.hamcrest.Matchers.equalTo;
import static org.junit.Assert.assertThat;
public class AnalayzersTest {
public class AnalyzersTest {
@Test
public void englishAnalyzerShouldGiveCorrectWords() throws IOException {
Analyzer morphlogyAnalyzer = new EnglishAnalyzer();
String answerPath = "/english/englsih-analayzer-answer.txt";
String testPath = "/english/englsih-analayzer-data.txt";
String answerPath = "/english/english-analyzer-answer.txt";
String testPath = "/english/english-analyzer-data.txt";
testAnalayzer(morphlogyAnalyzer, answerPath, testPath);
}
@ -46,8 +46,8 @@ public class AnalayzersTest {
@Test
public void shoudGiveCorretWords() throws IOException {
Analyzer morphlogyAnalyzer = new RussianAnalyzer();
String answerPath = "/russian/russian-analayzer-answer.txt";
String testPath = "/russian/russian-analayzer-data.txt";
String answerPath = "/russian/russian-analyzer-answer.txt";
String testPath = "/russian/russian-analyzer-data.txt";
testAnalayzer(morphlogyAnalyzer, answerPath, testPath);
}