fixing typo

git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@98 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
alexander.a.kuznetsov 2010-05-30 06:18:58 +00:00
parent 7bf8ef7d6f
commit e8399999c3
14 changed files with 55 additions and 55 deletions

View File

@ -28,22 +28,22 @@ import java.util.*;
* This class contain logic how read
* dictonary and produce word with it all forms.
*/
public class DictonaryReader {
public class DictionaryReader {
private String fileName;
private String fileEncoding = "windows-1251";
private List<List<FlexiaModel>> wordsFlexias = new ArrayList<List<FlexiaModel>>();
private List<List<String>> wordPrefixes = new ArrayList<List<String>>();
private Set<String> ingnoredForm = new HashSet<String>();
private Set<String> ignoredForm = new HashSet<String>();
public DictonaryReader(String fileName, Set<String> ingnoredForm) {
public DictionaryReader(String fileName, Set<String> ignoredForm) {
this.fileName = fileName;
this.ingnoredForm = ingnoredForm;
this.ignoredForm = ignoredForm;
}
public DictonaryReader(String fileName, String fileEncoding, Set<String> ingnoredForm) {
public DictionaryReader(String fileName, String fileEncoding, Set<String> ignoredForm) {
this.fileName = fileName;
this.fileEncoding = fileEncoding;
this.ingnoredForm = ingnoredForm;
this.ignoredForm = ignoredForm;
}
@ -70,7 +70,7 @@ public class DictonaryReader {
wordBase = "#".equals(wordBase) ? "" : wordBase;
List<FlexiaModel> models = wordsFlexias.get(Integer.valueOf(wd[1]));
FlexiaModel flexiaModel = models.get(0);
if (models.size() > 0 && !ingnoredForm.contains(flexiaModel.getCode())) {
if (models.size() > 0 && !ignoredForm.contains(flexiaModel.getCode())) {
WordCard card = new WordCard(flexiaModel.create(wordBase), wordBase, flexiaModel.getSuffix());
for (FlexiaModel fm : models) {

View File

@ -30,7 +30,7 @@ public class GrammaReader {
private String fileName;
private String fileEncoding = "windows-1251";
private List<String> grammaInfo = new ArrayList<String>();
private Map<String, Integer> inversIndex = new HashMap<String, Integer>();
private Map<String, Integer> inverseIndex = new HashMap<String, Integer>();
public GrammaReader(String fileName) throws IOException {
this.fileName = fileName;
@ -51,7 +51,7 @@ public class GrammaReader {
if (!line.startsWith("//") && line.length() > 0) {
String[] strings = line.split(" ", 2);
Integer i = grammaInfo.size();
inversIndex.put(strings[0], i);
inverseIndex.put(strings[0], i);
grammaInfo.add(i, strings[1]);
}
line = bufferedReader.readLine();
@ -67,10 +67,10 @@ public class GrammaReader {
}
public Map<String, Integer> getGrammInversIndex() {
return inversIndex;
return inverseIndex;
}
public void setInversIndex(Map<String, Integer> inversIndex) {
this.inversIndex = inversIndex;
public void setInverseIndex(Map<String, Integer> inverseIndex) {
this.inverseIndex = inverseIndex;
}
}

View File

@ -41,12 +41,12 @@ public class StatisticsCollector implements WordProccessor {
public void process(WordCard wordCard) throws IOException {
cleanWordCard(wordCard);
String normalStringMorph = wordCard.getWordsFroms().get(0).getCode();
String normalStringMorph = wordCard.getWordsForms().get(0).getCode();
String word = wordCard.getBase() + wordCard.getCanonicalSuffix();
if (word.contains("-")) return;
if (!decoderEncoder.checkString(word)) return;
for (FlexiaModel fm : wordCard.getWordsFroms()) {
for (FlexiaModel fm : wordCard.getWordsForms()) {
if (!decoderEncoder.checkString(fm.create(wordCard.getBase())) || fm.create(wordCard.getBase()).contains("-")) continue;
Heuristic heuristic = createEvristic(wordCard.getBase(), wordCard.getCanonicalSuffix(), fm, normalStringMorph);
String form = revertWord(fm.create(wordCard.getBase()));
@ -61,9 +61,9 @@ public class StatisticsCollector implements WordProccessor {
private void cleanWordCard(WordCard wordCard) {
wordCard.setBase(cleanString(wordCard.getBase()));
wordCard.setCanonicalFrom(cleanString(wordCard.getCanonicalFrom()));
wordCard.setCanonicalForm(cleanString(wordCard.getCanonicalForm()));
wordCard.setCanonicalSuffix(cleanString(wordCard.getCanonicalSuffix()));
List<FlexiaModel> models = wordCard.getWordsFroms();
List<FlexiaModel> models = wordCard.getWordsForms();
for (FlexiaModel m : models) {
m.setSuffix(cleanString(m.getSuffix()));
m.setPrefix(cleanString(m.getPrefix()));

View File

@ -23,23 +23,23 @@ import java.util.List;
* Represent word and all it forms.
*/
public class WordCard {
private String canonicalFrom;
private String canonicalForm;
private String base;
private String canonicalSuffix;
private List<FlexiaModel> wordsFroms = new ArrayList<FlexiaModel>();
private List<FlexiaModel> wordsForms = new ArrayList<FlexiaModel>();
public WordCard(String canonicalFrom, String base, String canonicalSuffix) {
this.canonicalFrom = canonicalFrom;
public WordCard(String canonicalForm, String base, String canonicalSuffix) {
this.canonicalForm = canonicalForm;
this.canonicalSuffix = canonicalSuffix;
this.base = base;
}
public void addFlexia(FlexiaModel flexiaModel) {
wordsFroms.add(flexiaModel);
wordsForms.add(flexiaModel);
}
public String getCanonicalFrom() {
return canonicalFrom;
public String getCanonicalForm() {
return canonicalForm;
}
public String getCanonicalSuffix() {
@ -50,12 +50,12 @@ public class WordCard {
return base;
}
public List<FlexiaModel> getWordsFroms() {
return wordsFroms;
public List<FlexiaModel> getWordsForms() {
return wordsForms;
}
public void setCanonicalFrom(String canonicalFrom) {
this.canonicalFrom = canonicalFrom;
public void setCanonicalForm(String canonicalForm) {
this.canonicalForm = canonicalForm;
}
public void setBase(String base) {
@ -66,17 +66,17 @@ public class WordCard {
this.canonicalSuffix = canonicalSuffix;
}
public void setWordsFroms(List<FlexiaModel> wordsFroms) {
this.wordsFroms = wordsFroms;
public void setWordsForms(List<FlexiaModel> wordsForms) {
this.wordsForms = wordsForms;
}
@Override
public String toString() {
return "WordCard{" +
"canonicalFrom='" + canonicalFrom + '\'' +
"canonicalForm='" + canonicalForm + '\'' +
", base='" + base + '\'' +
", canonicalSuffix='" + canonicalSuffix + '\'' +
", wordsFroms=" + wordsFroms +
", wordsForms=" + wordsForms +
'}';
}
}

View File

@ -16,7 +16,7 @@
package org.apache.lucene.morphology.generator;
import org.apache.lucene.morphology.dictionary.DictonaryReader;
import org.apache.lucene.morphology.dictionary.DictionaryReader;
import org.apache.lucene.morphology.dictionary.GrammaReader;
import org.apache.lucene.morphology.dictionary.StatisticsCollector;
import org.apache.lucene.morphology.english.EnglishLetterDecoderEncoder;
@ -29,11 +29,11 @@ public class EnglishHeuristicBuilder {
public static void main(String[] args) throws IOException {
GrammaReader grammaInfo = new GrammaReader("dictonary/Dicts/Morph/egramtab.tab");
DictonaryReader dictonaryReader = new DictonaryReader("dictonary/Dicts/SrcMorph/EngSrc/morphs.mrd", new HashSet<String>());
DictionaryReader dictionaryReader = new DictionaryReader("dictonary/Dicts/SrcMorph/EngSrc/morphs.mrd", new HashSet<String>());
EnglishLetterDecoderEncoder decoderEncoder = new EnglishLetterDecoderEncoder();
StatisticsCollector statisticsCollector = new StatisticsCollector(grammaInfo, decoderEncoder);
dictonaryReader.proccess(statisticsCollector);
dictionaryReader.proccess(statisticsCollector);
statisticsCollector.saveHeuristic("english/src/main/resources/org/apache/lucene/morphology/english/morph.info");
}

View File

@ -16,7 +16,7 @@
package org.apache.lucene.morphology.generator;
import org.apache.lucene.morphology.dictionary.DictonaryReader;
import org.apache.lucene.morphology.dictionary.DictionaryReader;
import org.apache.lucene.morphology.dictionary.GrammaReader;
import org.apache.lucene.morphology.dictionary.StatisticsCollector;
import org.apache.lucene.morphology.russian.RussianLetterDecoderEncoder;
@ -28,11 +28,11 @@ import java.util.HashSet;
public class RussianHeuristicBuilder {
public static void main(String[] args) throws IOException {
GrammaReader grammaInfo = new GrammaReader("dictonary/Dicts/Morph/rgramtab.tab");
DictonaryReader dictonaryReader = new DictonaryReader("dictonary/Dicts/SrcMorph/RusSrc/morphs.mrd", new HashSet<String>());
DictionaryReader dictionaryReader = new DictionaryReader("dictonary/Dicts/SrcMorph/RusSrc/morphs.mrd", new HashSet<String>());
RussianLetterDecoderEncoder decoderEncoder = new RussianLetterDecoderEncoder();
StatisticsCollector statisticsCollector = new StatisticsCollector(grammaInfo, decoderEncoder);
dictonaryReader.proccess(statisticsCollector);
dictionaryReader.proccess(statisticsCollector);
statisticsCollector.saveHeuristic("russian/src/main/resources/org/apache/lucene/morphology/russian/morph.info");
}

View File

@ -15,14 +15,14 @@
*/
package org.apache.lucene.morphology.english;
import org.apache.lucene.morphology.analayzer.MorphlogyAnalayzer;
import org.apache.lucene.morphology.analyzer.MorphologyAnalyzer;
import java.io.IOException;
public class EnglishAnalayzer extends MorphlogyAnalayzer {
public class EnglishAnalyzer extends MorphologyAnalyzer {
public EnglishAnalayzer() throws IOException {
public EnglishAnalyzer() throws IOException {
super(new EnglishLuceneMorphology());
}

View File

@ -39,12 +39,12 @@ public class EnglishAnalayzerTest {
HashSet<String> answer = new HashSet<String>(Arrays.asList(strings));
stream.close();
EnglishAnalayzer morphlogyAnalayzer = new EnglishAnalayzer();
EnglishAnalyzer morphlogyAnalyzer = new EnglishAnalyzer();
stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/english/englsih-analayzer-data.txt");
InputStreamReader reader = new InputStreamReader(stream, "UTF-8");
TokenStream tokenStream = morphlogyAnalayzer.tokenStream(null, reader);
TokenStream tokenStream = morphlogyAnalyzer.tokenStream(null, reader);
HashSet<String> result = new HashSet<String>();
while (tokenStream.incrementToken()) {
TermAttribute attribute1 = tokenStream.getAttribute(TermAttribute.class);

View File

@ -22,6 +22,6 @@ public interface Morphology {
List<String> getNormalForms(String s);
List<String> getMorfInfo(String s);
List<String> getMorphInfo(String s);
}

View File

@ -73,7 +73,7 @@ public class MorphologyImpl implements Morphology {
return result;
}
public List<String> getMorfInfo(String s) {
public List<String> getMorphInfo(String s) {
ArrayList<String> result = new ArrayList<String>();
int[] ints = decoderEncoder.encodeToArray(revertWord(s));
int ruleId = findRuleId(ints);

View File

@ -14,7 +14,7 @@
* limitations under the License.
*/
package org.apache.lucene.morphology.analayzer;
package org.apache.lucene.morphology.analyzer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;

View File

@ -14,7 +14,7 @@
* limitations under the License.
*/
package org.apache.lucene.morphology.analayzer;
package org.apache.lucene.morphology.analyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.LowerCaseFilter;
@ -29,18 +29,18 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
public class MorphlogyAnalayzer extends Analyzer {
public class MorphologyAnalyzer extends Analyzer {
private LuceneMorphology luceneMorph;
public MorphlogyAnalayzer(LuceneMorphology luceneMorph) {
public MorphologyAnalyzer(LuceneMorphology luceneMorph) {
this.luceneMorph = luceneMorph;
}
public MorphlogyAnalayzer(String pathToMorph, LetterDecoderEncoder letterDecoderEncoder) throws IOException {
public MorphologyAnalyzer(String pathToMorph, LetterDecoderEncoder letterDecoderEncoder) throws IOException {
luceneMorph = new LuceneMorphology(pathToMorph, letterDecoderEncoder);
}
public MorphlogyAnalayzer(InputStream inputStream, LetterDecoderEncoder letterDecoderEncoder) throws IOException {
public MorphologyAnalyzer(InputStream inputStream, LetterDecoderEncoder letterDecoderEncoder) throws IOException {
luceneMorph = new LuceneMorphology(inputStream, letterDecoderEncoder);
}

View File

@ -15,13 +15,13 @@
*/
package org.apache.lucene.morphology.russian;
import org.apache.lucene.morphology.analayzer.MorphlogyAnalayzer;
import org.apache.lucene.morphology.analyzer.MorphologyAnalyzer;
import java.io.IOException;
public class RussianAnalayzer extends MorphlogyAnalayzer {
public RussianAnalayzer() throws IOException {
public class RussianAnalyzer extends MorphologyAnalyzer {
public RussianAnalyzer() throws IOException {
super(new RussianLuceneMorphology());
}
}

View File

@ -39,12 +39,12 @@ public class RussianAnalayzerTest {
HashSet<String> answer = new HashSet<String>(Arrays.asList(strings));
stream.close();
RussianAnalayzer morphlogyAnalayzer = new RussianAnalayzer();
RussianAnalyzer morphlogyAnalyzer = new RussianAnalyzer();
stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/russian-analayzer-data.txt");
InputStreamReader reader = new InputStreamReader(stream, "UTF-8");
TokenStream tokenStream = morphlogyAnalayzer.tokenStream(null, reader);
TokenStream tokenStream = morphlogyAnalyzer.tokenStream(null, reader);
HashSet<String> result = new HashSet<String>();
while (tokenStream.incrementToken()) {
TermAttribute attribute1 = tokenStream.getAttribute(TermAttribute.class);