diff --git a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/DictonaryReader.java b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/DictionaryReader.java similarity index 91% rename from dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/DictonaryReader.java rename to dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/DictionaryReader.java index 0ec7956..b22528b 100644 --- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/DictonaryReader.java +++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/DictionaryReader.java @@ -28,22 +28,22 @@ import java.util.*; * This class contain logic how read * dictonary and produce word with it all forms. */ -public class DictonaryReader { +public class DictionaryReader { private String fileName; private String fileEncoding = "windows-1251"; private List> wordsFlexias = new ArrayList>(); private List> wordPrefixes = new ArrayList>(); - private Set ingnoredForm = new HashSet(); + private Set ignoredForm = new HashSet(); - public DictonaryReader(String fileName, Set ingnoredForm) { + public DictionaryReader(String fileName, Set ignoredForm) { this.fileName = fileName; - this.ingnoredForm = ingnoredForm; + this.ignoredForm = ignoredForm; } - public DictonaryReader(String fileName, String fileEncoding, Set ingnoredForm) { + public DictionaryReader(String fileName, String fileEncoding, Set ignoredForm) { this.fileName = fileName; this.fileEncoding = fileEncoding; - this.ingnoredForm = ingnoredForm; + this.ignoredForm = ignoredForm; } @@ -70,7 +70,7 @@ public class DictonaryReader { wordBase = "#".equals(wordBase) ? "" : wordBase; List models = wordsFlexias.get(Integer.valueOf(wd[1])); FlexiaModel flexiaModel = models.get(0); - if (models.size() > 0 && !ingnoredForm.contains(flexiaModel.getCode())) { + if (models.size() > 0 && !ignoredForm.contains(flexiaModel.getCode())) { WordCard card = new WordCard(flexiaModel.create(wordBase), wordBase, flexiaModel.getSuffix()); for (FlexiaModel fm : models) { diff --git a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/GrammaReader.java b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/GrammaReader.java index 7d32727..691b872 100644 --- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/GrammaReader.java +++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/GrammaReader.java @@ -30,7 +30,7 @@ public class GrammaReader { private String fileName; private String fileEncoding = "windows-1251"; private List grammaInfo = new ArrayList(); - private Map inversIndex = new HashMap(); + private Map inverseIndex = new HashMap(); public GrammaReader(String fileName) throws IOException { this.fileName = fileName; @@ -51,7 +51,7 @@ public class GrammaReader { if (!line.startsWith("//") && line.length() > 0) { String[] strings = line.split(" ", 2); Integer i = grammaInfo.size(); - inversIndex.put(strings[0], i); + inverseIndex.put(strings[0], i); grammaInfo.add(i, strings[1]); } line = bufferedReader.readLine(); @@ -67,10 +67,10 @@ public class GrammaReader { } public Map getGrammInversIndex() { - return inversIndex; + return inverseIndex; } - public void setInversIndex(Map inversIndex) { - this.inversIndex = inversIndex; + public void setInverseIndex(Map inverseIndex) { + this.inverseIndex = inverseIndex; } } diff --git a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/StatisticsCollector.java b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/StatisticsCollector.java index 94eaace..a803d2c 100644 --- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/StatisticsCollector.java +++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/StatisticsCollector.java @@ -41,12 +41,12 @@ public class StatisticsCollector implements WordProccessor { public void process(WordCard wordCard) throws IOException { cleanWordCard(wordCard); - String normalStringMorph = wordCard.getWordsFroms().get(0).getCode(); + String normalStringMorph = wordCard.getWordsForms().get(0).getCode(); String word = wordCard.getBase() + wordCard.getCanonicalSuffix(); if (word.contains("-")) return; if (!decoderEncoder.checkString(word)) return; - for (FlexiaModel fm : wordCard.getWordsFroms()) { + for (FlexiaModel fm : wordCard.getWordsForms()) { if (!decoderEncoder.checkString(fm.create(wordCard.getBase())) || fm.create(wordCard.getBase()).contains("-")) continue; Heuristic heuristic = createEvristic(wordCard.getBase(), wordCard.getCanonicalSuffix(), fm, normalStringMorph); String form = revertWord(fm.create(wordCard.getBase())); @@ -61,9 +61,9 @@ public class StatisticsCollector implements WordProccessor { private void cleanWordCard(WordCard wordCard) { wordCard.setBase(cleanString(wordCard.getBase())); - wordCard.setCanonicalFrom(cleanString(wordCard.getCanonicalFrom())); + wordCard.setCanonicalForm(cleanString(wordCard.getCanonicalForm())); wordCard.setCanonicalSuffix(cleanString(wordCard.getCanonicalSuffix())); - List models = wordCard.getWordsFroms(); + List models = wordCard.getWordsForms(); for (FlexiaModel m : models) { m.setSuffix(cleanString(m.getSuffix())); m.setPrefix(cleanString(m.getPrefix())); diff --git a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordCard.java b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordCard.java index 6f18ccf..0906acd 100644 --- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordCard.java +++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordCard.java @@ -23,23 +23,23 @@ import java.util.List; * Represent word and all it forms. */ public class WordCard { - private String canonicalFrom; + private String canonicalForm; private String base; private String canonicalSuffix; - private List wordsFroms = new ArrayList(); + private List wordsForms = new ArrayList(); - public WordCard(String canonicalFrom, String base, String canonicalSuffix) { - this.canonicalFrom = canonicalFrom; + public WordCard(String canonicalForm, String base, String canonicalSuffix) { + this.canonicalForm = canonicalForm; this.canonicalSuffix = canonicalSuffix; this.base = base; } public void addFlexia(FlexiaModel flexiaModel) { - wordsFroms.add(flexiaModel); + wordsForms.add(flexiaModel); } - public String getCanonicalFrom() { - return canonicalFrom; + public String getCanonicalForm() { + return canonicalForm; } public String getCanonicalSuffix() { @@ -50,12 +50,12 @@ public class WordCard { return base; } - public List getWordsFroms() { - return wordsFroms; + public List getWordsForms() { + return wordsForms; } - public void setCanonicalFrom(String canonicalFrom) { - this.canonicalFrom = canonicalFrom; + public void setCanonicalForm(String canonicalForm) { + this.canonicalForm = canonicalForm; } public void setBase(String base) { @@ -66,17 +66,17 @@ public class WordCard { this.canonicalSuffix = canonicalSuffix; } - public void setWordsFroms(List wordsFroms) { - this.wordsFroms = wordsFroms; + public void setWordsForms(List wordsForms) { + this.wordsForms = wordsForms; } @Override public String toString() { return "WordCard{" + - "canonicalFrom='" + canonicalFrom + '\'' + + "canonicalForm='" + canonicalForm + '\'' + ", base='" + base + '\'' + ", canonicalSuffix='" + canonicalSuffix + '\'' + - ", wordsFroms=" + wordsFroms + + ", wordsForms=" + wordsForms + '}'; } } diff --git a/dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/EnglishHeuristicBuilder.java b/dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/EnglishHeuristicBuilder.java index 864b3d2..63c60f2 100644 --- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/EnglishHeuristicBuilder.java +++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/EnglishHeuristicBuilder.java @@ -16,7 +16,7 @@ package org.apache.lucene.morphology.generator; -import org.apache.lucene.morphology.dictionary.DictonaryReader; +import org.apache.lucene.morphology.dictionary.DictionaryReader; import org.apache.lucene.morphology.dictionary.GrammaReader; import org.apache.lucene.morphology.dictionary.StatisticsCollector; import org.apache.lucene.morphology.english.EnglishLetterDecoderEncoder; @@ -29,11 +29,11 @@ public class EnglishHeuristicBuilder { public static void main(String[] args) throws IOException { GrammaReader grammaInfo = new GrammaReader("dictonary/Dicts/Morph/egramtab.tab"); - DictonaryReader dictonaryReader = new DictonaryReader("dictonary/Dicts/SrcMorph/EngSrc/morphs.mrd", new HashSet()); + DictionaryReader dictionaryReader = new DictionaryReader("dictonary/Dicts/SrcMorph/EngSrc/morphs.mrd", new HashSet()); EnglishLetterDecoderEncoder decoderEncoder = new EnglishLetterDecoderEncoder(); StatisticsCollector statisticsCollector = new StatisticsCollector(grammaInfo, decoderEncoder); - dictonaryReader.proccess(statisticsCollector); + dictionaryReader.proccess(statisticsCollector); statisticsCollector.saveHeuristic("english/src/main/resources/org/apache/lucene/morphology/english/morph.info"); } diff --git a/dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/RussianHeuristicBuilder.java b/dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/RussianHeuristicBuilder.java index c0ae11d..2b896c9 100644 --- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/RussianHeuristicBuilder.java +++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/RussianHeuristicBuilder.java @@ -16,7 +16,7 @@ package org.apache.lucene.morphology.generator; -import org.apache.lucene.morphology.dictionary.DictonaryReader; +import org.apache.lucene.morphology.dictionary.DictionaryReader; import org.apache.lucene.morphology.dictionary.GrammaReader; import org.apache.lucene.morphology.dictionary.StatisticsCollector; import org.apache.lucene.morphology.russian.RussianLetterDecoderEncoder; @@ -28,11 +28,11 @@ import java.util.HashSet; public class RussianHeuristicBuilder { public static void main(String[] args) throws IOException { GrammaReader grammaInfo = new GrammaReader("dictonary/Dicts/Morph/rgramtab.tab"); - DictonaryReader dictonaryReader = new DictonaryReader("dictonary/Dicts/SrcMorph/RusSrc/morphs.mrd", new HashSet()); + DictionaryReader dictionaryReader = new DictionaryReader("dictonary/Dicts/SrcMorph/RusSrc/morphs.mrd", new HashSet()); RussianLetterDecoderEncoder decoderEncoder = new RussianLetterDecoderEncoder(); StatisticsCollector statisticsCollector = new StatisticsCollector(grammaInfo, decoderEncoder); - dictonaryReader.proccess(statisticsCollector); + dictionaryReader.proccess(statisticsCollector); statisticsCollector.saveHeuristic("russian/src/main/resources/org/apache/lucene/morphology/russian/morph.info"); } diff --git a/english/src/main/java/org/apache/lucene/morphology/english/EnglishAnalayzer.java b/english/src/main/java/org/apache/lucene/morphology/english/EnglishAnalyzer.java similarity index 78% rename from english/src/main/java/org/apache/lucene/morphology/english/EnglishAnalayzer.java rename to english/src/main/java/org/apache/lucene/morphology/english/EnglishAnalyzer.java index 8f6098c..648d3de 100644 --- a/english/src/main/java/org/apache/lucene/morphology/english/EnglishAnalayzer.java +++ b/english/src/main/java/org/apache/lucene/morphology/english/EnglishAnalyzer.java @@ -15,14 +15,14 @@ */ package org.apache.lucene.morphology.english; -import org.apache.lucene.morphology.analayzer.MorphlogyAnalayzer; +import org.apache.lucene.morphology.analyzer.MorphologyAnalyzer; import java.io.IOException; -public class EnglishAnalayzer extends MorphlogyAnalayzer { +public class EnglishAnalyzer extends MorphologyAnalyzer { - public EnglishAnalayzer() throws IOException { + public EnglishAnalyzer() throws IOException { super(new EnglishLuceneMorphology()); } diff --git a/english/src/test/java/org/apache/lucene/morphology/english/EnglishAnalayzerTest.java b/english/src/test/java/org/apache/lucene/morphology/english/EnglishAnalayzerTest.java index 2ad4b43..5a31ae0 100644 --- a/english/src/test/java/org/apache/lucene/morphology/english/EnglishAnalayzerTest.java +++ b/english/src/test/java/org/apache/lucene/morphology/english/EnglishAnalayzerTest.java @@ -39,12 +39,12 @@ public class EnglishAnalayzerTest { HashSet answer = new HashSet(Arrays.asList(strings)); stream.close(); - EnglishAnalayzer morphlogyAnalayzer = new EnglishAnalayzer(); + EnglishAnalyzer morphlogyAnalyzer = new EnglishAnalyzer(); stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/english/englsih-analayzer-data.txt"); InputStreamReader reader = new InputStreamReader(stream, "UTF-8"); - TokenStream tokenStream = morphlogyAnalayzer.tokenStream(null, reader); + TokenStream tokenStream = morphlogyAnalyzer.tokenStream(null, reader); HashSet result = new HashSet(); while (tokenStream.incrementToken()) { TermAttribute attribute1 = tokenStream.getAttribute(TermAttribute.class); diff --git a/morph/src/main/java/org/apache/lucene/morphology/Morphology.java b/morph/src/main/java/org/apache/lucene/morphology/Morphology.java index b35fb30..c2fc7a0 100644 --- a/morph/src/main/java/org/apache/lucene/morphology/Morphology.java +++ b/morph/src/main/java/org/apache/lucene/morphology/Morphology.java @@ -22,6 +22,6 @@ public interface Morphology { List getNormalForms(String s); - List getMorfInfo(String s); + List getMorphInfo(String s); } diff --git a/morph/src/main/java/org/apache/lucene/morphology/MorphologyImpl.java b/morph/src/main/java/org/apache/lucene/morphology/MorphologyImpl.java index 5efca38..5328328 100644 --- a/morph/src/main/java/org/apache/lucene/morphology/MorphologyImpl.java +++ b/morph/src/main/java/org/apache/lucene/morphology/MorphologyImpl.java @@ -73,7 +73,7 @@ public class MorphologyImpl implements Morphology { return result; } - public List getMorfInfo(String s) { + public List getMorphInfo(String s) { ArrayList result = new ArrayList(); int[] ints = decoderEncoder.encodeToArray(revertWord(s)); int ruleId = findRuleId(ints); diff --git a/morph/src/main/java/org/apache/lucene/morphology/analayzer/MorphlogyFilter.java b/morph/src/main/java/org/apache/lucene/morphology/analyzer/MorphlogyFilter.java similarity index 94% rename from morph/src/main/java/org/apache/lucene/morphology/analayzer/MorphlogyFilter.java rename to morph/src/main/java/org/apache/lucene/morphology/analyzer/MorphlogyFilter.java index 641bc9b..8fe84c6 100644 --- a/morph/src/main/java/org/apache/lucene/morphology/analayzer/MorphlogyFilter.java +++ b/morph/src/main/java/org/apache/lucene/morphology/analyzer/MorphlogyFilter.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package org.apache.lucene.morphology.analayzer; +package org.apache.lucene.morphology.analyzer; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; diff --git a/morph/src/main/java/org/apache/lucene/morphology/analayzer/MorphlogyAnalayzer.java b/morph/src/main/java/org/apache/lucene/morphology/analyzer/MorphologyAnalyzer.java similarity index 83% rename from morph/src/main/java/org/apache/lucene/morphology/analayzer/MorphlogyAnalayzer.java rename to morph/src/main/java/org/apache/lucene/morphology/analyzer/MorphologyAnalyzer.java index 47ef528..5e6f08e 100644 --- a/morph/src/main/java/org/apache/lucene/morphology/analayzer/MorphlogyAnalayzer.java +++ b/morph/src/main/java/org/apache/lucene/morphology/analyzer/MorphologyAnalyzer.java @@ -14,7 +14,7 @@ * limitations under the License. */ -package org.apache.lucene.morphology.analayzer; +package org.apache.lucene.morphology.analyzer; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.LowerCaseFilter; @@ -29,18 +29,18 @@ import java.io.IOException; import java.io.InputStream; import java.io.Reader; -public class MorphlogyAnalayzer extends Analyzer { +public class MorphologyAnalyzer extends Analyzer { private LuceneMorphology luceneMorph; - public MorphlogyAnalayzer(LuceneMorphology luceneMorph) { + public MorphologyAnalyzer(LuceneMorphology luceneMorph) { this.luceneMorph = luceneMorph; } - public MorphlogyAnalayzer(String pathToMorph, LetterDecoderEncoder letterDecoderEncoder) throws IOException { + public MorphologyAnalyzer(String pathToMorph, LetterDecoderEncoder letterDecoderEncoder) throws IOException { luceneMorph = new LuceneMorphology(pathToMorph, letterDecoderEncoder); } - public MorphlogyAnalayzer(InputStream inputStream, LetterDecoderEncoder letterDecoderEncoder) throws IOException { + public MorphologyAnalyzer(InputStream inputStream, LetterDecoderEncoder letterDecoderEncoder) throws IOException { luceneMorph = new LuceneMorphology(inputStream, letterDecoderEncoder); } diff --git a/russian/src/main/java/org/apache/lucene/morphology/russian/RussianAnalayzer.java b/russian/src/main/java/org/apache/lucene/morphology/russian/RussianAnalyzer.java similarity index 78% rename from russian/src/main/java/org/apache/lucene/morphology/russian/RussianAnalayzer.java rename to russian/src/main/java/org/apache/lucene/morphology/russian/RussianAnalyzer.java index 2c68328..98d26f3 100644 --- a/russian/src/main/java/org/apache/lucene/morphology/russian/RussianAnalayzer.java +++ b/russian/src/main/java/org/apache/lucene/morphology/russian/RussianAnalyzer.java @@ -15,13 +15,13 @@ */ package org.apache.lucene.morphology.russian; -import org.apache.lucene.morphology.analayzer.MorphlogyAnalayzer; +import org.apache.lucene.morphology.analyzer.MorphologyAnalyzer; import java.io.IOException; -public class RussianAnalayzer extends MorphlogyAnalayzer { - public RussianAnalayzer() throws IOException { +public class RussianAnalyzer extends MorphologyAnalyzer { + public RussianAnalyzer() throws IOException { super(new RussianLuceneMorphology()); } } diff --git a/russian/src/test/java/org/apache/lucene/morphology/russian/RussianAnalayzerTest.java b/russian/src/test/java/org/apache/lucene/morphology/russian/RussianAnalayzerTest.java index bec1462..a946bf5 100644 --- a/russian/src/test/java/org/apache/lucene/morphology/russian/RussianAnalayzerTest.java +++ b/russian/src/test/java/org/apache/lucene/morphology/russian/RussianAnalayzerTest.java @@ -39,12 +39,12 @@ public class RussianAnalayzerTest { HashSet answer = new HashSet(Arrays.asList(strings)); stream.close(); - RussianAnalayzer morphlogyAnalayzer = new RussianAnalayzer(); + RussianAnalyzer morphlogyAnalyzer = new RussianAnalyzer(); stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/russian-analayzer-data.txt"); InputStreamReader reader = new InputStreamReader(stream, "UTF-8"); - TokenStream tokenStream = morphlogyAnalayzer.tokenStream(null, reader); + TokenStream tokenStream = morphlogyAnalyzer.tokenStream(null, reader); HashSet result = new HashSet(); while (tokenStream.incrementToken()) { TermAttribute attribute1 = tokenStream.getAttribute(TermAttribute.class);