some spell cheking fixes
git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@24 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
		| @@ -18,9 +18,9 @@ package org.apache.lucene.russian.morphology; | ||||
| 
 | ||||
| import org.apache.lucene.russian.morphology.dictonary.DictonaryReader; | ||||
| import org.apache.lucene.russian.morphology.dictonary.IgnoredFormReader; | ||||
| import org.apache.lucene.russian.morphology.evristics.Evristic; | ||||
| import org.apache.lucene.russian.morphology.evristics.StatiticsCollectors; | ||||
| import org.apache.lucene.russian.morphology.evristics.SuffixCounter; | ||||
| import org.apache.lucene.russian.morphology.heuristic.Heuristic; | ||||
| import org.apache.lucene.russian.morphology.heuristic.StatiticsCollectors; | ||||
| import org.apache.lucene.russian.morphology.heuristic.SuffixCounter; | ||||
| 
 | ||||
| import java.io.IOException; | ||||
| import java.util.Arrays; | ||||
| @@ -28,7 +28,7 @@ import java.util.Collection; | ||||
| import java.util.Set; | ||||
| 
 | ||||
| 
 | ||||
| public class EvristicBuilder { | ||||
| public class HeuristicBuilder { | ||||
|     public static void main(String[] args) throws IOException { | ||||
|         IgnoredFormReader formReader = new IgnoredFormReader("data/igoredFrom.txt"); | ||||
|         Set<String> form = formReader.getIngnoredFroms(); | ||||
| @@ -44,11 +44,11 @@ public class EvristicBuilder { | ||||
|             System.out.println(objects[i]); | ||||
|         } | ||||
| 
 | ||||
|         final Evristic evristic = new Evristic(); | ||||
|         final Heuristic heuristic = new Heuristic(); | ||||
|         for (int i = 0; i < objects.length; i++) { | ||||
|             evristic.addEvristic(((SuffixCounter) objects[i]).getSuffixEvristic()); | ||||
|             heuristic.addEvristic(((SuffixCounter) objects[i]).getSuffixEvristic()); | ||||
|         } | ||||
| 
 | ||||
|         evristic.writeToFile("src/main/resources/org/apache/lucene/russian/morpholgy/russianSuffixesEvristics.txt"); | ||||
|         heuristic.writeToFile("src/main/resources/org/apache/lucene/russian/morpholgy/russianSuffixesEvristics.txt"); | ||||
|     } | ||||
| } | ||||
| @@ -26,16 +26,16 @@ import java.io.IOException; | ||||
| import java.io.Reader; | ||||
|  | ||||
| public class RussianMorphlogyAnalayzer extends Analyzer { | ||||
|     private SuffixEvristics suffixEvristics; | ||||
|     private SuffixHeuristic suffixHeuristic; | ||||
|  | ||||
|     public RussianMorphlogyAnalayzer() throws IOException { | ||||
|         suffixEvristics = new SuffixEvristics(); | ||||
|         suffixHeuristic = new SuffixHeuristic(); | ||||
|     } | ||||
|  | ||||
|     public TokenStream tokenStream(String fieldName, Reader reader) { | ||||
|         TokenStream result = new StandardTokenizer(reader); | ||||
|         result = new StandardFilter(result); | ||||
|         result = new LowerCaseFilter(result); | ||||
|         return new RussianMorphlogyFilter(result, suffixEvristics); | ||||
|         return new RussianMorphlogyFilter(result, suffixHeuristic); | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -24,11 +24,11 @@ import java.io.IOException; | ||||
|  | ||||
|  | ||||
| public class RussianMorphlogyFilter extends TokenFilter { | ||||
|     private SuffixEvristics suffixEvristics; | ||||
|     private SuffixHeuristic suffixHeuristic; | ||||
|  | ||||
|     public RussianMorphlogyFilter(TokenStream tokenStream, SuffixEvristics suffixEvristics) { | ||||
|     public RussianMorphlogyFilter(TokenStream tokenStream, SuffixHeuristic suffixHeuristic) { | ||||
|         super(tokenStream); | ||||
|         this.suffixEvristics = suffixEvristics; | ||||
|         this.suffixHeuristic = suffixHeuristic; | ||||
|     } | ||||
|  | ||||
|     public Token next(final Token reusableToken) throws IOException { | ||||
| @@ -40,7 +40,7 @@ public class RussianMorphlogyFilter extends TokenFilter { | ||||
|             return nextToken; | ||||
|         } | ||||
|         Token current = (Token) nextToken.clone(); | ||||
|         return createToken(suffixEvristics.getCanonicalForm(word), current, reusableToken); | ||||
|         return createToken(suffixHeuristic.getCanonicalForm(word), current, reusableToken); | ||||
|     } | ||||
|  | ||||
|     protected Token createToken(String synonym, Token current, final Token reusableToken) { | ||||
|   | ||||
| @@ -22,7 +22,7 @@ import java.io.*; | ||||
| import java.util.Arrays; | ||||
| 
 | ||||
| 
 | ||||
| public class SuffixEvristics { | ||||
| public class SuffixHeuristic { | ||||
|     private long[] keys; | ||||
|     private long[] values; | ||||
| 
 | ||||
| @@ -32,11 +32,11 @@ public class SuffixEvristics { | ||||
|     } | ||||
| 
 | ||||
| 
 | ||||
|     public SuffixEvristics() throws IOException { | ||||
|     public SuffixHeuristic() throws IOException { | ||||
|         readFromResource(); | ||||
|     } | ||||
| 
 | ||||
|     public SuffixEvristics(String fileName) throws IOException { | ||||
|     public SuffixHeuristic(String fileName) throws IOException { | ||||
|         readFromFile(fileName); | ||||
|     } | ||||
| 
 | ||||
| @@ -14,7 +14,7 @@ | ||||
|  * limitations under the License. | ||||
|  */ | ||||
| 
 | ||||
| package org.apache.lucene.russian.morphology.evristics; | ||||
| package org.apache.lucene.russian.morphology.heuristic; | ||||
| 
 | ||||
| import org.apache.lucene.russian.morphology.RussianSuffixDecoderEncoder; | ||||
| 
 | ||||
| @@ -25,14 +25,14 @@ import java.io.IOException; | ||||
| import java.util.TreeMap; | ||||
| 
 | ||||
| 
 | ||||
| public class Evristic { | ||||
| public class Heuristic { | ||||
|     private TreeMap<Long, Long> encodedSuffixesPairs = new TreeMap<Long, Long>(); | ||||
| 
 | ||||
|     public void addEvristic(SuffixEvristic suffixEvristic) { | ||||
|         Long suffix = RussianSuffixDecoderEncoder.encode(suffixEvristic.getFormSuffix()); | ||||
|     public void addEvristic(SuffixHeuristic suffixHeuristic) { | ||||
|         Long suffix = RussianSuffixDecoderEncoder.encode(suffixHeuristic.getFormSuffix()); | ||||
|         Long longs = encodedSuffixesPairs.get(suffix); | ||||
|         if (longs == null) { | ||||
|             encodedSuffixesPairs.put(suffix, RussianSuffixDecoderEncoder.encode(suffixEvristic.getNormalSuffix())); | ||||
|             encodedSuffixesPairs.put(suffix, RussianSuffixDecoderEncoder.encode(suffixHeuristic.getNormalSuffix())); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
| @@ -14,7 +14,7 @@ | ||||
|  * limitations under the License. | ||||
|  */ | ||||
| 
 | ||||
| package org.apache.lucene.russian.morphology.evristics; | ||||
| package org.apache.lucene.russian.morphology.heuristic; | ||||
| 
 | ||||
| import org.apache.lucene.russian.morphology.RussianSuffixDecoderEncoder; | ||||
| import org.apache.lucene.russian.morphology.dictonary.WordCard; | ||||
| @@ -25,28 +25,28 @@ import java.util.Map; | ||||
| 
 | ||||
| 
 | ||||
| public class StatiticsCollectors implements WordProccessor { | ||||
|     Map<SuffixEvristic, SuffixCounter> statititics = new HashMap<SuffixEvristic, SuffixCounter>(); | ||||
|     Map<SuffixHeuristic, SuffixCounter> statititics = new HashMap<SuffixHeuristic, SuffixCounter>(); | ||||
| 
 | ||||
|     private Integer ignoredCount = 0; | ||||
| 
 | ||||
|     public void proccess(WordCard wordCard) { | ||||
|         for (String form : wordCard.getWordsFroms()) { | ||||
|             SuffixEvristic suffixEvristic = createEvristic(wordCard.getCanonicalFrom(), form); | ||||
|             if (suffixEvristic == null) continue; | ||||
|             SuffixCounter suffixCounter = statititics.get(suffixEvristic); | ||||
|             SuffixHeuristic suffixHeuristic = createEvristic(wordCard.getCanonicalFrom(), form); | ||||
|             if (suffixHeuristic == null) continue; | ||||
|             SuffixCounter suffixCounter = statititics.get(suffixHeuristic); | ||||
|             if (suffixCounter == null) { | ||||
|                 suffixCounter = new SuffixCounter(suffixEvristic); | ||||
|                 statititics.put(suffixEvristic, suffixCounter); | ||||
|                 suffixCounter = new SuffixCounter(suffixHeuristic); | ||||
|                 statititics.put(suffixHeuristic, suffixCounter); | ||||
|             } | ||||
|             suffixCounter.incrementAmount(); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     public Map<SuffixEvristic, SuffixCounter> getStatititics() { | ||||
|     public Map<SuffixHeuristic, SuffixCounter> getStatititics() { | ||||
|         return statititics; | ||||
|     } | ||||
| 
 | ||||
|     private SuffixEvristic createEvristic(String word, String form) { | ||||
|     private SuffixHeuristic createEvristic(String word, String form) { | ||||
|         int startSymbol = form.length() > RussianSuffixDecoderEncoder.SUFFIX_LENGTH ? form.length() - RussianSuffixDecoderEncoder.SUFFIX_LENGTH : 0; | ||||
|         String formSuffix = form.substring(startSymbol); | ||||
|         if (word.length() < startSymbol) { | ||||
| @@ -58,7 +58,7 @@ public class StatiticsCollectors implements WordProccessor { | ||||
|             System.out.println(word + " " + form); | ||||
|             return null; | ||||
|         } | ||||
|         return new SuffixEvristic(formSuffix, wordSuffix); | ||||
|         return new SuffixHeuristic(formSuffix, wordSuffix); | ||||
|     } | ||||
| 
 | ||||
| 
 | ||||
| @@ -14,30 +14,30 @@ | ||||
|  * limitations under the License. | ||||
|  */ | ||||
| 
 | ||||
| package org.apache.lucene.russian.morphology.evristics; | ||||
| package org.apache.lucene.russian.morphology.heuristic; | ||||
| 
 | ||||
| /** | ||||
|  * Conains information of freqency of suffix evristic | ||||
|  * in dictionary. | ||||
|  */ | ||||
| public class SuffixCounter implements Comparable { | ||||
|     private SuffixEvristic suffixEvristic; | ||||
|     private SuffixHeuristic suffixHeuristic; | ||||
|     private Double amnout = 0.0; | ||||
| 
 | ||||
|     public SuffixCounter(SuffixEvristic suffixEvristic) { | ||||
|         this.suffixEvristic = suffixEvristic; | ||||
|     public SuffixCounter(SuffixHeuristic suffixHeuristic) { | ||||
|         this.suffixHeuristic = suffixHeuristic; | ||||
|     } | ||||
| 
 | ||||
|     public void incrementAmount() { | ||||
|         amnout++; | ||||
|     } | ||||
| 
 | ||||
|     public SuffixEvristic getSuffixEvristic() { | ||||
|         return suffixEvristic; | ||||
|     public SuffixHeuristic getSuffixEvristic() { | ||||
|         return suffixHeuristic; | ||||
|     } | ||||
| 
 | ||||
|     public void setSuffixEvristic(SuffixEvristic suffixEvristic) { | ||||
|         this.suffixEvristic = suffixEvristic; | ||||
|     public void setSuffixEvristic(SuffixHeuristic suffixHeuristic) { | ||||
|         this.suffixHeuristic = suffixHeuristic; | ||||
|     } | ||||
| 
 | ||||
|     public Double getAmnout() { | ||||
| @@ -55,6 +55,6 @@ public class SuffixCounter implements Comparable { | ||||
| 
 | ||||
|     @Override | ||||
|     public String toString() { | ||||
|         return "" + amnout + " " + suffixEvristic.toString(); | ||||
|         return "" + amnout + " " + suffixHeuristic.toString(); | ||||
|     } | ||||
| } | ||||
| @@ -14,7 +14,7 @@ | ||||
|  * limitations under the License. | ||||
|  */ | ||||
| 
 | ||||
| package org.apache.lucene.russian.morphology.evristics; | ||||
| package org.apache.lucene.russian.morphology.heuristic; | ||||
| 
 | ||||
| /** | ||||
|  * Represent evristic that assume that | ||||
| @@ -22,11 +22,11 @@ package org.apache.lucene.russian.morphology.evristics; | ||||
|  * It contains to suffixes from given position of | ||||
|  * canonical word form and for form. | ||||
|  */ | ||||
| public class SuffixEvristic { | ||||
| public class SuffixHeuristic { | ||||
|     private String formSuffix; | ||||
|     private String normalSuffix; | ||||
| 
 | ||||
|     public SuffixEvristic(String formSuffix, String normalSuffix) { | ||||
|     public SuffixHeuristic(String formSuffix, String normalSuffix) { | ||||
|         this.formSuffix = formSuffix; | ||||
|         this.normalSuffix = normalSuffix; | ||||
|     } | ||||
| @@ -52,7 +52,7 @@ public class SuffixEvristic { | ||||
|         if (this == o) return true; | ||||
|         if (o == null || getClass() != o.getClass()) return false; | ||||
| 
 | ||||
|         SuffixEvristic that = (SuffixEvristic) o; | ||||
|         SuffixHeuristic that = (SuffixHeuristic) o; | ||||
| 
 | ||||
|         if (!formSuffix.equals(that.formSuffix)) return false; | ||||
|         if (!normalSuffix.equals(that.normalSuffix)) return false; | ||||
| @@ -69,7 +69,7 @@ public class SuffixEvristic { | ||||
| 
 | ||||
|     @Override | ||||
|     public String toString() { | ||||
|         return "SuffixEvristic{" + | ||||
|         return "SuffixHeuristic{" + | ||||
|                 "formSuffix='" + formSuffix + '\'' + | ||||
|                 ", normalSuffix='" + normalSuffix + '\'' + | ||||
|                 '}'; | ||||
| @@ -30,13 +30,13 @@ public class SuffixEvristicsTest { | ||||
|  | ||||
|     @Test | ||||
|     public void testShouldDefineCorretCononicalWordForm() throws IOException { | ||||
|         SuffixEvristics suffixEvristics = new SuffixEvristics(); | ||||
|         SuffixHeuristic suffixHeuristic = new SuffixHeuristic(); | ||||
|         InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/russian/morphology/analayzer/suffix-evristics-test-data.txt"); | ||||
|         BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8")); | ||||
|         String s = bufferedReader.readLine(); | ||||
|         while (s != null) { | ||||
|             String[] qa = s.trim().split(" "); | ||||
|             assertThat(suffixEvristics.getCanonicalForm(qa[0]), equalTo(qa[1])); | ||||
|             assertThat(suffixHeuristic.getCanonicalForm(qa[0]), equalTo(qa[1])); | ||||
|             s = bufferedReader.readLine(); | ||||
|         } | ||||
|     } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 alexander.a.kuznetsov
					alexander.a.kuznetsov