fixed bug with statitics
git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@39 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
		| @@ -18,12 +18,15 @@ package org.apache.lucene.russian.morphology; | ||||
|  | ||||
| import org.apache.lucene.russian.morphology.dictonary.*; | ||||
| import org.apache.lucene.russian.morphology.heuristic.HeuristicBySuffixLegth; | ||||
| import org.apache.lucene.russian.morphology.heuristic.SimpleSuffixHeuristic; | ||||
| import org.apache.lucene.russian.morphology.heuristic.StatiticsCollectors; | ||||
| import org.apache.lucene.russian.morphology.heuristic.SuffixCounter; | ||||
| import org.apache.lucene.russian.morphology.heuristic.SimpleSuffixHeuristic; | ||||
|  | ||||
| import java.io.IOException; | ||||
| import java.util.*; | ||||
| import java.util.Arrays; | ||||
| import java.util.Collection; | ||||
| import java.util.Map; | ||||
| import java.util.Set; | ||||
| import java.util.concurrent.atomic.AtomicLong; | ||||
|  | ||||
|  | ||||
| @@ -52,13 +55,25 @@ public class HeuristicBuilder { | ||||
|             heuristic.addHeuristic(((SuffixCounter) objects[i]).getSuffixHeuristic()); | ||||
|         } | ||||
|  | ||||
|         final Map<Long,Set<SimpleSuffixHeuristic>> map = heuristic.getUnkowns(); | ||||
|         System.out.println("Single suffix " + heuristic.getSingleSuffixes().size()); | ||||
|         System.out.println("diffiren morgh " + heuristic.getWordWithMorphology().size()); | ||||
|         System.out.println("Ononims " + heuristic.getOnonyms().size()); | ||||
|         final Map<Long, Set<SimpleSuffixHeuristic>> map = heuristic.getUnkowns(); | ||||
|         System.out.println("Unknow suffix " + map.size()); | ||||
|         int cont = 0; | ||||
|         for (Set<SimpleSuffixHeuristic> st : map.values()) { | ||||
|  | ||||
|             if (cont > 20) break; | ||||
|             if (st.size() < 3) { | ||||
|                 System.out.println(st); | ||||
|                 cont++; | ||||
|             } | ||||
|         } | ||||
|         //final RussianSuffixDecoderEncoder decoderEncoder = new RussianSuffixDecoderEncoder(6); | ||||
|         final AtomicLong c = new AtomicLong(0L); | ||||
|         final AtomicLong all  = new AtomicLong(0L); | ||||
|         final AtomicLong all = new AtomicLong(0L); | ||||
|         dictonaryReader.proccess( | ||||
|                 new WordProccessor(){ | ||||
|                 new WordProccessor() { | ||||
|                     public void proccess(WordCard wordCard) throws IOException { | ||||
|                         for (FlexiaModel fm : wordCard.getWordsFroms()) { | ||||
|                             String form = fm.create(wordCard.getBase()); | ||||
| @@ -66,7 +81,7 @@ public class HeuristicBuilder { | ||||
|                             String formSuffix = form.substring(startSymbol); | ||||
|                             Long aLong = RussianSuffixDecoderEncoder.encode(formSuffix); | ||||
|                             all.incrementAndGet(); | ||||
|                             if(map.containsKey(aLong)) c.incrementAndGet(); | ||||
|                             if (map.containsKey(aLong)) c.incrementAndGet(); | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|   | ||||
| @@ -25,47 +25,47 @@ public class HeuristicBySuffixLegth { | ||||
|         return heuristics; | ||||
|     } | ||||
|  | ||||
|     public Map<Long,SimpleSuffixHeuristic> getSingleSuffixes(){ | ||||
|     public Map<Long, SimpleSuffixHeuristic> getSingleSuffixes() { | ||||
|         HashMap<Long, SimpleSuffixHeuristic> result = new HashMap<Long, SimpleSuffixHeuristic>(); | ||||
|         for(Long st:heuristics.keySet()){ | ||||
|             if(heuristics.get(st).size() == 1){ | ||||
|                 result.put(st,heuristics.get(st).iterator().next()); | ||||
|         for (Long st : heuristics.keySet()) { | ||||
|             if (heuristics.get(st).size() == 1) { | ||||
|                 result.put(st, heuristics.get(st).iterator().next()); | ||||
|             } | ||||
|         } | ||||
|         return result; | ||||
|     } | ||||
|  | ||||
|  | ||||
|     public Map<Long,Set<SimpleSuffixHeuristic>> getWordWithMorphology(){ | ||||
|     public Map<Long, Set<SimpleSuffixHeuristic>> getWordWithMorphology() { | ||||
|         HashMap<Long, Set<SimpleSuffixHeuristic>> result = new HashMap<Long, Set<SimpleSuffixHeuristic>>(); | ||||
|         for(Long st:heuristics.keySet()){ | ||||
|             if(heuristics.get(st).size() == 1) continue; | ||||
|             if(checkSetOnSuffix(heuristics.get(st))) { | ||||
|                 result.put(st,heuristics.get(st)); | ||||
|         for (Long st : heuristics.keySet()) { | ||||
|             if (heuristics.get(st).size() == 1) continue; | ||||
|             if (checkSetOnSuffix(heuristics.get(st))) { | ||||
|                 result.put(st, heuristics.get(st)); | ||||
|             } | ||||
|         } | ||||
|         return result; | ||||
|     } | ||||
|  | ||||
|     public Map<Long,Set<SimpleSuffixHeuristic>> getOnonyms(){ | ||||
|     public Map<Long, Set<SimpleSuffixHeuristic>> getOnonyms() { | ||||
|         HashMap<Long, Set<SimpleSuffixHeuristic>> result = new HashMap<Long, Set<SimpleSuffixHeuristic>>(); | ||||
|         for(Long st:heuristics.keySet()){ | ||||
|             if(heuristics.get(st).size() == 1) continue; | ||||
|             if(checkSetOnSuffix(heuristics.get(st))) continue; | ||||
|             if(heuristics.get(st).iterator().next().getFormSuffix().length() < 6){ | ||||
|                 result.put(st,heuristics.get(st)); | ||||
|         for (Long st : heuristics.keySet()) { | ||||
|             if (heuristics.get(st).size() == 1) continue; | ||||
|             if (checkSetOnSuffix(heuristics.get(st))) continue; | ||||
|             if (heuristics.get(st).iterator().next().getFormSuffix().length() < 6) { | ||||
|                 result.put(st, heuristics.get(st)); | ||||
|             } | ||||
|         } | ||||
|         return result; | ||||
|     } | ||||
|  | ||||
|     public Map<Long,Set<SimpleSuffixHeuristic>> getUnkowns(){ | ||||
|     public Map<Long, Set<SimpleSuffixHeuristic>> getUnkowns() { | ||||
|         HashMap<Long, Set<SimpleSuffixHeuristic>> result = new HashMap<Long, Set<SimpleSuffixHeuristic>>(); | ||||
|         for(Long st:heuristics.keySet()){ | ||||
|             if(heuristics.get(st).size() == 1) continue; | ||||
|             if(checkSetOnSuffix(heuristics.get(st))) continue; | ||||
|             if(heuristics.get(st).iterator().next().getFormSuffix().length() >= 6){ | ||||
|                 result.put(st,heuristics.get(st)); | ||||
|         for (Long st : heuristics.keySet()) { | ||||
|             if (heuristics.get(st).size() == 1) continue; | ||||
|             if (checkSetOnSuffix(heuristics.get(st))) continue; | ||||
|             if (heuristics.get(st).iterator().next().getFormSuffix().length() >= 6) { | ||||
|                 result.put(st, heuristics.get(st)); | ||||
|             } | ||||
|         } | ||||
|         return result; | ||||
| @@ -73,10 +73,10 @@ public class HeuristicBySuffixLegth { | ||||
|  | ||||
|     private Boolean checkSetOnSuffix(Set<SimpleSuffixHeuristic> sshs) { | ||||
|         SimpleSuffixHeuristic heuristic = sshs.iterator().next(); | ||||
|         String normalSuffix = heuristic.getFormSuffix(); | ||||
|         String normalSuffix = heuristic.getNormalSuffix(); | ||||
|         Integer suffixLenght = heuristic.getActualSuffixLength(); | ||||
|         Boolean result = true; | ||||
|         for(SimpleSuffixHeuristic ssh:sshs){ | ||||
|         for (SimpleSuffixHeuristic ssh : sshs) { | ||||
|             result = result && ssh.getActualSuffixLength().equals(suffixLenght) && ssh.getNormalSuffix().endsWith(normalSuffix); | ||||
|         } | ||||
|         return result; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 alexander.a.kuznetsov
					alexander.a.kuznetsov