fixed bug with statitics
git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@39 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
		@@ -18,12 +18,15 @@ package org.apache.lucene.russian.morphology;
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
import org.apache.lucene.russian.morphology.dictonary.*;
 | 
					import org.apache.lucene.russian.morphology.dictonary.*;
 | 
				
			||||||
import org.apache.lucene.russian.morphology.heuristic.HeuristicBySuffixLegth;
 | 
					import org.apache.lucene.russian.morphology.heuristic.HeuristicBySuffixLegth;
 | 
				
			||||||
 | 
					import org.apache.lucene.russian.morphology.heuristic.SimpleSuffixHeuristic;
 | 
				
			||||||
import org.apache.lucene.russian.morphology.heuristic.StatiticsCollectors;
 | 
					import org.apache.lucene.russian.morphology.heuristic.StatiticsCollectors;
 | 
				
			||||||
import org.apache.lucene.russian.morphology.heuristic.SuffixCounter;
 | 
					import org.apache.lucene.russian.morphology.heuristic.SuffixCounter;
 | 
				
			||||||
import org.apache.lucene.russian.morphology.heuristic.SimpleSuffixHeuristic;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
import java.io.IOException;
 | 
					import java.io.IOException;
 | 
				
			||||||
import java.util.*;
 | 
					import java.util.Arrays;
 | 
				
			||||||
 | 
					import java.util.Collection;
 | 
				
			||||||
 | 
					import java.util.Map;
 | 
				
			||||||
 | 
					import java.util.Set;
 | 
				
			||||||
import java.util.concurrent.atomic.AtomicLong;
 | 
					import java.util.concurrent.atomic.AtomicLong;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -52,13 +55,25 @@ public class HeuristicBuilder {
 | 
				
			|||||||
            heuristic.addHeuristic(((SuffixCounter) objects[i]).getSuffixHeuristic());
 | 
					            heuristic.addHeuristic(((SuffixCounter) objects[i]).getSuffixHeuristic());
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        final Map<Long,Set<SimpleSuffixHeuristic>> map = heuristic.getUnkowns();
 | 
					        System.out.println("Single suffix " + heuristic.getSingleSuffixes().size());
 | 
				
			||||||
 | 
					        System.out.println("diffiren morgh " + heuristic.getWordWithMorphology().size());
 | 
				
			||||||
 | 
					        System.out.println("Ononims " + heuristic.getOnonyms().size());
 | 
				
			||||||
 | 
					        final Map<Long, Set<SimpleSuffixHeuristic>> map = heuristic.getUnkowns();
 | 
				
			||||||
 | 
					        System.out.println("Unknow suffix " + map.size());
 | 
				
			||||||
 | 
					        int cont = 0;
 | 
				
			||||||
 | 
					        for (Set<SimpleSuffixHeuristic> st : map.values()) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if (cont > 20) break;
 | 
				
			||||||
 | 
					            if (st.size() < 3) {
 | 
				
			||||||
 | 
					                System.out.println(st);
 | 
				
			||||||
 | 
					                cont++;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
        //final RussianSuffixDecoderEncoder decoderEncoder = new RussianSuffixDecoderEncoder(6);
 | 
					        //final RussianSuffixDecoderEncoder decoderEncoder = new RussianSuffixDecoderEncoder(6);
 | 
				
			||||||
        final AtomicLong c = new AtomicLong(0L);
 | 
					        final AtomicLong c = new AtomicLong(0L);
 | 
				
			||||||
        final AtomicLong all  = new AtomicLong(0L);
 | 
					        final AtomicLong all = new AtomicLong(0L);
 | 
				
			||||||
        dictonaryReader.proccess(
 | 
					        dictonaryReader.proccess(
 | 
				
			||||||
                new WordProccessor(){
 | 
					                new WordProccessor() {
 | 
				
			||||||
                    public void proccess(WordCard wordCard) throws IOException {
 | 
					                    public void proccess(WordCard wordCard) throws IOException {
 | 
				
			||||||
                        for (FlexiaModel fm : wordCard.getWordsFroms()) {
 | 
					                        for (FlexiaModel fm : wordCard.getWordsFroms()) {
 | 
				
			||||||
                            String form = fm.create(wordCard.getBase());
 | 
					                            String form = fm.create(wordCard.getBase());
 | 
				
			||||||
@@ -66,7 +81,7 @@ public class HeuristicBuilder {
 | 
				
			|||||||
                            String formSuffix = form.substring(startSymbol);
 | 
					                            String formSuffix = form.substring(startSymbol);
 | 
				
			||||||
                            Long aLong = RussianSuffixDecoderEncoder.encode(formSuffix);
 | 
					                            Long aLong = RussianSuffixDecoderEncoder.encode(formSuffix);
 | 
				
			||||||
                            all.incrementAndGet();
 | 
					                            all.incrementAndGet();
 | 
				
			||||||
                            if(map.containsKey(aLong)) c.incrementAndGet();
 | 
					                            if (map.containsKey(aLong)) c.incrementAndGet();
 | 
				
			||||||
                        }
 | 
					                        }
 | 
				
			||||||
                    }
 | 
					                    }
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -25,47 +25,47 @@ public class HeuristicBySuffixLegth {
 | 
				
			|||||||
        return heuristics;
 | 
					        return heuristics;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    public Map<Long,SimpleSuffixHeuristic> getSingleSuffixes(){
 | 
					    public Map<Long, SimpleSuffixHeuristic> getSingleSuffixes() {
 | 
				
			||||||
        HashMap<Long, SimpleSuffixHeuristic> result = new HashMap<Long, SimpleSuffixHeuristic>();
 | 
					        HashMap<Long, SimpleSuffixHeuristic> result = new HashMap<Long, SimpleSuffixHeuristic>();
 | 
				
			||||||
        for(Long st:heuristics.keySet()){
 | 
					        for (Long st : heuristics.keySet()) {
 | 
				
			||||||
            if(heuristics.get(st).size() == 1){
 | 
					            if (heuristics.get(st).size() == 1) {
 | 
				
			||||||
                result.put(st,heuristics.get(st).iterator().next());
 | 
					                result.put(st, heuristics.get(st).iterator().next());
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        return result;
 | 
					        return result;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    public Map<Long,Set<SimpleSuffixHeuristic>> getWordWithMorphology(){
 | 
					    public Map<Long, Set<SimpleSuffixHeuristic>> getWordWithMorphology() {
 | 
				
			||||||
        HashMap<Long, Set<SimpleSuffixHeuristic>> result = new HashMap<Long, Set<SimpleSuffixHeuristic>>();
 | 
					        HashMap<Long, Set<SimpleSuffixHeuristic>> result = new HashMap<Long, Set<SimpleSuffixHeuristic>>();
 | 
				
			||||||
        for(Long st:heuristics.keySet()){
 | 
					        for (Long st : heuristics.keySet()) {
 | 
				
			||||||
            if(heuristics.get(st).size() == 1) continue;
 | 
					            if (heuristics.get(st).size() == 1) continue;
 | 
				
			||||||
            if(checkSetOnSuffix(heuristics.get(st))) {
 | 
					            if (checkSetOnSuffix(heuristics.get(st))) {
 | 
				
			||||||
                result.put(st,heuristics.get(st));
 | 
					                result.put(st, heuristics.get(st));
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        return result;
 | 
					        return result;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    public Map<Long,Set<SimpleSuffixHeuristic>> getOnonyms(){
 | 
					    public Map<Long, Set<SimpleSuffixHeuristic>> getOnonyms() {
 | 
				
			||||||
        HashMap<Long, Set<SimpleSuffixHeuristic>> result = new HashMap<Long, Set<SimpleSuffixHeuristic>>();
 | 
					        HashMap<Long, Set<SimpleSuffixHeuristic>> result = new HashMap<Long, Set<SimpleSuffixHeuristic>>();
 | 
				
			||||||
        for(Long st:heuristics.keySet()){
 | 
					        for (Long st : heuristics.keySet()) {
 | 
				
			||||||
            if(heuristics.get(st).size() == 1) continue;
 | 
					            if (heuristics.get(st).size() == 1) continue;
 | 
				
			||||||
            if(checkSetOnSuffix(heuristics.get(st))) continue;
 | 
					            if (checkSetOnSuffix(heuristics.get(st))) continue;
 | 
				
			||||||
            if(heuristics.get(st).iterator().next().getFormSuffix().length() < 6){
 | 
					            if (heuristics.get(st).iterator().next().getFormSuffix().length() < 6) {
 | 
				
			||||||
                result.put(st,heuristics.get(st));
 | 
					                result.put(st, heuristics.get(st));
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        return result;
 | 
					        return result;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    public Map<Long,Set<SimpleSuffixHeuristic>> getUnkowns(){
 | 
					    public Map<Long, Set<SimpleSuffixHeuristic>> getUnkowns() {
 | 
				
			||||||
        HashMap<Long, Set<SimpleSuffixHeuristic>> result = new HashMap<Long, Set<SimpleSuffixHeuristic>>();
 | 
					        HashMap<Long, Set<SimpleSuffixHeuristic>> result = new HashMap<Long, Set<SimpleSuffixHeuristic>>();
 | 
				
			||||||
        for(Long st:heuristics.keySet()){
 | 
					        for (Long st : heuristics.keySet()) {
 | 
				
			||||||
            if(heuristics.get(st).size() == 1) continue;
 | 
					            if (heuristics.get(st).size() == 1) continue;
 | 
				
			||||||
            if(checkSetOnSuffix(heuristics.get(st))) continue;
 | 
					            if (checkSetOnSuffix(heuristics.get(st))) continue;
 | 
				
			||||||
            if(heuristics.get(st).iterator().next().getFormSuffix().length() >= 6){
 | 
					            if (heuristics.get(st).iterator().next().getFormSuffix().length() >= 6) {
 | 
				
			||||||
                result.put(st,heuristics.get(st));
 | 
					                result.put(st, heuristics.get(st));
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        return result;
 | 
					        return result;
 | 
				
			||||||
@@ -73,10 +73,10 @@ public class HeuristicBySuffixLegth {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    private Boolean checkSetOnSuffix(Set<SimpleSuffixHeuristic> sshs) {
 | 
					    private Boolean checkSetOnSuffix(Set<SimpleSuffixHeuristic> sshs) {
 | 
				
			||||||
        SimpleSuffixHeuristic heuristic = sshs.iterator().next();
 | 
					        SimpleSuffixHeuristic heuristic = sshs.iterator().next();
 | 
				
			||||||
        String normalSuffix = heuristic.getFormSuffix();
 | 
					        String normalSuffix = heuristic.getNormalSuffix();
 | 
				
			||||||
        Integer suffixLenght = heuristic.getActualSuffixLength();
 | 
					        Integer suffixLenght = heuristic.getActualSuffixLength();
 | 
				
			||||||
        Boolean result = true;
 | 
					        Boolean result = true;
 | 
				
			||||||
        for(SimpleSuffixHeuristic ssh:sshs){
 | 
					        for (SimpleSuffixHeuristic ssh : sshs) {
 | 
				
			||||||
            result = result && ssh.getActualSuffixLength().equals(suffixLenght) && ssh.getNormalSuffix().endsWith(normalSuffix);
 | 
					            result = result && ssh.getActualSuffixLength().equals(suffixLenght) && ssh.getNormalSuffix().endsWith(normalSuffix);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        return result;
 | 
					        return result;
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user