fixed bug with statitics

git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@39 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
alexander.a.kuznetsov 2009-08-11 21:16:02 +00:00
parent 70842ecfb7
commit 422f5c3e44
2 changed files with 44 additions and 29 deletions

View File

@ -18,12 +18,15 @@ package org.apache.lucene.russian.morphology;
import org.apache.lucene.russian.morphology.dictonary.*; import org.apache.lucene.russian.morphology.dictonary.*;
import org.apache.lucene.russian.morphology.heuristic.HeuristicBySuffixLegth; import org.apache.lucene.russian.morphology.heuristic.HeuristicBySuffixLegth;
import org.apache.lucene.russian.morphology.heuristic.SimpleSuffixHeuristic;
import org.apache.lucene.russian.morphology.heuristic.StatiticsCollectors; import org.apache.lucene.russian.morphology.heuristic.StatiticsCollectors;
import org.apache.lucene.russian.morphology.heuristic.SuffixCounter; import org.apache.lucene.russian.morphology.heuristic.SuffixCounter;
import org.apache.lucene.russian.morphology.heuristic.SimpleSuffixHeuristic;
import java.io.IOException; import java.io.IOException;
import java.util.*; import java.util.Arrays;
import java.util.Collection;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicLong;
@ -52,8 +55,20 @@ public class HeuristicBuilder {
heuristic.addHeuristic(((SuffixCounter) objects[i]).getSuffixHeuristic()); heuristic.addHeuristic(((SuffixCounter) objects[i]).getSuffixHeuristic());
} }
System.out.println("Single suffix " + heuristic.getSingleSuffixes().size());
System.out.println("diffiren morgh " + heuristic.getWordWithMorphology().size());
System.out.println("Ononims " + heuristic.getOnonyms().size());
final Map<Long, Set<SimpleSuffixHeuristic>> map = heuristic.getUnkowns(); final Map<Long, Set<SimpleSuffixHeuristic>> map = heuristic.getUnkowns();
System.out.println("Unknow suffix " + map.size());
int cont = 0;
for (Set<SimpleSuffixHeuristic> st : map.values()) {
if (cont > 20) break;
if (st.size() < 3) {
System.out.println(st);
cont++;
}
}
//final RussianSuffixDecoderEncoder decoderEncoder = new RussianSuffixDecoderEncoder(6); //final RussianSuffixDecoderEncoder decoderEncoder = new RussianSuffixDecoderEncoder(6);
final AtomicLong c = new AtomicLong(0L); final AtomicLong c = new AtomicLong(0L);
final AtomicLong all = new AtomicLong(0L); final AtomicLong all = new AtomicLong(0L);

View File

@ -73,7 +73,7 @@ public class HeuristicBySuffixLegth {
private Boolean checkSetOnSuffix(Set<SimpleSuffixHeuristic> sshs) { private Boolean checkSetOnSuffix(Set<SimpleSuffixHeuristic> sshs) {
SimpleSuffixHeuristic heuristic = sshs.iterator().next(); SimpleSuffixHeuristic heuristic = sshs.iterator().next();
String normalSuffix = heuristic.getFormSuffix(); String normalSuffix = heuristic.getNormalSuffix();
Integer suffixLenght = heuristic.getActualSuffixLength(); Integer suffixLenght = heuristic.getActualSuffixLength();
Boolean result = true; Boolean result = true;
for (SimpleSuffixHeuristic ssh : sshs) { for (SimpleSuffixHeuristic ssh : sshs) {