fixed bug with statitics
git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@39 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
parent
70842ecfb7
commit
422f5c3e44
@ -18,12 +18,15 @@ package org.apache.lucene.russian.morphology;
|
|||||||
|
|
||||||
import org.apache.lucene.russian.morphology.dictonary.*;
|
import org.apache.lucene.russian.morphology.dictonary.*;
|
||||||
import org.apache.lucene.russian.morphology.heuristic.HeuristicBySuffixLegth;
|
import org.apache.lucene.russian.morphology.heuristic.HeuristicBySuffixLegth;
|
||||||
|
import org.apache.lucene.russian.morphology.heuristic.SimpleSuffixHeuristic;
|
||||||
import org.apache.lucene.russian.morphology.heuristic.StatiticsCollectors;
|
import org.apache.lucene.russian.morphology.heuristic.StatiticsCollectors;
|
||||||
import org.apache.lucene.russian.morphology.heuristic.SuffixCounter;
|
import org.apache.lucene.russian.morphology.heuristic.SuffixCounter;
|
||||||
import org.apache.lucene.russian.morphology.heuristic.SimpleSuffixHeuristic;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.*;
|
import java.util.Arrays;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
import java.util.concurrent.atomic.AtomicLong;
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
|
||||||
|
|
||||||
@ -52,8 +55,20 @@ public class HeuristicBuilder {
|
|||||||
heuristic.addHeuristic(((SuffixCounter) objects[i]).getSuffixHeuristic());
|
heuristic.addHeuristic(((SuffixCounter) objects[i]).getSuffixHeuristic());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
System.out.println("Single suffix " + heuristic.getSingleSuffixes().size());
|
||||||
|
System.out.println("diffiren morgh " + heuristic.getWordWithMorphology().size());
|
||||||
|
System.out.println("Ononims " + heuristic.getOnonyms().size());
|
||||||
final Map<Long, Set<SimpleSuffixHeuristic>> map = heuristic.getUnkowns();
|
final Map<Long, Set<SimpleSuffixHeuristic>> map = heuristic.getUnkowns();
|
||||||
|
System.out.println("Unknow suffix " + map.size());
|
||||||
|
int cont = 0;
|
||||||
|
for (Set<SimpleSuffixHeuristic> st : map.values()) {
|
||||||
|
|
||||||
|
if (cont > 20) break;
|
||||||
|
if (st.size() < 3) {
|
||||||
|
System.out.println(st);
|
||||||
|
cont++;
|
||||||
|
}
|
||||||
|
}
|
||||||
//final RussianSuffixDecoderEncoder decoderEncoder = new RussianSuffixDecoderEncoder(6);
|
//final RussianSuffixDecoderEncoder decoderEncoder = new RussianSuffixDecoderEncoder(6);
|
||||||
final AtomicLong c = new AtomicLong(0L);
|
final AtomicLong c = new AtomicLong(0L);
|
||||||
final AtomicLong all = new AtomicLong(0L);
|
final AtomicLong all = new AtomicLong(0L);
|
||||||
|
@ -73,7 +73,7 @@ public class HeuristicBySuffixLegth {
|
|||||||
|
|
||||||
private Boolean checkSetOnSuffix(Set<SimpleSuffixHeuristic> sshs) {
|
private Boolean checkSetOnSuffix(Set<SimpleSuffixHeuristic> sshs) {
|
||||||
SimpleSuffixHeuristic heuristic = sshs.iterator().next();
|
SimpleSuffixHeuristic heuristic = sshs.iterator().next();
|
||||||
String normalSuffix = heuristic.getFormSuffix();
|
String normalSuffix = heuristic.getNormalSuffix();
|
||||||
Integer suffixLenght = heuristic.getActualSuffixLength();
|
Integer suffixLenght = heuristic.getActualSuffixLength();
|
||||||
Boolean result = true;
|
Boolean result = true;
|
||||||
for (SimpleSuffixHeuristic ssh : sshs) {
|
for (SimpleSuffixHeuristic ssh : sshs) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user