fixed bug with statitics
git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@39 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
parent
70842ecfb7
commit
422f5c3e44
@ -18,12 +18,15 @@ package org.apache.lucene.russian.morphology;
|
|||||||
|
|
||||||
import org.apache.lucene.russian.morphology.dictonary.*;
|
import org.apache.lucene.russian.morphology.dictonary.*;
|
||||||
import org.apache.lucene.russian.morphology.heuristic.HeuristicBySuffixLegth;
|
import org.apache.lucene.russian.morphology.heuristic.HeuristicBySuffixLegth;
|
||||||
|
import org.apache.lucene.russian.morphology.heuristic.SimpleSuffixHeuristic;
|
||||||
import org.apache.lucene.russian.morphology.heuristic.StatiticsCollectors;
|
import org.apache.lucene.russian.morphology.heuristic.StatiticsCollectors;
|
||||||
import org.apache.lucene.russian.morphology.heuristic.SuffixCounter;
|
import org.apache.lucene.russian.morphology.heuristic.SuffixCounter;
|
||||||
import org.apache.lucene.russian.morphology.heuristic.SimpleSuffixHeuristic;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.*;
|
import java.util.Arrays;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
import java.util.concurrent.atomic.AtomicLong;
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
|
||||||
|
|
||||||
@ -52,13 +55,25 @@ public class HeuristicBuilder {
|
|||||||
heuristic.addHeuristic(((SuffixCounter) objects[i]).getSuffixHeuristic());
|
heuristic.addHeuristic(((SuffixCounter) objects[i]).getSuffixHeuristic());
|
||||||
}
|
}
|
||||||
|
|
||||||
final Map<Long,Set<SimpleSuffixHeuristic>> map = heuristic.getUnkowns();
|
System.out.println("Single suffix " + heuristic.getSingleSuffixes().size());
|
||||||
|
System.out.println("diffiren morgh " + heuristic.getWordWithMorphology().size());
|
||||||
|
System.out.println("Ononims " + heuristic.getOnonyms().size());
|
||||||
|
final Map<Long, Set<SimpleSuffixHeuristic>> map = heuristic.getUnkowns();
|
||||||
|
System.out.println("Unknow suffix " + map.size());
|
||||||
|
int cont = 0;
|
||||||
|
for (Set<SimpleSuffixHeuristic> st : map.values()) {
|
||||||
|
|
||||||
|
if (cont > 20) break;
|
||||||
|
if (st.size() < 3) {
|
||||||
|
System.out.println(st);
|
||||||
|
cont++;
|
||||||
|
}
|
||||||
|
}
|
||||||
//final RussianSuffixDecoderEncoder decoderEncoder = new RussianSuffixDecoderEncoder(6);
|
//final RussianSuffixDecoderEncoder decoderEncoder = new RussianSuffixDecoderEncoder(6);
|
||||||
final AtomicLong c = new AtomicLong(0L);
|
final AtomicLong c = new AtomicLong(0L);
|
||||||
final AtomicLong all = new AtomicLong(0L);
|
final AtomicLong all = new AtomicLong(0L);
|
||||||
dictonaryReader.proccess(
|
dictonaryReader.proccess(
|
||||||
new WordProccessor(){
|
new WordProccessor() {
|
||||||
public void proccess(WordCard wordCard) throws IOException {
|
public void proccess(WordCard wordCard) throws IOException {
|
||||||
for (FlexiaModel fm : wordCard.getWordsFroms()) {
|
for (FlexiaModel fm : wordCard.getWordsFroms()) {
|
||||||
String form = fm.create(wordCard.getBase());
|
String form = fm.create(wordCard.getBase());
|
||||||
@ -66,7 +81,7 @@ public class HeuristicBuilder {
|
|||||||
String formSuffix = form.substring(startSymbol);
|
String formSuffix = form.substring(startSymbol);
|
||||||
Long aLong = RussianSuffixDecoderEncoder.encode(formSuffix);
|
Long aLong = RussianSuffixDecoderEncoder.encode(formSuffix);
|
||||||
all.incrementAndGet();
|
all.incrementAndGet();
|
||||||
if(map.containsKey(aLong)) c.incrementAndGet();
|
if (map.containsKey(aLong)) c.incrementAndGet();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -25,47 +25,47 @@ public class HeuristicBySuffixLegth {
|
|||||||
return heuristics;
|
return heuristics;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Map<Long,SimpleSuffixHeuristic> getSingleSuffixes(){
|
public Map<Long, SimpleSuffixHeuristic> getSingleSuffixes() {
|
||||||
HashMap<Long, SimpleSuffixHeuristic> result = new HashMap<Long, SimpleSuffixHeuristic>();
|
HashMap<Long, SimpleSuffixHeuristic> result = new HashMap<Long, SimpleSuffixHeuristic>();
|
||||||
for(Long st:heuristics.keySet()){
|
for (Long st : heuristics.keySet()) {
|
||||||
if(heuristics.get(st).size() == 1){
|
if (heuristics.get(st).size() == 1) {
|
||||||
result.put(st,heuristics.get(st).iterator().next());
|
result.put(st, heuristics.get(st).iterator().next());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public Map<Long,Set<SimpleSuffixHeuristic>> getWordWithMorphology(){
|
public Map<Long, Set<SimpleSuffixHeuristic>> getWordWithMorphology() {
|
||||||
HashMap<Long, Set<SimpleSuffixHeuristic>> result = new HashMap<Long, Set<SimpleSuffixHeuristic>>();
|
HashMap<Long, Set<SimpleSuffixHeuristic>> result = new HashMap<Long, Set<SimpleSuffixHeuristic>>();
|
||||||
for(Long st:heuristics.keySet()){
|
for (Long st : heuristics.keySet()) {
|
||||||
if(heuristics.get(st).size() == 1) continue;
|
if (heuristics.get(st).size() == 1) continue;
|
||||||
if(checkSetOnSuffix(heuristics.get(st))) {
|
if (checkSetOnSuffix(heuristics.get(st))) {
|
||||||
result.put(st,heuristics.get(st));
|
result.put(st, heuristics.get(st));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Map<Long,Set<SimpleSuffixHeuristic>> getOnonyms(){
|
public Map<Long, Set<SimpleSuffixHeuristic>> getOnonyms() {
|
||||||
HashMap<Long, Set<SimpleSuffixHeuristic>> result = new HashMap<Long, Set<SimpleSuffixHeuristic>>();
|
HashMap<Long, Set<SimpleSuffixHeuristic>> result = new HashMap<Long, Set<SimpleSuffixHeuristic>>();
|
||||||
for(Long st:heuristics.keySet()){
|
for (Long st : heuristics.keySet()) {
|
||||||
if(heuristics.get(st).size() == 1) continue;
|
if (heuristics.get(st).size() == 1) continue;
|
||||||
if(checkSetOnSuffix(heuristics.get(st))) continue;
|
if (checkSetOnSuffix(heuristics.get(st))) continue;
|
||||||
if(heuristics.get(st).iterator().next().getFormSuffix().length() < 6){
|
if (heuristics.get(st).iterator().next().getFormSuffix().length() < 6) {
|
||||||
result.put(st,heuristics.get(st));
|
result.put(st, heuristics.get(st));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Map<Long,Set<SimpleSuffixHeuristic>> getUnkowns(){
|
public Map<Long, Set<SimpleSuffixHeuristic>> getUnkowns() {
|
||||||
HashMap<Long, Set<SimpleSuffixHeuristic>> result = new HashMap<Long, Set<SimpleSuffixHeuristic>>();
|
HashMap<Long, Set<SimpleSuffixHeuristic>> result = new HashMap<Long, Set<SimpleSuffixHeuristic>>();
|
||||||
for(Long st:heuristics.keySet()){
|
for (Long st : heuristics.keySet()) {
|
||||||
if(heuristics.get(st).size() == 1) continue;
|
if (heuristics.get(st).size() == 1) continue;
|
||||||
if(checkSetOnSuffix(heuristics.get(st))) continue;
|
if (checkSetOnSuffix(heuristics.get(st))) continue;
|
||||||
if(heuristics.get(st).iterator().next().getFormSuffix().length() >= 6){
|
if (heuristics.get(st).iterator().next().getFormSuffix().length() >= 6) {
|
||||||
result.put(st,heuristics.get(st));
|
result.put(st, heuristics.get(st));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
@ -73,10 +73,10 @@ public class HeuristicBySuffixLegth {
|
|||||||
|
|
||||||
private Boolean checkSetOnSuffix(Set<SimpleSuffixHeuristic> sshs) {
|
private Boolean checkSetOnSuffix(Set<SimpleSuffixHeuristic> sshs) {
|
||||||
SimpleSuffixHeuristic heuristic = sshs.iterator().next();
|
SimpleSuffixHeuristic heuristic = sshs.iterator().next();
|
||||||
String normalSuffix = heuristic.getFormSuffix();
|
String normalSuffix = heuristic.getNormalSuffix();
|
||||||
Integer suffixLenght = heuristic.getActualSuffixLength();
|
Integer suffixLenght = heuristic.getActualSuffixLength();
|
||||||
Boolean result = true;
|
Boolean result = true;
|
||||||
for(SimpleSuffixHeuristic ssh:sshs){
|
for (SimpleSuffixHeuristic ssh : sshs) {
|
||||||
result = result && ssh.getActualSuffixLength().equals(suffixLenght) && ssh.getNormalSuffix().endsWith(normalSuffix);
|
result = result && ssh.getActualSuffixLength().equals(suffixLenght) && ssh.getNormalSuffix().endsWith(normalSuffix);
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user