git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@40 d817d54c-26ab-11de-abc9-2f7d1455ff7a

This commit is contained in:
alexander.a.kuznetsov 2009-08-13 06:25:41 +00:00
parent 422f5c3e44
commit d58c45a78c
4 changed files with 25 additions and 8 deletions

View File

@ -63,7 +63,7 @@ public class HeuristicBuilder {
int cont = 0;
for (Set<SimpleSuffixHeuristic> st : map.values()) {
if (cont > 20) break;
if (cont > 50) break;
if (st.size() < 3) {
System.out.println(st);
cont++;

View File

@ -75,9 +75,13 @@ public class HeuristicBySuffixLegth {
SimpleSuffixHeuristic heuristic = sshs.iterator().next();
String normalSuffix = heuristic.getNormalSuffix();
Integer suffixLenght = heuristic.getActualSuffixLength();
String normalFormMorphInfo = heuristic.getNormalFormMorphInfo();
Boolean result = true;
for (SimpleSuffixHeuristic ssh : sshs) {
result = result && ssh.getActualSuffixLength().equals(suffixLenght) && ssh.getNormalSuffix().endsWith(normalSuffix);
result = result &&
ssh.getActualSuffixLength().equals(suffixLenght) &&
ssh.getNormalSuffix().equals(normalSuffix) &&
ssh.getNormalFormMorphInfo().equals(normalFormMorphInfo);
}
return result;
}

View File

@ -27,12 +27,14 @@ public class SimpleSuffixHeuristic {
private Integer actualSuffixLength;
private String normalSuffix;
private String morphInfoCode;
private String normalFormMorphInfo;
public SimpleSuffixHeuristic(String formSuffix, Integer actualSuffixLength, String normalSuffix, String morphInfoCode) {
public SimpleSuffixHeuristic(String formSuffix, Integer actualSuffixLength, String normalSuffix, String morphInfoCode, String normalFormMorphInfo) {
this.formSuffix = formSuffix;
this.actualSuffixLength = actualSuffixLength;
this.normalSuffix = normalSuffix;
this.morphInfoCode = morphInfoCode;
this.normalFormMorphInfo = normalFormMorphInfo;
}
public String getFormSuffix() {
@ -51,6 +53,14 @@ public class SimpleSuffixHeuristic {
return morphInfoCode;
}
public String getNormalFormMorphInfo() {
return normalFormMorphInfo;
}
public void setNormalFormMorphInfo(String normalFormMorphInfo) {
this.normalFormMorphInfo = normalFormMorphInfo;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
@ -63,7 +73,8 @@ public class SimpleSuffixHeuristic {
if (formSuffix != null ? !formSuffix.equals(that.formSuffix) : that.formSuffix != null) return false;
if (morphInfoCode != null ? !morphInfoCode.equals(that.morphInfoCode) : that.morphInfoCode != null)
return false;
if (normalSuffix != null ? !normalSuffix.equals(that.normalSuffix) : that.normalSuffix != null)
if (normalSuffix != null ? !normalSuffix.equals(that.normalSuffix) : that.normalSuffix != null) return false;
if (normalFormMorphInfo != null ? !normalFormMorphInfo.equals(that.normalFormMorphInfo) : that.normalFormMorphInfo != null)
return false;
return true;
@ -75,11 +86,12 @@ public class SimpleSuffixHeuristic {
result = 31 * result + (actualSuffixLength != null ? actualSuffixLength.hashCode() : 0);
result = 31 * result + (normalSuffix != null ? normalSuffix.hashCode() : 0);
result = 31 * result + (morphInfoCode != null ? morphInfoCode.hashCode() : 0);
result = 31 * result + (normalFormMorphInfo != null ? normalFormMorphInfo.hashCode() : 0);
return result;
}
@Override
public String toString() {
return formSuffix + " " + actualSuffixLength + " " + normalSuffix + " " + morphInfoCode;
return formSuffix + " " + actualSuffixLength + " " + normalSuffix + " " + morphInfoCode + " nf " + normalFormMorphInfo;
}
}

View File

@ -39,8 +39,9 @@ public class StatiticsCollectors implements WordProccessor {
private Integer ignoredCount = 0;
public void proccess(WordCard wordCard) {
String normalStringMorph = wordCard.getWordsFroms().get(0).getCode();
for (FlexiaModel fm : wordCard.getWordsFroms()) {
SimpleSuffixHeuristic simpleSuffixHeuristic = createEvristic(wordCard.getBase(), wordCard.getCanonicalSuffix(), fm);
SimpleSuffixHeuristic simpleSuffixHeuristic = createEvristic(wordCard.getBase(), wordCard.getCanonicalSuffix(), fm, normalStringMorph);
if (simpleSuffixHeuristic == null) continue;
SuffixCounter suffixCounter = statititics.get(simpleSuffixHeuristic);
if (suffixCounter == null) {
@ -61,13 +62,13 @@ public class StatiticsCollectors implements WordProccessor {
return statititics;
}
private SimpleSuffixHeuristic createEvristic(String wordBase, String canonicalSuffix, FlexiaModel fm) {
private SimpleSuffixHeuristic createEvristic(String wordBase, String canonicalSuffix, FlexiaModel fm, String normalSuffixForm) {
String form = fm.create(wordBase);
int startSymbol = form.length() > RussianSuffixDecoderEncoder.suffixLength ? form.length() - RussianSuffixDecoderEncoder.suffixLength : 0;
String formSuffix = form.substring(startSymbol);
String actualSuffix = fm.getSuffix();
Integer actualSuffixLengh = actualSuffix.length();
return new SimpleSuffixHeuristic(formSuffix, actualSuffixLengh, canonicalSuffix, fm.getCode());
return new SimpleSuffixHeuristic(formSuffix, actualSuffixLengh, canonicalSuffix, fm.getCode(), normalSuffixForm);
}