git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@40 d817d54c-26ab-11de-abc9-2f7d1455ff7a

This commit is contained in:
alexander.a.kuznetsov 2009-08-13 06:25:41 +00:00
parent 422f5c3e44
commit d58c45a78c
4 changed files with 25 additions and 8 deletions

View File

@ -63,7 +63,7 @@ public class HeuristicBuilder {
int cont = 0; int cont = 0;
for (Set<SimpleSuffixHeuristic> st : map.values()) { for (Set<SimpleSuffixHeuristic> st : map.values()) {
if (cont > 20) break; if (cont > 50) break;
if (st.size() < 3) { if (st.size() < 3) {
System.out.println(st); System.out.println(st);
cont++; cont++;

View File

@ -75,9 +75,13 @@ public class HeuristicBySuffixLegth {
SimpleSuffixHeuristic heuristic = sshs.iterator().next(); SimpleSuffixHeuristic heuristic = sshs.iterator().next();
String normalSuffix = heuristic.getNormalSuffix(); String normalSuffix = heuristic.getNormalSuffix();
Integer suffixLenght = heuristic.getActualSuffixLength(); Integer suffixLenght = heuristic.getActualSuffixLength();
String normalFormMorphInfo = heuristic.getNormalFormMorphInfo();
Boolean result = true; Boolean result = true;
for (SimpleSuffixHeuristic ssh : sshs) { for (SimpleSuffixHeuristic ssh : sshs) {
result = result && ssh.getActualSuffixLength().equals(suffixLenght) && ssh.getNormalSuffix().endsWith(normalSuffix); result = result &&
ssh.getActualSuffixLength().equals(suffixLenght) &&
ssh.getNormalSuffix().equals(normalSuffix) &&
ssh.getNormalFormMorphInfo().equals(normalFormMorphInfo);
} }
return result; return result;
} }

View File

@ -27,12 +27,14 @@ public class SimpleSuffixHeuristic {
private Integer actualSuffixLength; private Integer actualSuffixLength;
private String normalSuffix; private String normalSuffix;
private String morphInfoCode; private String morphInfoCode;
private String normalFormMorphInfo;
public SimpleSuffixHeuristic(String formSuffix, Integer actualSuffixLength, String normalSuffix, String morphInfoCode) { public SimpleSuffixHeuristic(String formSuffix, Integer actualSuffixLength, String normalSuffix, String morphInfoCode, String normalFormMorphInfo) {
this.formSuffix = formSuffix; this.formSuffix = formSuffix;
this.actualSuffixLength = actualSuffixLength; this.actualSuffixLength = actualSuffixLength;
this.normalSuffix = normalSuffix; this.normalSuffix = normalSuffix;
this.morphInfoCode = morphInfoCode; this.morphInfoCode = morphInfoCode;
this.normalFormMorphInfo = normalFormMorphInfo;
} }
public String getFormSuffix() { public String getFormSuffix() {
@ -51,6 +53,14 @@ public class SimpleSuffixHeuristic {
return morphInfoCode; return morphInfoCode;
} }
public String getNormalFormMorphInfo() {
return normalFormMorphInfo;
}
public void setNormalFormMorphInfo(String normalFormMorphInfo) {
this.normalFormMorphInfo = normalFormMorphInfo;
}
@Override @Override
public boolean equals(Object o) { public boolean equals(Object o) {
if (this == o) return true; if (this == o) return true;
@ -63,7 +73,8 @@ public class SimpleSuffixHeuristic {
if (formSuffix != null ? !formSuffix.equals(that.formSuffix) : that.formSuffix != null) return false; if (formSuffix != null ? !formSuffix.equals(that.formSuffix) : that.formSuffix != null) return false;
if (morphInfoCode != null ? !morphInfoCode.equals(that.morphInfoCode) : that.morphInfoCode != null) if (morphInfoCode != null ? !morphInfoCode.equals(that.morphInfoCode) : that.morphInfoCode != null)
return false; return false;
if (normalSuffix != null ? !normalSuffix.equals(that.normalSuffix) : that.normalSuffix != null) if (normalSuffix != null ? !normalSuffix.equals(that.normalSuffix) : that.normalSuffix != null) return false;
if (normalFormMorphInfo != null ? !normalFormMorphInfo.equals(that.normalFormMorphInfo) : that.normalFormMorphInfo != null)
return false; return false;
return true; return true;
@ -75,11 +86,12 @@ public class SimpleSuffixHeuristic {
result = 31 * result + (actualSuffixLength != null ? actualSuffixLength.hashCode() : 0); result = 31 * result + (actualSuffixLength != null ? actualSuffixLength.hashCode() : 0);
result = 31 * result + (normalSuffix != null ? normalSuffix.hashCode() : 0); result = 31 * result + (normalSuffix != null ? normalSuffix.hashCode() : 0);
result = 31 * result + (morphInfoCode != null ? morphInfoCode.hashCode() : 0); result = 31 * result + (morphInfoCode != null ? morphInfoCode.hashCode() : 0);
result = 31 * result + (normalFormMorphInfo != null ? normalFormMorphInfo.hashCode() : 0);
return result; return result;
} }
@Override @Override
public String toString() { public String toString() {
return formSuffix + " " + actualSuffixLength + " " + normalSuffix + " " + morphInfoCode; return formSuffix + " " + actualSuffixLength + " " + normalSuffix + " " + morphInfoCode + " nf " + normalFormMorphInfo;
} }
} }

View File

@ -39,8 +39,9 @@ public class StatiticsCollectors implements WordProccessor {
private Integer ignoredCount = 0; private Integer ignoredCount = 0;
public void proccess(WordCard wordCard) { public void proccess(WordCard wordCard) {
String normalStringMorph = wordCard.getWordsFroms().get(0).getCode();
for (FlexiaModel fm : wordCard.getWordsFroms()) { for (FlexiaModel fm : wordCard.getWordsFroms()) {
SimpleSuffixHeuristic simpleSuffixHeuristic = createEvristic(wordCard.getBase(), wordCard.getCanonicalSuffix(), fm); SimpleSuffixHeuristic simpleSuffixHeuristic = createEvristic(wordCard.getBase(), wordCard.getCanonicalSuffix(), fm, normalStringMorph);
if (simpleSuffixHeuristic == null) continue; if (simpleSuffixHeuristic == null) continue;
SuffixCounter suffixCounter = statititics.get(simpleSuffixHeuristic); SuffixCounter suffixCounter = statititics.get(simpleSuffixHeuristic);
if (suffixCounter == null) { if (suffixCounter == null) {
@ -61,13 +62,13 @@ public class StatiticsCollectors implements WordProccessor {
return statititics; return statititics;
} }
private SimpleSuffixHeuristic createEvristic(String wordBase, String canonicalSuffix, FlexiaModel fm) { private SimpleSuffixHeuristic createEvristic(String wordBase, String canonicalSuffix, FlexiaModel fm, String normalSuffixForm) {
String form = fm.create(wordBase); String form = fm.create(wordBase);
int startSymbol = form.length() > RussianSuffixDecoderEncoder.suffixLength ? form.length() - RussianSuffixDecoderEncoder.suffixLength : 0; int startSymbol = form.length() > RussianSuffixDecoderEncoder.suffixLength ? form.length() - RussianSuffixDecoderEncoder.suffixLength : 0;
String formSuffix = form.substring(startSymbol); String formSuffix = form.substring(startSymbol);
String actualSuffix = fm.getSuffix(); String actualSuffix = fm.getSuffix();
Integer actualSuffixLengh = actualSuffix.length(); Integer actualSuffixLengh = actualSuffix.length();
return new SimpleSuffixHeuristic(formSuffix, actualSuffixLengh, canonicalSuffix, fm.getCode()); return new SimpleSuffixHeuristic(formSuffix, actualSuffixLengh, canonicalSuffix, fm.getCode(), normalSuffixForm);
} }