git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@40 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
parent
422f5c3e44
commit
d58c45a78c
@ -63,7 +63,7 @@ public class HeuristicBuilder {
|
||||
int cont = 0;
|
||||
for (Set<SimpleSuffixHeuristic> st : map.values()) {
|
||||
|
||||
if (cont > 20) break;
|
||||
if (cont > 50) break;
|
||||
if (st.size() < 3) {
|
||||
System.out.println(st);
|
||||
cont++;
|
||||
|
@ -75,9 +75,13 @@ public class HeuristicBySuffixLegth {
|
||||
SimpleSuffixHeuristic heuristic = sshs.iterator().next();
|
||||
String normalSuffix = heuristic.getNormalSuffix();
|
||||
Integer suffixLenght = heuristic.getActualSuffixLength();
|
||||
String normalFormMorphInfo = heuristic.getNormalFormMorphInfo();
|
||||
Boolean result = true;
|
||||
for (SimpleSuffixHeuristic ssh : sshs) {
|
||||
result = result && ssh.getActualSuffixLength().equals(suffixLenght) && ssh.getNormalSuffix().endsWith(normalSuffix);
|
||||
result = result &&
|
||||
ssh.getActualSuffixLength().equals(suffixLenght) &&
|
||||
ssh.getNormalSuffix().equals(normalSuffix) &&
|
||||
ssh.getNormalFormMorphInfo().equals(normalFormMorphInfo);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
@ -27,12 +27,14 @@ public class SimpleSuffixHeuristic {
|
||||
private Integer actualSuffixLength;
|
||||
private String normalSuffix;
|
||||
private String morphInfoCode;
|
||||
private String normalFormMorphInfo;
|
||||
|
||||
public SimpleSuffixHeuristic(String formSuffix, Integer actualSuffixLength, String normalSuffix, String morphInfoCode) {
|
||||
public SimpleSuffixHeuristic(String formSuffix, Integer actualSuffixLength, String normalSuffix, String morphInfoCode, String normalFormMorphInfo) {
|
||||
this.formSuffix = formSuffix;
|
||||
this.actualSuffixLength = actualSuffixLength;
|
||||
this.normalSuffix = normalSuffix;
|
||||
this.morphInfoCode = morphInfoCode;
|
||||
this.normalFormMorphInfo = normalFormMorphInfo;
|
||||
}
|
||||
|
||||
public String getFormSuffix() {
|
||||
@ -51,6 +53,14 @@ public class SimpleSuffixHeuristic {
|
||||
return morphInfoCode;
|
||||
}
|
||||
|
||||
public String getNormalFormMorphInfo() {
|
||||
return normalFormMorphInfo;
|
||||
}
|
||||
|
||||
public void setNormalFormMorphInfo(String normalFormMorphInfo) {
|
||||
this.normalFormMorphInfo = normalFormMorphInfo;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
@ -63,7 +73,8 @@ public class SimpleSuffixHeuristic {
|
||||
if (formSuffix != null ? !formSuffix.equals(that.formSuffix) : that.formSuffix != null) return false;
|
||||
if (morphInfoCode != null ? !morphInfoCode.equals(that.morphInfoCode) : that.morphInfoCode != null)
|
||||
return false;
|
||||
if (normalSuffix != null ? !normalSuffix.equals(that.normalSuffix) : that.normalSuffix != null)
|
||||
if (normalSuffix != null ? !normalSuffix.equals(that.normalSuffix) : that.normalSuffix != null) return false;
|
||||
if (normalFormMorphInfo != null ? !normalFormMorphInfo.equals(that.normalFormMorphInfo) : that.normalFormMorphInfo != null)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
@ -75,11 +86,12 @@ public class SimpleSuffixHeuristic {
|
||||
result = 31 * result + (actualSuffixLength != null ? actualSuffixLength.hashCode() : 0);
|
||||
result = 31 * result + (normalSuffix != null ? normalSuffix.hashCode() : 0);
|
||||
result = 31 * result + (morphInfoCode != null ? morphInfoCode.hashCode() : 0);
|
||||
result = 31 * result + (normalFormMorphInfo != null ? normalFormMorphInfo.hashCode() : 0);
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return formSuffix + " " + actualSuffixLength + " " + normalSuffix + " " + morphInfoCode;
|
||||
return formSuffix + " " + actualSuffixLength + " " + normalSuffix + " " + morphInfoCode + " nf " + normalFormMorphInfo;
|
||||
}
|
||||
}
|
||||
|
@ -39,8 +39,9 @@ public class StatiticsCollectors implements WordProccessor {
|
||||
private Integer ignoredCount = 0;
|
||||
|
||||
public void proccess(WordCard wordCard) {
|
||||
String normalStringMorph = wordCard.getWordsFroms().get(0).getCode();
|
||||
for (FlexiaModel fm : wordCard.getWordsFroms()) {
|
||||
SimpleSuffixHeuristic simpleSuffixHeuristic = createEvristic(wordCard.getBase(), wordCard.getCanonicalSuffix(), fm);
|
||||
SimpleSuffixHeuristic simpleSuffixHeuristic = createEvristic(wordCard.getBase(), wordCard.getCanonicalSuffix(), fm, normalStringMorph);
|
||||
if (simpleSuffixHeuristic == null) continue;
|
||||
SuffixCounter suffixCounter = statititics.get(simpleSuffixHeuristic);
|
||||
if (suffixCounter == null) {
|
||||
@ -61,13 +62,13 @@ public class StatiticsCollectors implements WordProccessor {
|
||||
return statititics;
|
||||
}
|
||||
|
||||
private SimpleSuffixHeuristic createEvristic(String wordBase, String canonicalSuffix, FlexiaModel fm) {
|
||||
private SimpleSuffixHeuristic createEvristic(String wordBase, String canonicalSuffix, FlexiaModel fm, String normalSuffixForm) {
|
||||
String form = fm.create(wordBase);
|
||||
int startSymbol = form.length() > RussianSuffixDecoderEncoder.suffixLength ? form.length() - RussianSuffixDecoderEncoder.suffixLength : 0;
|
||||
String formSuffix = form.substring(startSymbol);
|
||||
String actualSuffix = fm.getSuffix();
|
||||
Integer actualSuffixLengh = actualSuffix.length();
|
||||
return new SimpleSuffixHeuristic(formSuffix, actualSuffixLengh, canonicalSuffix, fm.getCode());
|
||||
return new SimpleSuffixHeuristic(formSuffix, actualSuffixLengh, canonicalSuffix, fm.getCode(), normalSuffixForm);
|
||||
}
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user