fixing problem with - in word the english morphology not work correctly because word forms contains it
git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@97 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
@ -19,33 +19,33 @@ import java.io.Serializable;
|
||||
|
||||
|
||||
public class Heuristic implements Serializable {
|
||||
byte actualSuffixLengh;
|
||||
byte actualSuffixLength;
|
||||
String actualNormalSuffix;
|
||||
short formMorphInfo;
|
||||
short normalFormMorphInfo;
|
||||
|
||||
public Heuristic(String s) {
|
||||
String[] strings = s.split("\\|");
|
||||
actualSuffixLengh = Byte.valueOf(strings[0]);
|
||||
actualSuffixLength = Byte.valueOf(strings[0]);
|
||||
actualNormalSuffix = strings[1];
|
||||
formMorphInfo = Short.valueOf(strings[2]);
|
||||
normalFormMorphInfo = Short.valueOf(strings[3]);
|
||||
}
|
||||
|
||||
public Heuristic(byte actualSuffixLengh, String actualNormalSuffix, short formMorphInfo, short normalFormMorphInfo) {
|
||||
this.actualSuffixLengh = actualSuffixLengh;
|
||||
public Heuristic(byte actualSuffixLength, String actualNormalSuffix, short formMorphInfo, short normalFormMorphInfo) {
|
||||
this.actualSuffixLength = actualSuffixLength;
|
||||
this.actualNormalSuffix = actualNormalSuffix;
|
||||
this.formMorphInfo = formMorphInfo;
|
||||
this.normalFormMorphInfo = normalFormMorphInfo;
|
||||
}
|
||||
|
||||
public String transofrmWord(String w) {
|
||||
if (w.length() - actualSuffixLengh < 0) return w;
|
||||
return w.substring(0, w.length() - actualSuffixLengh) + actualNormalSuffix;
|
||||
public String transformWord(String w) {
|
||||
if (w.length() - actualSuffixLength < 0) return w;
|
||||
return w.substring(0, w.length() - actualSuffixLength) + actualNormalSuffix;
|
||||
}
|
||||
|
||||
public byte getActualSuffixLengh() {
|
||||
return actualSuffixLengh;
|
||||
public byte getActualSuffixLength() {
|
||||
return actualSuffixLength;
|
||||
}
|
||||
|
||||
public String getActualNormalSuffix() {
|
||||
@ -67,7 +67,7 @@ public class Heuristic implements Serializable {
|
||||
|
||||
Heuristic heuristic = (Heuristic) o;
|
||||
|
||||
if (actualSuffixLengh != heuristic.actualSuffixLengh) return false;
|
||||
if (actualSuffixLength != heuristic.actualSuffixLength) return false;
|
||||
if (formMorphInfo != heuristic.formMorphInfo) return false;
|
||||
if (normalFormMorphInfo != heuristic.normalFormMorphInfo) return false;
|
||||
if (actualNormalSuffix != null ? !actualNormalSuffix.equals(heuristic.actualNormalSuffix) : heuristic.actualNormalSuffix != null)
|
||||
@ -78,7 +78,7 @@ public class Heuristic implements Serializable {
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int result = (int) actualSuffixLengh;
|
||||
int result = (int) actualSuffixLength;
|
||||
result = 31 * result + (actualNormalSuffix != null ? actualNormalSuffix.hashCode() : 0);
|
||||
result = 31 * result + (int) formMorphInfo;
|
||||
result = 31 * result + (int) normalFormMorphInfo;
|
||||
@ -87,6 +87,6 @@ public class Heuristic implements Serializable {
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "" + actualSuffixLengh + "|" + actualNormalSuffix + "|" + formMorphInfo + "|" + normalFormMorphInfo;
|
||||
return "" + actualSuffixLength + "|" + actualNormalSuffix + "|" + formMorphInfo + "|" + normalFormMorphInfo;
|
||||
}
|
||||
}
|
||||
|
@ -55,7 +55,7 @@ public class LuceneMorphology extends MorphologyImpl {
|
||||
for (Heuristic heuristic : heuristics) {
|
||||
boolean isAdded = true;
|
||||
for (Heuristic ch : result) {
|
||||
isAdded = isAdded && !(ch.getActualNormalSuffix().equals(heuristic.getActualNormalSuffix()) && (ch.getActualSuffixLengh() == heuristic.getActualSuffixLengh()));
|
||||
isAdded = isAdded && !(ch.getActualNormalSuffix().equals(heuristic.getActualNormalSuffix()) && (ch.getActualSuffixLength() == heuristic.getActualSuffixLength()));
|
||||
}
|
||||
if (isAdded) {
|
||||
result.add(heuristic);
|
||||
|
@ -68,7 +68,7 @@ public class MorphologyImpl implements Morphology {
|
||||
int[] ints = decoderEncoder.encodeToArray(revertWord(s));
|
||||
int ruleId = findRuleId(ints);
|
||||
for (Heuristic h : rules[rulesId[ruleId]]) {
|
||||
result.add(h.transofrmWord(s));
|
||||
result.add(h.transformWord(s));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
@ -78,7 +78,7 @@ public class MorphologyImpl implements Morphology {
|
||||
int[] ints = decoderEncoder.encodeToArray(revertWord(s));
|
||||
int ruleId = findRuleId(ints);
|
||||
for (Heuristic h : rules[rulesId[ruleId]]) {
|
||||
result.add(h.transofrmWord(s) + "|" + grammaInfo[h.getFormMorphInfo()]);
|
||||
result.add(h.transformWord(s) + "|" + grammaInfo[h.getFormMorphInfo()]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
@ -180,9 +180,9 @@ public class MorphologyImpl implements Morphology {
|
||||
rules = new Heuristic[amount][];
|
||||
for (int i = 0; i < amount; i++) {
|
||||
String s1 = bufferedReader.readLine();
|
||||
Integer ruleLenght = Integer.valueOf(s1);
|
||||
rules[i] = new Heuristic[ruleLenght];
|
||||
for (int j = 0; j < ruleLenght; j++) {
|
||||
Integer ruleLength = Integer.valueOf(s1);
|
||||
rules[i] = new Heuristic[ruleLength];
|
||||
for (int j = 0; j < ruleLength; j++) {
|
||||
rules[i][j] = new Heuristic(bufferedReader.readLine());
|
||||
}
|
||||
}
|
||||
@ -197,7 +197,6 @@ public class MorphologyImpl implements Morphology {
|
||||
}
|
||||
|
||||
private void readSeparators(BufferedReader bufferedReader, Integer amount) throws IOException {
|
||||
HashSet intetger = new HashSet<Integer>();
|
||||
separators = new int[amount][];
|
||||
for (int i = 0; i < amount; i++) {
|
||||
String s1 = bufferedReader.readLine();
|
||||
@ -206,7 +205,6 @@ public class MorphologyImpl implements Morphology {
|
||||
for (int j = 0; j < wordLenght; j++) {
|
||||
separators[i][j] = Integer.valueOf(bufferedReader.readLine());
|
||||
}
|
||||
intetger.add(separators[i][0]);
|
||||
}
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user