working on prefixes hypotities

git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@86 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
Alexander.A.Kuznetsov
2009-11-11 22:21:14 +00:00
parent 97fa8fa868
commit 6246f020fd
10 changed files with 577 additions and 15 deletions

View File

@ -34,14 +34,8 @@ public class LuceneMorphology extends Morphology {
}
@Override
public List<String> getMorhInfo(String s) {
ArrayList<String> result = new ArrayList<String>();
int[] ints = decoderEncoder.encodeToArray(revertWord(s));
int ruleId = findRuleId(ints);
for (Heuristic h : rules[rulesId[ruleId]]) {
result.add(h.transofrmWord(s));
}
return result;
protected String createForm(String form, String grammaInfo) {
return form;
}
protected void readRules(BufferedReader bufferedReader) throws IOException {

View File

@ -68,11 +68,15 @@ public class Morphology {
int[] ints = decoderEncoder.encodeToArray(revertWord(s));
int ruleId = findRuleId(ints);
for (Heuristic h : rules[rulesId[ruleId]]) {
result.add(h.transofrmWord(s) + "|" + grammaInfo[h.getFormMorphInfo()]);
result.add(createForm(h.transofrmWord(s),grammaInfo[h.getFormMorphInfo()]));
}
return result;
}
protected String createForm(String form,String grammaInfo){
return form+"|"+grammaInfo;
}
protected int findRuleId(int[] ints) {
int low = 0;
int high = separators.length - 1;

View File

@ -0,0 +1,52 @@
package org.apache.lucene.morphology;
import java.io.IOException;
import java.io.InputStream;
import java.util.Map;
import java.util.List;
import java.util.HashMap;
import java.util.ArrayList;
public class MorphologyWithPrefix extends Morphology {
private Map<String, PrefixRule> prefixRuleMap = new HashMap<String, PrefixRule>();
public MorphologyWithPrefix(String fileName, LetterDecoderEncoder decoderEncoder) throws IOException {
super(fileName, decoderEncoder);
}
public MorphologyWithPrefix(InputStream inputStream, LetterDecoderEncoder decoderEncoder) throws IOException {
super(inputStream, decoderEncoder);
}
public MorphologyWithPrefix(int[][] separators, short[] rulesId, Heuristic[][] rules, String[] grammaInfo) {
super(separators, rulesId, rules, grammaInfo);
}
@Override
public List<String> getMorhInfo(String s) {
if (s.length() < 4) {
return super.getMorhInfo(s);
}
String ruleIndex = "" + s.charAt(0) + s.charAt(s.length() - 1);
PrefixRule prefixRule = prefixRuleMap.get(ruleIndex);
if (prefixRule == null) {
return super.getMorhInfo(s);
}
if (s.startsWith(prefixRule.getPrefix())) {
return super.getMorhInfo(s);
}
String sWithoutPrefix = s.substring(prefixRule.getPrefix().length());
int[] ints = decoderEncoder.encodeToArray(revertWord(sWithoutPrefix));
int ruleId = findRuleId(ints);
ArrayList<String> result = new ArrayList<String>();
for (Heuristic h : rules[rulesId[ruleId]]) {
String morphInfo = grammaInfo[h.getFormMorphInfo()];
if(prefixRule.getForms().contains(morphInfo)){
result.add(createForm(h.transofrmWord(sWithoutPrefix),"pr"));
}
}
return result.size() > 0 ? result : super.getMorhInfo(s);
}
}

View File

@ -0,0 +1,61 @@
package org.apache.lucene.morphology;
import java.io.Serializable;
import java.util.HashSet;
public class PrefixRule implements Serializable {
private Character lastLetter;
private String prefix;
private HashSet<String> forms;
public Character getLastLetter() {
return lastLetter;
}
public void setLastLetter(Character lastLetter) {
this.lastLetter = lastLetter;
}
public String getPrefix() {
return prefix;
}
public void setPrefix(String prefix) {
this.prefix = prefix;
}
public HashSet<String> getForms() {
return forms;
}
public void setForms(HashSet<String> forms) {
this.forms = forms;
}
public String getHashString() {
return "" + prefix.charAt(0) + lastLetter;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
PrefixRule that = (PrefixRule) o;
if (forms != null ? !forms.equals(that.forms) : that.forms != null) return false;
if (lastLetter != null ? !lastLetter.equals(that.lastLetter) : that.lastLetter != null) return false;
if (prefix != null ? !prefix.equals(that.prefix) : that.prefix != null) return false;
return true;
}
@Override
public int hashCode() {
int result = lastLetter != null ? lastLetter.hashCode() : 0;
result = 31 * result + (prefix != null ? prefix.hashCode() : 0);
result = 31 * result + (forms != null ? forms.hashCode() : 0);
return result;
}
}