working on prefixes hypotities
git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@86 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
@ -34,14 +34,8 @@ public class LuceneMorphology extends Morphology {
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getMorhInfo(String s) {
|
||||
ArrayList<String> result = new ArrayList<String>();
|
||||
int[] ints = decoderEncoder.encodeToArray(revertWord(s));
|
||||
int ruleId = findRuleId(ints);
|
||||
for (Heuristic h : rules[rulesId[ruleId]]) {
|
||||
result.add(h.transofrmWord(s));
|
||||
}
|
||||
return result;
|
||||
protected String createForm(String form, String grammaInfo) {
|
||||
return form;
|
||||
}
|
||||
|
||||
protected void readRules(BufferedReader bufferedReader) throws IOException {
|
||||
|
@ -68,11 +68,15 @@ public class Morphology {
|
||||
int[] ints = decoderEncoder.encodeToArray(revertWord(s));
|
||||
int ruleId = findRuleId(ints);
|
||||
for (Heuristic h : rules[rulesId[ruleId]]) {
|
||||
result.add(h.transofrmWord(s) + "|" + grammaInfo[h.getFormMorphInfo()]);
|
||||
result.add(createForm(h.transofrmWord(s),grammaInfo[h.getFormMorphInfo()]));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
protected String createForm(String form,String grammaInfo){
|
||||
return form+"|"+grammaInfo;
|
||||
}
|
||||
|
||||
protected int findRuleId(int[] ints) {
|
||||
int low = 0;
|
||||
int high = separators.length - 1;
|
||||
|
@ -0,0 +1,52 @@
|
||||
package org.apache.lucene.morphology;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.Map;
|
||||
import java.util.List;
|
||||
import java.util.HashMap;
|
||||
import java.util.ArrayList;
|
||||
|
||||
|
||||
public class MorphologyWithPrefix extends Morphology {
|
||||
private Map<String, PrefixRule> prefixRuleMap = new HashMap<String, PrefixRule>();
|
||||
|
||||
public MorphologyWithPrefix(String fileName, LetterDecoderEncoder decoderEncoder) throws IOException {
|
||||
super(fileName, decoderEncoder);
|
||||
}
|
||||
|
||||
public MorphologyWithPrefix(InputStream inputStream, LetterDecoderEncoder decoderEncoder) throws IOException {
|
||||
super(inputStream, decoderEncoder);
|
||||
}
|
||||
|
||||
public MorphologyWithPrefix(int[][] separators, short[] rulesId, Heuristic[][] rules, String[] grammaInfo) {
|
||||
super(separators, rulesId, rules, grammaInfo);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getMorhInfo(String s) {
|
||||
if (s.length() < 4) {
|
||||
return super.getMorhInfo(s);
|
||||
}
|
||||
String ruleIndex = "" + s.charAt(0) + s.charAt(s.length() - 1);
|
||||
PrefixRule prefixRule = prefixRuleMap.get(ruleIndex);
|
||||
if (prefixRule == null) {
|
||||
return super.getMorhInfo(s);
|
||||
}
|
||||
if (s.startsWith(prefixRule.getPrefix())) {
|
||||
return super.getMorhInfo(s);
|
||||
}
|
||||
String sWithoutPrefix = s.substring(prefixRule.getPrefix().length());
|
||||
|
||||
int[] ints = decoderEncoder.encodeToArray(revertWord(sWithoutPrefix));
|
||||
int ruleId = findRuleId(ints);
|
||||
ArrayList<String> result = new ArrayList<String>();
|
||||
for (Heuristic h : rules[rulesId[ruleId]]) {
|
||||
String morphInfo = grammaInfo[h.getFormMorphInfo()];
|
||||
if(prefixRule.getForms().contains(morphInfo)){
|
||||
result.add(createForm(h.transofrmWord(sWithoutPrefix),"pr"));
|
||||
}
|
||||
}
|
||||
return result.size() > 0 ? result : super.getMorhInfo(s);
|
||||
}
|
||||
}
|
@ -0,0 +1,61 @@
|
||||
package org.apache.lucene.morphology;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.HashSet;
|
||||
|
||||
|
||||
public class PrefixRule implements Serializable {
|
||||
private Character lastLetter;
|
||||
private String prefix;
|
||||
private HashSet<String> forms;
|
||||
|
||||
public Character getLastLetter() {
|
||||
return lastLetter;
|
||||
}
|
||||
|
||||
public void setLastLetter(Character lastLetter) {
|
||||
this.lastLetter = lastLetter;
|
||||
}
|
||||
|
||||
public String getPrefix() {
|
||||
return prefix;
|
||||
}
|
||||
|
||||
public void setPrefix(String prefix) {
|
||||
this.prefix = prefix;
|
||||
}
|
||||
|
||||
public HashSet<String> getForms() {
|
||||
return forms;
|
||||
}
|
||||
|
||||
public void setForms(HashSet<String> forms) {
|
||||
this.forms = forms;
|
||||
}
|
||||
|
||||
public String getHashString() {
|
||||
return "" + prefix.charAt(0) + lastLetter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
|
||||
PrefixRule that = (PrefixRule) o;
|
||||
|
||||
if (forms != null ? !forms.equals(that.forms) : that.forms != null) return false;
|
||||
if (lastLetter != null ? !lastLetter.equals(that.lastLetter) : that.lastLetter != null) return false;
|
||||
if (prefix != null ? !prefix.equals(that.prefix) : that.prefix != null) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int result = lastLetter != null ? lastLetter.hashCode() : 0;
|
||||
result = 31 * result + (prefix != null ? prefix.hashCode() : 0);
|
||||
result = 31 * result + (forms != null ? forms.hashCode() : 0);
|
||||
return result;
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user