working on prefixes hypotities

git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@86 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
Alexander.A.Kuznetsov
2009-11-11 22:21:14 +00:00
parent 97fa8fa868
commit 6246f020fd
10 changed files with 577 additions and 15 deletions

View File

@ -1,18 +1,20 @@
package org.apache.lucene.morphology.dictionary;
import org.apache.lucene.morphology.PrefixRule;
import java.util.*;
import java.io.IOException;
import java.io.BufferedReader;
public class PrefixesHypotises extends DictonaryReader {
public class PrefixesRulesBuilder extends DictonaryReader {
private Map<FlexiaModel,Set<FlexiaModel>> rules = new HashMap<FlexiaModel,Set<FlexiaModel>>();
public PrefixesHypotises(String fileName, Set<String> ingnoredForm) {
public PrefixesRulesBuilder(String fileName, Set<String> ingnoredForm) {
super(fileName, ingnoredForm);
}
public PrefixesHypotises(String fileName, String fileEncoding, Set<String> ingnoredForm) {
public PrefixesRulesBuilder(String fileName, String fileEncoding, Set<String> ingnoredForm) {
super(fileName, fileEncoding, ingnoredForm);
}
@ -23,6 +25,22 @@ public class PrefixesHypotises extends DictonaryReader {
System.out.println(rules);
}
public List<PrefixRule> getPrefixRules(){
List<PrefixRule> prefixRules = new ArrayList<PrefixRule>();
for(FlexiaModel key:rules.keySet()){
PrefixRule prefixRule = new PrefixRule();
prefixRule.setPrefix(key.getPrefix());
prefixRule.setLastLetter(key.getSuffix().charAt(0));
HashSet<String> map = new HashSet<String>();
for(FlexiaModel fm:rules.get(key)){
map.add(fm.getCode());
}
prefixRule.setForms(map);
prefixRules.add(prefixRule);
}
return prefixRules;
}
@Override
protected void readWords(BufferedReader reader, WordProccessor wordProccessor) throws IOException {
sckipBlock(reader);

View File

@ -17,7 +17,6 @@
package org.apache.lucene.morphology.generator;
import org.apache.lucene.morphology.dictionary.*;
import org.apache.lucene.morphology.russian.RussianLetterDecoderEncoder;
import java.io.IOException;
import java.util.HashSet;
@ -26,7 +25,7 @@ import java.util.HashSet;
public class RussianPrefixesBuilder {
public static void main(String[] args) throws IOException {
GrammaReader grammaInfo = new GrammaReader("dictonary/Dicts/Morph/rgramtab.tab");
PrefixesHypotises dictonaryReader = new PrefixesHypotises("dictonary/Dicts/SrcMorph/RusSrc/morphs.mrd", new HashSet<String>());
PrefixesRulesBuilder dictonaryReader = new PrefixesRulesBuilder("dictonary/Dicts/SrcMorph/RusSrc/morphs.mrd", new HashSet<String>());
//RussianLetterDecoderEncoder decoderEncoder = new RussianLetterDecoderEncoder();
//StatiticsCollector statiticsCollector = new StatiticsCollector(grammaInfo, decoderEncoder);