adding clean for strings

git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@50 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
alexander.a.kuznetsov 2009-10-02 17:02:52 +00:00
parent 710384987c
commit ca1a9be6b7
3 changed files with 15 additions and 7 deletions

View File

@ -33,12 +33,13 @@ public class StatiticsCollector implements WordProccessor {
private LetterDecoderEncoder decoderEncoder; private LetterDecoderEncoder decoderEncoder;
public StatiticsCollector(GrammaReader grammaReader) { public StatiticsCollector(GrammaReader grammaReader, LetterDecoderEncoder decoderEncoder) {
this.grammaReader = grammaReader; this.grammaReader = grammaReader;
this.decoderEncoder = decoderEncoder;
} }
public void proccess(WordCard wordCard) throws IOException { public void proccess(WordCard wordCard) throws IOException {
wordCard = cleanWordCard(wordCard); cleanWordCard(wordCard);
String normalStringMorph = wordCard.getWordsFroms().get(0).getCode(); String normalStringMorph = wordCard.getWordsFroms().get(0).getCode();
String word = wordCard.getBase() + wordCard.getCanonicalSuffix(); String word = wordCard.getBase() + wordCard.getCanonicalSuffix();
if (word.contains("-")) return; if (word.contains("-")) return;
@ -55,8 +56,15 @@ public class StatiticsCollector implements WordProccessor {
} }
} }
private WordCard cleanWordCard(WordCard wordCard) { private void cleanWordCard(WordCard wordCard) {
return wordCard; wordCard.setBase(cleanString(wordCard.getBase()));
wordCard.setCanonicalFrom(cleanString(wordCard.getCanonicalFrom()));
wordCard.setCanonicalSuffix(cleanString(wordCard.getCanonicalSuffix()));
List<FlexiaModel> models = wordCard.getWordsFroms();
for (FlexiaModel m : models) {
m.setSuffix(cleanString(m.getSuffix()));
m.setPrefix(cleanString(m.getPrefix()));
}
} }
@ -141,7 +149,6 @@ public class StatiticsCollector implements WordProccessor {
private String cleanString(String s) { private String cleanString(String s) {
return decoderEncoder.cleanString(s); return decoderEncoder.cleanString(s);
//return s.replace((char) (34 + RussianSuffixDecoderEncoder.RUSSIAN_SMALL_LETTER_OFFSET), (char) (6 + RussianSuffixDecoderEncoder.RUSSIAN_SMALL_LETTER_OFFSET));
} }
} }

View File

@ -31,7 +31,8 @@ public class HeuristicBuilder {
GrammaReader grammaInfo = new GrammaReader("dictonary/Dicts/Morph/rgramtab.tab"); GrammaReader grammaInfo = new GrammaReader("dictonary/Dicts/Morph/rgramtab.tab");
DictonaryReader dictonaryReader = new DictonaryReader("dictonary/Dicts/SrcMorph/RusSrc/morphs.mrd", form); DictonaryReader dictonaryReader = new DictonaryReader("dictonary/Dicts/SrcMorph/RusSrc/morphs.mrd", form);
StatiticsCollector statiticsCollector = new StatiticsCollector(grammaInfo); RussianSuffixDecoderEncoder decoderEncoder = new RussianSuffixDecoderEncoder();
StatiticsCollector statiticsCollector = new StatiticsCollector(grammaInfo, decoderEncoder);
dictonaryReader.proccess(statiticsCollector); dictonaryReader.proccess(statiticsCollector);
statiticsCollector.saveHeuristic(); statiticsCollector.saveHeuristic();

View File

@ -107,6 +107,6 @@ public class RussianSuffixDecoderEncoder implements LetterDecoderEncoder {
} }
public String cleanString(String s) { public String cleanString(String s) {
return s; return s.replace((char) (34 + RussianSuffixDecoderEncoder.RUSSIAN_SMALL_LETTER_OFFSET), (char) (6 + RussianSuffixDecoderEncoder.RUSSIAN_SMALL_LETTER_OFFSET));
} }
} }