adding clean for strings

git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@50 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
alexander.a.kuznetsov 2009-10-02 17:02:52 +00:00
parent 710384987c
commit ca1a9be6b7
3 changed files with 15 additions and 7 deletions

View File

@ -33,12 +33,13 @@ public class StatiticsCollector implements WordProccessor {
private LetterDecoderEncoder decoderEncoder;
public StatiticsCollector(GrammaReader grammaReader) {
public StatiticsCollector(GrammaReader grammaReader, LetterDecoderEncoder decoderEncoder) {
this.grammaReader = grammaReader;
this.decoderEncoder = decoderEncoder;
}
public void proccess(WordCard wordCard) throws IOException {
wordCard = cleanWordCard(wordCard);
cleanWordCard(wordCard);
String normalStringMorph = wordCard.getWordsFroms().get(0).getCode();
String word = wordCard.getBase() + wordCard.getCanonicalSuffix();
if (word.contains("-")) return;
@ -55,8 +56,15 @@ public class StatiticsCollector implements WordProccessor {
}
}
private WordCard cleanWordCard(WordCard wordCard) {
return wordCard;
private void cleanWordCard(WordCard wordCard) {
wordCard.setBase(cleanString(wordCard.getBase()));
wordCard.setCanonicalFrom(cleanString(wordCard.getCanonicalFrom()));
wordCard.setCanonicalSuffix(cleanString(wordCard.getCanonicalSuffix()));
List<FlexiaModel> models = wordCard.getWordsFroms();
for (FlexiaModel m : models) {
m.setSuffix(cleanString(m.getSuffix()));
m.setPrefix(cleanString(m.getPrefix()));
}
}
@ -141,7 +149,6 @@ public class StatiticsCollector implements WordProccessor {
private String cleanString(String s) {
return decoderEncoder.cleanString(s);
//return s.replace((char) (34 + RussianSuffixDecoderEncoder.RUSSIAN_SMALL_LETTER_OFFSET), (char) (6 + RussianSuffixDecoderEncoder.RUSSIAN_SMALL_LETTER_OFFSET));
}
}

View File

@ -31,7 +31,8 @@ public class HeuristicBuilder {
GrammaReader grammaInfo = new GrammaReader("dictonary/Dicts/Morph/rgramtab.tab");
DictonaryReader dictonaryReader = new DictonaryReader("dictonary/Dicts/SrcMorph/RusSrc/morphs.mrd", form);
StatiticsCollector statiticsCollector = new StatiticsCollector(grammaInfo);
RussianSuffixDecoderEncoder decoderEncoder = new RussianSuffixDecoderEncoder();
StatiticsCollector statiticsCollector = new StatiticsCollector(grammaInfo, decoderEncoder);
dictonaryReader.proccess(statiticsCollector);
statiticsCollector.saveHeuristic();

View File

@ -107,6 +107,6 @@ public class RussianSuffixDecoderEncoder implements LetterDecoderEncoder {
}
public String cleanString(String s) {
return s;
return s.replace((char) (34 + RussianSuffixDecoderEncoder.RUSSIAN_SMALL_LETTER_OFFSET), (char) (6 + RussianSuffixDecoderEncoder.RUSSIAN_SMALL_LETTER_OFFSET));
}
}