adding clean for strings
git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@50 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
parent
710384987c
commit
ca1a9be6b7
@ -33,12 +33,13 @@ public class StatiticsCollector implements WordProccessor {
|
||||
private LetterDecoderEncoder decoderEncoder;
|
||||
|
||||
|
||||
public StatiticsCollector(GrammaReader grammaReader) {
|
||||
public StatiticsCollector(GrammaReader grammaReader, LetterDecoderEncoder decoderEncoder) {
|
||||
this.grammaReader = grammaReader;
|
||||
this.decoderEncoder = decoderEncoder;
|
||||
}
|
||||
|
||||
public void proccess(WordCard wordCard) throws IOException {
|
||||
wordCard = cleanWordCard(wordCard);
|
||||
cleanWordCard(wordCard);
|
||||
String normalStringMorph = wordCard.getWordsFroms().get(0).getCode();
|
||||
String word = wordCard.getBase() + wordCard.getCanonicalSuffix();
|
||||
if (word.contains("-")) return;
|
||||
@ -55,8 +56,15 @@ public class StatiticsCollector implements WordProccessor {
|
||||
}
|
||||
}
|
||||
|
||||
private WordCard cleanWordCard(WordCard wordCard) {
|
||||
return wordCard;
|
||||
private void cleanWordCard(WordCard wordCard) {
|
||||
wordCard.setBase(cleanString(wordCard.getBase()));
|
||||
wordCard.setCanonicalFrom(cleanString(wordCard.getCanonicalFrom()));
|
||||
wordCard.setCanonicalSuffix(cleanString(wordCard.getCanonicalSuffix()));
|
||||
List<FlexiaModel> models = wordCard.getWordsFroms();
|
||||
for (FlexiaModel m : models) {
|
||||
m.setSuffix(cleanString(m.getSuffix()));
|
||||
m.setPrefix(cleanString(m.getPrefix()));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -141,7 +149,6 @@ public class StatiticsCollector implements WordProccessor {
|
||||
|
||||
private String cleanString(String s) {
|
||||
return decoderEncoder.cleanString(s);
|
||||
//return s.replace((char) (34 + RussianSuffixDecoderEncoder.RUSSIAN_SMALL_LETTER_OFFSET), (char) (6 + RussianSuffixDecoderEncoder.RUSSIAN_SMALL_LETTER_OFFSET));
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -31,7 +31,8 @@ public class HeuristicBuilder {
|
||||
GrammaReader grammaInfo = new GrammaReader("dictonary/Dicts/Morph/rgramtab.tab");
|
||||
DictonaryReader dictonaryReader = new DictonaryReader("dictonary/Dicts/SrcMorph/RusSrc/morphs.mrd", form);
|
||||
|
||||
StatiticsCollector statiticsCollector = new StatiticsCollector(grammaInfo);
|
||||
RussianSuffixDecoderEncoder decoderEncoder = new RussianSuffixDecoderEncoder();
|
||||
StatiticsCollector statiticsCollector = new StatiticsCollector(grammaInfo, decoderEncoder);
|
||||
dictonaryReader.proccess(statiticsCollector);
|
||||
statiticsCollector.saveHeuristic();
|
||||
|
||||
|
@ -107,6 +107,6 @@ public class RussianSuffixDecoderEncoder implements LetterDecoderEncoder {
|
||||
}
|
||||
|
||||
public String cleanString(String s) {
|
||||
return s;
|
||||
return s.replace((char) (34 + RussianSuffixDecoderEncoder.RUSSIAN_SMALL_LETTER_OFFSET), (char) (6 + RussianSuffixDecoderEncoder.RUSSIAN_SMALL_LETTER_OFFSET));
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user