adding clean for strings
git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@50 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
parent
710384987c
commit
ca1a9be6b7
@ -33,12 +33,13 @@ public class StatiticsCollector implements WordProccessor {
|
|||||||
private LetterDecoderEncoder decoderEncoder;
|
private LetterDecoderEncoder decoderEncoder;
|
||||||
|
|
||||||
|
|
||||||
public StatiticsCollector(GrammaReader grammaReader) {
|
public StatiticsCollector(GrammaReader grammaReader, LetterDecoderEncoder decoderEncoder) {
|
||||||
this.grammaReader = grammaReader;
|
this.grammaReader = grammaReader;
|
||||||
|
this.decoderEncoder = decoderEncoder;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void proccess(WordCard wordCard) throws IOException {
|
public void proccess(WordCard wordCard) throws IOException {
|
||||||
wordCard = cleanWordCard(wordCard);
|
cleanWordCard(wordCard);
|
||||||
String normalStringMorph = wordCard.getWordsFroms().get(0).getCode();
|
String normalStringMorph = wordCard.getWordsFroms().get(0).getCode();
|
||||||
String word = wordCard.getBase() + wordCard.getCanonicalSuffix();
|
String word = wordCard.getBase() + wordCard.getCanonicalSuffix();
|
||||||
if (word.contains("-")) return;
|
if (word.contains("-")) return;
|
||||||
@ -55,8 +56,15 @@ public class StatiticsCollector implements WordProccessor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private WordCard cleanWordCard(WordCard wordCard) {
|
private void cleanWordCard(WordCard wordCard) {
|
||||||
return wordCard;
|
wordCard.setBase(cleanString(wordCard.getBase()));
|
||||||
|
wordCard.setCanonicalFrom(cleanString(wordCard.getCanonicalFrom()));
|
||||||
|
wordCard.setCanonicalSuffix(cleanString(wordCard.getCanonicalSuffix()));
|
||||||
|
List<FlexiaModel> models = wordCard.getWordsFroms();
|
||||||
|
for (FlexiaModel m : models) {
|
||||||
|
m.setSuffix(cleanString(m.getSuffix()));
|
||||||
|
m.setPrefix(cleanString(m.getPrefix()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -141,7 +149,6 @@ public class StatiticsCollector implements WordProccessor {
|
|||||||
|
|
||||||
private String cleanString(String s) {
|
private String cleanString(String s) {
|
||||||
return decoderEncoder.cleanString(s);
|
return decoderEncoder.cleanString(s);
|
||||||
//return s.replace((char) (34 + RussianSuffixDecoderEncoder.RUSSIAN_SMALL_LETTER_OFFSET), (char) (6 + RussianSuffixDecoderEncoder.RUSSIAN_SMALL_LETTER_OFFSET));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -31,7 +31,8 @@ public class HeuristicBuilder {
|
|||||||
GrammaReader grammaInfo = new GrammaReader("dictonary/Dicts/Morph/rgramtab.tab");
|
GrammaReader grammaInfo = new GrammaReader("dictonary/Dicts/Morph/rgramtab.tab");
|
||||||
DictonaryReader dictonaryReader = new DictonaryReader("dictonary/Dicts/SrcMorph/RusSrc/morphs.mrd", form);
|
DictonaryReader dictonaryReader = new DictonaryReader("dictonary/Dicts/SrcMorph/RusSrc/morphs.mrd", form);
|
||||||
|
|
||||||
StatiticsCollector statiticsCollector = new StatiticsCollector(grammaInfo);
|
RussianSuffixDecoderEncoder decoderEncoder = new RussianSuffixDecoderEncoder();
|
||||||
|
StatiticsCollector statiticsCollector = new StatiticsCollector(grammaInfo, decoderEncoder);
|
||||||
dictonaryReader.proccess(statiticsCollector);
|
dictonaryReader.proccess(statiticsCollector);
|
||||||
statiticsCollector.saveHeuristic();
|
statiticsCollector.saveHeuristic();
|
||||||
|
|
||||||
|
@ -107,6 +107,6 @@ public class RussianSuffixDecoderEncoder implements LetterDecoderEncoder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public String cleanString(String s) {
|
public String cleanString(String s) {
|
||||||
return s;
|
return s.replace((char) (34 + RussianSuffixDecoderEncoder.RUSSIAN_SMALL_LETTER_OFFSET), (char) (6 + RussianSuffixDecoderEncoder.RUSSIAN_SMALL_LETTER_OFFSET));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user