working on new version
git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@51 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
@ -31,7 +31,7 @@ public class HeuristicBuilder {
|
||||
GrammaReader grammaInfo = new GrammaReader("dictonary/Dicts/Morph/rgramtab.tab");
|
||||
DictonaryReader dictonaryReader = new DictonaryReader("dictonary/Dicts/SrcMorph/RusSrc/morphs.mrd", form);
|
||||
|
||||
RussianSuffixDecoderEncoder decoderEncoder = new RussianSuffixDecoderEncoder();
|
||||
RussianLetterDecoderEncoder decoderEncoder = new RussianLetterDecoderEncoder();
|
||||
StatiticsCollector statiticsCollector = new StatiticsCollector(grammaInfo, decoderEncoder);
|
||||
dictonaryReader.proccess(statiticsCollector);
|
||||
statiticsCollector.saveHeuristic();
|
||||
|
@ -28,7 +28,7 @@ import java.util.ArrayList;
|
||||
* Assumed that suffix contains only small russian letters and dash.
|
||||
* Also assumed that letter <EFBFBD> and <EFBFBD> coinsed.
|
||||
*/
|
||||
public class RussianSuffixDecoderEncoder implements LetterDecoderEncoder {
|
||||
public class RussianLetterDecoderEncoder implements LetterDecoderEncoder {
|
||||
public static final int RUSSIAN_SMALL_LETTER_OFFSET = 1071;
|
||||
static public int SUFFIX_LENGTH = 6;
|
||||
public static final int EE_CHAR = 34;
|
||||
@ -107,6 +107,6 @@ public class RussianSuffixDecoderEncoder implements LetterDecoderEncoder {
|
||||
}
|
||||
|
||||
public String cleanString(String s) {
|
||||
return s.replace((char) (34 + RussianSuffixDecoderEncoder.RUSSIAN_SMALL_LETTER_OFFSET), (char) (6 + RussianSuffixDecoderEncoder.RUSSIAN_SMALL_LETTER_OFFSET));
|
||||
return s.replace((char) (34 + RussianLetterDecoderEncoder.RUSSIAN_SMALL_LETTER_OFFSET), (char) (6 + RussianLetterDecoderEncoder.RUSSIAN_SMALL_LETTER_OFFSET));
|
||||
}
|
||||
}
|
@ -35,7 +35,7 @@ public class Test {
|
||||
|
||||
public static void main(String[] args) throws IOException, ClassNotFoundException {
|
||||
//
|
||||
Morph splitter = new Morph("sep.txt");
|
||||
Morph splitter = new Morph("sep.txt",new RussianLetterDecoderEncoder());
|
||||
TreeSet<Short> shorts = new TreeSet<Short>();
|
||||
int count = 0;
|
||||
TreeMap<Integer, Integer> rulesStat = new TreeMap<Integer, Integer>();
|
||||
@ -57,7 +57,6 @@ public class Test {
|
||||
System.out.println(count);
|
||||
System.out.println(rulesStat);
|
||||
System.gc();
|
||||
System.out.println("Ready");
|
||||
System.in.read();
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,11 @@
|
||||
package org.apache.lucene.morphology.russian;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: akuznetsov
|
||||
* Date: 03/10/2009
|
||||
* Time: 3:52:43 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class AnalayzerTest {
|
||||
}
|
@ -0,0 +1,30 @@
|
||||
package org.apache.lucene.morphology.russian;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
|
||||
public class RussianLetterDecoderEncoderTest extends TestCase {
|
||||
public void testEncode() {
|
||||
// Add your code here
|
||||
}
|
||||
|
||||
public void testEncodeToArray() {
|
||||
// Add your code here
|
||||
}
|
||||
|
||||
public void testDecodeArray() {
|
||||
// Add your code here
|
||||
}
|
||||
|
||||
public void testDecode() {
|
||||
// Add your code here
|
||||
}
|
||||
|
||||
public void testCheckCharacter() {
|
||||
// Add your code here
|
||||
}
|
||||
|
||||
public void testCleanString() {
|
||||
// Add your code here
|
||||
}
|
||||
}
|
@ -0,0 +1,11 @@
|
||||
package org.apache.lucene.morphology.russian;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: akuznetsov
|
||||
* Date: 03/10/2009
|
||||
* Time: 3:52:18 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class RussianMorphTest {
|
||||
}
|
Reference in New Issue
Block a user