test for decoder
git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@8 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
parent
c7c145a014
commit
5576a22a3a
@ -16,14 +16,15 @@ public class RussianSuffixDecoderEncoder {
|
|||||||
|
|
||||||
|
|
||||||
static public Long encode(String string) {
|
static public Long encode(String string) {
|
||||||
if (string.length() > 12) throw new RuntimeException("suffix to long");
|
if (string.length() > 12) throw new SuffixToLongException("Suffix length should not be greater then " + 12);
|
||||||
long result = 0L;
|
long result = 0L;
|
||||||
for (int i = 0; i < string.length(); i++) {
|
for (int i = 0; i < string.length(); i++) {
|
||||||
int c = 0 + string.charAt(i) - RUSSIAN_SMALL_LETTER_OFFSET;
|
int c = 0 + string.charAt(i) - RUSSIAN_SMALL_LETTER_OFFSET;
|
||||||
if (c < 0) {
|
if (c == 45 - RUSSIAN_SMALL_LETTER_OFFSET) {
|
||||||
c = DASH_CODE;
|
c = DASH_CODE;
|
||||||
}
|
}
|
||||||
if (c == EE_CHAR) c = E_CHAR;
|
if (c == EE_CHAR) c = E_CHAR;
|
||||||
|
if (c < 0 || c > 33) throw new WrongCharaterException();
|
||||||
result = result * 35L + c;
|
result = result * 35L + c;
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
|
@ -0,0 +1,12 @@
|
|||||||
|
package org.apache.lucene.russian.morphology;
|
||||||
|
|
||||||
|
|
||||||
|
public class SuffixToLongException extends RuntimeException {
|
||||||
|
|
||||||
|
public SuffixToLongException() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public SuffixToLongException(String message) {
|
||||||
|
super(message);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,11 @@
|
|||||||
|
package org.apache.lucene.russian.morphology;
|
||||||
|
|
||||||
|
|
||||||
|
public class WrongCharaterException extends RuntimeException{
|
||||||
|
public WrongCharaterException() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public WrongCharaterException(String message) {
|
||||||
|
super(message);
|
||||||
|
}
|
||||||
|
}
|
@ -1,5 +1,39 @@
|
|||||||
package org.apache.lucene.russian.morphology;
|
package org.apache.lucene.russian.morphology;
|
||||||
|
|
||||||
|
import org.junit.Test;
|
||||||
|
import static org.junit.Assert.assertThat;
|
||||||
|
import static org.hamcrest.core.IsEqual.equalTo;
|
||||||
|
import org.apache.lucene.russian.morphology.SuffixToLongException;
|
||||||
|
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
|
||||||
public class RussianSuffixDecoderEncoderTest {
|
public class RussianSuffixDecoderEncoderTest {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testShouldCorretDecodeEncode() throws IOException {
|
||||||
|
InputStream stream = this.getClass().getResourceAsStream("/decoder-test-data.txt");
|
||||||
|
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream));
|
||||||
|
String s = bufferedReader.readLine();
|
||||||
|
while(s != null){
|
||||||
|
String[] qa = s.trim().split(" ");
|
||||||
|
Long ecodedSuffix = RussianSuffixDecoderEncoder.encode(qa[0]);
|
||||||
|
assertThat(RussianSuffixDecoderEncoder.decode(ecodedSuffix),equalTo(qa[1]));
|
||||||
|
s = bufferedReader.readLine();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expected = SuffixToLongException.class)
|
||||||
|
public void shouldThrownExeptionIfSuffixToLong(){
|
||||||
|
RussianSuffixDecoderEncoder.encode("1234567890123");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test(expected = WrongCharaterException.class)
|
||||||
|
public void shouldThrownExeptionIfSuffixContainWrongCharater(){
|
||||||
|
RussianSuffixDecoderEncoder.encode("1");
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
4
src/test/resources/decoder-test-data.txt
Normal file
4
src/test/resources/decoder-test-data.txt
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
тест тест
|
||||||
|
ёж еж
|
||||||
|
тестера тестера
|
||||||
|
что-то что-то
|
Loading…
x
Reference in New Issue
Block a user