update to jdk11, fix migration issues, some typos, deprecated apis, bump up Lucene to 8.11
This commit is contained in:
@ -6,7 +6,6 @@
|
||||
<version>1.5</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>org.apache.lucene.morphology</groupId>
|
||||
<artifactId>russian</artifactId>
|
||||
<name>russian</name>
|
||||
<version>1.5</version>
|
||||
@ -23,7 +22,7 @@
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<version>4.13.1</version>
|
||||
<version>${junit.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
|
@ -20,7 +20,6 @@ import org.apache.lucene.morphology.LetterDecoderEncoder;
|
||||
import org.apache.lucene.morphology.SuffixToLongException;
|
||||
import org.apache.lucene.morphology.WrongCharaterException;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedList;
|
||||
|
||||
/**
|
||||
@ -42,7 +41,7 @@ public class RussianLetterDecoderEncoder implements LetterDecoderEncoder {
|
||||
throw new SuffixToLongException("Suffix length should not be greater then " + WORD_PART_LENGHT + " " + string);
|
||||
int result = 0;
|
||||
for (int i = 0; i < string.length(); i++) {
|
||||
int c = 0 + string.charAt(i) - RUSSIAN_SMALL_LETTER_OFFSET;
|
||||
int c = string.charAt(i) - RUSSIAN_SMALL_LETTER_OFFSET;
|
||||
if (c == 45 - RUSSIAN_SMALL_LETTER_OFFSET) {
|
||||
c = DASH_CODE;
|
||||
}
|
||||
@ -58,7 +57,7 @@ public class RussianLetterDecoderEncoder implements LetterDecoderEncoder {
|
||||
}
|
||||
|
||||
public int[] encodeToArray(String s) {
|
||||
LinkedList<Integer> integers = new LinkedList<Integer>();
|
||||
LinkedList<Integer> integers = new LinkedList<>();
|
||||
while (s.length() > WORD_PART_LENGHT) {
|
||||
integers.add(encode(s.substring(0, WORD_PART_LENGHT)));
|
||||
s = s.substring(WORD_PART_LENGHT);
|
||||
@ -74,16 +73,16 @@ public class RussianLetterDecoderEncoder implements LetterDecoderEncoder {
|
||||
}
|
||||
|
||||
public String decodeArray(int[] array) {
|
||||
String result = "";
|
||||
StringBuilder result = new StringBuilder();
|
||||
for (int i : array) {
|
||||
result += decode(i);
|
||||
result.append(decode(i));
|
||||
}
|
||||
return result;
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
|
||||
public String decode(Integer suffixN) {
|
||||
String result = "";
|
||||
StringBuilder result = new StringBuilder();
|
||||
while (suffixN > 33) {
|
||||
int c = suffixN % 34 + RUSSIAN_SMALL_LETTER_OFFSET;
|
||||
if (c == RUSSIAN_SMALL_LETTER_OFFSET) {
|
||||
@ -91,21 +90,20 @@ public class RussianLetterDecoderEncoder implements LetterDecoderEncoder {
|
||||
continue;
|
||||
}
|
||||
if (c == DASH_CODE + RUSSIAN_SMALL_LETTER_OFFSET) c = DASH_CHAR;
|
||||
result = (char) c + result;
|
||||
result.insert(0, (char) c);
|
||||
suffixN /= 34;
|
||||
}
|
||||
long c = suffixN + RUSSIAN_SMALL_LETTER_OFFSET;
|
||||
if (c == DASH_CODE + RUSSIAN_SMALL_LETTER_OFFSET) c = DASH_CHAR;
|
||||
result = (char) c + result;
|
||||
return result;
|
||||
result.insert(0, (char) c);
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
public boolean checkCharacter(char c) {
|
||||
int code = 0 + c;
|
||||
int code = c;
|
||||
if (code == 45) return true;
|
||||
code -= RUSSIAN_SMALL_LETTER_OFFSET;
|
||||
if (code > 0 && code < 33) return true;
|
||||
return false;
|
||||
return code > 0 && code < 33;
|
||||
}
|
||||
|
||||
public boolean checkString(String word) {
|
||||
|
@ -17,6 +17,7 @@ package org.apache.lucene.morphology.russian;
|
||||
|
||||
import org.apache.lucene.morphology.SuffixToLongException;
|
||||
import org.apache.lucene.morphology.WrongCharaterException;
|
||||
import org.hamcrest.MatcherAssert;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
@ -24,9 +25,9 @@ import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
|
||||
import static org.hamcrest.core.IsEqual.equalTo;
|
||||
import static org.junit.Assert.assertThat;
|
||||
|
||||
public class RussianLetterDecoderEncoderTest {
|
||||
private RussianLetterDecoderEncoder decoderEncoder;
|
||||
@ -40,12 +41,12 @@ public class RussianLetterDecoderEncoderTest {
|
||||
@Test
|
||||
public void testShouldPreserverStringComporision() throws IOException {
|
||||
InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/decoder-test-monotonic.txt");
|
||||
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
|
||||
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8));
|
||||
String s = bufferedReader.readLine();
|
||||
while (s != null) {
|
||||
String[] qa = s.trim().split(" ");
|
||||
if (qa[0].length() <= RussianLetterDecoderEncoder.WORD_PART_LENGHT && qa[1].length() <= RussianLetterDecoderEncoder.WORD_PART_LENGHT) {
|
||||
assertThat(decoderEncoder.encode(qa[1]) > decoderEncoder.encode(qa[0]), equalTo(true));
|
||||
MatcherAssert.assertThat(decoderEncoder.encode(qa[1]) > decoderEncoder.encode(qa[0]), equalTo(true));
|
||||
}
|
||||
s = bufferedReader.readLine();
|
||||
}
|
||||
@ -55,13 +56,13 @@ public class RussianLetterDecoderEncoderTest {
|
||||
@Test
|
||||
public void testShouldCorrectDecodeEncode() throws IOException {
|
||||
InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/decoder-test-data.txt");
|
||||
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
|
||||
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8));
|
||||
String s = bufferedReader.readLine();
|
||||
while (s != null) {
|
||||
String[] qa = s.trim().split(" ");
|
||||
if (qa[0].length() <= RussianLetterDecoderEncoder.WORD_PART_LENGHT) {
|
||||
Integer encodedSuffix = decoderEncoder.encode(qa[0]);
|
||||
assertThat(decoderEncoder.decode(encodedSuffix), equalTo(qa[1]));
|
||||
MatcherAssert.assertThat(decoderEncoder.decode(encodedSuffix), equalTo(qa[1]));
|
||||
}
|
||||
s = bufferedReader.readLine();
|
||||
}
|
||||
@ -70,12 +71,12 @@ public class RussianLetterDecoderEncoderTest {
|
||||
@Test
|
||||
public void testShouldCorrectDecodeEncodeStringToArray() throws IOException {
|
||||
InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/decoder-test-data-for-array.txt");
|
||||
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
|
||||
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8));
|
||||
String s = bufferedReader.readLine();
|
||||
while (s != null) {
|
||||
String[] qa = s.trim().split(" ");
|
||||
int[] ecodedSuffix = decoderEncoder.encodeToArray(qa[0]);
|
||||
assertThat(decoderEncoder.decodeArray(ecodedSuffix), equalTo(qa[1]));
|
||||
MatcherAssert.assertThat(decoderEncoder.decodeArray(ecodedSuffix), equalTo(qa[1]));
|
||||
s = bufferedReader.readLine();
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user