update to jdk11, fix migration issues, some typos, deprecated apis, bump up Lucene to 8.11

This commit is contained in:
Konstantin Perikov
2021-12-10 18:15:47 +00:00
parent b2fec940d0
commit 96900e334e
32 changed files with 184 additions and 198 deletions

View File

@ -6,7 +6,6 @@
<version>1.5</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<groupId>org.apache.lucene.morphology</groupId>
<artifactId>russian</artifactId>
<name>russian</name>
<version>1.5</version>
@ -23,7 +22,7 @@
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.13.1</version>
<version>${junit.version}</version>
<scope>test</scope>
</dependency>

View File

@ -20,7 +20,6 @@ import org.apache.lucene.morphology.LetterDecoderEncoder;
import org.apache.lucene.morphology.SuffixToLongException;
import org.apache.lucene.morphology.WrongCharaterException;
import java.util.ArrayList;
import java.util.LinkedList;
/**
@ -42,7 +41,7 @@ public class RussianLetterDecoderEncoder implements LetterDecoderEncoder {
throw new SuffixToLongException("Suffix length should not be greater then " + WORD_PART_LENGHT + " " + string);
int result = 0;
for (int i = 0; i < string.length(); i++) {
int c = 0 + string.charAt(i) - RUSSIAN_SMALL_LETTER_OFFSET;
int c = string.charAt(i) - RUSSIAN_SMALL_LETTER_OFFSET;
if (c == 45 - RUSSIAN_SMALL_LETTER_OFFSET) {
c = DASH_CODE;
}
@ -58,7 +57,7 @@ public class RussianLetterDecoderEncoder implements LetterDecoderEncoder {
}
public int[] encodeToArray(String s) {
LinkedList<Integer> integers = new LinkedList<Integer>();
LinkedList<Integer> integers = new LinkedList<>();
while (s.length() > WORD_PART_LENGHT) {
integers.add(encode(s.substring(0, WORD_PART_LENGHT)));
s = s.substring(WORD_PART_LENGHT);
@ -74,16 +73,16 @@ public class RussianLetterDecoderEncoder implements LetterDecoderEncoder {
}
public String decodeArray(int[] array) {
String result = "";
StringBuilder result = new StringBuilder();
for (int i : array) {
result += decode(i);
result.append(decode(i));
}
return result;
return result.toString();
}
public String decode(Integer suffixN) {
String result = "";
StringBuilder result = new StringBuilder();
while (suffixN > 33) {
int c = suffixN % 34 + RUSSIAN_SMALL_LETTER_OFFSET;
if (c == RUSSIAN_SMALL_LETTER_OFFSET) {
@ -91,21 +90,20 @@ public class RussianLetterDecoderEncoder implements LetterDecoderEncoder {
continue;
}
if (c == DASH_CODE + RUSSIAN_SMALL_LETTER_OFFSET) c = DASH_CHAR;
result = (char) c + result;
result.insert(0, (char) c);
suffixN /= 34;
}
long c = suffixN + RUSSIAN_SMALL_LETTER_OFFSET;
if (c == DASH_CODE + RUSSIAN_SMALL_LETTER_OFFSET) c = DASH_CHAR;
result = (char) c + result;
return result;
result.insert(0, (char) c);
return result.toString();
}
public boolean checkCharacter(char c) {
int code = 0 + c;
int code = c;
if (code == 45) return true;
code -= RUSSIAN_SMALL_LETTER_OFFSET;
if (code > 0 && code < 33) return true;
return false;
return code > 0 && code < 33;
}
public boolean checkString(String word) {

View File

@ -17,6 +17,7 @@ package org.apache.lucene.morphology.russian;
import org.apache.lucene.morphology.SuffixToLongException;
import org.apache.lucene.morphology.WrongCharaterException;
import org.hamcrest.MatcherAssert;
import org.junit.Before;
import org.junit.Test;
@ -24,9 +25,9 @@ import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import static org.hamcrest.core.IsEqual.equalTo;
import static org.junit.Assert.assertThat;
public class RussianLetterDecoderEncoderTest {
private RussianLetterDecoderEncoder decoderEncoder;
@ -40,12 +41,12 @@ public class RussianLetterDecoderEncoderTest {
@Test
public void testShouldPreserverStringComporision() throws IOException {
InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/decoder-test-monotonic.txt");
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8));
String s = bufferedReader.readLine();
while (s != null) {
String[] qa = s.trim().split(" ");
if (qa[0].length() <= RussianLetterDecoderEncoder.WORD_PART_LENGHT && qa[1].length() <= RussianLetterDecoderEncoder.WORD_PART_LENGHT) {
assertThat(decoderEncoder.encode(qa[1]) > decoderEncoder.encode(qa[0]), equalTo(true));
MatcherAssert.assertThat(decoderEncoder.encode(qa[1]) > decoderEncoder.encode(qa[0]), equalTo(true));
}
s = bufferedReader.readLine();
}
@ -55,13 +56,13 @@ public class RussianLetterDecoderEncoderTest {
@Test
public void testShouldCorrectDecodeEncode() throws IOException {
InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/decoder-test-data.txt");
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8));
String s = bufferedReader.readLine();
while (s != null) {
String[] qa = s.trim().split(" ");
if (qa[0].length() <= RussianLetterDecoderEncoder.WORD_PART_LENGHT) {
Integer encodedSuffix = decoderEncoder.encode(qa[0]);
assertThat(decoderEncoder.decode(encodedSuffix), equalTo(qa[1]));
MatcherAssert.assertThat(decoderEncoder.decode(encodedSuffix), equalTo(qa[1]));
}
s = bufferedReader.readLine();
}
@ -70,12 +71,12 @@ public class RussianLetterDecoderEncoderTest {
@Test
public void testShouldCorrectDecodeEncodeStringToArray() throws IOException {
InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/decoder-test-data-for-array.txt");
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8));
String s = bufferedReader.readLine();
while (s != null) {
String[] qa = s.trim().split(" ");
int[] ecodedSuffix = decoderEncoder.encodeToArray(qa[0]);
assertThat(decoderEncoder.decodeArray(ecodedSuffix), equalTo(qa[1]));
MatcherAssert.assertThat(decoderEncoder.decodeArray(ecodedSuffix), equalTo(qa[1]));
s = bufferedReader.readLine();
}
}