diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml
index 71b2e6c..7bf0260 100644
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -1,6 +1,6 @@
name: Java CI
-on: [push]
+on: [push, pull_request]
jobs:
tests:
@@ -32,4 +32,4 @@ jobs:
- uses: actions/upload-artifact@v2
with:
name: artifacts
- path: ${{ github.workspace }}/*/target/*.jar
\ No newline at end of file
+ path: ${{ github.workspace }}/*/target/*.jar
diff --git a/dictionary-reader/pom.xml b/dictionary-reader/pom.xml
index fb4d65a..abf4f2e 100644
--- a/dictionary-reader/pom.xml
+++ b/dictionary-reader/pom.xml
@@ -6,7 +6,6 @@
1.5
4.0.0
- org.apache.lucene.morphology
dictionary-reader
dictionary-reader
1.5
diff --git a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/DictionaryReader.java b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/DictionaryReader.java
index 37a4794..27d3da1 100644
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/DictionaryReader.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/DictionaryReader.java
@@ -22,20 +22,19 @@ import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
-import java.util.HashSet;
import java.util.List;
import java.util.Set;
/**
* This class contain logic how read
- * dictonary and produce word with it all forms.
+ * dictionary and produce word with it all forms.
*/
public class DictionaryReader {
private String fileName;
private String fileEncoding = "windows-1251";
- private List> wordsFlexias = new ArrayList>();
- private Set ignoredForm = new HashSet();
+ private List> wordsFlexias = new ArrayList<>();
+ private Set ignoredForm;
public DictionaryReader(String fileName, Set ignoredForm) {
this.fileName = fileName;
@@ -55,7 +54,7 @@ public class DictionaryReader {
private void readWords(BufferedReader reader, WordProcessor wordProcessor) throws IOException {
String s = reader.readLine();
- int count = Integer.valueOf(s);
+ int count = Integer.parseInt(s);
int actual = 0;
for (int i = 0; i < count; i++) {
s = reader.readLine();
@@ -79,7 +78,7 @@ public class DictionaryReader {
String wordBase = wd[0].toLowerCase();
if (wordBase.startsWith("-")) return null;
wordBase = "#".equals(wordBase) ? "" : wordBase;
- List models = wordsFlexias.get(Integer.valueOf(wd[1]));
+ List models = wordsFlexias.get(Integer.parseInt(wd[1]));
FlexiaModel flexiaModel = models.get(0);
if (models.size() == 0 || ignoredForm.contains(flexiaModel.getCode())) {
return null;
@@ -96,7 +95,7 @@ public class DictionaryReader {
private void skipBlock(BufferedReader reader) throws IOException {
String s = reader.readLine();
- int count = Integer.valueOf(s);
+ int count = Integer.parseInt(s);
for (int i = 0; i < count; i++) {
reader.readLine();
}
@@ -105,7 +104,7 @@ public class DictionaryReader {
private void readPrefix(BufferedReader reader) throws IOException {
String s = reader.readLine();
- int count = Integer.valueOf(s);
+ int count = Integer.parseInt(s);
for (int i = 0; i < count; i++) {
reader.readLine();
}
@@ -113,10 +112,10 @@ public class DictionaryReader {
private void readFlexias(BufferedReader reader) throws IOException {
String s = reader.readLine();
- int count = Integer.valueOf(s);
+ int count = Integer.parseInt(s);
for (int i = 0; i < count; i++) {
s = reader.readLine();
- ArrayList flexiaModelArrayList = new ArrayList();
+ ArrayList flexiaModelArrayList = new ArrayList<>();
wordsFlexias.add(flexiaModelArrayList);
for (String line : s.split("%")) {
addFlexia(flexiaModelArrayList, line);
diff --git a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/FlexiaModel.java b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/FlexiaModel.java
index 6906a61..510f119 100644
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/FlexiaModel.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/FlexiaModel.java
@@ -16,6 +16,8 @@
package org.apache.lucene.morphology.dictionary;
+import java.util.Objects;
+
/**
* Represent information of how word form created form it imutible part.
*/
@@ -74,11 +76,9 @@ public class FlexiaModel {
FlexiaModel that = (FlexiaModel) o;
- if (code != null ? !code.equals(that.code) : that.code != null) return false;
- if (prefix != null ? !prefix.equals(that.prefix) : that.prefix != null) return false;
- if (suffix != null ? !suffix.equals(that.suffix) : that.suffix != null) return false;
-
- return true;
+ if (!Objects.equals(code, that.code)) return false;
+ if (!Objects.equals(prefix, that.prefix)) return false;
+ return Objects.equals(suffix, that.suffix);
}
@Override
diff --git a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/GrammarReader.java b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/GrammarReader.java
index d56da6a..43f2ca2 100644
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/GrammarReader.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/GrammarReader.java
@@ -29,8 +29,8 @@ import java.util.Map;
public class GrammarReader {
private String fileName;
private String fileEncoding = "windows-1251";
- private List grammarInfo = new ArrayList();
- private Map inverseIndex = new HashMap();
+ private List grammarInfo = new ArrayList<>();
+ private Map inverseIndex = new HashMap<>();
public GrammarReader(String fileName) throws IOException {
this.fileName = fileName;
@@ -50,7 +50,7 @@ public class GrammarReader {
line = line.trim();
if (!line.startsWith("//") && line.length() > 0) {
String[] strings = line.split(" ", 2);
- Integer i = grammarInfo.size();
+ int i = grammarInfo.size();
inverseIndex.put(strings[0], i);
grammarInfo.add(i, strings[1]);
}
@@ -63,7 +63,7 @@ public class GrammarReader {
}
public String[] getGrammarInfoAsArray() {
- return grammarInfo.toArray(new String[grammarInfo.size()]);
+ return grammarInfo.toArray(new String[0]);
}
public Map getGrammarInverseIndex() {
diff --git a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/RemoveFlexiaWithPrefixes.java b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/RemoveFlexiaWithPrefixes.java
index 612896f..cf96823 100644
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/RemoveFlexiaWithPrefixes.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/RemoveFlexiaWithPrefixes.java
@@ -15,7 +15,7 @@
*/
package org.apache.lucene.morphology.dictionary;
-import java.util.Arrays;
+import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
@@ -29,7 +29,7 @@ public class RemoveFlexiaWithPrefixes extends WordFilter {
@Override
public List transform(WordCard wordCard) {
- List flexiaModelsToRemove = new LinkedList();
+ List flexiaModelsToRemove = new LinkedList<>();
for (FlexiaModel fm : wordCard.getWordsForms()) {
if (fm.getPrefix().length() > 0) {
flexiaModelsToRemove.add(fm);
@@ -39,6 +39,6 @@ public class RemoveFlexiaWithPrefixes extends WordFilter {
wordCard.removeFlexia(fm);
}
- return new LinkedList(Arrays.asList(wordCard));
+ return new LinkedList<>(Collections.singletonList(wordCard));
}
}
diff --git a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/RussianAdvSplitterFilter.java b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/RussianAdvSplitterFilter.java
index 6818190..cf07b77 100644
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/RussianAdvSplitterFilter.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/RussianAdvSplitterFilter.java
@@ -32,13 +32,13 @@ public class RussianAdvSplitterFilter extends WordFilter {
@Override
public List transform(WordCard wordCard) {
- LinkedList result = new LinkedList();
+ LinkedList result = new LinkedList<>();
result.add(wordCard);
String baseWord = "";
String canonicalForm = "";
String canonicalSuffix = "";
- List flexiaModels = new LinkedList();
+ List flexiaModels = new LinkedList<>();
for (FlexiaModel flexiaModel : wordCard.getWordsForms()) {
if (flexiaModel.getPrefix().length() > 0) {
flexiaModels.add(new FlexiaModel(flexiaModel.getCode(), flexiaModel.getSuffix(), ""));
diff --git a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/StatisticsCollector.java b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/StatisticsCollector.java
index be65edc..9cf3882 100644
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/StatisticsCollector.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/StatisticsCollector.java
@@ -27,9 +27,9 @@ import java.util.*;
//todo made refactoring this class
public class StatisticsCollector implements WordProcessor {
- private TreeMap> inverseIndex = new TreeMap>();
- private Map, Integer> ruleInverseIndex = new HashMap, Integer>();
- private List> rules = new ArrayList>();
+ private TreeMap> inverseIndex = new TreeMap<>();
+ private Map, Integer> ruleInverseIndex = new HashMap<>();
+ private List> rules = new ArrayList<>();
private GrammarReader grammarReader;
private LetterDecoderEncoder decoderEncoder;
@@ -39,18 +39,14 @@ public class StatisticsCollector implements WordProcessor {
this.decoderEncoder = decoderEncoder;
}
- public void process(WordCard wordCard) throws IOException {
+ public void process(WordCard wordCard) {
cleanWordCard(wordCard);
String normalStringMorph = wordCard.getWordsForms().get(0).getCode();
for (FlexiaModel fm : wordCard.getWordsForms()) {
Heuristic heuristic = createEvristic(wordCard.getBase(), wordCard.getCanonicalSuffix(), fm, normalStringMorph);
String form = revertWord(fm.create(wordCard.getBase()));
- Set suffixHeuristics = inverseIndex.get(form);
- if (suffixHeuristics == null) {
- suffixHeuristics = new HashSet();
- inverseIndex.put(form, suffixHeuristics);
- }
+ Set suffixHeuristics = inverseIndex.computeIfAbsent(form, k -> new HashSet<>());
suffixHeuristics.add(heuristic);
}
}
@@ -69,7 +65,7 @@ public class StatisticsCollector implements WordProcessor {
public void saveHeuristic(String fileName) throws IOException {
- Map dist = new TreeMap();
+ Map dist = new TreeMap<>();
Set prevSet = null;
int count = 0;
for (String key : inverseIndex.keySet()) {
@@ -120,11 +116,11 @@ public class StatisticsCollector implements WordProcessor {
}
private String revertWord(String s) {
- String result = "";
+ StringBuilder result = new StringBuilder();
for (int i = 1; i <= s.length(); i++) {
- result += s.charAt(s.length() - i);
+ result.append(s.charAt(s.length() - i));
}
- return result;
+ return result.toString();
}
@@ -132,15 +128,15 @@ public class StatisticsCollector implements WordProcessor {
String form = fm.create(wordBase);
String normalForm = wordBase + canonicalSuffix;
Integer length = getCommonLength(form, normalForm);
- Integer actualSuffixLengh = form.length() - length;
+ int actualSuffixLengh = form.length() - length;
String actualNormalSuffix = normalForm.substring(length);
Integer integer = grammarReader.getGrammarInverseIndex().get(fm.getCode());
Integer nf = grammarReader.getGrammarInverseIndex().get(normalSuffixForm);
- return new Heuristic((byte) actualSuffixLengh.intValue(), actualNormalSuffix, (short) integer.intValue(), (short) nf.intValue());
+ return new Heuristic((byte) actualSuffixLengh, actualNormalSuffix, (short) integer.intValue(), (short) nf.intValue());
}
public static Integer getCommonLength(String s1, String s2) {
- Integer length = Math.min(s1.length(), s2.length());
+ int length = Math.min(s1.length(), s2.length());
for (int i = 0; i < length; i++) {
if (s1.charAt(i) != s2.charAt(i)) return i;
}
diff --git a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordCard.java b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordCard.java
index 7d10229..2f958a5 100644
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordCard.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordCard.java
@@ -26,7 +26,7 @@ public class WordCard {
private String canonicalForm;
private String base;
private String canonicalSuffix;
- private List wordsForms = new ArrayList();
+ private List wordsForms = new ArrayList<>();
public WordCard(String canonicalForm, String base, String canonicalSuffix) {
this.canonicalForm = canonicalForm;
diff --git a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordCleaner.java b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordCleaner.java
index 4e6ae45..6fa6484 100644
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordCleaner.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordCleaner.java
@@ -17,7 +17,6 @@ package org.apache.lucene.morphology.dictionary;
import org.apache.lucene.morphology.LetterDecoderEncoder;
-import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
@@ -38,7 +37,7 @@ public class WordCleaner extends WordFilter {
if (word.contains("-")) return Collections.emptyList();
if (!decoderEncoder.checkString(word)) return Collections.emptyList();
- List flexiaModelsToRemove = new LinkedList();
+ List flexiaModelsToRemove = new LinkedList<>();
for (FlexiaModel fm : wordCard.getWordsForms()) {
if (!decoderEncoder.checkString(fm.create(wordCard.getBase())) || fm.create(wordCard.getBase()).contains("-")) {
flexiaModelsToRemove.add(fm);
@@ -48,6 +47,6 @@ public class WordCleaner extends WordFilter {
wordCard.removeFlexia(fm);
}
- return new LinkedList(Arrays.asList(wordCard));
+ return new LinkedList<>(Collections.singletonList(wordCard));
}
}
diff --git a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordProcessor.java b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordProcessor.java
index 709bc7a..69cd9fe 100644
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordProcessor.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordProcessor.java
@@ -23,5 +23,5 @@ import java.io.IOException;
*/
public interface WordProcessor {
- public void process(WordCard wordCard) throws IOException;
+ void process(WordCard wordCard) throws IOException;
}
diff --git a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordStringCleaner.java b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordStringCleaner.java
index d6f2c2e..3abb28c 100644
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordStringCleaner.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordStringCleaner.java
@@ -17,7 +17,7 @@ package org.apache.lucene.morphology.dictionary;
import org.apache.lucene.morphology.LetterDecoderEncoder;
-import java.util.Arrays;
+import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
@@ -42,7 +42,7 @@ public class WordStringCleaner extends WordFilter {
//made correct code
m.setCode(m.getCode().substring(0, 2));
}
- return new LinkedList(Arrays.asList(wordCard));
+ return new LinkedList<>(Collections.singletonList(wordCard));
}
diff --git a/dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/EnglishHeuristicBuilder.java b/dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/EnglishHeuristicBuilder.java
index db65d15..5370d31 100644
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/EnglishHeuristicBuilder.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/EnglishHeuristicBuilder.java
@@ -29,7 +29,7 @@ public class EnglishHeuristicBuilder {
GrammarReader grammarInfo = new GrammarReader("dictonary/Dicts/Morph/egramtab.tab");
EnglishLetterDecoderEncoder decoderEncoder = new EnglishLetterDecoderEncoder();
- DictionaryReader dictionaryReader = new DictionaryReader("dictonary/Dicts/SrcMorph/EngSrc/morphs.mrd", new HashSet());
+ DictionaryReader dictionaryReader = new DictionaryReader("dictonary/Dicts/SrcMorph/EngSrc/morphs.mrd", new HashSet<>());
StatisticsCollector statisticsCollector = new StatisticsCollector(grammarInfo, decoderEncoder);
WordCleaner wordCleaner = new WordCleaner(decoderEncoder, statisticsCollector);
@@ -39,4 +39,4 @@ public class EnglishHeuristicBuilder {
statisticsCollector.saveHeuristic("english/src/main/resources/org/apache/lucene/morphology/english/morph.info");
}
-}
\ No newline at end of file
+}
diff --git a/dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/RussianHeuristicBuilder.java b/dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/RussianHeuristicBuilder.java
index 9c68d88..2d1273d 100644
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/RussianHeuristicBuilder.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/RussianHeuristicBuilder.java
@@ -28,7 +28,7 @@ public class RussianHeuristicBuilder {
GrammarReader grammarInfo = new GrammarReader("dictonary/Dicts/Morph/rgramtab.tab");
RussianLetterDecoderEncoder decoderEncoder = new RussianLetterDecoderEncoder();
- DictionaryReader dictionaryReader = new DictionaryReader("dictonary/Dicts/SrcMorph/RusSrc/morphs.mrd", new HashSet());
+ DictionaryReader dictionaryReader = new DictionaryReader("dictonary/Dicts/SrcMorph/RusSrc/morphs.mrd", new HashSet<>());
StatisticsCollector statisticsCollector = new StatisticsCollector(grammarInfo, decoderEncoder);
WordCleaner wordCleaner = new WordCleaner(decoderEncoder, statisticsCollector);
diff --git a/dictionary-reader/src/test/java/org/apache/lucene/TestAllWords.java b/dictionary-reader/src/test/java/org/apache/lucene/TestAllWords.java
index 7ab67db..f58e5d9 100644
--- a/dictionary-reader/src/test/java/org/apache/lucene/TestAllWords.java
+++ b/dictionary-reader/src/test/java/org/apache/lucene/TestAllWords.java
@@ -23,6 +23,7 @@ import org.apache.lucene.morphology.english.EnglishMorphology;
import org.apache.lucene.morphology.russian.RussianLetterDecoderEncoder;
import org.apache.lucene.morphology.russian.RussianLuceneMorphology;
import org.apache.lucene.morphology.russian.RussianMorphology;
+import org.hamcrest.MatcherAssert;
import org.junit.Before;
import org.junit.Test;
@@ -33,7 +34,6 @@ import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;
import static org.hamcrest.Matchers.hasItem;
-import static org.junit.Assert.assertThat;
public class TestAllWords {
@@ -73,21 +73,19 @@ public class TestAllWords {
final List morphInfo = grammarInfo.getGrammarInfo();
final Map inversIndex = grammarInfo.getGrammarInverseIndex();
- DictionaryReader dictionaryReader = new DictionaryReader(pathToDict, new HashSet());
+ DictionaryReader dictionaryReader = new DictionaryReader(pathToDict, new HashSet<>());
final AtomicLong wordCount = new AtomicLong(0);
- Long startTime = System.currentTimeMillis();
+ long startTime = System.currentTimeMillis();
- WordProcessor wordProcessor = new WordProcessor() {
- public void process(WordCard wordCard) throws IOException {
- String word = wordCard.getBase() + wordCard.getCanonicalSuffix();
- for (FlexiaModel fm : wordCard.getWordsForms()) {
- String wordForm = wordCard.getBase() + fm.getSuffix();
- String morph = morphInfo.get(inversIndex.get(fm.getCode()));
- assertThat(morphology.getMorphInfo(wordForm), hasItem(word + "|" + morph));
- assertThat(morphology.getNormalForms(wordForm), hasItem(word));
- wordCount.set(2L + wordCount.get());
- }
+ WordProcessor wordProcessor = wordCard -> {
+ String word = wordCard.getBase() + wordCard.getCanonicalSuffix();
+ for (FlexiaModel fm : wordCard.getWordsForms()) {
+ String wordForm = wordCard.getBase() + fm.getSuffix();
+ String morph = morphInfo.get(inversIndex.get(fm.getCode()));
+ MatcherAssert.assertThat(morphology.getMorphInfo(wordForm), hasItem(word + "|" + morph));
+ MatcherAssert.assertThat(morphology.getNormalForms(wordForm), hasItem(word));
+ wordCount.set(2L + wordCount.get());
}
};
@@ -123,17 +121,15 @@ public class TestAllWords {
private void testAllWordForLucene(final LuceneMorphology morphology, LetterDecoderEncoder decoderEncoder, String pathToDic) throws IOException {
final AtomicLong wordCount = new AtomicLong(0);
- Long startTime = System.currentTimeMillis();
+ long startTime = System.currentTimeMillis();
- DictionaryReader dictionaryReader = new DictionaryReader(pathToDic, new HashSet());
- WordProcessor wordProcessor = new WordProcessor() {
- public void process(WordCard wordCard) throws IOException {
- String word = wordCard.getBase() + wordCard.getCanonicalSuffix();
- for (FlexiaModel fm : wordCard.getWordsForms()) {
- String wordForm = wordCard.getBase() + fm.getSuffix();
- assertThat(morphology.getNormalForms(wordForm), hasItem(word));
- wordCount.set(1L + wordCount.get());
- }
+ DictionaryReader dictionaryReader = new DictionaryReader(pathToDic, new HashSet<>());
+ WordProcessor wordProcessor = wordCard -> {
+ String word = wordCard.getBase() + wordCard.getCanonicalSuffix();
+ for (FlexiaModel fm : wordCard.getWordsForms()) {
+ String wordForm = wordCard.getBase() + fm.getSuffix();
+ MatcherAssert.assertThat(morphology.getNormalForms(wordForm), hasItem(word));
+ wordCount.set(1L + wordCount.get());
}
};
diff --git a/dictionary-reader/src/test/java/org/apache/lucene/morphology/AnalyzersTest.java b/dictionary-reader/src/test/java/org/apache/lucene/morphology/AnalyzersTest.java
index 02475ae..727b8c9 100644
--- a/dictionary-reader/src/test/java/org/apache/lucene/morphology/AnalyzersTest.java
+++ b/dictionary-reader/src/test/java/org/apache/lucene/morphology/AnalyzersTest.java
@@ -31,9 +31,11 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.morphology.english.EnglishLuceneMorphology;
import org.apache.lucene.morphology.russian.RussianAnalyzer;
import org.apache.lucene.morphology.russian.RussianLuceneMorphology;
+import org.hamcrest.MatcherAssert;
import org.junit.Test;
import java.io.*;
+import java.nio.charset.StandardCharsets;
import java.util.*;
import static org.hamcrest.Matchers.equalTo;
@@ -65,24 +67,24 @@ public class AnalyzersTest extends BaseTokenStreamTestCase {
LuceneMorphology englishLuceneMorphology = new EnglishLuceneMorphology();
MorphologyAnalyzer russianAnalyzer = new MorphologyAnalyzer(russianLuceneMorphology);
- InputStreamReader reader = new InputStreamReader(new ByteArrayInputStream("тест пм тест".getBytes()), "UTF-8");
+ InputStreamReader reader = new InputStreamReader(new ByteArrayInputStream("тест пм тест".getBytes()), StandardCharsets.UTF_8);
TokenStream stream = russianAnalyzer.tokenStream(null, reader);
MorphologyFilter englishFilter = new MorphologyFilter(stream, englishLuceneMorphology);
englishFilter.reset();
while (englishFilter.incrementToken()) {
- System.out.println(englishFilter.toString());
+ System.out.println(englishFilter);
}
}
@Test
public void shouldProvideCorrectIndentForWordWithMelitaForm() throws IOException {
Analyzer morphlogyAnalyzer = new RussianAnalyzer();
- InputStreamReader reader = new InputStreamReader(new ByteArrayInputStream("принеси мне вина на новый год".getBytes()), "UTF-8");
+ InputStreamReader reader = new InputStreamReader(new ByteArrayInputStream("принеси мне вина на новый год".getBytes()), StandardCharsets.UTF_8);
TokenStream tokenStream = morphlogyAnalyzer.tokenStream(null, reader);
tokenStream.reset();
- Set foromsOfWine = new HashSet();
+ Set foromsOfWine = new HashSet<>();
foromsOfWine.add("вина");
foromsOfWine.add("винo");
boolean wordSeen = false;
@@ -90,7 +92,7 @@ public class AnalyzersTest extends BaseTokenStreamTestCase {
CharTermAttribute charTerm = tokenStream.getAttribute(CharTermAttribute.class);
PositionIncrementAttribute position = tokenStream.getAttribute(PositionIncrementAttribute.class);
if(foromsOfWine.contains(charTerm.toString()) && wordSeen){
- assertThat(position.getPositionIncrement(),equalTo(0));
+ MatcherAssert.assertThat(position.getPositionIncrement(),equalTo(0));
}
if(foromsOfWine.contains(charTerm.toString())){
wordSeen = true;
@@ -100,18 +102,18 @@ public class AnalyzersTest extends BaseTokenStreamTestCase {
private void testAnalayzer(Analyzer morphlogyAnalyzer, String answerPath, String testPath) throws IOException {
InputStream stream = this.getClass().getResourceAsStream(answerPath);
- BufferedReader breader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
+ BufferedReader breader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8));
String[] strings = breader.readLine().replaceAll(" +", " ").trim().split(" ");
- HashSet answer = new HashSet(Arrays.asList(strings));
+ HashSet answer = new HashSet<>(Arrays.asList(strings));
stream.close();
stream = this.getClass().getResourceAsStream(testPath);
- InputStreamReader reader = new InputStreamReader(stream, "UTF-8");
+ InputStreamReader reader = new InputStreamReader(stream, StandardCharsets.UTF_8);
TokenStream tokenStream = morphlogyAnalyzer.tokenStream(null, reader);
tokenStream.reset();
- HashSet result = new HashSet();
+ HashSet result = new HashSet<>();
while (tokenStream.incrementToken()) {
CharTermAttribute attribute1 = tokenStream.getAttribute(CharTermAttribute.class);
result.add(attribute1.toString());
@@ -119,7 +121,7 @@ public class AnalyzersTest extends BaseTokenStreamTestCase {
stream.close();
- assertThat(result, equalTo(answer));
+ MatcherAssert.assertThat(result, equalTo(answer));
}
@Test
diff --git a/dictionary-reader/src/test/java/org/apache/lucene/morphology/LuceneMorphTest.java b/dictionary-reader/src/test/java/org/apache/lucene/morphology/LuceneMorphTest.java
index 838a7e7..c6d5899 100644
--- a/dictionary-reader/src/test/java/org/apache/lucene/morphology/LuceneMorphTest.java
+++ b/dictionary-reader/src/test/java/org/apache/lucene/morphology/LuceneMorphTest.java
@@ -17,19 +17,20 @@ package org.apache.lucene.morphology;
import org.apache.lucene.morphology.russian.RussianLuceneMorphology;
import org.apache.lucene.morphology.english.EnglishLuceneMorphology;
+import org.hamcrest.MatcherAssert;
import org.junit.Test;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import static org.hamcrest.CoreMatchers.equalTo;
-import static org.junit.Assert.assertThat;
public class LuceneMorphTest {
@@ -52,14 +53,13 @@ public class LuceneMorphTest {
private void testMorphology(LuceneMorphology luceneMorph, String pathToTestData) throws IOException {
InputStream stream = this.getClass().getResourceAsStream(pathToTestData);
- BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
+ BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8));
String s = bufferedReader.readLine();
while (s != null) {
String[] qa = s.trim().split(" ");
- Set result = new HashSet();
- result.addAll(Arrays.asList(qa).subList(1, qa.length));
- Set stringList = new HashSet(luceneMorph.getNormalForms(qa[0]));
- assertThat(stringList, equalTo(result));
+ Set result = new HashSet<>(Arrays.asList(qa).subList(1, qa.length));
+ Set stringList = new HashSet<>(luceneMorph.getNormalForms(qa[0]));
+ MatcherAssert.assertThat(stringList, equalTo(result));
s = bufferedReader.readLine();
}
}
diff --git a/english/pom.xml b/english/pom.xml
index 480eedb..64bac77 100644
--- a/english/pom.xml
+++ b/english/pom.xml
@@ -6,7 +6,6 @@
1.5
4.0.0
- org.apache.lucene.morphology
english
english
1.5
@@ -20,4 +19,4 @@
-
\ No newline at end of file
+
diff --git a/english/src/main/java/org/apache/lucene/morphology/english/EnglishLetterDecoderEncoder.java b/english/src/main/java/org/apache/lucene/morphology/english/EnglishLetterDecoderEncoder.java
index 46b0f85..9f12a9f 100644
--- a/english/src/main/java/org/apache/lucene/morphology/english/EnglishLetterDecoderEncoder.java
+++ b/english/src/main/java/org/apache/lucene/morphology/english/EnglishLetterDecoderEncoder.java
@@ -32,7 +32,7 @@ public class EnglishLetterDecoderEncoder implements LetterDecoderEncoder {
if (string.length() > 6) throw new SuffixToLongException("Suffix length should not be greater then " + 12);
int result = 0;
for (int i = 0; i < string.length(); i++) {
- int c = 0 + string.charAt(i) - ENGLISH_SMALL_LETTER_OFFSET;
+ int c = string.charAt(i) - ENGLISH_SMALL_LETTER_OFFSET;
if (c == 45 - ENGLISH_SMALL_LETTER_OFFSET) {
c = DASH_CODE;
}
@@ -48,7 +48,7 @@ public class EnglishLetterDecoderEncoder implements LetterDecoderEncoder {
public int[] encodeToArray(String s) {
- ArrayList integers = new ArrayList();
+ ArrayList integers = new ArrayList<>();
while (s.length() > 6) {
integers.add(encode(s.substring(0, 6)));
s = s.substring(6);
@@ -64,16 +64,16 @@ public class EnglishLetterDecoderEncoder implements LetterDecoderEncoder {
}
public String decodeArray(int[] array) {
- String result = "";
+ StringBuilder result = new StringBuilder();
for (int i : array) {
- result += decode(i);
+ result.append(decode(i));
}
- return result;
+ return result.toString();
}
public String decode(Integer suffixN) {
- String result = "";
+ StringBuilder result = new StringBuilder();
while (suffixN > 27) {
int c = suffixN % 28 + ENGLISH_SMALL_LETTER_OFFSET;
if (c == ENGLISH_SMALL_LETTER_OFFSET) {
@@ -81,21 +81,20 @@ public class EnglishLetterDecoderEncoder implements LetterDecoderEncoder {
continue;
}
if (c == DASH_CODE + ENGLISH_SMALL_LETTER_OFFSET) c = DASH_CHAR;
- result = (char) c + result;
+ result.insert(0, (char) c);
suffixN /= 28;
}
long c = suffixN + ENGLISH_SMALL_LETTER_OFFSET;
if (c == DASH_CODE + ENGLISH_SMALL_LETTER_OFFSET) c = DASH_CHAR;
- result = (char) c + result;
- return result;
+ result.insert(0, (char) c);
+ return result.toString();
}
public boolean checkCharacter(char c) {
- int code = 0 + c;
+ int code = c;
if (code == 45) return true;
code -= ENGLISH_SMALL_LETTER_OFFSET;
- if (code > 0 && code < 27) return true;
- return false;
+ return code > 0 && code < 27;
}
diff --git a/english/src/test/java/org/apache/lucene/morphology/english/EnglishLetterDecoderEncoderTest.java b/english/src/test/java/org/apache/lucene/morphology/english/EnglishLetterDecoderEncoderTest.java
index bc6f872..55c0aa4 100644
--- a/english/src/test/java/org/apache/lucene/morphology/english/EnglishLetterDecoderEncoderTest.java
+++ b/english/src/test/java/org/apache/lucene/morphology/english/EnglishLetterDecoderEncoderTest.java
@@ -16,7 +16,8 @@
package org.apache.lucene.morphology.english;
import static org.hamcrest.core.IsEqual.equalTo;
-import static org.junit.Assert.assertThat;
+
+import org.hamcrest.MatcherAssert;
import org.junit.Before;
@@ -30,11 +31,11 @@ public class EnglishLetterDecoderEncoderTest {
@org.junit.Test
public void testDecodeEncodeToArray() {
- assertThat(decoderEncoder.decodeArray(decoderEncoder.encodeToArray("abcdefghijklmnopqrstuvwxyz")), equalTo("abcdefghijklmnopqrstuvwxyz"));
- assertThat(decoderEncoder.decodeArray(decoderEncoder.encodeToArray("xyz")), equalTo("xyz"));
- assertThat(decoderEncoder.decodeArray(decoderEncoder.encodeToArray("ytrrty")), equalTo("ytrrty"));
- assertThat(decoderEncoder.decodeArray(decoderEncoder.encodeToArray("ytrrtyz")), equalTo("ytrrtyz"));
- assertThat(decoderEncoder.decodeArray(decoderEncoder.encodeToArray("ytrrtyzqwqwe")), equalTo("ytrrtyzqwqwe"));
+ MatcherAssert.assertThat(decoderEncoder.decodeArray(decoderEncoder.encodeToArray("abcdefghijklmnopqrstuvwxyz")), equalTo("abcdefghijklmnopqrstuvwxyz"));
+ MatcherAssert.assertThat(decoderEncoder.decodeArray(decoderEncoder.encodeToArray("xyz")), equalTo("xyz"));
+ MatcherAssert.assertThat(decoderEncoder.decodeArray(decoderEncoder.encodeToArray("ytrrty")), equalTo("ytrrty"));
+ MatcherAssert.assertThat(decoderEncoder.decodeArray(decoderEncoder.encodeToArray("ytrrtyz")), equalTo("ytrrtyz"));
+ MatcherAssert.assertThat(decoderEncoder.decodeArray(decoderEncoder.encodeToArray("ytrrtyzqwqwe")), equalTo("ytrrtyzqwqwe"));
}
}
diff --git a/english/src/test/java/org/apache/lucene/morphology/english/stemmer/EnglishStemmerTest.java b/english/src/test/java/org/apache/lucene/morphology/english/stemmer/EnglishStemmerTest.java
index 7899279..134857c 100644
--- a/english/src/test/java/org/apache/lucene/morphology/english/stemmer/EnglishStemmerTest.java
+++ b/english/src/test/java/org/apache/lucene/morphology/english/stemmer/EnglishStemmerTest.java
@@ -16,9 +16,9 @@
package org.apache.lucene.morphology.english.stemmer;
import org.apache.lucene.morphology.english.EnglishLuceneMorphology;
+import org.hamcrest.MatcherAssert;
import org.junit.Test;
import static org.hamcrest.core.IsEqual.equalTo;
-import static org.junit.Assert.assertThat;
public class EnglishStemmerTest {
@@ -26,24 +26,24 @@ public class EnglishStemmerTest {
public void testGetStemmedWord() throws Exception {
EnglishLuceneMorphology englishLuceneMorphology = new EnglishLuceneMorphology();
EnglishStemmer englishStemmer = new EnglishStemmer(englishLuceneMorphology);
- assertThat(englishStemmer.getStemmedWord("running"),equalTo("run"));
- assertThat(englishStemmer.getStemmedWord("run"),equalTo("run"));
- assertThat(englishStemmer.getStemmedWord("killed"),equalTo("kill"));
- assertThat(englishStemmer.getStemmedWord("kill"),equalTo("kill"));
- assertThat(englishStemmer.getStemmedWord("networking"),equalTo("network"));
- assertThat(englishStemmer.getStemmedWord("network"),equalTo("network"));
- assertThat(englishStemmer.getStemmedWord("statistics"),equalTo("statistic"));
- assertThat(englishStemmer.getStemmedWord("statistic"),equalTo("statistic"));
- assertThat(englishStemmer.getStemmedWord("stats"),equalTo("stat"));
- assertThat(englishStemmer.getStemmedWord("stat"),equalTo("stat"));
- assertThat(englishStemmer.getStemmedWord("countries"),equalTo("country"));
- assertThat(englishStemmer.getStemmedWord("country"),equalTo("country"));
- assertThat(englishStemmer.getStemmedWord("delete"),equalTo("delete"));
- assertThat(englishStemmer.getStemmedWord("ended"),equalTo("end"));
- assertThat(englishStemmer.getStemmedWord("end"),equalTo("end"));
- assertThat(englishStemmer.getStemmedWord("ends"),equalTo("end"));
- assertThat(englishStemmer.getStemmedWord("given"),equalTo("give"));
- assertThat(englishStemmer.getStemmedWord("give"),equalTo("give"));
- assertThat(englishStemmer.getStemmedWord("log4j"),equalTo("log4j"));
+ MatcherAssert.assertThat(englishStemmer.getStemmedWord("running"),equalTo("run"));
+ MatcherAssert.assertThat(englishStemmer.getStemmedWord("run"),equalTo("run"));
+ MatcherAssert.assertThat(englishStemmer.getStemmedWord("killed"),equalTo("kill"));
+ MatcherAssert.assertThat(englishStemmer.getStemmedWord("kill"),equalTo("kill"));
+ MatcherAssert.assertThat(englishStemmer.getStemmedWord("networking"),equalTo("network"));
+ MatcherAssert.assertThat(englishStemmer.getStemmedWord("network"),equalTo("network"));
+ MatcherAssert.assertThat(englishStemmer.getStemmedWord("statistics"),equalTo("statistic"));
+ MatcherAssert.assertThat(englishStemmer.getStemmedWord("statistic"),equalTo("statistic"));
+ MatcherAssert.assertThat(englishStemmer.getStemmedWord("stats"),equalTo("stat"));
+ MatcherAssert.assertThat(englishStemmer.getStemmedWord("stat"),equalTo("stat"));
+ MatcherAssert.assertThat(englishStemmer.getStemmedWord("countries"),equalTo("country"));
+ MatcherAssert.assertThat(englishStemmer.getStemmedWord("country"),equalTo("country"));
+ MatcherAssert.assertThat(englishStemmer.getStemmedWord("delete"),equalTo("delete"));
+ MatcherAssert.assertThat(englishStemmer.getStemmedWord("ended"),equalTo("end"));
+ MatcherAssert.assertThat(englishStemmer.getStemmedWord("end"),equalTo("end"));
+ MatcherAssert.assertThat(englishStemmer.getStemmedWord("ends"),equalTo("end"));
+ MatcherAssert.assertThat(englishStemmer.getStemmedWord("given"),equalTo("give"));
+ MatcherAssert.assertThat(englishStemmer.getStemmedWord("give"),equalTo("give"));
+ MatcherAssert.assertThat(englishStemmer.getStemmedWord("log4j"),equalTo("log4j"));
}
}
diff --git a/morph/pom.xml b/morph/pom.xml
index b3b0576..9e40c10 100644
--- a/morph/pom.xml
+++ b/morph/pom.xml
@@ -6,7 +6,6 @@
1.5
4.0.0
- org.apache.lucene.morphology
morph
morph
1.5
diff --git a/morph/src/main/java/org/apache/lucene/morphology/BaseLetterDecoderEncoder.java b/morph/src/main/java/org/apache/lucene/morphology/BaseLetterDecoderEncoder.java
index 0598d92..3bfd60a 100644
--- a/morph/src/main/java/org/apache/lucene/morphology/BaseLetterDecoderEncoder.java
+++ b/morph/src/main/java/org/apache/lucene/morphology/BaseLetterDecoderEncoder.java
@@ -21,7 +21,7 @@ import java.util.ArrayList;
public abstract class BaseLetterDecoderEncoder implements LetterDecoderEncoder {
public int[] encodeToArray(String s) {
- ArrayList integers = new ArrayList();
+ ArrayList integers = new ArrayList<>();
while (s.length() > 6) {
integers.add(encode(s.substring(0, 6)));
s = s.substring(6);
@@ -37,11 +37,11 @@ public abstract class BaseLetterDecoderEncoder implements LetterDecoderEncoder {
}
public String decodeArray(int[] array) {
- String result = "";
+ StringBuilder result = new StringBuilder();
for (int i : array) {
- result += decode(i);
+ result.append(decode(i));
}
- return result;
+ return result.toString();
}
public boolean checkString(String word) {
diff --git a/morph/src/main/java/org/apache/lucene/morphology/Heuristic.java b/morph/src/main/java/org/apache/lucene/morphology/Heuristic.java
index be8ea82..8fe5d62 100644
--- a/morph/src/main/java/org/apache/lucene/morphology/Heuristic.java
+++ b/morph/src/main/java/org/apache/lucene/morphology/Heuristic.java
@@ -16,6 +16,7 @@
package org.apache.lucene.morphology;
import java.io.Serializable;
+import java.util.Objects;
public class Heuristic implements Serializable {
@@ -26,10 +27,10 @@ public class Heuristic implements Serializable {
public Heuristic(String s) {
String[] strings = s.split("\\|");
- actualSuffixLength = Byte.valueOf(strings[0]);
+ actualSuffixLength = Byte.parseByte(strings[0]);
actualNormalSuffix = strings[1];
- formMorphInfo = Short.valueOf(strings[2]);
- normalFormMorphInfo = Short.valueOf(strings[3]);
+ formMorphInfo = Short.parseShort(strings[2]);
+ normalFormMorphInfo = Short.parseShort(strings[3]);
}
public Heuristic(byte actualSuffixLength, String actualNormalSuffix, short formMorphInfo, short normalFormMorphInfo) {
@@ -70,15 +71,12 @@ public class Heuristic implements Serializable {
if (actualSuffixLength != heuristic.actualSuffixLength) return false;
if (formMorphInfo != heuristic.formMorphInfo) return false;
if (normalFormMorphInfo != heuristic.normalFormMorphInfo) return false;
- if (actualNormalSuffix != null ? !actualNormalSuffix.equals(heuristic.actualNormalSuffix) : heuristic.actualNormalSuffix != null)
- return false;
-
- return true;
+ return Objects.equals(actualNormalSuffix, heuristic.actualNormalSuffix);
}
@Override
public int hashCode() {
- int result = (int) actualSuffixLength;
+ int result = actualSuffixLength;
result = 31 * result + (actualNormalSuffix != null ? actualNormalSuffix.hashCode() : 0);
result = 31 * result + (int) formMorphInfo;
result = 31 * result + (int) normalFormMorphInfo;
diff --git a/morph/src/main/java/org/apache/lucene/morphology/LetterDecoderEncoder.java b/morph/src/main/java/org/apache/lucene/morphology/LetterDecoderEncoder.java
index 7e186e1..91ff9ed 100644
--- a/morph/src/main/java/org/apache/lucene/morphology/LetterDecoderEncoder.java
+++ b/morph/src/main/java/org/apache/lucene/morphology/LetterDecoderEncoder.java
@@ -17,17 +17,17 @@ package org.apache.lucene.morphology;
public interface LetterDecoderEncoder {
- public Integer encode(String string);
+ Integer encode(String string);
- public int[] encodeToArray(String s);
+ int[] encodeToArray(String s);
- public String decodeArray(int[] array);
+ String decodeArray(int[] array);
- public String decode(Integer suffixN);
+ String decode(Integer suffixN);
- public boolean checkCharacter(char c);
+ boolean checkCharacter(char c);
- public boolean checkString(String word);
+ boolean checkString(String word);
- public String cleanString(String s);
+ String cleanString(String s);
}
diff --git a/morph/src/main/java/org/apache/lucene/morphology/LuceneMorphology.java b/morph/src/main/java/org/apache/lucene/morphology/LuceneMorphology.java
index 7e97b6b..96b4cc4 100644
--- a/morph/src/main/java/org/apache/lucene/morphology/LuceneMorphology.java
+++ b/morph/src/main/java/org/apache/lucene/morphology/LuceneMorphology.java
@@ -34,13 +34,13 @@ public class LuceneMorphology extends MorphologyImpl {
protected void readRules(BufferedReader bufferedReader) throws IOException {
String s;
- Integer amount;
+ int amount;
s = bufferedReader.readLine();
- amount = Integer.valueOf(s);
+ amount = Integer.parseInt(s);
rules = new Heuristic[amount][];
for (int i = 0; i < amount; i++) {
String s1 = bufferedReader.readLine();
- Integer ruleLenght = Integer.valueOf(s1);
+ int ruleLenght = Integer.parseInt(s1);
Heuristic[] heuristics = new Heuristic[ruleLenght];
for (int j = 0; j < ruleLenght; j++) {
heuristics[j] = new Heuristic(bufferedReader.readLine());
@@ -51,7 +51,7 @@ public class LuceneMorphology extends MorphologyImpl {
private Heuristic[] modeifyHeuristic(Heuristic[] heuristics) {
- ArrayList result = new ArrayList();
+ ArrayList result = new ArrayList<>();
for (Heuristic heuristic : heuristics) {
boolean isAdded = true;
for (Heuristic ch : result) {
@@ -61,7 +61,7 @@ public class LuceneMorphology extends MorphologyImpl {
result.add(heuristic);
}
}
- return result.toArray(new Heuristic[result.size()]);
+ return result.toArray(new Heuristic[0]);
}
public boolean checkString(String s) {
diff --git a/morph/src/main/java/org/apache/lucene/morphology/MorphologyImpl.java b/morph/src/main/java/org/apache/lucene/morphology/MorphologyImpl.java
index 9a12d2b..7ed7a0c 100644
--- a/morph/src/main/java/org/apache/lucene/morphology/MorphologyImpl.java
+++ b/morph/src/main/java/org/apache/lucene/morphology/MorphologyImpl.java
@@ -17,6 +17,7 @@ package org.apache.lucene.morphology;
import java.io.*;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
@@ -47,7 +48,7 @@ public class MorphologyImpl implements Morphology {
}
public List getNormalForms(String s) {
- ArrayList result = new ArrayList();
+ ArrayList result = new ArrayList<>();
int[] ints = decoderEncoder.encodeToArray(revertWord(s));
int ruleId = findRuleId(ints);
boolean notSeenEmptyString = true;
@@ -64,7 +65,7 @@ public class MorphologyImpl implements Morphology {
}
public List getMorphInfo(String s) {
- ArrayList result = new ArrayList();
+ ArrayList result = new ArrayList<>();
int[] ints = decoderEncoder.encodeToArray(revertWord(s));
int ruleId = findRuleId(ints);
for (Heuristic h : rules[rulesId[ruleId]]) {
@@ -100,14 +101,14 @@ public class MorphologyImpl implements Morphology {
private int compareToInts(int[] i1, int[] i2) {
int minLength = Math.min(i1.length, i2.length);
for (int i = 0; i < minLength; i++) {
- int i3 = i1[i] < i2[i] ? -1 : (i1[i] == i2[i] ? 0 : 1);
+ int i3 = Integer.compare(i1[i], i2[i]);
if (i3 != 0) return i3;
}
return i1.length - i2.length;
}
public void writeToFile(String fileName) throws IOException {
- OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(fileName), "UTF-8");
+ OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8);
writer.write(separators.length + "\n");
for (int[] i : separators) {
writer.write(i.length + "\n");
@@ -138,7 +139,7 @@ public class MorphologyImpl implements Morphology {
}
private void readFromInputStream(InputStream inputStream) throws IOException {
- BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"));
+ BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8));
String s = bufferedReader.readLine();
Integer amount = Integer.valueOf(s);
@@ -153,9 +154,9 @@ public class MorphologyImpl implements Morphology {
private void readGrammaInfo(BufferedReader bufferedReader) throws IOException {
String s;
- Integer amount;
+ int amount;
s = bufferedReader.readLine();
- amount = Integer.valueOf(s);
+ amount = Integer.parseInt(s);
grammarInfo = new String[amount];
for (int i = 0; i < amount; i++) {
grammarInfo[i] = bufferedReader.readLine();
@@ -164,13 +165,13 @@ public class MorphologyImpl implements Morphology {
protected void readRules(BufferedReader bufferedReader) throws IOException {
String s;
- Integer amount;
+ int amount;
s = bufferedReader.readLine();
- amount = Integer.valueOf(s);
+ amount = Integer.parseInt(s);
rules = new Heuristic[amount][];
for (int i = 0; i < amount; i++) {
String s1 = bufferedReader.readLine();
- Integer ruleLength = Integer.valueOf(s1);
+ int ruleLength = Integer.parseInt(s1);
rules[i] = new Heuristic[ruleLength];
for (int j = 0; j < ruleLength; j++) {
rules[i][j] = new Heuristic(bufferedReader.readLine());
@@ -182,7 +183,7 @@ public class MorphologyImpl implements Morphology {
rulesId = new short[amount];
for (int i = 0; i < amount; i++) {
String s1 = bufferedReader.readLine();
- rulesId[i] = Short.valueOf(s1);
+ rulesId[i] = Short.parseShort(s1);
}
}
@@ -190,10 +191,10 @@ public class MorphologyImpl implements Morphology {
separators = new int[amount][];
for (int i = 0; i < amount; i++) {
String s1 = bufferedReader.readLine();
- Integer wordLenght = Integer.valueOf(s1);
+ int wordLenght = Integer.parseInt(s1);
separators[i] = new int[wordLenght];
for (int j = 0; j < wordLenght; j++) {
- separators[i][j] = Integer.valueOf(bufferedReader.readLine());
+ separators[i][j] = Integer.parseInt(bufferedReader.readLine());
}
}
}
diff --git a/morph/src/main/java/org/apache/lucene/morphology/analyzer/MorphologyAnalyzer.java b/morph/src/main/java/org/apache/lucene/morphology/analyzer/MorphologyAnalyzer.java
index 26f2dcb..53401c4 100644
--- a/morph/src/main/java/org/apache/lucene/morphology/analyzer/MorphologyAnalyzer.java
+++ b/morph/src/main/java/org/apache/lucene/morphology/analyzer/MorphologyAnalyzer.java
@@ -73,6 +73,6 @@ public class MorphologyAnalyzer extends Analyzer {
TokenFilter filter = new LowerCaseFilter(src);
filter = new MorphologyFilter(filter, luceneMorph);
- return new TokenStreamComponents(r -> src.setReader(r), filter);
+ return new TokenStreamComponents(src::setReader, filter);
}
}
diff --git a/pom.xml b/pom.xml
index e539154..ee2163c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -16,7 +16,7 @@
- 8.7.0
+ 8.11.0
1.5
4.13
@@ -46,7 +46,7 @@
org.hamcrest
hamcrest-all
- 1.1
+ 1.3
test
@@ -109,8 +109,8 @@
maven-compiler-plugin
3.8.1
- 1.8
- 1.8
+ 11
+ 11
@@ -147,7 +147,7 @@
maven-source-plugin
- 3.0.1
+ 3.2.1
attach-sources
@@ -159,7 +159,7 @@
maven-javadoc-plugin
- 2.10.4
+ 3.3.1
attach-javadocs
diff --git a/russian/pom.xml b/russian/pom.xml
index c29cb67..d7d7ec2 100644
--- a/russian/pom.xml
+++ b/russian/pom.xml
@@ -6,7 +6,6 @@
1.5
4.0.0
- org.apache.lucene.morphology
russian
russian
1.5
@@ -23,7 +22,7 @@
junit
junit
- 4.13.1
+ ${junit.version}
test
diff --git a/russian/src/main/java/org/apache/lucene/morphology/russian/RussianLetterDecoderEncoder.java b/russian/src/main/java/org/apache/lucene/morphology/russian/RussianLetterDecoderEncoder.java
index 802be45..649a3a8 100644
--- a/russian/src/main/java/org/apache/lucene/morphology/russian/RussianLetterDecoderEncoder.java
+++ b/russian/src/main/java/org/apache/lucene/morphology/russian/RussianLetterDecoderEncoder.java
@@ -20,7 +20,6 @@ import org.apache.lucene.morphology.LetterDecoderEncoder;
import org.apache.lucene.morphology.SuffixToLongException;
import org.apache.lucene.morphology.WrongCharaterException;
-import java.util.ArrayList;
import java.util.LinkedList;
/**
@@ -42,7 +41,7 @@ public class RussianLetterDecoderEncoder implements LetterDecoderEncoder {
throw new SuffixToLongException("Suffix length should not be greater then " + WORD_PART_LENGHT + " " + string);
int result = 0;
for (int i = 0; i < string.length(); i++) {
- int c = 0 + string.charAt(i) - RUSSIAN_SMALL_LETTER_OFFSET;
+ int c = string.charAt(i) - RUSSIAN_SMALL_LETTER_OFFSET;
if (c == 45 - RUSSIAN_SMALL_LETTER_OFFSET) {
c = DASH_CODE;
}
@@ -58,7 +57,7 @@ public class RussianLetterDecoderEncoder implements LetterDecoderEncoder {
}
public int[] encodeToArray(String s) {
- LinkedList integers = new LinkedList();
+ LinkedList integers = new LinkedList<>();
while (s.length() > WORD_PART_LENGHT) {
integers.add(encode(s.substring(0, WORD_PART_LENGHT)));
s = s.substring(WORD_PART_LENGHT);
@@ -74,16 +73,16 @@ public class RussianLetterDecoderEncoder implements LetterDecoderEncoder {
}
public String decodeArray(int[] array) {
- String result = "";
+ StringBuilder result = new StringBuilder();
for (int i : array) {
- result += decode(i);
+ result.append(decode(i));
}
- return result;
+ return result.toString();
}
public String decode(Integer suffixN) {
- String result = "";
+ StringBuilder result = new StringBuilder();
while (suffixN > 33) {
int c = suffixN % 34 + RUSSIAN_SMALL_LETTER_OFFSET;
if (c == RUSSIAN_SMALL_LETTER_OFFSET) {
@@ -91,21 +90,20 @@ public class RussianLetterDecoderEncoder implements LetterDecoderEncoder {
continue;
}
if (c == DASH_CODE + RUSSIAN_SMALL_LETTER_OFFSET) c = DASH_CHAR;
- result = (char) c + result;
+ result.insert(0, (char) c);
suffixN /= 34;
}
long c = suffixN + RUSSIAN_SMALL_LETTER_OFFSET;
if (c == DASH_CODE + RUSSIAN_SMALL_LETTER_OFFSET) c = DASH_CHAR;
- result = (char) c + result;
- return result;
+ result.insert(0, (char) c);
+ return result.toString();
}
public boolean checkCharacter(char c) {
- int code = 0 + c;
+ int code = c;
if (code == 45) return true;
code -= RUSSIAN_SMALL_LETTER_OFFSET;
- if (code > 0 && code < 33) return true;
- return false;
+ return code > 0 && code < 33;
}
public boolean checkString(String word) {
diff --git a/russian/src/test/java/org/apache/lucene/morphology/russian/RussianLetterDecoderEncoderTest.java b/russian/src/test/java/org/apache/lucene/morphology/russian/RussianLetterDecoderEncoderTest.java
index 73a2d38..d089c48 100644
--- a/russian/src/test/java/org/apache/lucene/morphology/russian/RussianLetterDecoderEncoderTest.java
+++ b/russian/src/test/java/org/apache/lucene/morphology/russian/RussianLetterDecoderEncoderTest.java
@@ -17,6 +17,7 @@ package org.apache.lucene.morphology.russian;
import org.apache.lucene.morphology.SuffixToLongException;
import org.apache.lucene.morphology.WrongCharaterException;
+import org.hamcrest.MatcherAssert;
import org.junit.Before;
import org.junit.Test;
@@ -24,9 +25,9 @@ import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
+import java.nio.charset.StandardCharsets;
import static org.hamcrest.core.IsEqual.equalTo;
-import static org.junit.Assert.assertThat;
public class RussianLetterDecoderEncoderTest {
private RussianLetterDecoderEncoder decoderEncoder;
@@ -40,12 +41,12 @@ public class RussianLetterDecoderEncoderTest {
@Test
public void testShouldPreserverStringComporision() throws IOException {
InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/decoder-test-monotonic.txt");
- BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
+ BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8));
String s = bufferedReader.readLine();
while (s != null) {
String[] qa = s.trim().split(" ");
if (qa[0].length() <= RussianLetterDecoderEncoder.WORD_PART_LENGHT && qa[1].length() <= RussianLetterDecoderEncoder.WORD_PART_LENGHT) {
- assertThat(decoderEncoder.encode(qa[1]) > decoderEncoder.encode(qa[0]), equalTo(true));
+ MatcherAssert.assertThat(decoderEncoder.encode(qa[1]) > decoderEncoder.encode(qa[0]), equalTo(true));
}
s = bufferedReader.readLine();
}
@@ -55,13 +56,13 @@ public class RussianLetterDecoderEncoderTest {
@Test
public void testShouldCorrectDecodeEncode() throws IOException {
InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/decoder-test-data.txt");
- BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
+ BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8));
String s = bufferedReader.readLine();
while (s != null) {
String[] qa = s.trim().split(" ");
if (qa[0].length() <= RussianLetterDecoderEncoder.WORD_PART_LENGHT) {
Integer encodedSuffix = decoderEncoder.encode(qa[0]);
- assertThat(decoderEncoder.decode(encodedSuffix), equalTo(qa[1]));
+ MatcherAssert.assertThat(decoderEncoder.decode(encodedSuffix), equalTo(qa[1]));
}
s = bufferedReader.readLine();
}
@@ -70,12 +71,12 @@ public class RussianLetterDecoderEncoderTest {
@Test
public void testShouldCorrectDecodeEncodeStringToArray() throws IOException {
InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/decoder-test-data-for-array.txt");
- BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
+ BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8));
String s = bufferedReader.readLine();
while (s != null) {
String[] qa = s.trim().split(" ");
int[] ecodedSuffix = decoderEncoder.encodeToArray(qa[0]);
- assertThat(decoderEncoder.decodeArray(ecodedSuffix), equalTo(qa[1]));
+ MatcherAssert.assertThat(decoderEncoder.decodeArray(ecodedSuffix), equalTo(qa[1]));
s = bufferedReader.readLine();
}
}
diff --git a/solr-morphology-analysis/src/test/java/org/apache/lucene/analysis/morphology/MorphologyFilterFactoryTest.java b/solr-morphology-analysis/src/test/java/org/apache/lucene/analysis/morphology/MorphologyFilterFactoryTest.java
index 7cb225a..b6c814a 100644
--- a/solr-morphology-analysis/src/test/java/org/apache/lucene/analysis/morphology/MorphologyFilterFactoryTest.java
+++ b/solr-morphology-analysis/src/test/java/org/apache/lucene/analysis/morphology/MorphologyFilterFactoryTest.java
@@ -30,7 +30,7 @@ import java.util.Map;
public class MorphologyFilterFactoryTest {
private static final String LANGUAGE_KEY = "language";
- private ResourceLoader loader = new ClasspathResourceLoader();
+ private ResourceLoader loader = new ClasspathResourceLoader(MorphologyFilterFactoryTest.class);
private Map args;
@Before