fixing typo
git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@98 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
parent
7bf8ef7d6f
commit
e8399999c3
@ -28,22 +28,22 @@ import java.util.*;
|
|||||||
* This class contain logic how read
|
* This class contain logic how read
|
||||||
* dictonary and produce word with it all forms.
|
* dictonary and produce word with it all forms.
|
||||||
*/
|
*/
|
||||||
public class DictonaryReader {
|
public class DictionaryReader {
|
||||||
private String fileName;
|
private String fileName;
|
||||||
private String fileEncoding = "windows-1251";
|
private String fileEncoding = "windows-1251";
|
||||||
private List<List<FlexiaModel>> wordsFlexias = new ArrayList<List<FlexiaModel>>();
|
private List<List<FlexiaModel>> wordsFlexias = new ArrayList<List<FlexiaModel>>();
|
||||||
private List<List<String>> wordPrefixes = new ArrayList<List<String>>();
|
private List<List<String>> wordPrefixes = new ArrayList<List<String>>();
|
||||||
private Set<String> ingnoredForm = new HashSet<String>();
|
private Set<String> ignoredForm = new HashSet<String>();
|
||||||
|
|
||||||
public DictonaryReader(String fileName, Set<String> ingnoredForm) {
|
public DictionaryReader(String fileName, Set<String> ignoredForm) {
|
||||||
this.fileName = fileName;
|
this.fileName = fileName;
|
||||||
this.ingnoredForm = ingnoredForm;
|
this.ignoredForm = ignoredForm;
|
||||||
}
|
}
|
||||||
|
|
||||||
public DictonaryReader(String fileName, String fileEncoding, Set<String> ingnoredForm) {
|
public DictionaryReader(String fileName, String fileEncoding, Set<String> ignoredForm) {
|
||||||
this.fileName = fileName;
|
this.fileName = fileName;
|
||||||
this.fileEncoding = fileEncoding;
|
this.fileEncoding = fileEncoding;
|
||||||
this.ingnoredForm = ingnoredForm;
|
this.ignoredForm = ignoredForm;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -70,7 +70,7 @@ public class DictonaryReader {
|
|||||||
wordBase = "#".equals(wordBase) ? "" : wordBase;
|
wordBase = "#".equals(wordBase) ? "" : wordBase;
|
||||||
List<FlexiaModel> models = wordsFlexias.get(Integer.valueOf(wd[1]));
|
List<FlexiaModel> models = wordsFlexias.get(Integer.valueOf(wd[1]));
|
||||||
FlexiaModel flexiaModel = models.get(0);
|
FlexiaModel flexiaModel = models.get(0);
|
||||||
if (models.size() > 0 && !ingnoredForm.contains(flexiaModel.getCode())) {
|
if (models.size() > 0 && !ignoredForm.contains(flexiaModel.getCode())) {
|
||||||
|
|
||||||
WordCard card = new WordCard(flexiaModel.create(wordBase), wordBase, flexiaModel.getSuffix());
|
WordCard card = new WordCard(flexiaModel.create(wordBase), wordBase, flexiaModel.getSuffix());
|
||||||
for (FlexiaModel fm : models) {
|
for (FlexiaModel fm : models) {
|
@ -30,7 +30,7 @@ public class GrammaReader {
|
|||||||
private String fileName;
|
private String fileName;
|
||||||
private String fileEncoding = "windows-1251";
|
private String fileEncoding = "windows-1251";
|
||||||
private List<String> grammaInfo = new ArrayList<String>();
|
private List<String> grammaInfo = new ArrayList<String>();
|
||||||
private Map<String, Integer> inversIndex = new HashMap<String, Integer>();
|
private Map<String, Integer> inverseIndex = new HashMap<String, Integer>();
|
||||||
|
|
||||||
public GrammaReader(String fileName) throws IOException {
|
public GrammaReader(String fileName) throws IOException {
|
||||||
this.fileName = fileName;
|
this.fileName = fileName;
|
||||||
@ -51,7 +51,7 @@ public class GrammaReader {
|
|||||||
if (!line.startsWith("//") && line.length() > 0) {
|
if (!line.startsWith("//") && line.length() > 0) {
|
||||||
String[] strings = line.split(" ", 2);
|
String[] strings = line.split(" ", 2);
|
||||||
Integer i = grammaInfo.size();
|
Integer i = grammaInfo.size();
|
||||||
inversIndex.put(strings[0], i);
|
inverseIndex.put(strings[0], i);
|
||||||
grammaInfo.add(i, strings[1]);
|
grammaInfo.add(i, strings[1]);
|
||||||
}
|
}
|
||||||
line = bufferedReader.readLine();
|
line = bufferedReader.readLine();
|
||||||
@ -67,10 +67,10 @@ public class GrammaReader {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public Map<String, Integer> getGrammInversIndex() {
|
public Map<String, Integer> getGrammInversIndex() {
|
||||||
return inversIndex;
|
return inverseIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setInversIndex(Map<String, Integer> inversIndex) {
|
public void setInverseIndex(Map<String, Integer> inverseIndex) {
|
||||||
this.inversIndex = inversIndex;
|
this.inverseIndex = inverseIndex;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -41,12 +41,12 @@ public class StatisticsCollector implements WordProccessor {
|
|||||||
|
|
||||||
public void process(WordCard wordCard) throws IOException {
|
public void process(WordCard wordCard) throws IOException {
|
||||||
cleanWordCard(wordCard);
|
cleanWordCard(wordCard);
|
||||||
String normalStringMorph = wordCard.getWordsFroms().get(0).getCode();
|
String normalStringMorph = wordCard.getWordsForms().get(0).getCode();
|
||||||
String word = wordCard.getBase() + wordCard.getCanonicalSuffix();
|
String word = wordCard.getBase() + wordCard.getCanonicalSuffix();
|
||||||
if (word.contains("-")) return;
|
if (word.contains("-")) return;
|
||||||
if (!decoderEncoder.checkString(word)) return;
|
if (!decoderEncoder.checkString(word)) return;
|
||||||
|
|
||||||
for (FlexiaModel fm : wordCard.getWordsFroms()) {
|
for (FlexiaModel fm : wordCard.getWordsForms()) {
|
||||||
if (!decoderEncoder.checkString(fm.create(wordCard.getBase())) || fm.create(wordCard.getBase()).contains("-")) continue;
|
if (!decoderEncoder.checkString(fm.create(wordCard.getBase())) || fm.create(wordCard.getBase()).contains("-")) continue;
|
||||||
Heuristic heuristic = createEvristic(wordCard.getBase(), wordCard.getCanonicalSuffix(), fm, normalStringMorph);
|
Heuristic heuristic = createEvristic(wordCard.getBase(), wordCard.getCanonicalSuffix(), fm, normalStringMorph);
|
||||||
String form = revertWord(fm.create(wordCard.getBase()));
|
String form = revertWord(fm.create(wordCard.getBase()));
|
||||||
@ -61,9 +61,9 @@ public class StatisticsCollector implements WordProccessor {
|
|||||||
|
|
||||||
private void cleanWordCard(WordCard wordCard) {
|
private void cleanWordCard(WordCard wordCard) {
|
||||||
wordCard.setBase(cleanString(wordCard.getBase()));
|
wordCard.setBase(cleanString(wordCard.getBase()));
|
||||||
wordCard.setCanonicalFrom(cleanString(wordCard.getCanonicalFrom()));
|
wordCard.setCanonicalForm(cleanString(wordCard.getCanonicalForm()));
|
||||||
wordCard.setCanonicalSuffix(cleanString(wordCard.getCanonicalSuffix()));
|
wordCard.setCanonicalSuffix(cleanString(wordCard.getCanonicalSuffix()));
|
||||||
List<FlexiaModel> models = wordCard.getWordsFroms();
|
List<FlexiaModel> models = wordCard.getWordsForms();
|
||||||
for (FlexiaModel m : models) {
|
for (FlexiaModel m : models) {
|
||||||
m.setSuffix(cleanString(m.getSuffix()));
|
m.setSuffix(cleanString(m.getSuffix()));
|
||||||
m.setPrefix(cleanString(m.getPrefix()));
|
m.setPrefix(cleanString(m.getPrefix()));
|
||||||
|
@ -23,23 +23,23 @@ import java.util.List;
|
|||||||
* Represent word and all it forms.
|
* Represent word and all it forms.
|
||||||
*/
|
*/
|
||||||
public class WordCard {
|
public class WordCard {
|
||||||
private String canonicalFrom;
|
private String canonicalForm;
|
||||||
private String base;
|
private String base;
|
||||||
private String canonicalSuffix;
|
private String canonicalSuffix;
|
||||||
private List<FlexiaModel> wordsFroms = new ArrayList<FlexiaModel>();
|
private List<FlexiaModel> wordsForms = new ArrayList<FlexiaModel>();
|
||||||
|
|
||||||
public WordCard(String canonicalFrom, String base, String canonicalSuffix) {
|
public WordCard(String canonicalForm, String base, String canonicalSuffix) {
|
||||||
this.canonicalFrom = canonicalFrom;
|
this.canonicalForm = canonicalForm;
|
||||||
this.canonicalSuffix = canonicalSuffix;
|
this.canonicalSuffix = canonicalSuffix;
|
||||||
this.base = base;
|
this.base = base;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void addFlexia(FlexiaModel flexiaModel) {
|
public void addFlexia(FlexiaModel flexiaModel) {
|
||||||
wordsFroms.add(flexiaModel);
|
wordsForms.add(flexiaModel);
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getCanonicalFrom() {
|
public String getCanonicalForm() {
|
||||||
return canonicalFrom;
|
return canonicalForm;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String getCanonicalSuffix() {
|
public String getCanonicalSuffix() {
|
||||||
@ -50,12 +50,12 @@ public class WordCard {
|
|||||||
return base;
|
return base;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<FlexiaModel> getWordsFroms() {
|
public List<FlexiaModel> getWordsForms() {
|
||||||
return wordsFroms;
|
return wordsForms;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setCanonicalFrom(String canonicalFrom) {
|
public void setCanonicalForm(String canonicalForm) {
|
||||||
this.canonicalFrom = canonicalFrom;
|
this.canonicalForm = canonicalForm;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setBase(String base) {
|
public void setBase(String base) {
|
||||||
@ -66,17 +66,17 @@ public class WordCard {
|
|||||||
this.canonicalSuffix = canonicalSuffix;
|
this.canonicalSuffix = canonicalSuffix;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setWordsFroms(List<FlexiaModel> wordsFroms) {
|
public void setWordsForms(List<FlexiaModel> wordsForms) {
|
||||||
this.wordsFroms = wordsFroms;
|
this.wordsForms = wordsForms;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "WordCard{" +
|
return "WordCard{" +
|
||||||
"canonicalFrom='" + canonicalFrom + '\'' +
|
"canonicalForm='" + canonicalForm + '\'' +
|
||||||
", base='" + base + '\'' +
|
", base='" + base + '\'' +
|
||||||
", canonicalSuffix='" + canonicalSuffix + '\'' +
|
", canonicalSuffix='" + canonicalSuffix + '\'' +
|
||||||
", wordsFroms=" + wordsFroms +
|
", wordsForms=" + wordsForms +
|
||||||
'}';
|
'}';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -16,7 +16,7 @@
|
|||||||
|
|
||||||
package org.apache.lucene.morphology.generator;
|
package org.apache.lucene.morphology.generator;
|
||||||
|
|
||||||
import org.apache.lucene.morphology.dictionary.DictonaryReader;
|
import org.apache.lucene.morphology.dictionary.DictionaryReader;
|
||||||
import org.apache.lucene.morphology.dictionary.GrammaReader;
|
import org.apache.lucene.morphology.dictionary.GrammaReader;
|
||||||
import org.apache.lucene.morphology.dictionary.StatisticsCollector;
|
import org.apache.lucene.morphology.dictionary.StatisticsCollector;
|
||||||
import org.apache.lucene.morphology.english.EnglishLetterDecoderEncoder;
|
import org.apache.lucene.morphology.english.EnglishLetterDecoderEncoder;
|
||||||
@ -29,11 +29,11 @@ public class EnglishHeuristicBuilder {
|
|||||||
public static void main(String[] args) throws IOException {
|
public static void main(String[] args) throws IOException {
|
||||||
|
|
||||||
GrammaReader grammaInfo = new GrammaReader("dictonary/Dicts/Morph/egramtab.tab");
|
GrammaReader grammaInfo = new GrammaReader("dictonary/Dicts/Morph/egramtab.tab");
|
||||||
DictonaryReader dictonaryReader = new DictonaryReader("dictonary/Dicts/SrcMorph/EngSrc/morphs.mrd", new HashSet<String>());
|
DictionaryReader dictionaryReader = new DictionaryReader("dictonary/Dicts/SrcMorph/EngSrc/morphs.mrd", new HashSet<String>());
|
||||||
|
|
||||||
EnglishLetterDecoderEncoder decoderEncoder = new EnglishLetterDecoderEncoder();
|
EnglishLetterDecoderEncoder decoderEncoder = new EnglishLetterDecoderEncoder();
|
||||||
StatisticsCollector statisticsCollector = new StatisticsCollector(grammaInfo, decoderEncoder);
|
StatisticsCollector statisticsCollector = new StatisticsCollector(grammaInfo, decoderEncoder);
|
||||||
dictonaryReader.proccess(statisticsCollector);
|
dictionaryReader.proccess(statisticsCollector);
|
||||||
statisticsCollector.saveHeuristic("english/src/main/resources/org/apache/lucene/morphology/english/morph.info");
|
statisticsCollector.saveHeuristic("english/src/main/resources/org/apache/lucene/morphology/english/morph.info");
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -16,7 +16,7 @@
|
|||||||
|
|
||||||
package org.apache.lucene.morphology.generator;
|
package org.apache.lucene.morphology.generator;
|
||||||
|
|
||||||
import org.apache.lucene.morphology.dictionary.DictonaryReader;
|
import org.apache.lucene.morphology.dictionary.DictionaryReader;
|
||||||
import org.apache.lucene.morphology.dictionary.GrammaReader;
|
import org.apache.lucene.morphology.dictionary.GrammaReader;
|
||||||
import org.apache.lucene.morphology.dictionary.StatisticsCollector;
|
import org.apache.lucene.morphology.dictionary.StatisticsCollector;
|
||||||
import org.apache.lucene.morphology.russian.RussianLetterDecoderEncoder;
|
import org.apache.lucene.morphology.russian.RussianLetterDecoderEncoder;
|
||||||
@ -28,11 +28,11 @@ import java.util.HashSet;
|
|||||||
public class RussianHeuristicBuilder {
|
public class RussianHeuristicBuilder {
|
||||||
public static void main(String[] args) throws IOException {
|
public static void main(String[] args) throws IOException {
|
||||||
GrammaReader grammaInfo = new GrammaReader("dictonary/Dicts/Morph/rgramtab.tab");
|
GrammaReader grammaInfo = new GrammaReader("dictonary/Dicts/Morph/rgramtab.tab");
|
||||||
DictonaryReader dictonaryReader = new DictonaryReader("dictonary/Dicts/SrcMorph/RusSrc/morphs.mrd", new HashSet<String>());
|
DictionaryReader dictionaryReader = new DictionaryReader("dictonary/Dicts/SrcMorph/RusSrc/morphs.mrd", new HashSet<String>());
|
||||||
|
|
||||||
RussianLetterDecoderEncoder decoderEncoder = new RussianLetterDecoderEncoder();
|
RussianLetterDecoderEncoder decoderEncoder = new RussianLetterDecoderEncoder();
|
||||||
StatisticsCollector statisticsCollector = new StatisticsCollector(grammaInfo, decoderEncoder);
|
StatisticsCollector statisticsCollector = new StatisticsCollector(grammaInfo, decoderEncoder);
|
||||||
dictonaryReader.proccess(statisticsCollector);
|
dictionaryReader.proccess(statisticsCollector);
|
||||||
statisticsCollector.saveHeuristic("russian/src/main/resources/org/apache/lucene/morphology/russian/morph.info");
|
statisticsCollector.saveHeuristic("russian/src/main/resources/org/apache/lucene/morphology/russian/morph.info");
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -15,14 +15,14 @@
|
|||||||
*/
|
*/
|
||||||
package org.apache.lucene.morphology.english;
|
package org.apache.lucene.morphology.english;
|
||||||
|
|
||||||
import org.apache.lucene.morphology.analayzer.MorphlogyAnalayzer;
|
import org.apache.lucene.morphology.analyzer.MorphologyAnalyzer;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
|
||||||
public class EnglishAnalayzer extends MorphlogyAnalayzer {
|
public class EnglishAnalyzer extends MorphologyAnalyzer {
|
||||||
|
|
||||||
public EnglishAnalayzer() throws IOException {
|
public EnglishAnalyzer() throws IOException {
|
||||||
super(new EnglishLuceneMorphology());
|
super(new EnglishLuceneMorphology());
|
||||||
}
|
}
|
||||||
|
|
@ -39,12 +39,12 @@ public class EnglishAnalayzerTest {
|
|||||||
HashSet<String> answer = new HashSet<String>(Arrays.asList(strings));
|
HashSet<String> answer = new HashSet<String>(Arrays.asList(strings));
|
||||||
stream.close();
|
stream.close();
|
||||||
|
|
||||||
EnglishAnalayzer morphlogyAnalayzer = new EnglishAnalayzer();
|
EnglishAnalyzer morphlogyAnalyzer = new EnglishAnalyzer();
|
||||||
stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/english/englsih-analayzer-data.txt");
|
stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/english/englsih-analayzer-data.txt");
|
||||||
|
|
||||||
InputStreamReader reader = new InputStreamReader(stream, "UTF-8");
|
InputStreamReader reader = new InputStreamReader(stream, "UTF-8");
|
||||||
|
|
||||||
TokenStream tokenStream = morphlogyAnalayzer.tokenStream(null, reader);
|
TokenStream tokenStream = morphlogyAnalyzer.tokenStream(null, reader);
|
||||||
HashSet<String> result = new HashSet<String>();
|
HashSet<String> result = new HashSet<String>();
|
||||||
while (tokenStream.incrementToken()) {
|
while (tokenStream.incrementToken()) {
|
||||||
TermAttribute attribute1 = tokenStream.getAttribute(TermAttribute.class);
|
TermAttribute attribute1 = tokenStream.getAttribute(TermAttribute.class);
|
||||||
|
@ -22,6 +22,6 @@ public interface Morphology {
|
|||||||
|
|
||||||
List<String> getNormalForms(String s);
|
List<String> getNormalForms(String s);
|
||||||
|
|
||||||
List<String> getMorfInfo(String s);
|
List<String> getMorphInfo(String s);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -73,7 +73,7 @@ public class MorphologyImpl implements Morphology {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<String> getMorfInfo(String s) {
|
public List<String> getMorphInfo(String s) {
|
||||||
ArrayList<String> result = new ArrayList<String>();
|
ArrayList<String> result = new ArrayList<String>();
|
||||||
int[] ints = decoderEncoder.encodeToArray(revertWord(s));
|
int[] ints = decoderEncoder.encodeToArray(revertWord(s));
|
||||||
int ruleId = findRuleId(ints);
|
int ruleId = findRuleId(ints);
|
||||||
|
@ -14,7 +14,7 @@
|
|||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.apache.lucene.morphology.analayzer;
|
package org.apache.lucene.morphology.analyzer;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenFilter;
|
import org.apache.lucene.analysis.TokenFilter;
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
@ -14,7 +14,7 @@
|
|||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.apache.lucene.morphology.analayzer;
|
package org.apache.lucene.morphology.analyzer;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||||
@ -29,18 +29,18 @@ import java.io.IOException;
|
|||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
|
|
||||||
public class MorphlogyAnalayzer extends Analyzer {
|
public class MorphologyAnalyzer extends Analyzer {
|
||||||
private LuceneMorphology luceneMorph;
|
private LuceneMorphology luceneMorph;
|
||||||
|
|
||||||
public MorphlogyAnalayzer(LuceneMorphology luceneMorph) {
|
public MorphologyAnalyzer(LuceneMorphology luceneMorph) {
|
||||||
this.luceneMorph = luceneMorph;
|
this.luceneMorph = luceneMorph;
|
||||||
}
|
}
|
||||||
|
|
||||||
public MorphlogyAnalayzer(String pathToMorph, LetterDecoderEncoder letterDecoderEncoder) throws IOException {
|
public MorphologyAnalyzer(String pathToMorph, LetterDecoderEncoder letterDecoderEncoder) throws IOException {
|
||||||
luceneMorph = new LuceneMorphology(pathToMorph, letterDecoderEncoder);
|
luceneMorph = new LuceneMorphology(pathToMorph, letterDecoderEncoder);
|
||||||
}
|
}
|
||||||
|
|
||||||
public MorphlogyAnalayzer(InputStream inputStream, LetterDecoderEncoder letterDecoderEncoder) throws IOException {
|
public MorphologyAnalyzer(InputStream inputStream, LetterDecoderEncoder letterDecoderEncoder) throws IOException {
|
||||||
luceneMorph = new LuceneMorphology(inputStream, letterDecoderEncoder);
|
luceneMorph = new LuceneMorphology(inputStream, letterDecoderEncoder);
|
||||||
}
|
}
|
||||||
|
|
@ -15,13 +15,13 @@
|
|||||||
*/
|
*/
|
||||||
package org.apache.lucene.morphology.russian;
|
package org.apache.lucene.morphology.russian;
|
||||||
|
|
||||||
import org.apache.lucene.morphology.analayzer.MorphlogyAnalayzer;
|
import org.apache.lucene.morphology.analyzer.MorphologyAnalyzer;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
|
||||||
public class RussianAnalayzer extends MorphlogyAnalayzer {
|
public class RussianAnalyzer extends MorphologyAnalyzer {
|
||||||
public RussianAnalayzer() throws IOException {
|
public RussianAnalyzer() throws IOException {
|
||||||
super(new RussianLuceneMorphology());
|
super(new RussianLuceneMorphology());
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -39,12 +39,12 @@ public class RussianAnalayzerTest {
|
|||||||
HashSet<String> answer = new HashSet<String>(Arrays.asList(strings));
|
HashSet<String> answer = new HashSet<String>(Arrays.asList(strings));
|
||||||
stream.close();
|
stream.close();
|
||||||
|
|
||||||
RussianAnalayzer morphlogyAnalayzer = new RussianAnalayzer();
|
RussianAnalyzer morphlogyAnalyzer = new RussianAnalyzer();
|
||||||
stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/russian-analayzer-data.txt");
|
stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/russian-analayzer-data.txt");
|
||||||
|
|
||||||
InputStreamReader reader = new InputStreamReader(stream, "UTF-8");
|
InputStreamReader reader = new InputStreamReader(stream, "UTF-8");
|
||||||
|
|
||||||
TokenStream tokenStream = morphlogyAnalayzer.tokenStream(null, reader);
|
TokenStream tokenStream = morphlogyAnalyzer.tokenStream(null, reader);
|
||||||
HashSet<String> result = new HashSet<String>();
|
HashSet<String> result = new HashSet<String>();
|
||||||
while (tokenStream.incrementToken()) {
|
while (tokenStream.incrementToken()) {
|
||||||
TermAttribute attribute1 = tokenStream.getAttribute(TermAttribute.class);
|
TermAttribute attribute1 = tokenStream.getAttribute(TermAttribute.class);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user