adding modules
git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@49 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
22
dictionary-reader/pom.xml
Normal file
22
dictionary-reader/pom.xml
Normal file
@ -0,0 +1,22 @@
|
||||
<?xml version="1.0"?>
|
||||
<project>
|
||||
<parent>
|
||||
<artifactId>morpholgy</artifactId>
|
||||
<groupId>org.apache.lucene.morpholgy</groupId>
|
||||
<version>0.7-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>org.apache.lucene.morpholgy</groupId>
|
||||
<artifactId>dictionary-reader</artifactId>
|
||||
<name>dictionary-reader</name>
|
||||
<version>0.7-SNAPSHOT</version>
|
||||
<url>http://maven.apache.org</url>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene.morpholgy</groupId>
|
||||
<artifactId>morph</artifactId>
|
||||
<version>0.7-SNAPSHOT</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
@ -0,0 +1,125 @@
|
||||
/**
|
||||
* Copyright 2009 Alexander Kuznetsov
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.morpholgy.dictionary;
|
||||
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
* This class contain logic how read
|
||||
* dictonary and produce word with it all forms.
|
||||
*/
|
||||
public class DictonaryReader {
|
||||
private String fileName;
|
||||
private String fileEncoding = "windows-1251";
|
||||
private List<List<FlexiaModel>> wordsFlexias = new ArrayList<List<FlexiaModel>>();
|
||||
private List<List<String>> wordPrefixes = new ArrayList<List<String>>();
|
||||
private Set<String> ingnoredForm = new HashSet<String>();
|
||||
|
||||
public DictonaryReader(String fileName, Set<String> ingnoredForm) {
|
||||
this.fileName = fileName;
|
||||
this.ingnoredForm = ingnoredForm;
|
||||
}
|
||||
|
||||
public DictonaryReader(String fileName, String fileEncoding, Set<String> ingnoredForm) {
|
||||
this.fileName = fileName;
|
||||
this.fileEncoding = fileEncoding;
|
||||
this.ingnoredForm = ingnoredForm;
|
||||
}
|
||||
|
||||
|
||||
public void proccess(WordProccessor wordProccessor) throws IOException {
|
||||
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), fileEncoding));
|
||||
readFlexias(bufferedReader);
|
||||
sckipBlock(bufferedReader);
|
||||
sckipBlock(bufferedReader);
|
||||
readPrefix(bufferedReader);
|
||||
readWords(bufferedReader, wordProccessor);
|
||||
}
|
||||
|
||||
|
||||
private void readWords(BufferedReader reader, WordProccessor wordProccessor) throws IOException {
|
||||
String s = reader.readLine();
|
||||
int count = Integer.valueOf(s);
|
||||
for (int i = 0; i < count; i++) {
|
||||
s = reader.readLine();
|
||||
if (i % 10000 == 0) System.out.println("Proccess " + i + " wordBase of " + count);
|
||||
|
||||
String[] wd = s.split(" ");
|
||||
String wordBase = wd[0].toLowerCase();
|
||||
if (wordBase.startsWith("-")) continue;
|
||||
wordBase = "#".equals(wordBase) ? "" : wordBase;
|
||||
List<FlexiaModel> models = wordsFlexias.get(Integer.valueOf(wd[1]));
|
||||
FlexiaModel flexiaModel = models.get(0);
|
||||
if (models.size() > 0 && !ingnoredForm.contains(flexiaModel.getCode())) {
|
||||
WordCard card = new WordCard(flexiaModel.create(wordBase), wordBase, flexiaModel.getSuffix());
|
||||
for (FlexiaModel fm : models) {
|
||||
card.addFlexia(fm);
|
||||
}
|
||||
wordProccessor.proccess(card);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void sckipBlock(BufferedReader reader) throws IOException {
|
||||
String s = reader.readLine();
|
||||
int count = Integer.valueOf(s);
|
||||
for (int i = 0; i < count; i++) {
|
||||
s = reader.readLine();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void readPrefix(BufferedReader reader) throws IOException {
|
||||
String s = reader.readLine();
|
||||
int count = Integer.valueOf(s);
|
||||
for (int i = 0; i < count; i++) {
|
||||
s = reader.readLine();
|
||||
wordPrefixes.add(Arrays.asList(s.toLowerCase().split(",")));
|
||||
}
|
||||
}
|
||||
|
||||
private void readFlexias(BufferedReader reader) throws IOException {
|
||||
String s = reader.readLine();
|
||||
int count = Integer.valueOf(s);
|
||||
for (int i = 0; i < count; i++) {
|
||||
s = reader.readLine();
|
||||
ArrayList<FlexiaModel> flexiaModelArrayList = new ArrayList<FlexiaModel>();
|
||||
wordsFlexias.add(flexiaModelArrayList);
|
||||
for (String line : s.split("%")) {
|
||||
addFlexia(flexiaModelArrayList, line);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void addFlexia(ArrayList<FlexiaModel> flexiaModelArrayList, String line) {
|
||||
String[] fl = line.split("\\*");
|
||||
// we inored all forms thats
|
||||
if (fl.length == 3) {
|
||||
System.out.println(line);
|
||||
// flexiaModelArrayList.add(new FlexiaModel(fl[1], cleanString(fl[0].toLowerCase()), cleanString(fl[2].toLowerCase())));
|
||||
}
|
||||
if (fl.length == 2) flexiaModelArrayList.add(new FlexiaModel(fl[1], fl[0].toLowerCase(), ""));
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,65 @@
|
||||
/**
|
||||
* Copyright 2009 Alexander Kuznetsov
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.morpholgy.dictionary;
|
||||
|
||||
/**
|
||||
* Represent inofrmation of how word form created form it imutible part.
|
||||
*/
|
||||
public class FlexiaModel {
|
||||
private String code;
|
||||
private String suffix;
|
||||
private String prefix;
|
||||
|
||||
public FlexiaModel(String code, String suffix, String prefix) {
|
||||
this.code = code;
|
||||
this.suffix = suffix;
|
||||
this.prefix = prefix;
|
||||
}
|
||||
|
||||
public String getCode() {
|
||||
return code;
|
||||
}
|
||||
|
||||
public void setCode(String code) {
|
||||
this.code = code;
|
||||
}
|
||||
|
||||
public String getSuffix() {
|
||||
return suffix;
|
||||
}
|
||||
|
||||
public void setSuffix(String suffix) {
|
||||
this.suffix = suffix;
|
||||
}
|
||||
|
||||
public String getPrefix() {
|
||||
return prefix;
|
||||
}
|
||||
|
||||
public void setPrefix(String prefix) {
|
||||
this.prefix = prefix;
|
||||
}
|
||||
|
||||
public String create(String s) {
|
||||
return prefix + s + suffix;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return prefix + " " + suffix;
|
||||
}
|
||||
}
|
@ -0,0 +1,56 @@
|
||||
/**
|
||||
* Copyright 2009 Alexander Kuznetsov
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.morpholgy.dictionary;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
|
||||
public class FrequentyReader {
|
||||
private String fileName;
|
||||
private String fileEncoding = "windows-1251";
|
||||
|
||||
public FrequentyReader(String fileName) {
|
||||
this.fileName = fileName;
|
||||
}
|
||||
|
||||
public FrequentyReader(String fileName, String fileEncoding) {
|
||||
this.fileName = fileName;
|
||||
this.fileEncoding = fileEncoding;
|
||||
}
|
||||
|
||||
|
||||
public Map<String, Double> read() throws IOException {
|
||||
Map<String, Double> result = new HashMap<String, Double>();
|
||||
|
||||
BufferedReader bufferedReader = new BufferedReader(
|
||||
new InputStreamReader(
|
||||
new FileInputStream(fileName), fileEncoding));
|
||||
String s = bufferedReader.readLine();
|
||||
while (s != null) {
|
||||
String[] strings = s.split(" ");
|
||||
Double value = Double.valueOf(strings[1]);
|
||||
result.put(strings[2], value);
|
||||
s = bufferedReader.readLine();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
@ -0,0 +1,76 @@
|
||||
/**
|
||||
* Copyright 2009 Alexander Kuznetsov
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.morpholgy.dictionary;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
//todo spleet this class on two.
|
||||
public class GrammaReader {
|
||||
private String fileName;
|
||||
private String fileEncoding = "windows-1251";
|
||||
private List<String> grammaInfo = new ArrayList<String>();
|
||||
private Map<String, Integer> inversIndex = new HashMap<String, Integer>();
|
||||
|
||||
public GrammaReader(String fileName) throws IOException {
|
||||
this.fileName = fileName;
|
||||
setUp();
|
||||
}
|
||||
|
||||
public GrammaReader(String fileName, String fileEncoding) throws IOException {
|
||||
this.fileName = fileName;
|
||||
this.fileEncoding = fileEncoding;
|
||||
setUp();
|
||||
}
|
||||
|
||||
private void setUp() throws IOException {
|
||||
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), fileEncoding));
|
||||
String line = bufferedReader.readLine();
|
||||
while (line != null) {
|
||||
line = line.trim();
|
||||
if (!line.startsWith("//") && line.length() > 0) {
|
||||
String[] strings = line.split(" ", 2);
|
||||
Integer i = grammaInfo.size();
|
||||
inversIndex.put(strings[0], i);
|
||||
grammaInfo.add(i, strings[1]);
|
||||
}
|
||||
line = bufferedReader.readLine();
|
||||
}
|
||||
}
|
||||
|
||||
public List<String> getGrammaInfo() {
|
||||
return grammaInfo;
|
||||
}
|
||||
|
||||
public String[] getGrammaInfoAsArray() {
|
||||
return grammaInfo.toArray(new String[grammaInfo.size()]);
|
||||
}
|
||||
|
||||
public Map<String, Integer> getGrammInversIndex() {
|
||||
return inversIndex;
|
||||
}
|
||||
|
||||
public void setInversIndex(Map<String, Integer> inversIndex) {
|
||||
this.inversIndex = inversIndex;
|
||||
}
|
||||
}
|
@ -0,0 +1,54 @@
|
||||
/**
|
||||
* Copyright 2009 Alexander Kuznetsov
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.morpholgy.dictionary;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
|
||||
public class IgnoredFormReader {
|
||||
private String fileName;
|
||||
private String fileEncoding = "windows-1251";
|
||||
|
||||
public IgnoredFormReader(String fileName) {
|
||||
this.fileName = fileName;
|
||||
}
|
||||
|
||||
public IgnoredFormReader(String fileName, String fileEncoding) {
|
||||
this.fileName = fileName;
|
||||
this.fileEncoding = fileEncoding;
|
||||
}
|
||||
|
||||
public Set<String> getIngnoredFroms() throws IOException {
|
||||
BufferedReader bufferedReader = new BufferedReader(
|
||||
new InputStreamReader(
|
||||
new FileInputStream(fileName), fileEncoding));
|
||||
String s = bufferedReader.readLine();
|
||||
HashSet<String> result = new HashSet<String>();
|
||||
while (s != null) {
|
||||
if (!s.startsWith("//")) {
|
||||
result.add(s.trim().split(" ")[0]);
|
||||
}
|
||||
s = bufferedReader.readLine();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
@ -0,0 +1,147 @@
|
||||
/**
|
||||
* Copyright 2009 Alexander Kuznetsov
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.morpholgy.dictionary;
|
||||
|
||||
|
||||
import org.apache.lucene.morphology.Heuristic;
|
||||
import org.apache.lucene.morphology.LetterDecoderEncoder;
|
||||
import org.apache.lucene.morphology.Morph;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
public class StatiticsCollector implements WordProccessor {
|
||||
private TreeMap<String, Set<Heuristic>> inversIndex = new TreeMap<String, Set<Heuristic>>();
|
||||
private Map<Set<Heuristic>, Integer> ruleInverIndex = new HashMap<Set<Heuristic>, Integer>();
|
||||
private List<Set<Heuristic>> rules = new ArrayList<Set<Heuristic>>();
|
||||
private GrammaReader grammaReader;
|
||||
private LetterDecoderEncoder decoderEncoder;
|
||||
|
||||
|
||||
public StatiticsCollector(GrammaReader grammaReader) {
|
||||
this.grammaReader = grammaReader;
|
||||
}
|
||||
|
||||
public void proccess(WordCard wordCard) throws IOException {
|
||||
wordCard = cleanWordCard(wordCard);
|
||||
String normalStringMorph = wordCard.getWordsFroms().get(0).getCode();
|
||||
String word = wordCard.getBase() + wordCard.getCanonicalSuffix();
|
||||
if (word.contains("-")) return;
|
||||
|
||||
for (FlexiaModel fm : wordCard.getWordsFroms()) {
|
||||
Heuristic heuristic = createEvristic(wordCard.getBase(), wordCard.getCanonicalSuffix(), fm, normalStringMorph);
|
||||
String form = revertWord(fm.create(wordCard.getBase()));
|
||||
Set<Heuristic> suffixHeuristics = inversIndex.get(form);
|
||||
if (suffixHeuristics == null) {
|
||||
suffixHeuristics = new HashSet<Heuristic>();
|
||||
inversIndex.put(form, suffixHeuristics);
|
||||
}
|
||||
suffixHeuristics.add(heuristic);
|
||||
}
|
||||
}
|
||||
|
||||
private WordCard cleanWordCard(WordCard wordCard) {
|
||||
return wordCard;
|
||||
}
|
||||
|
||||
|
||||
public void saveHeuristic() throws IOException {
|
||||
|
||||
Map<Integer, Integer> dist = new TreeMap<Integer, Integer>();
|
||||
Set<Heuristic> prevSet = null;
|
||||
int count = 0;
|
||||
for (String key : inversIndex.keySet()) {
|
||||
Set<Heuristic> currentSet = inversIndex.get(key);
|
||||
if (!currentSet.equals(prevSet)) {
|
||||
Integer d = dist.get(key.length());
|
||||
dist.put(key.length(), 1 + (d == null ? 0 : d));
|
||||
prevSet = currentSet;
|
||||
count++;
|
||||
if (!ruleInverIndex.containsKey(currentSet)) {
|
||||
ruleInverIndex.put(currentSet, rules.size());
|
||||
rules.add(currentSet);
|
||||
}
|
||||
}
|
||||
}
|
||||
System.out.println("Word with diffirent rules " + count);
|
||||
System.out.println("All ivers words " + inversIndex.size());
|
||||
System.out.println(dist);
|
||||
System.out.println("diffirent rule count " + ruleInverIndex.size());
|
||||
Heuristic[][] heuristics = new Heuristic[ruleInverIndex.size()][];
|
||||
int index = 0;
|
||||
for (Set<Heuristic> hs : rules) {
|
||||
heuristics[index] = new Heuristic[hs.size()];
|
||||
int indexj = 0;
|
||||
for (Heuristic h : hs) {
|
||||
heuristics[index][indexj] = h;
|
||||
indexj++;
|
||||
}
|
||||
index++;
|
||||
}
|
||||
|
||||
int[][] ints = new int[count][];
|
||||
short[] rulesId = new short[count];
|
||||
count = 0;
|
||||
prevSet = null;
|
||||
for (String key : inversIndex.keySet()) {
|
||||
Set<Heuristic> currentSet = inversIndex.get(key);
|
||||
if (!currentSet.equals(prevSet)) {
|
||||
ints[count] = decoderEncoder.encodeToArray(key);
|
||||
rulesId[count] = (short) ruleInverIndex.get(currentSet).intValue();
|
||||
count++;
|
||||
prevSet = currentSet;
|
||||
}
|
||||
}
|
||||
Morph morph = new Morph(ints, rulesId, heuristics, grammaReader.getGrammaInfoAsArray());
|
||||
morph.writeToFile("sep.txt");
|
||||
}
|
||||
|
||||
private String revertWord(String s) {
|
||||
String result = "";
|
||||
for (int i = 1; i <= s.length(); i++) {
|
||||
result += s.charAt(s.length() - i);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
private Heuristic createEvristic(String wordBase, String canonicalSuffix, FlexiaModel fm, String normalSuffixForm) {
|
||||
String form = fm.create(wordBase);
|
||||
String normalForm = wordBase + canonicalSuffix;
|
||||
Integer length = getCommonLength(form, normalForm);
|
||||
Integer actualSuffixLengh = form.length() - length;
|
||||
String actualNormalSuffix = normalForm.substring(length);
|
||||
Integer integer = grammaReader.getGrammInversIndex().get(fm.getCode().substring(0, 2));
|
||||
Integer nf = grammaReader.getGrammInversIndex().get(normalSuffixForm.substring(0, 2));
|
||||
return new Heuristic((byte) actualSuffixLengh.intValue(), actualNormalSuffix, (short) integer.intValue(), (short) nf.intValue());
|
||||
}
|
||||
|
||||
public static Integer getCommonLength(String s1, String s2) {
|
||||
Integer length = Math.min(s1.length(), s2.length());
|
||||
for (int i = 0; i < length; i++) {
|
||||
if (s1.charAt(i) != s2.charAt(i)) return i;
|
||||
}
|
||||
return length;
|
||||
}
|
||||
|
||||
private String cleanString(String s) {
|
||||
return decoderEncoder.cleanString(s);
|
||||
//return s.replace((char) (34 + RussianSuffixDecoderEncoder.RUSSIAN_SMALL_LETTER_OFFSET), (char) (6 + RussianSuffixDecoderEncoder.RUSSIAN_SMALL_LETTER_OFFSET));
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,72 @@
|
||||
/**
|
||||
* Copyright 2009 Alexander Kuznetsov
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.morpholgy.dictionary;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Represent word and all it forms.
|
||||
*/
|
||||
public class WordCard {
|
||||
private String canonicalFrom;
|
||||
private String base;
|
||||
private String canonicalSuffix;
|
||||
private List<FlexiaModel> wordsFroms = new ArrayList<FlexiaModel>();
|
||||
|
||||
public WordCard(String canonicalFrom, String base, String canonicalSuffix) {
|
||||
this.canonicalFrom = canonicalFrom;
|
||||
this.canonicalSuffix = canonicalSuffix;
|
||||
this.base = base;
|
||||
}
|
||||
|
||||
public void addFlexia(FlexiaModel flexiaModel) {
|
||||
wordsFroms.add(flexiaModel);
|
||||
}
|
||||
|
||||
public String getCanonicalFrom() {
|
||||
return canonicalFrom;
|
||||
}
|
||||
|
||||
public String getCanonicalSuffix() {
|
||||
return canonicalSuffix;
|
||||
}
|
||||
|
||||
public String getBase() {
|
||||
return base;
|
||||
}
|
||||
|
||||
public List<FlexiaModel> getWordsFroms() {
|
||||
return wordsFroms;
|
||||
}
|
||||
|
||||
public void setCanonicalFrom(String canonicalFrom) {
|
||||
this.canonicalFrom = canonicalFrom;
|
||||
}
|
||||
|
||||
public void setBase(String base) {
|
||||
this.base = base;
|
||||
}
|
||||
|
||||
public void setCanonicalSuffix(String canonicalSuffix) {
|
||||
this.canonicalSuffix = canonicalSuffix;
|
||||
}
|
||||
|
||||
public void setWordsFroms(List<FlexiaModel> wordsFroms) {
|
||||
this.wordsFroms = wordsFroms;
|
||||
}
|
||||
}
|
@ -0,0 +1,28 @@
|
||||
/**
|
||||
* Copyright 2009 Alexander Kuznetsov
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.morpholgy.dictionary;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Interface allows get information from
|
||||
* {@org.apache.lucene.russian.morphology.dictonary.DirtonaryReader}.
|
||||
*/
|
||||
public interface WordProccessor {
|
||||
|
||||
public void proccess(WordCard wordCard) throws IOException;
|
||||
}
|
Reference in New Issue
Block a user