adding modules
git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@49 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
		| @@ -0,0 +1,125 @@ | ||||
| /** | ||||
|  * Copyright 2009 Alexander Kuznetsov  | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
|  * you may not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  *     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an "AS IS" BASIS, | ||||
|  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
|  | ||||
| package org.apache.lucene.morpholgy.dictionary; | ||||
|  | ||||
|  | ||||
| import java.io.BufferedReader; | ||||
| import java.io.FileInputStream; | ||||
| import java.io.IOException; | ||||
| import java.io.InputStreamReader; | ||||
| import java.util.*; | ||||
|  | ||||
|  | ||||
| /** | ||||
|  * This class contain logic how read | ||||
|  * dictonary and produce word with it all forms. | ||||
|  */ | ||||
| public class DictonaryReader { | ||||
|     private String fileName; | ||||
|     private String fileEncoding = "windows-1251"; | ||||
|     private List<List<FlexiaModel>> wordsFlexias = new ArrayList<List<FlexiaModel>>(); | ||||
|     private List<List<String>> wordPrefixes = new ArrayList<List<String>>(); | ||||
|     private Set<String> ingnoredForm = new HashSet<String>(); | ||||
|  | ||||
|     public DictonaryReader(String fileName, Set<String> ingnoredForm) { | ||||
|         this.fileName = fileName; | ||||
|         this.ingnoredForm = ingnoredForm; | ||||
|     } | ||||
|  | ||||
|     public DictonaryReader(String fileName, String fileEncoding, Set<String> ingnoredForm) { | ||||
|         this.fileName = fileName; | ||||
|         this.fileEncoding = fileEncoding; | ||||
|         this.ingnoredForm = ingnoredForm; | ||||
|     } | ||||
|  | ||||
|  | ||||
|     public void proccess(WordProccessor wordProccessor) throws IOException { | ||||
|         BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), fileEncoding)); | ||||
|         readFlexias(bufferedReader); | ||||
|         sckipBlock(bufferedReader); | ||||
|         sckipBlock(bufferedReader); | ||||
|         readPrefix(bufferedReader); | ||||
|         readWords(bufferedReader, wordProccessor); | ||||
|     } | ||||
|  | ||||
|  | ||||
|     private void readWords(BufferedReader reader, WordProccessor wordProccessor) throws IOException { | ||||
|         String s = reader.readLine(); | ||||
|         int count = Integer.valueOf(s); | ||||
|         for (int i = 0; i < count; i++) { | ||||
|             s = reader.readLine(); | ||||
|             if (i % 10000 == 0) System.out.println("Proccess " + i + " wordBase of " + count); | ||||
|  | ||||
|             String[] wd = s.split(" "); | ||||
|             String wordBase = wd[0].toLowerCase(); | ||||
|             if (wordBase.startsWith("-")) continue; | ||||
|             wordBase = "#".equals(wordBase) ? "" : wordBase; | ||||
|             List<FlexiaModel> models = wordsFlexias.get(Integer.valueOf(wd[1])); | ||||
|             FlexiaModel flexiaModel = models.get(0); | ||||
|             if (models.size() > 0 && !ingnoredForm.contains(flexiaModel.getCode())) { | ||||
|                 WordCard card = new WordCard(flexiaModel.create(wordBase), wordBase, flexiaModel.getSuffix()); | ||||
|                 for (FlexiaModel fm : models) { | ||||
|                     card.addFlexia(fm); | ||||
|                 } | ||||
|                 wordProccessor.proccess(card); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|  | ||||
|     private void sckipBlock(BufferedReader reader) throws IOException { | ||||
|         String s = reader.readLine(); | ||||
|         int count = Integer.valueOf(s); | ||||
|         for (int i = 0; i < count; i++) { | ||||
|             s = reader.readLine(); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|  | ||||
|     private void readPrefix(BufferedReader reader) throws IOException { | ||||
|         String s = reader.readLine(); | ||||
|         int count = Integer.valueOf(s); | ||||
|         for (int i = 0; i < count; i++) { | ||||
|             s = reader.readLine(); | ||||
|             wordPrefixes.add(Arrays.asList(s.toLowerCase().split(","))); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     private void readFlexias(BufferedReader reader) throws IOException { | ||||
|         String s = reader.readLine(); | ||||
|         int count = Integer.valueOf(s); | ||||
|         for (int i = 0; i < count; i++) { | ||||
|             s = reader.readLine(); | ||||
|             ArrayList<FlexiaModel> flexiaModelArrayList = new ArrayList<FlexiaModel>(); | ||||
|             wordsFlexias.add(flexiaModelArrayList); | ||||
|             for (String line : s.split("%")) { | ||||
|                 addFlexia(flexiaModelArrayList, line); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     private void addFlexia(ArrayList<FlexiaModel> flexiaModelArrayList, String line) { | ||||
|         String[] fl = line.split("\\*"); | ||||
|         // we inored all forms thats | ||||
|         if (fl.length == 3) { | ||||
|             System.out.println(line); | ||||
|             // flexiaModelArrayList.add(new FlexiaModel(fl[1], cleanString(fl[0].toLowerCase()), cleanString(fl[2].toLowerCase()))); | ||||
|         } | ||||
|         if (fl.length == 2) flexiaModelArrayList.add(new FlexiaModel(fl[1], fl[0].toLowerCase(), "")); | ||||
|     } | ||||
|  | ||||
| } | ||||
| @@ -0,0 +1,65 @@ | ||||
| /** | ||||
|  * Copyright 2009 Alexander Kuznetsov  | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
|  * you may not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  *     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an "AS IS" BASIS, | ||||
|  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
|  | ||||
| package org.apache.lucene.morpholgy.dictionary; | ||||
|  | ||||
| /** | ||||
|  * Represent inofrmation of how word form created form it imutible part. | ||||
|  */ | ||||
| public class FlexiaModel { | ||||
|     private String code; | ||||
|     private String suffix; | ||||
|     private String prefix; | ||||
|  | ||||
|     public FlexiaModel(String code, String suffix, String prefix) { | ||||
|         this.code = code; | ||||
|         this.suffix = suffix; | ||||
|         this.prefix = prefix; | ||||
|     } | ||||
|  | ||||
|     public String getCode() { | ||||
|         return code; | ||||
|     } | ||||
|  | ||||
|     public void setCode(String code) { | ||||
|         this.code = code; | ||||
|     } | ||||
|  | ||||
|     public String getSuffix() { | ||||
|         return suffix; | ||||
|     } | ||||
|  | ||||
|     public void setSuffix(String suffix) { | ||||
|         this.suffix = suffix; | ||||
|     } | ||||
|  | ||||
|     public String getPrefix() { | ||||
|         return prefix; | ||||
|     } | ||||
|  | ||||
|     public void setPrefix(String prefix) { | ||||
|         this.prefix = prefix; | ||||
|     } | ||||
|  | ||||
|     public String create(String s) { | ||||
|         return prefix + s + suffix; | ||||
|     } | ||||
|  | ||||
|     @Override | ||||
|     public String toString() { | ||||
|         return prefix + " " + suffix; | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,56 @@ | ||||
| /** | ||||
|  * Copyright 2009 Alexander Kuznetsov | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
|  * you may not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  *     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an "AS IS" BASIS, | ||||
|  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
|  | ||||
| package org.apache.lucene.morpholgy.dictionary; | ||||
|  | ||||
| import java.io.BufferedReader; | ||||
| import java.io.FileInputStream; | ||||
| import java.io.IOException; | ||||
| import java.io.InputStreamReader; | ||||
| import java.util.HashMap; | ||||
| import java.util.Map; | ||||
|  | ||||
|  | ||||
| public class FrequentyReader { | ||||
|     private String fileName; | ||||
|     private String fileEncoding = "windows-1251"; | ||||
|  | ||||
|     public FrequentyReader(String fileName) { | ||||
|         this.fileName = fileName; | ||||
|     } | ||||
|  | ||||
|     public FrequentyReader(String fileName, String fileEncoding) { | ||||
|         this.fileName = fileName; | ||||
|         this.fileEncoding = fileEncoding; | ||||
|     } | ||||
|  | ||||
|  | ||||
|     public Map<String, Double> read() throws IOException { | ||||
|         Map<String, Double> result = new HashMap<String, Double>(); | ||||
|  | ||||
|         BufferedReader bufferedReader = new BufferedReader( | ||||
|                 new InputStreamReader( | ||||
|                         new FileInputStream(fileName), fileEncoding)); | ||||
|         String s = bufferedReader.readLine(); | ||||
|         while (s != null) { | ||||
|             String[] strings = s.split(" "); | ||||
|             Double value = Double.valueOf(strings[1]); | ||||
|             result.put(strings[2], value); | ||||
|             s = bufferedReader.readLine(); | ||||
|         } | ||||
|         return result; | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,76 @@ | ||||
| /** | ||||
|  * Copyright 2009 Alexander Kuznetsov | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
|  * you may not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  *     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an "AS IS" BASIS, | ||||
|  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
|  | ||||
| package org.apache.lucene.morpholgy.dictionary; | ||||
|  | ||||
| import java.io.BufferedReader; | ||||
| import java.io.FileInputStream; | ||||
| import java.io.IOException; | ||||
| import java.io.InputStreamReader; | ||||
| import java.util.ArrayList; | ||||
| import java.util.HashMap; | ||||
| import java.util.List; | ||||
| import java.util.Map; | ||||
|  | ||||
| //todo spleet this class on two. | ||||
| public class GrammaReader { | ||||
|     private String fileName; | ||||
|     private String fileEncoding = "windows-1251"; | ||||
|     private List<String> grammaInfo = new ArrayList<String>(); | ||||
|     private Map<String, Integer> inversIndex = new HashMap<String, Integer>(); | ||||
|  | ||||
|     public GrammaReader(String fileName) throws IOException { | ||||
|         this.fileName = fileName; | ||||
|         setUp(); | ||||
|     } | ||||
|  | ||||
|     public GrammaReader(String fileName, String fileEncoding) throws IOException { | ||||
|         this.fileName = fileName; | ||||
|         this.fileEncoding = fileEncoding; | ||||
|         setUp(); | ||||
|     } | ||||
|  | ||||
|     private void setUp() throws IOException { | ||||
|         BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), fileEncoding)); | ||||
|         String line = bufferedReader.readLine(); | ||||
|         while (line != null) { | ||||
|             line = line.trim(); | ||||
|             if (!line.startsWith("//") && line.length() > 0) { | ||||
|                 String[] strings = line.split(" ", 2); | ||||
|                 Integer i = grammaInfo.size(); | ||||
|                 inversIndex.put(strings[0], i); | ||||
|                 grammaInfo.add(i, strings[1]); | ||||
|             } | ||||
|             line = bufferedReader.readLine(); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     public List<String> getGrammaInfo() { | ||||
|         return grammaInfo; | ||||
|     } | ||||
|  | ||||
|     public String[] getGrammaInfoAsArray() { | ||||
|         return grammaInfo.toArray(new String[grammaInfo.size()]); | ||||
|     } | ||||
|  | ||||
|     public Map<String, Integer> getGrammInversIndex() { | ||||
|         return inversIndex; | ||||
|     } | ||||
|  | ||||
|     public void setInversIndex(Map<String, Integer> inversIndex) { | ||||
|         this.inversIndex = inversIndex; | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,54 @@ | ||||
| /** | ||||
|  * Copyright 2009 Alexander Kuznetsov  | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
|  * you may not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  *     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an "AS IS" BASIS, | ||||
|  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
|  | ||||
| package org.apache.lucene.morpholgy.dictionary; | ||||
|  | ||||
| import java.io.BufferedReader; | ||||
| import java.io.FileInputStream; | ||||
| import java.io.IOException; | ||||
| import java.io.InputStreamReader; | ||||
| import java.util.HashSet; | ||||
| import java.util.Set; | ||||
|  | ||||
|  | ||||
| public class IgnoredFormReader { | ||||
|     private String fileName; | ||||
|     private String fileEncoding = "windows-1251"; | ||||
|  | ||||
|     public IgnoredFormReader(String fileName) { | ||||
|         this.fileName = fileName; | ||||
|     } | ||||
|  | ||||
|     public IgnoredFormReader(String fileName, String fileEncoding) { | ||||
|         this.fileName = fileName; | ||||
|         this.fileEncoding = fileEncoding; | ||||
|     } | ||||
|  | ||||
|     public Set<String> getIngnoredFroms() throws IOException { | ||||
|         BufferedReader bufferedReader = new BufferedReader( | ||||
|                 new InputStreamReader( | ||||
|                         new FileInputStream(fileName), fileEncoding)); | ||||
|         String s = bufferedReader.readLine(); | ||||
|         HashSet<String> result = new HashSet<String>(); | ||||
|         while (s != null) { | ||||
|             if (!s.startsWith("//")) { | ||||
|                 result.add(s.trim().split(" ")[0]); | ||||
|             } | ||||
|             s = bufferedReader.readLine(); | ||||
|         } | ||||
|         return result; | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,147 @@ | ||||
| /** | ||||
|  * Copyright 2009 Alexander Kuznetsov | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
|  * you may not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  *     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an "AS IS" BASIS, | ||||
|  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
|  | ||||
| package org.apache.lucene.morpholgy.dictionary; | ||||
|  | ||||
|  | ||||
| import org.apache.lucene.morphology.Heuristic; | ||||
| import org.apache.lucene.morphology.LetterDecoderEncoder; | ||||
| import org.apache.lucene.morphology.Morph; | ||||
|  | ||||
| import java.io.IOException; | ||||
| import java.util.*; | ||||
|  | ||||
|  | ||||
| public class StatiticsCollector implements WordProccessor { | ||||
|     private TreeMap<String, Set<Heuristic>> inversIndex = new TreeMap<String, Set<Heuristic>>(); | ||||
|     private Map<Set<Heuristic>, Integer> ruleInverIndex = new HashMap<Set<Heuristic>, Integer>(); | ||||
|     private List<Set<Heuristic>> rules = new ArrayList<Set<Heuristic>>(); | ||||
|     private GrammaReader grammaReader; | ||||
|     private LetterDecoderEncoder decoderEncoder; | ||||
|  | ||||
|  | ||||
|     public StatiticsCollector(GrammaReader grammaReader) { | ||||
|         this.grammaReader = grammaReader; | ||||
|     } | ||||
|  | ||||
|     public void proccess(WordCard wordCard) throws IOException { | ||||
|         wordCard = cleanWordCard(wordCard); | ||||
|         String normalStringMorph = wordCard.getWordsFroms().get(0).getCode(); | ||||
|         String word = wordCard.getBase() + wordCard.getCanonicalSuffix(); | ||||
|         if (word.contains("-")) return; | ||||
|  | ||||
|         for (FlexiaModel fm : wordCard.getWordsFroms()) { | ||||
|             Heuristic heuristic = createEvristic(wordCard.getBase(), wordCard.getCanonicalSuffix(), fm, normalStringMorph); | ||||
|             String form = revertWord(fm.create(wordCard.getBase())); | ||||
|             Set<Heuristic> suffixHeuristics = inversIndex.get(form); | ||||
|             if (suffixHeuristics == null) { | ||||
|                 suffixHeuristics = new HashSet<Heuristic>(); | ||||
|                 inversIndex.put(form, suffixHeuristics); | ||||
|             } | ||||
|             suffixHeuristics.add(heuristic); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     private WordCard cleanWordCard(WordCard wordCard) { | ||||
|         return wordCard; | ||||
|     } | ||||
|  | ||||
|  | ||||
|     public void saveHeuristic() throws IOException { | ||||
|  | ||||
|         Map<Integer, Integer> dist = new TreeMap<Integer, Integer>(); | ||||
|         Set<Heuristic> prevSet = null; | ||||
|         int count = 0; | ||||
|         for (String key : inversIndex.keySet()) { | ||||
|             Set<Heuristic> currentSet = inversIndex.get(key); | ||||
|             if (!currentSet.equals(prevSet)) { | ||||
|                 Integer d = dist.get(key.length()); | ||||
|                 dist.put(key.length(), 1 + (d == null ? 0 : d)); | ||||
|                 prevSet = currentSet; | ||||
|                 count++; | ||||
|                 if (!ruleInverIndex.containsKey(currentSet)) { | ||||
|                     ruleInverIndex.put(currentSet, rules.size()); | ||||
|                     rules.add(currentSet); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|         System.out.println("Word with diffirent rules " + count); | ||||
|         System.out.println("All ivers words " + inversIndex.size()); | ||||
|         System.out.println(dist); | ||||
|         System.out.println("diffirent rule count " + ruleInverIndex.size()); | ||||
|         Heuristic[][] heuristics = new Heuristic[ruleInverIndex.size()][]; | ||||
|         int index = 0; | ||||
|         for (Set<Heuristic> hs : rules) { | ||||
|             heuristics[index] = new Heuristic[hs.size()]; | ||||
|             int indexj = 0; | ||||
|             for (Heuristic h : hs) { | ||||
|                 heuristics[index][indexj] = h; | ||||
|                 indexj++; | ||||
|             } | ||||
|             index++; | ||||
|         } | ||||
|  | ||||
|         int[][] ints = new int[count][]; | ||||
|         short[] rulesId = new short[count]; | ||||
|         count = 0; | ||||
|         prevSet = null; | ||||
|         for (String key : inversIndex.keySet()) { | ||||
|             Set<Heuristic> currentSet = inversIndex.get(key); | ||||
|             if (!currentSet.equals(prevSet)) { | ||||
|                 ints[count] = decoderEncoder.encodeToArray(key); | ||||
|                 rulesId[count] = (short) ruleInverIndex.get(currentSet).intValue(); | ||||
|                 count++; | ||||
|                 prevSet = currentSet; | ||||
|             } | ||||
|         } | ||||
|         Morph morph = new Morph(ints, rulesId, heuristics, grammaReader.getGrammaInfoAsArray()); | ||||
|         morph.writeToFile("sep.txt"); | ||||
|     } | ||||
|  | ||||
|     private String revertWord(String s) { | ||||
|         String result = ""; | ||||
|         for (int i = 1; i <= s.length(); i++) { | ||||
|             result += s.charAt(s.length() - i); | ||||
|         } | ||||
|         return result; | ||||
|     } | ||||
|  | ||||
|  | ||||
|     private Heuristic createEvristic(String wordBase, String canonicalSuffix, FlexiaModel fm, String normalSuffixForm) { | ||||
|         String form = fm.create(wordBase); | ||||
|         String normalForm = wordBase + canonicalSuffix; | ||||
|         Integer length = getCommonLength(form, normalForm); | ||||
|         Integer actualSuffixLengh = form.length() - length; | ||||
|         String actualNormalSuffix = normalForm.substring(length); | ||||
|         Integer integer = grammaReader.getGrammInversIndex().get(fm.getCode().substring(0, 2)); | ||||
|         Integer nf = grammaReader.getGrammInversIndex().get(normalSuffixForm.substring(0, 2)); | ||||
|         return new Heuristic((byte) actualSuffixLengh.intValue(), actualNormalSuffix, (short) integer.intValue(), (short) nf.intValue()); | ||||
|     } | ||||
|  | ||||
|     public static Integer getCommonLength(String s1, String s2) { | ||||
|         Integer length = Math.min(s1.length(), s2.length()); | ||||
|         for (int i = 0; i < length; i++) { | ||||
|             if (s1.charAt(i) != s2.charAt(i)) return i; | ||||
|         } | ||||
|         return length; | ||||
|     } | ||||
|  | ||||
|     private String cleanString(String s) { | ||||
|         return decoderEncoder.cleanString(s); | ||||
|         //return s.replace((char) (34 + RussianSuffixDecoderEncoder.RUSSIAN_SMALL_LETTER_OFFSET), (char) (6 + RussianSuffixDecoderEncoder.RUSSIAN_SMALL_LETTER_OFFSET)); | ||||
|     } | ||||
|  | ||||
| } | ||||
| @@ -0,0 +1,72 @@ | ||||
| /** | ||||
|  * Copyright 2009 Alexander Kuznetsov  | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
|  * you may not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  *     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an "AS IS" BASIS, | ||||
|  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
|  | ||||
| package org.apache.lucene.morpholgy.dictionary; | ||||
|  | ||||
| import java.util.ArrayList; | ||||
| import java.util.List; | ||||
|  | ||||
| /** | ||||
|  * Represent word and all it forms. | ||||
|  */ | ||||
| public class WordCard { | ||||
|     private String canonicalFrom; | ||||
|     private String base; | ||||
|     private String canonicalSuffix; | ||||
|     private List<FlexiaModel> wordsFroms = new ArrayList<FlexiaModel>(); | ||||
|  | ||||
|     public WordCard(String canonicalFrom, String base, String canonicalSuffix) { | ||||
|         this.canonicalFrom = canonicalFrom; | ||||
|         this.canonicalSuffix = canonicalSuffix; | ||||
|         this.base = base; | ||||
|     } | ||||
|  | ||||
|     public void addFlexia(FlexiaModel flexiaModel) { | ||||
|         wordsFroms.add(flexiaModel); | ||||
|     } | ||||
|  | ||||
|     public String getCanonicalFrom() { | ||||
|         return canonicalFrom; | ||||
|     } | ||||
|  | ||||
|     public String getCanonicalSuffix() { | ||||
|         return canonicalSuffix; | ||||
|     } | ||||
|  | ||||
|     public String getBase() { | ||||
|         return base; | ||||
|     } | ||||
|  | ||||
|     public List<FlexiaModel> getWordsFroms() { | ||||
|         return wordsFroms; | ||||
|     } | ||||
|  | ||||
|     public void setCanonicalFrom(String canonicalFrom) { | ||||
|         this.canonicalFrom = canonicalFrom; | ||||
|     } | ||||
|  | ||||
|     public void setBase(String base) { | ||||
|         this.base = base; | ||||
|     } | ||||
|  | ||||
|     public void setCanonicalSuffix(String canonicalSuffix) { | ||||
|         this.canonicalSuffix = canonicalSuffix; | ||||
|     } | ||||
|  | ||||
|     public void setWordsFroms(List<FlexiaModel> wordsFroms) { | ||||
|         this.wordsFroms = wordsFroms; | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,28 @@ | ||||
| /** | ||||
|  * Copyright 2009 Alexander Kuznetsov  | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
|  * you may not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  *     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an "AS IS" BASIS, | ||||
|  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
|  | ||||
| package org.apache.lucene.morpholgy.dictionary; | ||||
|  | ||||
| import java.io.IOException; | ||||
|  | ||||
| /** | ||||
|  * Interface allows get information from | ||||
|  * {@org.apache.lucene.russian.morphology.dictonary.DirtonaryReader}. | ||||
|  */ | ||||
| public interface WordProccessor { | ||||
|  | ||||
|     public void proccess(WordCard wordCard) throws IOException; | ||||
| } | ||||
		Reference in New Issue
	
	Block a user
	 alexander.a.kuznetsov
					alexander.a.kuznetsov