making some clear up
git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@63 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
		| @@ -1,56 +0,0 @@ | ||||
| /** | ||||
|  * Copyright 2009 Alexander Kuznetsov | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
|  * you may not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  *     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an "AS IS" BASIS, | ||||
|  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
|  | ||||
| package org.apache.lucene.morphology.dictionary; | ||||
|  | ||||
| import java.io.BufferedReader; | ||||
| import java.io.FileInputStream; | ||||
| import java.io.IOException; | ||||
| import java.io.InputStreamReader; | ||||
| import java.util.HashMap; | ||||
| import java.util.Map; | ||||
|  | ||||
|  | ||||
| public class FrequentyReader { | ||||
|     private String fileName; | ||||
|     private String fileEncoding = "windows-1251"; | ||||
|  | ||||
|     public FrequentyReader(String fileName) { | ||||
|         this.fileName = fileName; | ||||
|     } | ||||
|  | ||||
|     public FrequentyReader(String fileName, String fileEncoding) { | ||||
|         this.fileName = fileName; | ||||
|         this.fileEncoding = fileEncoding; | ||||
|     } | ||||
|  | ||||
|  | ||||
|     public Map<String, Double> read() throws IOException { | ||||
|         Map<String, Double> result = new HashMap<String, Double>(); | ||||
|  | ||||
|         BufferedReader bufferedReader = new BufferedReader( | ||||
|                 new InputStreamReader( | ||||
|                         new FileInputStream(fileName), fileEncoding)); | ||||
|         String s = bufferedReader.readLine(); | ||||
|         while (s != null) { | ||||
|             String[] strings = s.split(" "); | ||||
|             Double value = Double.valueOf(strings[1]); | ||||
|             result.put(strings[2], value); | ||||
|             s = bufferedReader.readLine(); | ||||
|         } | ||||
|         return result; | ||||
|     } | ||||
| } | ||||
| @@ -1,54 +0,0 @@ | ||||
| /** | ||||
|  * Copyright 2009 Alexander Kuznetsov  | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
|  * you may not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  *     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an "AS IS" BASIS, | ||||
|  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
|  | ||||
| package org.apache.lucene.morphology.dictionary; | ||||
|  | ||||
| import java.io.BufferedReader; | ||||
| import java.io.FileInputStream; | ||||
| import java.io.IOException; | ||||
| import java.io.InputStreamReader; | ||||
| import java.util.HashSet; | ||||
| import java.util.Set; | ||||
|  | ||||
|  | ||||
| public class IgnoredFormReader { | ||||
|     private String fileName; | ||||
|     private String fileEncoding = "windows-1251"; | ||||
|  | ||||
|     public IgnoredFormReader(String fileName) { | ||||
|         this.fileName = fileName; | ||||
|     } | ||||
|  | ||||
|     public IgnoredFormReader(String fileName, String fileEncoding) { | ||||
|         this.fileName = fileName; | ||||
|         this.fileEncoding = fileEncoding; | ||||
|     } | ||||
|  | ||||
|     public Set<String> getIngnoredFroms() throws IOException { | ||||
|         BufferedReader bufferedReader = new BufferedReader( | ||||
|                 new InputStreamReader( | ||||
|                         new FileInputStream(fileName), fileEncoding)); | ||||
|         String s = bufferedReader.readLine(); | ||||
|         HashSet<String> result = new HashSet<String>(); | ||||
|         while (s != null) { | ||||
|             if (!s.startsWith("//")) { | ||||
|                 result.add(s.trim().split(" ")[0]); | ||||
|             } | ||||
|             s = bufferedReader.readLine(); | ||||
|         } | ||||
|         return result; | ||||
|     } | ||||
| } | ||||
| @@ -25,7 +25,7 @@ import java.io.IOException; | ||||
| import java.util.*; | ||||
|  | ||||
|  | ||||
| //todo made refactoring thi8s class | ||||
| //todo made refactoring this class | ||||
| public class StatiticsCollector implements WordProccessor { | ||||
|     private TreeMap<String, Set<Heuristic>> inversIndex = new TreeMap<String, Set<Heuristic>>(); | ||||
|     private Map<Set<Heuristic>, Integer> ruleInverIndex = new HashMap<Set<Heuristic>, Integer>(); | ||||
|   | ||||
| @@ -27,8 +27,6 @@ import java.util.HashSet; | ||||
|  | ||||
| public class EnglishHeuristicBuilder { | ||||
|     public static void main(String[] args) throws IOException { | ||||
|         //IgnoredFormReader formReader = new IgnoredFormReader("data/igoredFrom.txt"); | ||||
|         //Set<String> form = formReader.getIngnoredFroms(); | ||||
|  | ||||
|         GrammaReader grammaInfo = new GrammaReader("dictonary/Dicts/Morph/egramtab.tab"); | ||||
|         DictonaryReader dictonaryReader = new DictonaryReader("dictonary/Dicts/SrcMorph/EngSrc/morphs.mrd", new HashSet<String>()); | ||||
|   | ||||
| @@ -27,9 +27,6 @@ import java.util.HashSet; | ||||
|  | ||||
| public class RussianHeuristicBuilder { | ||||
|     public static void main(String[] args) throws IOException { | ||||
|         //IgnoredFormReader formReader = new IgnoredFormReader("data/igoredFrom.txt"); | ||||
|         //Set<String> form = formReader.getIngnoredFroms(); | ||||
|  | ||||
|         GrammaReader grammaInfo = new GrammaReader("dictonary/Dicts/Morph/rgramtab.tab"); | ||||
|         DictonaryReader dictonaryReader = new DictonaryReader("dictonary/Dicts/SrcMorph/RusSrc/morphs.mrd", new HashSet<String>()); | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 alexander.a.kuznetsov
					alexander.a.kuznetsov