rallback on wrong version of morphology, adding interafce for morphology
git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@88 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
@ -22,6 +22,6 @@ import java.io.IOException;
|
||||
public class RussianLuceneMorphology extends LuceneMorphology {
|
||||
|
||||
public RussianLuceneMorphology() throws IOException {
|
||||
super(RussianLuceneMorphology.class.getResourceAsStream("/org/apache/lucene/morphology/russian/morph.info"),RussianLuceneMorphology.class.getResourceAsStream("/org/apache/lucene/morphology/russian/prefixes.info"), new RussianLetterDecoderEncoder());
|
||||
super(RussianLuceneMorphology.class.getResourceAsStream("/org/apache/lucene/morphology/russian/morph.info"), new RussianLetterDecoderEncoder());
|
||||
}
|
||||
}
|
@ -15,11 +15,11 @@
|
||||
*/
|
||||
package org.apache.lucene.morphology.russian;
|
||||
|
||||
import org.apache.lucene.morphology.Morphology;
|
||||
import org.apache.lucene.morphology.MorphologyImpl;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class RussianMorphology extends Morphology {
|
||||
public class RussianMorphology extends MorphologyImpl {
|
||||
|
||||
public RussianMorphology() throws IOException {
|
||||
super(RussianMorphology.class.getResourceAsStream("/org/apache/lucene/morphology/russian/morph.info"), new RussianLetterDecoderEncoder());
|
||||
|
@ -1,60 +0,0 @@
|
||||
/**
|
||||
* Copyright 2009 Alexander Kuznetsov
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.morphology.russian;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.util.HashSet;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: akuznetsov
|
||||
* Date: 31.10.2009
|
||||
* Time: 14:01:11
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class TestSpeed {
|
||||
|
||||
public static void main(String[] args) throws IOException {
|
||||
RussianAnalayzer russianAnalayzer = new RussianAnalayzer();
|
||||
bookProccess(russianAnalayzer, "C:/tmp/_Aleksandr_Suhov_Tanets_na_raskalennyih_uglyah1.fb2");
|
||||
Long stat = System.currentTimeMillis();
|
||||
bookProccess(russianAnalayzer, "C:/tmp/_Aleksandr_Suhov_Tanets_na_raskalennyih_uglyah1.fb2");
|
||||
System.out.println("Done in " + (System.currentTimeMillis() - stat));
|
||||
}
|
||||
|
||||
private static void bookProccess(RussianAnalayzer russianAnalayzer, String bookName) throws IOException {
|
||||
FileInputStream inputStream = new FileInputStream(bookName);
|
||||
TokenStream tokenStream = russianAnalayzer.tokenStream(null,new InputStreamReader(inputStream,"UTF-8"));
|
||||
final Token reusableToken = new Token();
|
||||
long count = 0;
|
||||
Token nextToken;
|
||||
for (; ;) {
|
||||
nextToken = tokenStream.next(reusableToken);
|
||||
// System.out.println(" " + nextToken.term());
|
||||
count++;
|
||||
if (nextToken == null) {
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
//System.out.println("Words " + count);
|
||||
}
|
||||
}
|
@ -1,96 +0,0 @@
|
||||
11
|
||||
наи
|
||||
е
|
||||
8
|
||||
258
|
||||
255
|
||||
289
|
||||
252
|
||||
292
|
||||
262
|
||||
296
|
||||
286
|
||||
наи
|
||||
и
|
||||
2
|
||||
263
|
||||
297
|
||||
наи
|
||||
ю
|
||||
4
|
||||
250
|
||||
249
|
||||
283
|
||||
284
|
||||
по
|
||||
й
|
||||
5
|
||||
250
|
||||
251
|
||||
248
|
||||
247
|
||||
269
|
||||
по
|
||||
е
|
||||
3
|
||||
255
|
||||
252
|
||||
269
|
||||
наи
|
||||
й
|
||||
12
|
||||
239
|
||||
273
|
||||
250
|
||||
251
|
||||
248
|
||||
277
|
||||
247
|
||||
282
|
||||
281
|
||||
243
|
||||
285
|
||||
284
|
||||
наи
|
||||
о
|
||||
6
|
||||
274
|
||||
253
|
||||
276
|
||||
287
|
||||
242
|
||||
240
|
||||
наи
|
||||
м
|
||||
10
|
||||
256
|
||||
290
|
||||
257
|
||||
291
|
||||
279
|
||||
278
|
||||
294
|
||||
260
|
||||
244
|
||||
245
|
||||
наи
|
||||
х
|
||||
6
|
||||
259
|
||||
293
|
||||
261
|
||||
295
|
||||
264
|
||||
298
|
||||
наи
|
||||
я
|
||||
2
|
||||
246
|
||||
280
|
||||
наи
|
||||
у
|
||||
4
|
||||
275
|
||||
254
|
||||
288
|
||||
241
|
@ -33,7 +33,7 @@ public class RussianLuceneMorphTest {
|
||||
|
||||
@Before
|
||||
public void setUp() throws IOException {
|
||||
luceneMorph = new RussianLuceneMorphology();
|
||||
luceneMorph = new LuceneMorphology(this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/morph.info"), new RussianLetterDecoderEncoder());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -1,4 +1,3 @@
|
||||
наилучший хороший
|
||||
еду еда ехать
|
||||
тестов тест
|
||||
вина вино вина
|
||||
@ -17,8 +16,4 @@
|
||||
тосклив тоскливый
|
||||
лучший хороший
|
||||
на на
|
||||
тест тест тесто
|
||||
спам спам
|
||||
спама спам
|
||||
наигранный наигранный
|
||||
наивный наивный
|
||||
тест тест тесто
|
Reference in New Issue
Block a user