working on prefixes hypotities

git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@86 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
Alexander.A.Kuznetsov
2009-11-11 22:21:14 +00:00
parent 97fa8fa868
commit 6246f020fd
10 changed files with 577 additions and 15 deletions

View File

@ -0,0 +1,60 @@
/**
* Copyright 2009 Alexander Kuznetsov
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.morphology.russian;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Token;
import java.io.IOException;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.HashSet;
/**
* Created by IntelliJ IDEA.
* User: akuznetsov
* Date: 31.10.2009
* Time: 14:01:11
* To change this template use File | Settings | File Templates.
*/
public class TestSpeed {
public static void main(String[] args) throws IOException {
RussianAnalayzer russianAnalayzer = new RussianAnalayzer();
bookProccess(russianAnalayzer, "C:/tmp/_Aleksandr_Suhov_Tanets_na_raskalennyih_uglyah1.fb2");
Long stat = System.currentTimeMillis();
bookProccess(russianAnalayzer, "C:/tmp/_Aleksandr_Suhov_Tanets_na_raskalennyih_uglyah1.fb2");
System.out.println("Done in " + (System.currentTimeMillis() - stat));
}
private static void bookProccess(RussianAnalayzer russianAnalayzer, String bookName) throws IOException {
FileInputStream inputStream = new FileInputStream(bookName);
TokenStream tokenStream = russianAnalayzer.tokenStream(null,new InputStreamReader(inputStream,"UTF-8"));
final Token reusableToken = new Token();
long count = 0;
Token nextToken;
for (; ;) {
nextToken = tokenStream.next(reusableToken);
// System.out.println(" " + nextToken.term());
count++;
if (nextToken == null) {
break;
}
}
//System.out.println("Words " + count);
}
}

View File

@ -18,4 +18,6 @@
на на
тест тест тесто
спам спам
спама спам
спама спам
наигранный наигранный
наивный наивный