adding licence
git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@14 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
@ -1,39 +1,54 @@
|
||||
/**
|
||||
* Copyright 2009 Alexander Kuznetsov
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.russian.morphology;
|
||||
|
||||
import org.junit.Test;
|
||||
import static org.junit.Assert.assertThat;
|
||||
import static org.hamcrest.core.IsEqual.equalTo;
|
||||
import org.apache.lucene.russian.morphology.SuffixToLongException;
|
||||
import static org.junit.Assert.assertThat;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
|
||||
|
||||
public class RussianSuffixDecoderEncoderTest {
|
||||
|
||||
@Test
|
||||
public void testShouldCorretDecodeEncode() throws IOException {
|
||||
InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/russian/morphology/decoder-test-data.txt");
|
||||
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream,"UTF-8"));
|
||||
String s = bufferedReader.readLine();
|
||||
while(s != null){
|
||||
InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/russian/morphology/decoder-test-data.txt");
|
||||
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
|
||||
String s = bufferedReader.readLine();
|
||||
while (s != null) {
|
||||
String[] qa = s.trim().split(" ");
|
||||
Long ecodedSuffix = RussianSuffixDecoderEncoder.encode(qa[0]);
|
||||
assertThat(RussianSuffixDecoderEncoder.decode(ecodedSuffix),equalTo(qa[1]));
|
||||
assertThat(RussianSuffixDecoderEncoder.decode(ecodedSuffix), equalTo(qa[1]));
|
||||
s = bufferedReader.readLine();
|
||||
}
|
||||
}
|
||||
|
||||
@Test(expected = SuffixToLongException.class)
|
||||
public void shouldThrownExeptionIfSuffixToLong(){
|
||||
RussianSuffixDecoderEncoder.encode("1234567890123");
|
||||
public void shouldThrownExeptionIfSuffixToLong() {
|
||||
RussianSuffixDecoderEncoder.encode("1234567890123");
|
||||
}
|
||||
|
||||
@Test(expected = WrongCharaterException.class)
|
||||
public void shouldThrownExeptionIfSuffixContainWrongCharater(){
|
||||
RussianSuffixDecoderEncoder.encode("1");
|
||||
}
|
||||
|
||||
public void shouldThrownExeptionIfSuffixContainWrongCharater() {
|
||||
RussianSuffixDecoderEncoder.encode("1");
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,13 +1,28 @@
|
||||
/**
|
||||
* Copyright 2009 Alexander Kuznetsov
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.russian.morphology.analayzer;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
import org.junit.Test;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.InputStreamReader;
|
||||
|
||||
|
||||
@ -17,24 +32,22 @@ public class RussianMorphlogyAnalayzerTest {
|
||||
public void shouldCorrectProccessText() throws IOException {
|
||||
RussianMorphlogyAnalayzer morphlogyAnalayzer = new RussianMorphlogyAnalayzer();
|
||||
InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/russian/morphology/analayzer/russian-text.txt");
|
||||
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream,"UTF-8"));
|
||||
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
|
||||
|
||||
final Token reusableToken = new Token();
|
||||
|
||||
Token nextToken;
|
||||
Token nextToken;
|
||||
|
||||
|
||||
TokenStream in = morphlogyAnalayzer.tokenStream(null, bufferedReader);
|
||||
for (;;)
|
||||
{
|
||||
nextToken = in.next(reusableToken);
|
||||
for (; ;) {
|
||||
nextToken = in.next(reusableToken);
|
||||
|
||||
if (nextToken == null)
|
||||
{
|
||||
break;
|
||||
}
|
||||
if (nextToken == null) {
|
||||
break;
|
||||
}
|
||||
|
||||
System.out.println(nextToken.term());
|
||||
System.out.println(nextToken.term());
|
||||
// nextSampleToken = sample.next(reusableSampleToken);
|
||||
// assertEquals(
|
||||
// "Unicode",
|
||||
@ -42,7 +55,7 @@ public class RussianMorphlogyAnalayzerTest {
|
||||
// nextSampleToken == null
|
||||
// ? null
|
||||
// : nextSampleToken.term());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -1,24 +1,42 @@
|
||||
/**
|
||||
* Copyright 2009 Alexander Kuznetsov
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.russian.morphology.analayzer;
|
||||
|
||||
import org.junit.Test;
|
||||
import static org.junit.Assert.assertThat;
|
||||
import org.apache.lucene.russian.morphology.RussianSuffixDecoderEncoder;
|
||||
import static org.hamcrest.core.IsEqual.equalTo;
|
||||
import static org.junit.Assert.assertThat;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.*;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
|
||||
|
||||
public class SuffixEvristicsTest {
|
||||
|
||||
@Test
|
||||
public void testShouldDefineCorretCononicalWordForm() throws IOException {
|
||||
SuffixEvristics suffixEvristics = new SuffixEvristics();
|
||||
InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/russian/morphology/analayzer/suffix-evristics-test-data.txt");
|
||||
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream,"UTF-8"));
|
||||
String s = bufferedReader.readLine();
|
||||
while(s != null){
|
||||
SuffixEvristics suffixEvristics = new SuffixEvristics();
|
||||
InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/russian/morphology/analayzer/suffix-evristics-test-data.txt");
|
||||
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
|
||||
String s = bufferedReader.readLine();
|
||||
while (s != null) {
|
||||
String[] qa = s.trim().split(" ");
|
||||
assertThat(suffixEvristics.getCanonicalForm(qa[0]),equalTo(qa[1]));
|
||||
assertThat(suffixEvristics.getCanonicalForm(qa[0]), equalTo(qa[1]));
|
||||
s = bufferedReader.readLine();
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,3 @@
|
||||
В условиях нарастающей пурги было сделано 4 успешных захода на посадку. После завершения облета и демонтажа оборудования
|
||||
Рубен Есаян дал устную оценку эксперимента:"Все нормально, будем рекомендовать систему к внедрению".
|
||||
Летом - с ноября по март - рейсы из Кейптауна (ЮАР) на станцию "Новолазаревская" (Антарктида) совершаются
|
||||
примерно один раз в две недели. Туда привозят людей, питание, оборудование, ГСМ и т.д.
|
||||
что-то
|
||||
В условиях нарастающей пурги было сделано 4 успешных захода на посадку. "Все нормально, будем рекомендовать систему к внедрению".
|
||||
Рейсы из Кейптауна (ЮАР) на станцию "Новолазаревская" (Антарктида) совершаются
|
||||
примерно один раз в две недели.
|
Reference in New Issue
Block a user