adding licence

git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@14 d817d54c-26ab-11de-abc9-2f7d1455ff7a
2009-04-14 07:47:27 +00:00
parent fe855dfa51
commit 3f26888bde
23 changed files with 541 additions and 129 deletions
--- a/etc/header.txt
+++ b/etc/header.txt
@@ -0,0 +1,13 @@
 Copyright 2009 Alexander Kuznetsov 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
--- a/pom.xml
+++ b/pom.xml
@@ -4,9 +4,27 @@
    <groupId>org.apache.lucene</groupId>
    <artifactId>russian-morpholgy</artifactId>
    <packaging>jar</packaging>
-    <version>1.0-SNAPSHOT</version>
+    <version>0.5-SNAPSHOT</version>
    <name>russian-morpholgy</name>
    <url>http://maven.apache.org</url>
    <distributionManagement>
        <repository>
            <id>russian-morpholgy</id>
            <url>
                ../repo/releases
            </url>
        </repository>
        <snapshotRepository>
            <id>russian-morpholgy-snapshots</id>
            <url>
                ../repo/snapshots
            </url>
            <uniqueVersion>true</uniqueVersion>
        </snapshotRepository>
    </distributionManagement>
    <dependencies>
        <dependency>
            <groupId>junit</groupId>
@@ -29,8 +47,68 @@
        </dependency>
    </dependencies>
-    <build>
+    <repositories>
        <repository>
            <id>maven2-repository.dev.java.net</id>
            <name>Java.net Repository for Maven</name>
            <url>http://download.java.net/maven/2/</url>
        </repository>
    </repositories>
    <pluginRepositories>
        <pluginRepository>
            <id>mc-release</id>
            <name>maven-license-plugin repository of releases</name>
            <url>http://mc-repo.googlecode.com/svn/maven2/releases</url>
            <snapshots>
                <enabled>false</enabled>
            </snapshots>
            <releases>
                <enabled>true</enabled>
            </releases>
        </pluginRepository>
    </pluginRepositories>
    <reporting>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-javadoc-plugin</artifactId>
            </plugin>
            <plugin>
                <groupId>org.codehaus.mojo</groupId>
                <artifactId>cobertura-maven-plugin</artifactId>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-pmd-plugin</artifactId>
                <configuration>
                    <linkXref>true</linkXref>
                    <sourceEncoding>utf-8</sourceEncoding>
                    <minimumTokens>100</minimumTokens>
                    <targetJdk>1.5</targetJdk>
                </configuration>
            </plugin>
        </plugins>
    </reporting>
    <build>
        <extensions>
            <extension>
                <groupId>org.jvnet.wagon-svn</groupId>
                <artifactId>wagon-svn</artifactId>
                <version>1.8</version>
            </extension>
        </extensions>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
@@ -39,6 +117,34 @@
                    <target>1.5</target>
                </configuration>
            </plugin>
            <plugin>
                <!--
                 usage: http://code.google.com/p/maven-license-plugin/wiki/HowTo
                -->
                <artifactId>maven-license-plugin</artifactId>
                <groupId>com.mathieucarbou.mojo</groupId>
                <configuration>
                    <basedir>${project.parent.basedir}</basedir>
                    <header>etc/header.txt</header>
                    <excludes>
                        <exclude>**/*.txt</exclude>
                    </excludes>
                    <includes>
                        <include>**/src/**</include>
                        <include>**/pom.xml</include>
                    </includes>
                </configuration>
                <executions>
                    <execution>
                        <phase>test</phase>
                        <goals>
                            <goal>check</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>
--- a/src/main/java/org/apache/lucene/App.java
+++ b/src/main/java/org/apache/lucene/App.java
@@ -1,13 +0,0 @@
 package org.apache.lucene;
 /**
 * Hello world!
 *
 */
 public class App 
 {
    public static void main( String[] args )
    {
        System.out.println( "Hello World!" );
    }
 }
--- a/src/main/java/org/apache/lucene/russian/morphology/EvristicBuilder.java
+++ b/src/main/java/org/apache/lucene/russian/morphology/EvristicBuilder.java
@@ -1,14 +1,31 @@
 /**
 * Copyright 2009 Alexander Kuznetsov 
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.russian.morphology;
 import org.apache.lucene.russian.morphology.dictonary.DictonaryReader;
 import org.apache.lucene.russian.morphology.dictonary.IgnoredFormReader;
 import org.apache.lucene.russian.morphology.evristics.Evristic;
 import org.apache.lucene.russian.morphology.evristics.StatiticsCollectors;
 import org.apache.lucene.russian.morphology.evristics.SuffixCounter;
 import org.apache.lucene.russian.morphology.evristics.Evristic;
-import java.io.*;
+import java.io.IOException;
-import java.util.*;
+import java.util.Arrays;
-import java.util.concurrent.atomic.AtomicInteger;
+import java.util.Collection;
 import java.util.Set;
 public class EvristicBuilder {
@@ -23,12 +40,12 @@ public class EvristicBuilder {
        Object[] objects = counterCollection.toArray();
        Arrays.sort(objects);
        System.out.println("Length " + objects.length + " ingored words " + statiticsCollectors.getIgnoredCount());
-        for(int i = 0; i < 10; i++){
+        for (int i = 0; i < 10; i++) {
            System.out.println(objects[i]);
        }
        final Evristic evristic = new Evristic();
-        for(int i = 0; i < objects.length; i++){
+        for (int i = 0; i < objects.length; i++) {
            evristic.addEvristic(((SuffixCounter) objects[i]).getSuffixEvristic());
        }
--- a/src/main/java/org/apache/lucene/russian/morphology/RussianSuffixDecoderEncoder.java
+++ b/src/main/java/org/apache/lucene/russian/morphology/RussianSuffixDecoderEncoder.java
@@ -1,3 +1,19 @@
 /**
 * Copyright 2009 Alexander Kuznetsov 
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.russian.morphology;
 /**
@@ -44,12 +60,12 @@ public class RussianSuffixDecoderEncoder {
        return result;
    }
-    static public boolean checkCharacter(char c){
+    static public boolean checkCharacter(char c) {
-         int code = 0 + c;
+        int code = 0 + c;
-         if(code == 45) return true;
+        if (code == 45) return true;
-         code -= RUSSIAN_SMALL_LETTER_OFFSET;
+        code -= RUSSIAN_SMALL_LETTER_OFFSET;
-         if(code == 34) return true;
+        if (code == 34) return true;
-         if(code > 0 && code < 33) return true;
+        if (code > 0 && code < 33) return true;
-         return false;
+        return false;
    }
 }
--- a/src/main/java/org/apache/lucene/russian/morphology/SuffixToLongException.java
+++ b/src/main/java/org/apache/lucene/russian/morphology/SuffixToLongException.java
@@ -1,3 +1,19 @@
 /**
 * Copyright 2009 Alexander Kuznetsov 
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.russian.morphology;
--- a/src/main/java/org/apache/lucene/russian/morphology/WrongCharaterException.java
+++ b/src/main/java/org/apache/lucene/russian/morphology/WrongCharaterException.java
@@ -1,7 +1,23 @@
 /**
 * Copyright 2009 Alexander Kuznetsov 
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.russian.morphology;
-public class WrongCharaterException extends RuntimeException{
+public class WrongCharaterException extends RuntimeException {
    public WrongCharaterException() {
    }
--- a/src/main/java/org/apache/lucene/russian/morphology/analayzer/RussianMorphlogyAnalayzer.java
+++ b/src/main/java/org/apache/lucene/russian/morphology/analayzer/RussianMorphlogyAnalayzer.java
@@ -1,15 +1,31 @@
 /**
 * Copyright 2009 Alexander Kuznetsov 
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.russian.morphology.analayzer;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.LowerCaseFilter;
-import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import java.io.Reader;
 import java.io.IOException;
 import java.io.Reader;
-public class RussianMorphlogyAnalayzer  extends Analyzer {
+public class RussianMorphlogyAnalayzer extends Analyzer {
    private SuffixEvristics suffixEvristics;
    public RussianMorphlogyAnalayzer() throws IOException {
@@ -20,6 +36,6 @@ public class RussianMorphlogyAnalayzer  extends Analyzer {
        TokenStream result = new StandardTokenizer(reader);
        result = new StandardFilter(result);
        result = new LowerCaseFilter(result);
-        return new RussianMorphlogyFilter(result,suffixEvristics);
+        return new RussianMorphlogyFilter(result, suffixEvristics);
    }
 }
--- a/src/main/java/org/apache/lucene/russian/morphology/analayzer/RussianMorphlogyFilter.java
+++ b/src/main/java/org/apache/lucene/russian/morphology/analayzer/RussianMorphlogyFilter.java
@@ -1,3 +1,19 @@
 /**
 * Copyright 2009 Alexander Kuznetsov 
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.russian.morphology.analayzer;
 import org.apache.lucene.analysis.Token;
@@ -17,11 +33,11 @@ public class RussianMorphlogyFilter extends TokenFilter {
    public Token next(final Token reusableToken) throws IOException {
        Token nextToken = input.next(reusableToken);
-        if(nextToken == null || nextToken.term().length() == 0) return nextToken;
+        if (nextToken == null || nextToken.term().length() == 0) return nextToken;
        String word = nextToken.term();
        Character testC = word.charAt(0);
-        if (Character.UnicodeBlock.of(testC) != Character.UnicodeBlock.CYRILLIC){
+        if (Character.UnicodeBlock.of(testC) != Character.UnicodeBlock.CYRILLIC) {
-            return  nextToken;
+            return nextToken;
        }
        Token current = (Token) nextToken.clone();
        return createToken(suffixEvristics.getCanonicalForm(word), current, reusableToken);
--- a/src/main/java/org/apache/lucene/russian/morphology/analayzer/SuffixEvristics.java
+++ b/src/main/java/org/apache/lucene/russian/morphology/analayzer/SuffixEvristics.java
@@ -1,10 +1,25 @@
 /**
 * Copyright 2009 Alexander Kuznetsov 
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.russian.morphology.analayzer;
 import org.apache.lucene.russian.morphology.RussianSuffixDecoderEncoder;
 import java.io.*;
 import java.util.Arrays;
 import java.util.HashSet;
 public class SuffixEvristics {
@@ -46,23 +61,23 @@ public class SuffixEvristics {
        int startSymbol = form.length() > RussianSuffixDecoderEncoder.SUFFIX_LENGTH ? form.length() - RussianSuffixDecoderEncoder.SUFFIX_LENGTH : 0;
        String suffixS = form.substring(startSymbol);
-        if(!chechSuffix(suffixS)) return form;
+        if (!chechSuffix(suffixS)) return form;
        Long suffix = RussianSuffixDecoderEncoder.encode(suffixS);
-        int index = Arrays.binarySearch(keys,suffix);
+        int index = Arrays.binarySearch(keys, suffix);
-        if(index < -1){
+        if (index < -1) {
            System.out.println(" " + form);
            return form;
-        }else{
+        } else {
            String nSuffix = RussianSuffixDecoderEncoder.decode(values[index]);
            return startSymbol > 0 ? form.substring(0, startSymbol) + nSuffix : nSuffix;
        }
    }
-    private boolean chechSuffix(String suffix){
+    private boolean chechSuffix(String suffix) {
-        for(int i = 0; i < suffix.length(); i++){
+        for (int i = 0; i < suffix.length(); i++) {
            if (!RussianSuffixDecoderEncoder.checkCharacter(suffix.charAt(i))) return false;
        }
        return true;
--- a/src/main/java/org/apache/lucene/russian/morphology/dictonary/DictonaryReader.java
+++ b/src/main/java/org/apache/lucene/russian/morphology/dictonary/DictonaryReader.java
@@ -1,9 +1,28 @@
 /**
 * Copyright 2009 Alexander Kuznetsov 
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.russian.morphology.dictonary;
 import org.apache.lucene.russian.morphology.RussianSuffixDecoderEncoder;
 import java.io.BufferedReader;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.util.*;
 import java.io.*;
 /**
@@ -15,7 +34,7 @@ public class DictonaryReader {
    private String fileEncoding = "windows-1251";
    private List<List<FlexiaModel>> wordsFlexias = new ArrayList<List<FlexiaModel>>();
    private List<List<String>> wordPrefixes = new ArrayList<List<String>>();
-    private Set<String> ingnoredForm =  new HashSet<String>();
+    private Set<String> ingnoredForm = new HashSet<String>();
    public DictonaryReader(String fileName, Set<String> ingnoredForm) {
        this.fileName = fileName;
@@ -35,11 +54,11 @@ public class DictonaryReader {
        sckipBlock(bufferedReader);
        sckipBlock(bufferedReader);
        readPrefix(bufferedReader);
-        readWords(bufferedReader,wordProccessor);
+        readWords(bufferedReader, wordProccessor);
    }
-    private void readWords(BufferedReader reader,WordProccessor wordProccessor) throws IOException {
+    private void readWords(BufferedReader reader, WordProccessor wordProccessor) throws IOException {
        String s = reader.readLine();
        int count = Integer.valueOf(s);
        for (int i = 0; i < count; i++) {
@@ -54,15 +73,15 @@ public class DictonaryReader {
            if (models.size() > 0 && !ingnoredForm.contains(models.get(0).getCode())) {
                WordCard card = new WordCard(cleanString(models.get(0).create(word)));
                for (FlexiaModel fm : models) {
-                       card.addFrom(cleanString(fm.create(word)));
+                    card.addFrom(cleanString(fm.create(word)));
                }
                wordProccessor.proccess(card);
            }
        }
    }
-    private String cleanString(String s){
+    private String cleanString(String s) {
-        return s.replace((char)(34 + RussianSuffixDecoderEncoder.RUSSIAN_SMALL_LETTER_OFFSET),(char)(6 + RussianSuffixDecoderEncoder.RUSSIAN_SMALL_LETTER_OFFSET));
+        return s.replace((char) (34 + RussianSuffixDecoderEncoder.RUSSIAN_SMALL_LETTER_OFFSET), (char) (6 + RussianSuffixDecoderEncoder.RUSSIAN_SMALL_LETTER_OFFSET));
    }
    private void sckipBlock(BufferedReader reader) throws IOException {
@@ -99,8 +118,8 @@ public class DictonaryReader {
    private void addFlexia(ArrayList<FlexiaModel> flexiaModelArrayList, String line) {
        String[] fl = line.split("\\*");
        // we inored all forms thats
-      //  if (fl.length == 3)
+        //  if (fl.length == 3)
-      //      flexiaModelArrayList.add(new FlexiaModel(fl[1], fl[0].toLowerCase(), fl[2].toLowerCase()));
+        //      flexiaModelArrayList.add(new FlexiaModel(fl[1], fl[0].toLowerCase(), fl[2].toLowerCase()));
        if (fl.length == 2) flexiaModelArrayList.add(new FlexiaModel(fl[1], fl[0].toLowerCase(), ""));
    }
--- a/src/main/java/org/apache/lucene/russian/morphology/dictonary/FlexiaModel.java
+++ b/src/main/java/org/apache/lucene/russian/morphology/dictonary/FlexiaModel.java
@@ -1,3 +1,19 @@
 /**
 * Copyright 2009 Alexander Kuznetsov 
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.russian.morphology.dictonary;
 /**
--- a/src/main/java/org/apache/lucene/russian/morphology/dictonary/IgnoredFormReader.java
+++ b/src/main/java/org/apache/lucene/russian/morphology/dictonary/IgnoredFormReader.java
@@ -1,11 +1,27 @@
 /**
 * Copyright 2009 Alexander Kuznetsov 
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.russian.morphology.dictonary;
 import java.util.Set;
 import java.util.HashSet;
 import java.io.BufferedReader;
 import java.io.InputStreamReader;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.util.HashSet;
 import java.util.Set;
 public class IgnoredFormReader {
--- a/src/main/java/org/apache/lucene/russian/morphology/dictonary/WordCard.java
+++ b/src/main/java/org/apache/lucene/russian/morphology/dictonary/WordCard.java
@@ -1,7 +1,23 @@
 /**
 * Copyright 2009 Alexander Kuznetsov 
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.russian.morphology.dictonary;
 import java.util.List;
 import java.util.ArrayList;
 import java.util.List;
 /**
 * Represent word and all it forms.
@@ -14,7 +30,7 @@ public class WordCard {
        this.canonicalFrom = canonicalFrom;
    }
-    protected void addFrom(String word){
+    protected void addFrom(String word) {
        wordsFroms.add(word);
    }
--- a/src/main/java/org/apache/lucene/russian/morphology/dictonary/WordProccessor.java
+++ b/src/main/java/org/apache/lucene/russian/morphology/dictonary/WordProccessor.java
@@ -1,3 +1,19 @@
 /**
 * Copyright 2009 Alexander Kuznetsov 
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.russian.morphology.dictonary;
 import java.io.IOException;
--- a/src/main/java/org/apache/lucene/russian/morphology/evristics/Evristic.java
+++ b/src/main/java/org/apache/lucene/russian/morphology/evristics/Evristic.java
@@ -1,9 +1,28 @@
 /**
 * Copyright 2009 Alexander Kuznetsov 
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.russian.morphology.evristics;
 import org.apache.lucene.russian.morphology.RussianSuffixDecoderEncoder;
-import java.util.*;
+import java.io.BufferedReader;
-import java.io.*;
+import java.io.FileReader;
 import java.io.FileWriter;
 import java.io.IOException;
 import java.util.TreeMap;
 public class Evristic {
@@ -35,7 +54,7 @@ public class Evristic {
        String s = reader.readLine();
        while (s != null) {
            String[] sfns = s.split(" ");
-            if(sfns.length == 2){
+            if (sfns.length == 2) {
                encodedSuffixesPairs.put(Long.valueOf(sfns[0]), Long.valueOf(sfns[0]));
            }
            s = reader.readLine();
@@ -45,8 +64,8 @@ public class Evristic {
    public void writeToFile(String file) throws IOException {
        FileWriter writer = new FileWriter(file);
-        writer.write(encodedSuffixesPairs.size()+"\n");
+        writer.write(encodedSuffixesPairs.size() + "\n");
-        for(Long k:encodedSuffixesPairs.keySet()){
+        for (Long k : encodedSuffixesPairs.keySet()) {
            writer.write("" + k + " " + encodedSuffixesPairs.get(k) + "\n");
        }
        writer.close();
--- a/src/main/java/org/apache/lucene/russian/morphology/evristics/StatiticsCollectors.java
+++ b/src/main/java/org/apache/lucene/russian/morphology/evristics/StatiticsCollectors.java
@@ -1,26 +1,42 @@
 /**
 * Copyright 2009 Alexander Kuznetsov 
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.russian.morphology.evristics;
 import org.apache.lucene.russian.morphology.dictonary.WordProccessor;
 import org.apache.lucene.russian.morphology.dictonary.WordCard;
 import org.apache.lucene.russian.morphology.RussianSuffixDecoderEncoder;
 import org.apache.lucene.russian.morphology.dictonary.WordCard;
 import org.apache.lucene.russian.morphology.dictonary.WordProccessor;
 import java.util.Map;
 import java.util.HashMap;
 import java.util.Map;
-public class StatiticsCollectors implements WordProccessor{
+public class StatiticsCollectors implements WordProccessor {
-    Map<SuffixEvristic,SuffixCounter> statititics = new HashMap<SuffixEvristic,SuffixCounter>();
+    Map<SuffixEvristic, SuffixCounter> statititics = new HashMap<SuffixEvristic, SuffixCounter>();
    private Integer ignoredCount = 0;
    public void proccess(WordCard wordCard) {
-        for(String form:wordCard.getWordsFroms()){
+        for (String form : wordCard.getWordsFroms()) {
            SuffixEvristic suffixEvristic = createEvristic(wordCard.getCanonicalFrom(), form);
            if (suffixEvristic == null) continue;
            SuffixCounter suffixCounter = statititics.get(suffixEvristic);
-            if(suffixCounter == null){
+            if (suffixCounter == null) {
                suffixCounter = new SuffixCounter(suffixEvristic);
-                statititics.put(suffixEvristic,suffixCounter);
+                statititics.put(suffixEvristic, suffixCounter);
            }
            suffixCounter.incrementAmount();
        }
@@ -30,19 +46,19 @@ public class StatiticsCollectors implements WordProccessor{
        return statititics;
    }
-    private SuffixEvristic createEvristic(String word,String form){
+    private SuffixEvristic createEvristic(String word, String form) {
        int startSymbol = form.length() > RussianSuffixDecoderEncoder.SUFFIX_LENGTH ? form.length() - RussianSuffixDecoderEncoder.SUFFIX_LENGTH : 0;
        String formSuffix = form.substring(startSymbol);
-        if(word.length() < startSymbol){
+        if (word.length() < startSymbol) {
            ignoredCount++;
            return null;
        }
        String wordSuffix = word.length() > startSymbol ? word.substring(startSymbol) : "";
-        if (wordSuffix.length() > 12){
+        if (wordSuffix.length() > 12) {
            System.out.println(word + " " + form);
            return null;
        }
-        return new SuffixEvristic(formSuffix,wordSuffix);
+        return new SuffixEvristic(formSuffix, wordSuffix);
    }
--- a/src/main/java/org/apache/lucene/russian/morphology/evristics/SuffixCounter.java
+++ b/src/main/java/org/apache/lucene/russian/morphology/evristics/SuffixCounter.java
@@ -1,10 +1,26 @@
 /**
 * Copyright 2009 Alexander Kuznetsov 
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.russian.morphology.evristics;
 /**
 * Conains information of freqency of suffix evristic
- *  in dictionary.
+ * in dictionary.
 */
-public class SuffixCounter implements Comparable{
+public class SuffixCounter implements Comparable {
    private SuffixEvristic suffixEvristic;
    private Double amnout = 0.0;
@@ -12,7 +28,7 @@ public class SuffixCounter implements Comparable{
        this.suffixEvristic = suffixEvristic;
    }
-    public void incrementAmount(){
+    public void incrementAmount() {
        amnout++;
    }
@@ -33,12 +49,12 @@ public class SuffixCounter implements Comparable{
    }
    public int compareTo(Object o) {
-        if(o instanceof SuffixCounter) return (int) Math.round(Math.signum(((SuffixCounter)o).amnout - amnout));
+        if (o instanceof SuffixCounter) return (int) Math.round(Math.signum(((SuffixCounter) o).amnout - amnout));
        return -1;
    }
    @Override
    public String toString() {
-        return ""+amnout + " " + suffixEvristic.toString();
+        return "" + amnout + " " + suffixEvristic.toString();
    }
 }
--- a/src/main/java/org/apache/lucene/russian/morphology/evristics/SuffixEvristic.java
+++ b/src/main/java/org/apache/lucene/russian/morphology/evristics/SuffixEvristic.java
@@ -1,3 +1,19 @@
 /**
 * Copyright 2009 Alexander Kuznetsov 
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.russian.morphology.evristics;
 /**
--- a/src/test/java/org/apache/lucene/russian/morphology/RussianSuffixDecoderEncoderTest.java
+++ b/src/test/java/org/apache/lucene/russian/morphology/RussianSuffixDecoderEncoderTest.java
@@ -1,39 +1,54 @@
 /**
 * Copyright 2009 Alexander Kuznetsov 
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.russian.morphology;
 import org.junit.Test;
 import static org.junit.Assert.assertThat;
 import static org.hamcrest.core.IsEqual.equalTo;
-import org.apache.lucene.russian.morphology.SuffixToLongException;
+import static org.junit.Assert.assertThat;
 import org.junit.Test;
 import java.io.InputStream;
 import java.io.BufferedReader;
 import java.io.InputStreamReader;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 public class RussianSuffixDecoderEncoderTest {
    @Test
    public void testShouldCorretDecodeEncode() throws IOException {
-       InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/russian/morphology/decoder-test-data.txt");
+        InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/russian/morphology/decoder-test-data.txt");
-       BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream,"UTF-8"));
+        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
-       String s = bufferedReader.readLine();
+        String s = bufferedReader.readLine();
-        while(s != null){
+        while (s != null) {
            String[] qa = s.trim().split(" ");
            Long ecodedSuffix = RussianSuffixDecoderEncoder.encode(qa[0]);
-            assertThat(RussianSuffixDecoderEncoder.decode(ecodedSuffix),equalTo(qa[1]));
+            assertThat(RussianSuffixDecoderEncoder.decode(ecodedSuffix), equalTo(qa[1]));
            s = bufferedReader.readLine();
        }
    }
    @Test(expected = SuffixToLongException.class)
-    public void shouldThrownExeptionIfSuffixToLong(){
+    public void shouldThrownExeptionIfSuffixToLong() {
-         RussianSuffixDecoderEncoder.encode("1234567890123");
+        RussianSuffixDecoderEncoder.encode("1234567890123");
    }
    @Test(expected = WrongCharaterException.class)
-    public void shouldThrownExeptionIfSuffixContainWrongCharater(){
+    public void shouldThrownExeptionIfSuffixContainWrongCharater() {
-         RussianSuffixDecoderEncoder.encode("1");
+        RussianSuffixDecoderEncoder.encode("1");
    }
 }
--- a/src/test/java/org/apache/lucene/russian/morphology/analayzer/RussianMorphlogyAnalayzerTest.java
+++ b/src/test/java/org/apache/lucene/russian/morphology/analayzer/RussianMorphlogyAnalayzerTest.java
@@ -1,13 +1,28 @@
 /**
 * Copyright 2009 Alexander Kuznetsov 
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.russian.morphology.analayzer;
 import junit.framework.TestCase;
 import org.junit.Test;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
 import org.junit.Test;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.BufferedReader;
 import java.io.InputStreamReader;
@@ -17,24 +32,22 @@ public class RussianMorphlogyAnalayzerTest {
    public void shouldCorrectProccessText() throws IOException {
        RussianMorphlogyAnalayzer morphlogyAnalayzer = new RussianMorphlogyAnalayzer();
        InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/russian/morphology/analayzer/russian-text.txt");
-        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream,"UTF-8"));
+        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
        final Token reusableToken = new Token();
-              Token nextToken;
+        Token nextToken;
        TokenStream in = morphlogyAnalayzer.tokenStream(null, bufferedReader);
-        for (;;)
+        for (; ;) {
-               {
+            nextToken = in.next(reusableToken);
                   nextToken = in.next(reusableToken);
-                   if (nextToken == null)
+            if (nextToken == null) {
-                   {
+                break;
-                       break;
+            }
                   }
-                   System.out.println(nextToken.term());
+            System.out.println(nextToken.term());
 //                   nextSampleToken = sample.next(reusableSampleToken);
 //                   assertEquals(
 //                       "Unicode",
@@ -42,7 +55,7 @@ public class RussianMorphlogyAnalayzerTest {
 //                       nextSampleToken == null
 //                       ? null
 //                       : nextSampleToken.term());
-               }
+        }
    }
 }
--- a/src/test/java/org/apache/lucene/russian/morphology/analayzer/SuffixEvristicsTest.java
+++ b/src/test/java/org/apache/lucene/russian/morphology/analayzer/SuffixEvristicsTest.java
@@ -1,24 +1,42 @@
 /**
 * Copyright 2009 Alexander Kuznetsov 
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.lucene.russian.morphology.analayzer;
 import org.junit.Test;
 import static org.junit.Assert.assertThat;
 import org.apache.lucene.russian.morphology.RussianSuffixDecoderEncoder;
 import static org.hamcrest.core.IsEqual.equalTo;
 import static org.junit.Assert.assertThat;
 import org.junit.Test;
-import java.io.*;
+import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 public class SuffixEvristicsTest {
    @Test
    public void testShouldDefineCorretCononicalWordForm() throws IOException {
-       SuffixEvristics suffixEvristics = new SuffixEvristics();
+        SuffixEvristics suffixEvristics = new SuffixEvristics();
-       InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/russian/morphology/analayzer/suffix-evristics-test-data.txt");
+        InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/russian/morphology/analayzer/suffix-evristics-test-data.txt");
-       BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream,"UTF-8"));
+        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
-       String s = bufferedReader.readLine();
+        String s = bufferedReader.readLine();
-        while(s != null){
+        while (s != null) {
            String[] qa = s.trim().split(" ");
-            assertThat(suffixEvristics.getCanonicalForm(qa[0]),equalTo(qa[1]));
+            assertThat(suffixEvristics.getCanonicalForm(qa[0]), equalTo(qa[1]));
            s = bufferedReader.readLine();
        }
    }
--- a/src/test/resources/org/apache/lucene/russian/morphology/analayzer/russian-text.txt
+++ b/src/test/resources/org/apache/lucene/russian/morphology/analayzer/russian-text.txt
@@ -1,5 +1,3 @@
-В условиях нарастающей пурги было сделано 4 успешных захода на посадку. После завершения облета и демонтажа оборудования
+В условиях нарастающей пурги было сделано 4 успешных захода на посадку. "Все нормально, будем рекомендовать систему к внедрению".
-Рубен Есаян дал устную оценку эксперимента:"Все нормально, будем рекомендовать систему к внедрению".
+Рейсы из Кейптауна (ЮАР) на станцию "Новолазаревская" (Антарктида) совершаются
-Летом - с ноября по март - рейсы из Кейптауна (ЮАР) на станцию "Новолазаревская" (Антарктида) совершаются
+примерно один раз в две недели.
 примерно один раз в две недели. Туда привозят людей, питание, оборудование, ГСМ и т.д.
 что-то