adding licence

git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@14 d817d54c-26ab-11de-abc9-2f7d1455ff7a
2009-04-14 07:47:27 +00:00
parent fe855dfa51
commit 3f26888bde
23 changed files with 541 additions and 129 deletions
@@ -0,0 +1,13 @@
+Copyright 2009 Alexander Kuznetsov 
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
@@ -4,9 +4,27 @@
    <groupId>org.apache.lucene</groupId>
    <artifactId>russian-morpholgy</artifactId>
    <packaging>jar</packaging>
-    <version>1.0-SNAPSHOT</version>
+    <version>0.5-SNAPSHOT</version>
    <name>russian-morpholgy</name>
    <url>http://maven.apache.org</url>
+
+    <distributionManagement>
+        <repository>
+            <id>russian-morpholgy</id>
+            <url>
+                ../repo/releases
+            </url>
+        </repository>
+        <snapshotRepository>
+            <id>russian-morpholgy-snapshots</id>
+            <url>
+                ../repo/snapshots
+            </url>
+            <uniqueVersion>true</uniqueVersion>
+        </snapshotRepository>
+    </distributionManagement>
+
+
    <dependencies>
        <dependency>
            <groupId>junit</groupId>
@@ -29,8 +47,68 @@
        </dependency>
    </dependencies>

-    <build>
+    <repositories>
+        <repository>
+            <id>maven2-repository.dev.java.net</id>
+            <name>Java.net Repository for Maven</name>
+            <url>http://download.java.net/maven/2/</url>
+        </repository>
+    </repositories>
+
+    <pluginRepositories>
+        <pluginRepository>
+            <id>mc-release</id>
+            <name>maven-license-plugin repository of releases</name>
+            <url>http://mc-repo.googlecode.com/svn/maven2/releases</url>
+
+            <snapshots>
+                <enabled>false</enabled>
+            </snapshots>
+
+            <releases>
+                <enabled>true</enabled>
+            </releases>
+        </pluginRepository>
+    </pluginRepositories>
+
+    <reporting>
        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-javadoc-plugin</artifactId>
+            </plugin>
+
+
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>cobertura-maven-plugin</artifactId>
+            </plugin>
+
+
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-pmd-plugin</artifactId>
+                <configuration>
+                    <linkXref>true</linkXref>
+                    <sourceEncoding>utf-8</sourceEncoding>
+                    <minimumTokens>100</minimumTokens>
+                    <targetJdk>1.5</targetJdk>
+                </configuration>
+            </plugin>
+        </plugins>
+    </reporting>
+
+    <build>
+        <extensions>
+            <extension>
+                <groupId>org.jvnet.wagon-svn</groupId>
+                <artifactId>wagon-svn</artifactId>
+                <version>1.8</version>
+            </extension>
+        </extensions>
+
+        <plugins>
+
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
@@ -39,6 +117,34 @@
                    <target>1.5</target>
                </configuration>
            </plugin>
+            <plugin>
+                <!--
+                 usage: http://code.google.com/p/maven-license-plugin/wiki/HowTo
+                -->
+                <artifactId>maven-license-plugin</artifactId>
+                <groupId>com.mathieucarbou.mojo</groupId>
+
+                <configuration>
+                    <basedir>${project.parent.basedir}</basedir>
+                    <header>etc/header.txt</header>
+                    <excludes>
+                        <exclude>**/*.txt</exclude>
+                    </excludes>
+                    <includes>
+                        <include>**/src/**</include>
+                        <include>**/pom.xml</include>
+                    </includes>
+                </configuration>
+
+                <executions>
+                    <execution>
+                        <phase>test</phase>
+                        <goals>
+                            <goal>check</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
        </plugins>
    </build>

@@ -1,13 +0,0 @@
-package org.apache.lucene;
-
-/**
- * Hello world!
- *
- */
-public class App 
-{
-    public static void main( String[] args )
-    {
-        System.out.println( "Hello World!" );
-    }
-}
@@ -1,14 +1,31 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.lucene.russian.morphology;

 import org.apache.lucene.russian.morphology.dictonary.DictonaryReader;
 import org.apache.lucene.russian.morphology.dictonary.IgnoredFormReader;
+import org.apache.lucene.russian.morphology.evristics.Evristic;
 import org.apache.lucene.russian.morphology.evristics.StatiticsCollectors;
 import org.apache.lucene.russian.morphology.evristics.SuffixCounter;
-import org.apache.lucene.russian.morphology.evristics.Evristic;

-import java.io.*;
-import java.util.*;
-import java.util.concurrent.atomic.AtomicInteger;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Set;


 public class EvristicBuilder {
@@ -23,12 +40,12 @@ public class EvristicBuilder {
        Object[] objects = counterCollection.toArray();
        Arrays.sort(objects);
        System.out.println("Length " + objects.length + " ingored words " + statiticsCollectors.getIgnoredCount());
-        for(int i = 0; i < 10; i++){
+        for (int i = 0; i < 10; i++) {
            System.out.println(objects[i]);
        }

        final Evristic evristic = new Evristic();
-        for(int i = 0; i < objects.length; i++){
+        for (int i = 0; i < objects.length; i++) {
            evristic.addEvristic(((SuffixCounter) objects[i]).getSuffixEvristic());
        }

@@ -1,3 +1,19 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.lucene.russian.morphology;

 /**
@@ -24,7 +40,7 @@ public class RussianSuffixDecoderEncoder {
                c = DASH_CODE;
            }
            if (c == EE_CHAR) c = E_CHAR;
-            if (c < 0 || c > 33) throw new WrongCharaterException(); 
+            if (c < 0 || c > 33) throw new WrongCharaterException();
            result = result * 35L + c;
        }
        return result;
@@ -44,12 +60,12 @@ public class RussianSuffixDecoderEncoder {
        return result;
    }

-    static public boolean checkCharacter(char c){
-         int code = 0 + c;
-         if(code == 45) return true;
-         code -= RUSSIAN_SMALL_LETTER_OFFSET;
-         if(code == 34) return true;
-         if(code > 0 && code < 33) return true;
-         return false;
+    static public boolean checkCharacter(char c) {
+        int code = 0 + c;
+        if (code == 45) return true;
+        code -= RUSSIAN_SMALL_LETTER_OFFSET;
+        if (code == 34) return true;
+        if (code > 0 && code < 33) return true;
+        return false;
    }
 }
@@ -1,3 +1,19 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.lucene.russian.morphology;


@@ -1,7 +1,23 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.lucene.russian.morphology;


-public class WrongCharaterException extends RuntimeException{
+public class WrongCharaterException extends RuntimeException {
    public WrongCharaterException() {
    }

@@ -1,15 +1,31 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.lucene.russian.morphology.analayzer;

 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.LowerCaseFilter;
-import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.standard.StandardFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;

-import java.io.Reader;
 import java.io.IOException;
+import java.io.Reader;

-public class RussianMorphlogyAnalayzer  extends Analyzer {
+public class RussianMorphlogyAnalayzer extends Analyzer {
    private SuffixEvristics suffixEvristics;

    public RussianMorphlogyAnalayzer() throws IOException {
@@ -20,6 +36,6 @@ public class RussianMorphlogyAnalayzer  extends Analyzer {
        TokenStream result = new StandardTokenizer(reader);
        result = new StandardFilter(result);
        result = new LowerCaseFilter(result);
-        return new RussianMorphlogyFilter(result,suffixEvristics);
+        return new RussianMorphlogyFilter(result, suffixEvristics);
    }
 }
@@ -1,3 +1,19 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.lucene.russian.morphology.analayzer;

 import org.apache.lucene.analysis.Token;
@@ -17,11 +33,11 @@ public class RussianMorphlogyFilter extends TokenFilter {

    public Token next(final Token reusableToken) throws IOException {
        Token nextToken = input.next(reusableToken);
-        if(nextToken == null || nextToken.term().length() == 0) return nextToken;
+        if (nextToken == null || nextToken.term().length() == 0) return nextToken;
        String word = nextToken.term();
        Character testC = word.charAt(0);
-        if (Character.UnicodeBlock.of(testC) != Character.UnicodeBlock.CYRILLIC){
-            return  nextToken;
+        if (Character.UnicodeBlock.of(testC) != Character.UnicodeBlock.CYRILLIC) {
+            return nextToken;
        }
        Token current = (Token) nextToken.clone();
        return createToken(suffixEvristics.getCanonicalForm(word), current, reusableToken);
@@ -1,10 +1,25 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.lucene.russian.morphology.analayzer;

 import org.apache.lucene.russian.morphology.RussianSuffixDecoderEncoder;

 import java.io.*;
 import java.util.Arrays;
-import java.util.HashSet;


 public class SuffixEvristics {
@@ -46,23 +61,23 @@ public class SuffixEvristics {
        int startSymbol = form.length() > RussianSuffixDecoderEncoder.SUFFIX_LENGTH ? form.length() - RussianSuffixDecoderEncoder.SUFFIX_LENGTH : 0;
        String suffixS = form.substring(startSymbol);

-        if(!chechSuffix(suffixS)) return form;
+        if (!chechSuffix(suffixS)) return form;

        Long suffix = RussianSuffixDecoderEncoder.encode(suffixS);

-        int index = Arrays.binarySearch(keys,suffix);
-        if(index < -1){
+        int index = Arrays.binarySearch(keys, suffix);
+        if (index < -1) {
            System.out.println(" " + form);
            return form;
-        }else{
+        } else {
            String nSuffix = RussianSuffixDecoderEncoder.decode(values[index]);
            return startSymbol > 0 ? form.substring(0, startSymbol) + nSuffix : nSuffix;
        }
    }


-    private boolean chechSuffix(String suffix){
-        for(int i = 0; i < suffix.length(); i++){
+    private boolean chechSuffix(String suffix) {
+        for (int i = 0; i < suffix.length(); i++) {
            if (!RussianSuffixDecoderEncoder.checkCharacter(suffix.charAt(i))) return false;
        }
        return true;
@@ -1,9 +1,28 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.lucene.russian.morphology.dictonary;

 import org.apache.lucene.russian.morphology.RussianSuffixDecoderEncoder;

+import java.io.BufferedReader;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
 import java.util.*;
-import java.io.*;


 /**
@@ -15,7 +34,7 @@ public class DictonaryReader {
    private String fileEncoding = "windows-1251";
    private List<List<FlexiaModel>> wordsFlexias = new ArrayList<List<FlexiaModel>>();
    private List<List<String>> wordPrefixes = new ArrayList<List<String>>();
-    private Set<String> ingnoredForm =  new HashSet<String>();
+    private Set<String> ingnoredForm = new HashSet<String>();

    public DictonaryReader(String fileName, Set<String> ingnoredForm) {
        this.fileName = fileName;
@@ -35,11 +54,11 @@ public class DictonaryReader {
        sckipBlock(bufferedReader);
        sckipBlock(bufferedReader);
        readPrefix(bufferedReader);
-        readWords(bufferedReader,wordProccessor);
+        readWords(bufferedReader, wordProccessor);
    }


-    private void readWords(BufferedReader reader,WordProccessor wordProccessor) throws IOException {
+    private void readWords(BufferedReader reader, WordProccessor wordProccessor) throws IOException {
        String s = reader.readLine();
        int count = Integer.valueOf(s);
        for (int i = 0; i < count; i++) {
@@ -54,15 +73,15 @@ public class DictonaryReader {
            if (models.size() > 0 && !ingnoredForm.contains(models.get(0).getCode())) {
                WordCard card = new WordCard(cleanString(models.get(0).create(word)));
                for (FlexiaModel fm : models) {
-                       card.addFrom(cleanString(fm.create(word)));
+                    card.addFrom(cleanString(fm.create(word)));
                }
                wordProccessor.proccess(card);
            }
        }
    }

-    private String cleanString(String s){
-        return s.replace((char)(34 + RussianSuffixDecoderEncoder.RUSSIAN_SMALL_LETTER_OFFSET),(char)(6 + RussianSuffixDecoderEncoder.RUSSIAN_SMALL_LETTER_OFFSET));
+    private String cleanString(String s) {
+        return s.replace((char) (34 + RussianSuffixDecoderEncoder.RUSSIAN_SMALL_LETTER_OFFSET), (char) (6 + RussianSuffixDecoderEncoder.RUSSIAN_SMALL_LETTER_OFFSET));
    }

    private void sckipBlock(BufferedReader reader) throws IOException {
@@ -99,8 +118,8 @@ public class DictonaryReader {
    private void addFlexia(ArrayList<FlexiaModel> flexiaModelArrayList, String line) {
        String[] fl = line.split("\\*");
        // we inored all forms thats
-      //  if (fl.length == 3)
-      //      flexiaModelArrayList.add(new FlexiaModel(fl[1], fl[0].toLowerCase(), fl[2].toLowerCase()));
+        //  if (fl.length == 3)
+        //      flexiaModelArrayList.add(new FlexiaModel(fl[1], fl[0].toLowerCase(), fl[2].toLowerCase()));
        if (fl.length == 2) flexiaModelArrayList.add(new FlexiaModel(fl[1], fl[0].toLowerCase(), ""));
    }

@@ -1,3 +1,19 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.lucene.russian.morphology.dictonary;

 /**
@@ -1,11 +1,27 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.lucene.russian.morphology.dictonary;

-import java.util.Set;
-import java.util.HashSet;
 import java.io.BufferedReader;
-import java.io.InputStreamReader;
 import java.io.FileInputStream;
 import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.HashSet;
+import java.util.Set;


 public class IgnoredFormReader {
@@ -1,7 +1,23 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.lucene.russian.morphology.dictonary;

-import java.util.List;
 import java.util.ArrayList;
+import java.util.List;

 /**
 * Represent word and all it forms.
@@ -14,7 +30,7 @@ public class WordCard {
        this.canonicalFrom = canonicalFrom;
    }

-    protected void addFrom(String word){
+    protected void addFrom(String word) {
        wordsFroms.add(word);
    }

@@ -1,9 +1,25 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.lucene.russian.morphology.dictonary;

 import java.io.IOException;

 /**
- * Interface allows get information from 
+ * Interface allows get information from
 * {@org.apache.lucene.russian.morphology.dictonary.DirtonaryReader}.
 */
 public interface WordProccessor {
@@ -1,9 +1,28 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.lucene.russian.morphology.evristics;

 import org.apache.lucene.russian.morphology.RussianSuffixDecoderEncoder;

-import java.util.*;
-import java.io.*;
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.TreeMap;


 public class Evristic {
@@ -35,7 +54,7 @@ public class Evristic {
        String s = reader.readLine();
        while (s != null) {
            String[] sfns = s.split(" ");
-            if(sfns.length == 2){
+            if (sfns.length == 2) {
                encodedSuffixesPairs.put(Long.valueOf(sfns[0]), Long.valueOf(sfns[0]));
            }
            s = reader.readLine();
@@ -45,8 +64,8 @@ public class Evristic {

    public void writeToFile(String file) throws IOException {
        FileWriter writer = new FileWriter(file);
-        writer.write(encodedSuffixesPairs.size()+"\n");
-        for(Long k:encodedSuffixesPairs.keySet()){
+        writer.write(encodedSuffixesPairs.size() + "\n");
+        for (Long k : encodedSuffixesPairs.keySet()) {
            writer.write("" + k + " " + encodedSuffixesPairs.get(k) + "\n");
        }
        writer.close();
@@ -1,26 +1,42 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.lucene.russian.morphology.evristics;

-import org.apache.lucene.russian.morphology.dictonary.WordProccessor;
-import org.apache.lucene.russian.morphology.dictonary.WordCard;
 import org.apache.lucene.russian.morphology.RussianSuffixDecoderEncoder;
+import org.apache.lucene.russian.morphology.dictonary.WordCard;
+import org.apache.lucene.russian.morphology.dictonary.WordProccessor;

-import java.util.Map;
 import java.util.HashMap;
+import java.util.Map;


-public class StatiticsCollectors implements WordProccessor{
-    Map<SuffixEvristic,SuffixCounter> statititics = new HashMap<SuffixEvristic,SuffixCounter>();
+public class StatiticsCollectors implements WordProccessor {
+    Map<SuffixEvristic, SuffixCounter> statititics = new HashMap<SuffixEvristic, SuffixCounter>();

    private Integer ignoredCount = 0;

    public void proccess(WordCard wordCard) {
-        for(String form:wordCard.getWordsFroms()){
+        for (String form : wordCard.getWordsFroms()) {
            SuffixEvristic suffixEvristic = createEvristic(wordCard.getCanonicalFrom(), form);
            if (suffixEvristic == null) continue;
            SuffixCounter suffixCounter = statititics.get(suffixEvristic);
-            if(suffixCounter == null){
+            if (suffixCounter == null) {
                suffixCounter = new SuffixCounter(suffixEvristic);
-                statititics.put(suffixEvristic,suffixCounter);
+                statititics.put(suffixEvristic, suffixCounter);
            }
            suffixCounter.incrementAmount();
        }
@@ -30,19 +46,19 @@ public class StatiticsCollectors implements WordProccessor{
        return statititics;
    }

-    private SuffixEvristic createEvristic(String word,String form){
+    private SuffixEvristic createEvristic(String word, String form) {
        int startSymbol = form.length() > RussianSuffixDecoderEncoder.SUFFIX_LENGTH ? form.length() - RussianSuffixDecoderEncoder.SUFFIX_LENGTH : 0;
        String formSuffix = form.substring(startSymbol);
-        if(word.length() < startSymbol){
+        if (word.length() < startSymbol) {
            ignoredCount++;
-            return null;            
+            return null;
        }
        String wordSuffix = word.length() > startSymbol ? word.substring(startSymbol) : "";
-        if (wordSuffix.length() > 12){
+        if (wordSuffix.length() > 12) {
            System.out.println(word + " " + form);
            return null;
        }
-        return new SuffixEvristic(formSuffix,wordSuffix);
+        return new SuffixEvristic(formSuffix, wordSuffix);
    }


@@ -1,10 +1,26 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.lucene.russian.morphology.evristics;

 /**
 * Conains information of freqency of suffix evristic
- *  in dictionary.
+ * in dictionary.
 */
-public class SuffixCounter implements Comparable{
+public class SuffixCounter implements Comparable {
    private SuffixEvristic suffixEvristic;
    private Double amnout = 0.0;

@@ -12,7 +28,7 @@ public class SuffixCounter implements Comparable{
        this.suffixEvristic = suffixEvristic;
    }

-    public void incrementAmount(){
+    public void incrementAmount() {
        amnout++;
    }

@@ -33,12 +49,12 @@ public class SuffixCounter implements Comparable{
    }

    public int compareTo(Object o) {
-        if(o instanceof SuffixCounter) return (int) Math.round(Math.signum(((SuffixCounter)o).amnout - amnout));
+        if (o instanceof SuffixCounter) return (int) Math.round(Math.signum(((SuffixCounter) o).amnout - amnout));
        return -1;
    }

    @Override
    public String toString() {
-        return ""+amnout + " " + suffixEvristic.toString();
+        return "" + amnout + " " + suffixEvristic.toString();
    }
 }
@@ -1,3 +1,19 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.lucene.russian.morphology.evristics;

 /**
@@ -1,39 +1,54 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.lucene.russian.morphology;

-import org.junit.Test;
-import static org.junit.Assert.assertThat;
 import static org.hamcrest.core.IsEqual.equalTo;
-import org.apache.lucene.russian.morphology.SuffixToLongException;
+import static org.junit.Assert.assertThat;
+import org.junit.Test;

-import java.io.InputStream;
 import java.io.BufferedReader;
-import java.io.InputStreamReader;
 import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;


 public class RussianSuffixDecoderEncoderTest {

    @Test
    public void testShouldCorretDecodeEncode() throws IOException {
-       InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/russian/morphology/decoder-test-data.txt");
-       BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream,"UTF-8"));
-       String s = bufferedReader.readLine();
-        while(s != null){
+        InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/russian/morphology/decoder-test-data.txt");
+        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
+        String s = bufferedReader.readLine();
+        while (s != null) {
            String[] qa = s.trim().split(" ");
            Long ecodedSuffix = RussianSuffixDecoderEncoder.encode(qa[0]);
-            assertThat(RussianSuffixDecoderEncoder.decode(ecodedSuffix),equalTo(qa[1]));
+            assertThat(RussianSuffixDecoderEncoder.decode(ecodedSuffix), equalTo(qa[1]));
            s = bufferedReader.readLine();
        }
    }

    @Test(expected = SuffixToLongException.class)
-    public void shouldThrownExeptionIfSuffixToLong(){
-         RussianSuffixDecoderEncoder.encode("1234567890123");
+    public void shouldThrownExeptionIfSuffixToLong() {
+        RussianSuffixDecoderEncoder.encode("1234567890123");
    }

    @Test(expected = WrongCharaterException.class)
-    public void shouldThrownExeptionIfSuffixContainWrongCharater(){
-         RussianSuffixDecoderEncoder.encode("1");
-    }    
-    
+    public void shouldThrownExeptionIfSuffixContainWrongCharater() {
+        RussianSuffixDecoderEncoder.encode("1");
+    }
+
 }
@@ -1,13 +1,28 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.lucene.russian.morphology.analayzer;

-import junit.framework.TestCase;
-import org.junit.Test;
-import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import org.junit.Test;

+import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.BufferedReader;
 import java.io.InputStreamReader;


@@ -17,24 +32,22 @@ public class RussianMorphlogyAnalayzerTest {
    public void shouldCorrectProccessText() throws IOException {
        RussianMorphlogyAnalayzer morphlogyAnalayzer = new RussianMorphlogyAnalayzer();
        InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/russian/morphology/analayzer/russian-text.txt");
-        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream,"UTF-8"));
+        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));

        final Token reusableToken = new Token();

-              Token nextToken;
+        Token nextToken;


        TokenStream in = morphlogyAnalayzer.tokenStream(null, bufferedReader);
-        for (;;)
-               {
-                   nextToken = in.next(reusableToken);
+        for (; ;) {
+            nextToken = in.next(reusableToken);

-                   if (nextToken == null)
-                   {
-                       break;
-                   }
+            if (nextToken == null) {
+                break;
+            }

-                   System.out.println(nextToken.term());
+            System.out.println(nextToken.term());
 //                   nextSampleToken = sample.next(reusableSampleToken);
 //                   assertEquals(
 //                       "Unicode",
@@ -42,7 +55,7 @@ public class RussianMorphlogyAnalayzerTest {
 //                       nextSampleToken == null
 //                       ? null
 //                       : nextSampleToken.term());
-               }
+        }

    }
 }
@@ -1,24 +1,42 @@
+/**
+ * Copyright 2009 Alexander Kuznetsov 
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.lucene.russian.morphology.analayzer;

-import org.junit.Test;
-import static org.junit.Assert.assertThat;
-import org.apache.lucene.russian.morphology.RussianSuffixDecoderEncoder;
 import static org.hamcrest.core.IsEqual.equalTo;
+import static org.junit.Assert.assertThat;
+import org.junit.Test;

-import java.io.*;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;


 public class SuffixEvristicsTest {

    @Test
    public void testShouldDefineCorretCononicalWordForm() throws IOException {
-       SuffixEvristics suffixEvristics = new SuffixEvristics();
-       InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/russian/morphology/analayzer/suffix-evristics-test-data.txt");
-       BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream,"UTF-8"));
-       String s = bufferedReader.readLine();
-        while(s != null){
+        SuffixEvristics suffixEvristics = new SuffixEvristics();
+        InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/russian/morphology/analayzer/suffix-evristics-test-data.txt");
+        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
+        String s = bufferedReader.readLine();
+        while (s != null) {
            String[] qa = s.trim().split(" ");
-            assertThat(suffixEvristics.getCanonicalForm(qa[0]),equalTo(qa[1]));
+            assertThat(suffixEvristics.getCanonicalForm(qa[0]), equalTo(qa[1]));
            s = bufferedReader.readLine();
        }
    }
@@ -1,5 +1,3 @@
-В условиях нарастающей пурги было сделано 4 успешных захода на посадку. После завершения облета и демонтажа оборудования
-Рубен Есаян дал устную оценку эксперимента:"Все нормально, будем рекомендовать систему к внедрению".
-Летом - с ноября по март - рейсы из Кейптауна (ЮАР) на станцию "Новолазаревская" (Антарктида) совершаются
-примерно один раз в две недели. Туда привозят людей, питание, оборудование, ГСМ и т.д.
-что-то
+В условиях нарастающей пурги было сделано 4 успешных захода на посадку. "Все нормально, будем рекомендовать систему к внедрению".
+Рейсы из Кейптауна (ЮАР) на станцию "Новолазаревская" (Антарктида) совершаются
+примерно один раз в две недели.