Adding context stats

Working on morph ambiguously resolver
2015-07-14 16:26:22 +04:00 · 2015-06-22 22:42:15 +03:00
46 changed files with 727 additions and 802 deletions
--- a/.github/workflows/main.yaml
+++ b/.github/workflows/main.yaml
@@ -1,35 +0,0 @@
-name: Java CI
-
-on: [push, pull_request]
-
-jobs:
-  tests:
-    runs-on: ubuntu-latest
-
-    steps:
-      - uses: actions/checkout@v2
-      - name: Set up JDK 11
-        uses: actions/setup-java@v2
-        with:
-          java-version: '11'
-          distribution: 'adopt'
-      - name: Build with Maven
-        run: mvn --batch-mode --update-snapshots verify
-          
-  pack-artifacts:
-    runs-on: ubuntu-latest
-    needs: tests
-    if: github.ref == 'refs/heads/master'
-    steps:
-      - uses: actions/checkout@v2
-      - name: Set up JDK 11
-        uses: actions/setup-java@v2
-        with:
-          java-version: '11'
-          distribution: 'adopt'
-      - name: Build with Maven
-        run: mvn --batch-mode --update-snapshots verify
-      - uses: actions/upload-artifact@v2
-        with:
-          name: artifacts
-          path: ${{ github.workspace }}/*/target/*.jar
--- a/202
+++ b/202
@@ -1,202 +0,0 @@
-
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
--- a/README.md
+++ b/README.md
@@ -1,17 +1,54 @@
-# Russian Morphology for Apache Lucene
+# Russian Morphology for lucene

-Russian and English morphology for Java and [Apache Lucene](http://lucene.apache.org) 9.3 framework based on open source dictionary from site [АОТ](http://aot.ru). It uses dictionary base morphology with some heuristics for unknown words. It supports a homonym for example for a Russian word "вина" it gives two variants "вино" and "вина".
+Russian and English morphology for java and lucene 3.0 framework based on open source dictionary from site [АОТ](http://aot.ru). It use dictionary base morphology with some heuristics for unknown words. It support homonym for example for Russian word "вина" it gives two variants "вино" and "вина". 


 ### How to use

-Build project, by running `mvn clean package`, this will provide you the latest versions of the artifacts - 1.5, add it to your classpath. You could select which version to use - Russian or English.
+First download 
+[morph-1.0.jar](https://bintray.com/artifact/download/akuznetsov/russianmorphology/org/apache/lucene/morphology/morph/1.1/morph-1.1.jar)  
+and add it to your class path. When download [Russian](https://bintray.com/artifact/download/akuznetsov/russianmorphology/org/apache/lucene/morphology/russian/1.1/russian-1.1.jar) or 
+[English](https://bintray.com/artifact/download/akuznetsov/russianmorphology/org/apache/lucene/morphology/english/1.1/english-1.1.jar) package. 
+
+If you use maven you can add dependency 
+
+        <dependency>
+            <groupId>org.apache.lucene.morphology</groupId>
+            <artifactId>russian</artifactId>
+            <version>1.1</version>
+        </dependency>
+
+
+        <dependency>
+            <groupId>org.apache.lucene.morphology</groupId>
+            <artifactId>english</artifactId>
+            <version>1.1</version>
+        </dependency>
+
+Don't forget add link to repository
+
+
+    <repositories>
+    ...............
+      <repository>
+        <snapshots>
+          <enabled>false</enabled>
+        </snapshots>
+        <id>bintray-akuznetsov-russianmorphology</id>
+        <name>bintray</name>
+        <url>http://dl.bintray.com/akuznetsov/russianmorphology</url>
+      </repository>
+    </repositories>
+
+
+
+Now you can create a Lucene Analyzer 

-Now you can create a Lucene Analyzer:

      RussianAnalayzer russian = new RussianAnalayzer();
      EnglishAnalayzer english = new EnglishAnalayzer();

+
 You can write you own analyzer using filter that convert word in it's right forms. 

      LuceneMorphology luceneMorph = new EnglishLuceneMorphology();
@@ -25,28 +62,9 @@ Also if you need get a list of base forms of word, you can use following example
     LuceneMorphology luceneMorph = new EnglishLuceneMorphology();
     List<String> wordBaseForms = luceneMorph.getMorphInfo(word);

-### Solr
-
-You can use the LuceneMorphology as morphology filter in a Solr _schema.xml_ using a **MorphologyFilterFactory:**
-
-```xml
-<fieldType name="content" class="solr.TextField" positionIncrementGap="100">
-      <analyzer>
-        <tokenizer class="solr.StandardTokenizerFactory"/>
-		<filter class="org.apache.lucene.analysis.morphology.MorphologyFilterFactory" language="Russian"/>
-		<filter class="org.apache.lucene.analysis.morphology.MorphologyFilterFactory" language="English"/>
-      </analyzer>
-</fieldType>
-```
-
-Just add _morphology-1.5.jar_ in your Solr lib-directories

 ### Restrictions
  
  * It works only with UTF-8.
  * It assume what letters е and ё are the same.
  * Word forms with prefixes like "наибольший" treated as separate word. 
-
-### License
-
-Apache License, Version 2.0
--- a/solr-morphology-analysis/pom.xml
+++ b/solr-morphology-analysis/pom.xml
@@ -1,40 +1,36 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project xmlns="http://maven.apache.org/POM/4.0.0"
-         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-    <parent>
-        <artifactId>morphology</artifactId>
-        <groupId>org.apache.lucene.morphology</groupId>
-        <version>1.5</version>
-    </parent>
+<?xml version="1.0"?>
+<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
+         xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
    <modelVersion>4.0.0</modelVersion>
-
-    <groupId>org.apache.lucene.analysis</groupId>
-    <artifactId>morphology</artifactId>
-    <name>solr-morphology-analysis</name>
-    <version>${morphology.version}</version>
+    <parent>
+        <groupId>org.apache.lucene.morphology</groupId>
+        <artifactId>morphology</artifactId>
+        <version>1.2-SNAPSHOT</version>
+    </parent>
+    <groupId>org.apache.lucene.morphology</groupId>
+    <artifactId>context</artifactId>
+    <version>1.0-SNAPSHOT</version>
+    <name>context</name>
    <url>http://maven.apache.org</url>
-
    <dependencies>
-
+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <version>4.8.2</version>
+            <scope>test</scope>
+        </dependency>
        <dependency>
            <groupId>org.apache.lucene.morphology</groupId>
            <artifactId>russian</artifactId>
-            <version>${morphology.version}</version>
+            <version>1.2-SNAPSHOT</version>
+            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene.morphology</groupId>
            <artifactId>english</artifactId>
-            <version>${morphology.version}</version>
-        </dependency>
-
-        <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
-            <version>${junit.version}</version>
+            <version>1.2-SNAPSHOT</version>
            <scope>test</scope>
        </dependency>
-
    </dependencies>
-
-</project>
+</project>
--- a/context/src/main/java/org/apache/lucene/morphology/context/CalculateContextItem.java
+++ b/context/src/main/java/org/apache/lucene/morphology/context/CalculateContextItem.java
@@ -0,0 +1,52 @@
+/**
+ * Copyright 2015 Alexander Kuznetsov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.morphology.context;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.List;
+
+public class CalculateContextItem {
+
+    public List<ContextItem> createContextItems(String text) throws IOException {
+        Analyzer statAnalyzer = new StatAnalyzer();
+        InputStreamReader reader = new InputStreamReader(new ByteArrayInputStream("принеси мне вина на новый год? - и что я жду тебя, где вино".getBytes()), "UTF-8");
+
+
+//        new RussianMorphology();
+
+        TokenStream tokenStream = statAnalyzer.tokenStream(null, reader);
+        tokenStream.reset();
+
+        List<List<String>> listedLink =  new LinkedList<>();
+        while (tokenStream.incrementToken()) {
+            CharTermAttribute charTerm = tokenStream.getAttribute(CharTermAttribute.class);
+            PositionIncrementAttribute position = tokenStream.getAttribute(PositionIncrementAttribute.class);
+
+
+        }
+
+        return null;
+    }
+}
--- a/context/src/main/java/org/apache/lucene/morphology/context/ContextItem.java
+++ b/context/src/main/java/org/apache/lucene/morphology/context/ContextItem.java
@@ -0,0 +1,80 @@
+/**
+ * Copyright 2015 Alexander Kuznetsov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.morphology.context;
+
+import java.util.Arrays;
+
+public class ContextItem implements Comparable<ContextItem> {
+    String[][] morphInfo;
+
+    public ContextItem(String[][] morphInfo) {
+        this.morphInfo = morphInfo;
+    }
+
+    public String[][] getMorphInfo() {
+        return morphInfo;
+    }
+
+    public void setMorphInfo(String[][] morphInfo) {
+        this.morphInfo = morphInfo;
+    }
+
+    public int hashCode() {
+        int h = 0;
+        for (String[] m : morphInfo) {
+            for (String s : m) {
+                h = 31 * h + s.hashCode();
+            }
+        }
+        return h;
+    }
+
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+
+        ContextItem that = (ContextItem) o;
+
+        if (that.morphInfo.length != this.morphInfo.length) {
+            return false;
+        }
+        for (int i = 0; i < morphInfo.length; i++) {
+            if (!Arrays.equals(morphInfo[i], that.morphInfo[i])) {
+                return false;
+            }
+        }
+
+        return true;
+    }
+
+
+    @Override
+    public int compareTo(ContextItem o) {
+        int i = o.morphInfo.length - morphInfo.length;
+        if (i != 0) return i;
+        for (int j = 0; j < morphInfo.length; j++) {
+            i = o.morphInfo[j].length - morphInfo[j].length;
+            if (i != 0) return i;
+            for (int k = 0; k < morphInfo[j].length; k++) {
+                i = morphInfo[j][k].compareTo(o.morphInfo[j][k]);
+                if (i != 0) return i;
+            }
+        }
+        return 0;
+    }
+}
--- a/context/src/main/java/org/apache/lucene/morphology/context/ContextStats.java
+++ b/context/src/main/java/org/apache/lucene/morphology/context/ContextStats.java
@@ -0,0 +1,37 @@
+/**
+ * Copyright 2015 Alexander Kuznetsov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.morphology.context;
+
+public class ContextStats {
+    String[] morphInfo;
+    double prob;
+
+    public String[] getMorphInfo() {
+        return morphInfo;
+    }
+
+    public void setMorphInfo(String[] morphInfo) {
+        this.morphInfo = morphInfo;
+    }
+
+    public double getProb() {
+        return prob;
+    }
+
+    public void setProb(double prob) {
+        this.prob = prob;
+    }
+}
--- a/context/src/main/java/org/apache/lucene/morphology/context/ProbClalucator.java
+++ b/context/src/main/java/org/apache/lucene/morphology/context/ProbClalucator.java
@@ -0,0 +1,21 @@
+/**
+ * Copyright 2015 Alexander Kuznetsov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.morphology.context;
+
+public class ProbClalucator {
+
+
+}
--- a/context/src/main/java/org/apache/lucene/morphology/context/SimpleTokenizer.java
+++ b/context/src/main/java/org/apache/lucene/morphology/context/SimpleTokenizer.java
@@ -0,0 +1,116 @@
+/**
+ * Copyright 2015 Alexander Kuznetsov
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.morphology.context;
+
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttributeImpl;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.util.AttributeFactory;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.Set;
+
+public class SimpleTokenizer extends Tokenizer {
+
+    private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+    private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
+    LinkedList<String> terms;
+
+    public final static Set<Character> SEPARATION_LETTERS = new HashSet<>(Arrays.asList(' ', '(', ')', ',', '|', '\t',
+            '\n', '"', ':', '!', '?', ',', ';', '•'));
+
+    public final static Set<Character> MEANING_CHARS = new HashSet<>(Arrays.asList('(', ')', ',', '|',
+            '"', ':', '!', '?', ',', ';', '•', '.'));
+
+    public SimpleTokenizer() {
+    }
+
+    public SimpleTokenizer(AttributeFactory factory) {
+        super(factory);
+    }
+
+    @Override
+    final public boolean incrementToken() throws IOException {
+        if (terms == null) {
+            createTeams();
+        }
+        if (terms.size() > 0) {
+            String str = terms.poll();
+            termAtt.setEmpty();
+            termAtt.append(str);
+            posAtt.setPositionIncrement(1);
+            return true;
+        }
+        return false;
+    }
+
+    private void createTeams() throws IOException {
+        terms = new LinkedList<>();
+
+        BufferedReader br = new BufferedReader(input);
+        StringBuilder sb = new StringBuilder();
+        String s = "";
+        while ((s = br.readLine()) != null) {
+            sb.append(s).append(" ");
+        }
+
+        s = sb.toString();
+        CharTermAttributeImpl currentTerm = new CharTermAttributeImpl();
+        for (int i = 0; i < s.length(); i++) {
+            if (checkIsCharSepartor(s, i)) {
+                if (checkIsCharHasMeaning(s, i)) {
+                    terms.add(s.substring(i, i + 1));
+                }
+                String term = currentTerm.toString();
+                currentTerm.clear();
+                if (term.length() > 0) {
+                    terms.add(term);
+                }
+            } else {
+                currentTerm.append(s.charAt(i));
+            }
+        }
+    }
+
+    private boolean checkIsCharHasMeaning(String s, int i) {
+        return MEANING_CHARS.contains(s.charAt(i));
+    }
+
+    private boolean checkIsCharSepartor(String s, int i) {
+        char c = s.charAt(i);
+        if (SEPARATION_LETTERS.contains(c)) {
+            return true;
+        }
+        if ('.' == c
+                && s.length() > i + 1
+                && SEPARATION_LETTERS.contains(s.charAt(i + 1))) {
+            return true;
+        }
+        return false;
+    }
+
+    @Override
+    public void reset() throws IOException {
+        this.terms = null;
+        super.reset();
+    }
+
+}
--- a/context/src/main/java/org/apache/lucene/morphology/context/StatAnalyzer.java
+++ b/context/src/main/java/org/apache/lucene/morphology/context/StatAnalyzer.java
@@ -0,0 +1,34 @@
+package org.apache.lucene.morphology.context;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.core.LowerCaseFilter;
+import org.apache.lucene.analysis.standard.StandardFilter;
+
+import java.io.IOException;
+import java.io.Reader;
+
+/**
+ * Created by akuznetsov on 6/24/15.
+ */
+public class StatAnalyzer extends Analyzer {
+
+
+
+    @Override
+    protected TokenStreamComponents createComponents(String s) {
+
+        SimpleTokenizer src = new SimpleTokenizer();
+        TokenFilter filter = new StandardFilter(src);
+        filter = new LowerCaseFilter(filter);
+
+        return new TokenStreamComponents(src, filter) {
+            @Override
+            protected void setReader(final Reader reader) throws IOException {
+                super.setReader(reader);
+            }
+        };
+    }
+
+
+}
--- a/context/src/test/java/org/apache/lucene/morphology/context/SimpleTokenizerTest.java
+++ b/context/src/test/java/org/apache/lucene/morphology/context/SimpleTokenizerTest.java
@@ -0,0 +1,32 @@
+package org.apache.lucene.morphology.context;
+
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.junit.Test;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+
+public class SimpleTokenizerTest {
+
+    @Test
+    public void testSimpleTokenizer() throws IOException {
+        Analyzer statAnalyzer = new StatAnalyzer();
+        InputStreamReader reader = new InputStreamReader(new ByteArrayInputStream("принеси мне вина на новый год? - и что я жду тебя, где вино".getBytes()), "UTF-8");
+
+        TokenStream tokenStream = statAnalyzer.tokenStream(null, reader);
+        tokenStream.reset();
+
+        boolean wordSeen = false;
+        while (tokenStream.incrementToken()) {
+            CharTermAttribute charTerm = tokenStream.getAttribute(CharTermAttribute.class);
+            PositionIncrementAttribute position = tokenStream.getAttribute(PositionIncrementAttribute.class);
+            System.out.println(charTerm.toString());
+        }
+    }
+
+}
--- a/dictionary-reader/pom.xml
+++ b/dictionary-reader/pom.xml
@@ -3,26 +3,27 @@
    <parent>
        <artifactId>morphology</artifactId>
        <groupId>org.apache.lucene.morphology</groupId>
-        <version>1.5</version>
+        <version>1.2-SNAPSHOT</version>
    </parent>
    <modelVersion>4.0.0</modelVersion>
+    <groupId>org.apache.lucene.morphology</groupId>
    <artifactId>dictionary-reader</artifactId>
    <name>dictionary-reader</name>
-    <version>1.5</version>
+    <version>1.2-SNAPSHOT</version>
    <url>http://maven.apache.org</url>

    <dependencies>
        <dependency>
            <groupId>org.apache.lucene.morphology</groupId>
            <artifactId>russian</artifactId>
-            <version>1.5</version>
+            <version>1.2-SNAPSHOT</version>
        </dependency>


        <dependency>
            <groupId>org.apache.lucene.morphology</groupId>
            <artifactId>english</artifactId>
-            <version>1.5</version>
+            <version>1.2-SNAPSHOT</version>
        </dependency>
    </dependencies>

--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/DictionaryReader.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/DictionaryReader.java
@@ -22,19 +22,20 @@ import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Set;


 /**
 * This class contain logic how read
- * dictionary and produce word with it all forms.
+ * dictonary and produce word with it all forms.
 */
 public class DictionaryReader {
    private String fileName;
    private String fileEncoding = "windows-1251";
-    private List<List<FlexiaModel>> wordsFlexias = new ArrayList<>();
-    private Set<String> ignoredForm;
+    private List<List<FlexiaModel>> wordsFlexias = new ArrayList<List<FlexiaModel>>();
+    private Set<String> ignoredForm = new HashSet<String>();

    public DictionaryReader(String fileName, Set<String> ignoredForm) {
        this.fileName = fileName;
@@ -54,7 +55,7 @@ public class DictionaryReader {

    private void readWords(BufferedReader reader, WordProcessor wordProcessor) throws IOException {
        String s = reader.readLine();
-        int count = Integer.parseInt(s);
+        int count = Integer.valueOf(s);
        int actual = 0;
        for (int i = 0; i < count; i++) {
            s = reader.readLine();
@@ -78,7 +79,7 @@ public class DictionaryReader {
        String wordBase = wd[0].toLowerCase();
        if (wordBase.startsWith("-")) return null;
        wordBase = "#".equals(wordBase) ? "" : wordBase;
-        List<FlexiaModel> models = wordsFlexias.get(Integer.parseInt(wd[1]));
+        List<FlexiaModel> models = wordsFlexias.get(Integer.valueOf(wd[1]));
        FlexiaModel flexiaModel = models.get(0);
        if (models.size() == 0 || ignoredForm.contains(flexiaModel.getCode())) {
            return null;
@@ -95,7 +96,7 @@ public class DictionaryReader {

    private void skipBlock(BufferedReader reader) throws IOException {
        String s = reader.readLine();
-        int count = Integer.parseInt(s);
+        int count = Integer.valueOf(s);
        for (int i = 0; i < count; i++) {
            reader.readLine();
        }
@@ -104,7 +105,7 @@ public class DictionaryReader {

    private void readPrefix(BufferedReader reader) throws IOException {
        String s = reader.readLine();
-        int count = Integer.parseInt(s);
+        int count = Integer.valueOf(s);
        for (int i = 0; i < count; i++) {
            reader.readLine();
        }
@@ -112,10 +113,10 @@ public class DictionaryReader {

    private void readFlexias(BufferedReader reader) throws IOException {
        String s = reader.readLine();
-        int count = Integer.parseInt(s);
+        int count = Integer.valueOf(s);
        for (int i = 0; i < count; i++) {
            s = reader.readLine();
-            ArrayList<FlexiaModel> flexiaModelArrayList = new ArrayList<>();
+            ArrayList<FlexiaModel> flexiaModelArrayList = new ArrayList<FlexiaModel>();
            wordsFlexias.add(flexiaModelArrayList);
            for (String line : s.split("%")) {
                addFlexia(flexiaModelArrayList, line);
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/FlexiaModel.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/FlexiaModel.java
@@ -16,8 +16,6 @@

 package org.apache.lucene.morphology.dictionary;

-import java.util.Objects;
-
 /**
 * Represent information of how word form created form it imutible part.
 */
@@ -76,9 +74,11 @@ public class FlexiaModel {

        FlexiaModel that = (FlexiaModel) o;

-        if (!Objects.equals(code, that.code)) return false;
-        if (!Objects.equals(prefix, that.prefix)) return false;
-        return Objects.equals(suffix, that.suffix);
+        if (code != null ? !code.equals(that.code) : that.code != null) return false;
+        if (prefix != null ? !prefix.equals(that.prefix) : that.prefix != null) return false;
+        if (suffix != null ? !suffix.equals(that.suffix) : that.suffix != null) return false;
+
+        return true;
    }

    @Override
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/GrammarReader.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/GrammarReader.java
@@ -29,8 +29,8 @@ import java.util.Map;
 public class GrammarReader {
    private String fileName;
    private String fileEncoding = "windows-1251";
-    private List<String> grammarInfo = new ArrayList<>();
-    private Map<String, Integer> inverseIndex = new HashMap<>();
+    private List<String> grammarInfo = new ArrayList<String>();
+    private Map<String, Integer> inverseIndex = new HashMap<String, Integer>();

    public GrammarReader(String fileName) throws IOException {
        this.fileName = fileName;
@@ -50,7 +50,7 @@ public class GrammarReader {
            line = line.trim();
            if (!line.startsWith("//") && line.length() > 0) {
                String[] strings = line.split(" ", 2);
-                int i = grammarInfo.size();
+                Integer i = grammarInfo.size();
                inverseIndex.put(strings[0], i);
                grammarInfo.add(i, strings[1]);
            }
@@ -63,7 +63,7 @@ public class GrammarReader {
    }

    public String[] getGrammarInfoAsArray() {
-        return grammarInfo.toArray(new String[0]);
+        return grammarInfo.toArray(new String[grammarInfo.size()]);
    }

    public Map<String, Integer> getGrammarInverseIndex() {
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/RemoveFlexiaWithPrefixes.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/RemoveFlexiaWithPrefixes.java
@@ -15,7 +15,7 @@
 */
 package org.apache.lucene.morphology.dictionary;

-import java.util.Collections;
+import java.util.Arrays;
 import java.util.LinkedList;
 import java.util.List;

@@ -29,7 +29,7 @@ public class RemoveFlexiaWithPrefixes extends WordFilter {
    @Override
    public List<WordCard> transform(WordCard wordCard) {

-        List<FlexiaModel> flexiaModelsToRemove = new LinkedList<>();
+        List<FlexiaModel> flexiaModelsToRemove = new LinkedList<FlexiaModel>();
        for (FlexiaModel fm : wordCard.getWordsForms()) {
            if (fm.getPrefix().length() > 0) {
                flexiaModelsToRemove.add(fm);
@@ -39,6 +39,6 @@ public class RemoveFlexiaWithPrefixes extends WordFilter {
            wordCard.removeFlexia(fm);
        }

-        return new LinkedList<>(Collections.singletonList(wordCard));
+        return new LinkedList<WordCard>(Arrays.asList(wordCard));
    }
 }
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/RussianAdvSplitterFilter.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/RussianAdvSplitterFilter.java
@@ -32,13 +32,13 @@ public class RussianAdvSplitterFilter extends WordFilter {

    @Override
    public List<WordCard> transform(WordCard wordCard) {
-        LinkedList<WordCard> result = new LinkedList<>();
+        LinkedList<WordCard> result = new LinkedList<WordCard>();
        result.add(wordCard);

        String baseWord = "";
        String canonicalForm = "";
        String canonicalSuffix = "";
-        List<FlexiaModel> flexiaModels = new LinkedList<>();
+        List<FlexiaModel> flexiaModels = new LinkedList<FlexiaModel>();
        for (FlexiaModel flexiaModel : wordCard.getWordsForms()) {
            if (flexiaModel.getPrefix().length() > 0) {
                flexiaModels.add(new FlexiaModel(flexiaModel.getCode(), flexiaModel.getSuffix(), ""));
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/StatisticsCollector.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/StatisticsCollector.java
@@ -27,9 +27,9 @@ import java.util.*;

 //todo made refactoring this class
 public class StatisticsCollector implements WordProcessor {
-    private TreeMap<String, Set<Heuristic>> inverseIndex = new TreeMap<>();
-    private Map<Set<Heuristic>, Integer> ruleInverseIndex = new HashMap<>();
-    private List<Set<Heuristic>> rules = new ArrayList<>();
+    private TreeMap<String, Set<Heuristic>> inverseIndex = new TreeMap<String, Set<Heuristic>>();
+    private Map<Set<Heuristic>, Integer> ruleInverseIndex = new HashMap<Set<Heuristic>, Integer>();
+    private List<Set<Heuristic>> rules = new ArrayList<Set<Heuristic>>();
    private GrammarReader grammarReader;
    private LetterDecoderEncoder decoderEncoder;

@@ -39,14 +39,18 @@ public class StatisticsCollector implements WordProcessor {
        this.decoderEncoder = decoderEncoder;
    }

-    public void process(WordCard wordCard) {
+    public void process(WordCard wordCard) throws IOException {
        cleanWordCard(wordCard);
        String normalStringMorph = wordCard.getWordsForms().get(0).getCode();

        for (FlexiaModel fm : wordCard.getWordsForms()) {
            Heuristic heuristic = createEvristic(wordCard.getBase(), wordCard.getCanonicalSuffix(), fm, normalStringMorph);
            String form = revertWord(fm.create(wordCard.getBase()));
-            Set<Heuristic> suffixHeuristics = inverseIndex.computeIfAbsent(form, k -> new HashSet<>());
+            Set<Heuristic> suffixHeuristics = inverseIndex.get(form);
+            if (suffixHeuristics == null) {
+                suffixHeuristics = new HashSet<Heuristic>();
+                inverseIndex.put(form, suffixHeuristics);
+            }
            suffixHeuristics.add(heuristic);
        }
    }
@@ -65,7 +69,7 @@ public class StatisticsCollector implements WordProcessor {

    public void saveHeuristic(String fileName) throws IOException {

-        Map<Integer, Integer> dist = new TreeMap<>();
+        Map<Integer, Integer> dist = new TreeMap<Integer, Integer>();
        Set<Heuristic> prevSet = null;
        int count = 0;
        for (String key : inverseIndex.keySet()) {
@@ -116,11 +120,11 @@ public class StatisticsCollector implements WordProcessor {
    }

    private String revertWord(String s) {
-        StringBuilder result = new StringBuilder();
+        String result = "";
        for (int i = 1; i <= s.length(); i++) {
-            result.append(s.charAt(s.length() - i));
+            result += s.charAt(s.length() - i);
        }
-        return result.toString();
+        return result;
    }


@@ -128,15 +132,15 @@ public class StatisticsCollector implements WordProcessor {
        String form = fm.create(wordBase);
        String normalForm = wordBase + canonicalSuffix;
        Integer length = getCommonLength(form, normalForm);
-        int actualSuffixLengh = form.length() - length;
+        Integer actualSuffixLengh = form.length() - length;
        String actualNormalSuffix = normalForm.substring(length);
        Integer integer = grammarReader.getGrammarInverseIndex().get(fm.getCode());
        Integer nf = grammarReader.getGrammarInverseIndex().get(normalSuffixForm);
-        return new Heuristic((byte) actualSuffixLengh, actualNormalSuffix, (short) integer.intValue(), (short) nf.intValue());
+        return new Heuristic((byte) actualSuffixLengh.intValue(), actualNormalSuffix, (short) integer.intValue(), (short) nf.intValue());
    }

    public static Integer getCommonLength(String s1, String s2) {
-        int length = Math.min(s1.length(), s2.length());
+        Integer length = Math.min(s1.length(), s2.length());
        for (int i = 0; i < length; i++) {
            if (s1.charAt(i) != s2.charAt(i)) return i;
        }
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordCard.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordCard.java
@@ -26,7 +26,7 @@ public class WordCard {
    private String canonicalForm;
    private String base;
    private String canonicalSuffix;
-    private List<FlexiaModel> wordsForms = new ArrayList<>();
+    private List<FlexiaModel> wordsForms = new ArrayList<FlexiaModel>();

    public WordCard(String canonicalForm, String base, String canonicalSuffix) {
        this.canonicalForm = canonicalForm;
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordCleaner.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordCleaner.java
@@ -17,6 +17,7 @@ package org.apache.lucene.morphology.dictionary;

 import org.apache.lucene.morphology.LetterDecoderEncoder;

+import java.util.Arrays;
 import java.util.Collections;
 import java.util.LinkedList;
 import java.util.List;
@@ -37,7 +38,7 @@ public class WordCleaner extends WordFilter {
        if (word.contains("-")) return Collections.emptyList();
        if (!decoderEncoder.checkString(word)) return Collections.emptyList();

-        List<FlexiaModel> flexiaModelsToRemove = new LinkedList<>();
+        List<FlexiaModel> flexiaModelsToRemove = new LinkedList<FlexiaModel>();
        for (FlexiaModel fm : wordCard.getWordsForms()) {
            if (!decoderEncoder.checkString(fm.create(wordCard.getBase())) || fm.create(wordCard.getBase()).contains("-")) {
                flexiaModelsToRemove.add(fm);
@@ -47,6 +48,6 @@ public class WordCleaner extends WordFilter {
            wordCard.removeFlexia(fm);
        }

-        return new LinkedList<>(Collections.singletonList(wordCard));
+        return new LinkedList<WordCard>(Arrays.asList(wordCard));
    }
 }
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordProcessor.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordProcessor.java
@@ -23,5 +23,5 @@ import java.io.IOException;
 */
 public interface WordProcessor {

-    void process(WordCard wordCard) throws IOException;
+    public void process(WordCard wordCard) throws IOException;
 }
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordStringCleaner.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/dictionary/WordStringCleaner.java
@@ -17,7 +17,7 @@ package org.apache.lucene.morphology.dictionary;

 import org.apache.lucene.morphology.LetterDecoderEncoder;

-import java.util.Collections;
+import java.util.Arrays;
 import java.util.LinkedList;
 import java.util.List;

@@ -42,7 +42,7 @@ public class WordStringCleaner extends WordFilter {
            //made correct code
            m.setCode(m.getCode().substring(0, 2));
        }
-        return new LinkedList<>(Collections.singletonList(wordCard));
+        return new LinkedList<WordCard>(Arrays.asList(wordCard));
    }


--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/EnglishHeuristicBuilder.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/EnglishHeuristicBuilder.java
@@ -29,7 +29,7 @@ public class EnglishHeuristicBuilder {
        GrammarReader grammarInfo = new GrammarReader("dictonary/Dicts/Morph/egramtab.tab");
        EnglishLetterDecoderEncoder decoderEncoder = new EnglishLetterDecoderEncoder();

-        DictionaryReader dictionaryReader = new DictionaryReader("dictonary/Dicts/SrcMorph/EngSrc/morphs.mrd", new HashSet<>());
+        DictionaryReader dictionaryReader = new DictionaryReader("dictonary/Dicts/SrcMorph/EngSrc/morphs.mrd", new HashSet<String>());

        StatisticsCollector statisticsCollector = new StatisticsCollector(grammarInfo, decoderEncoder);
        WordCleaner wordCleaner = new WordCleaner(decoderEncoder, statisticsCollector);
@@ -39,4 +39,4 @@ public class EnglishHeuristicBuilder {
        statisticsCollector.saveHeuristic("english/src/main/resources/org/apache/lucene/morphology/english/morph.info");

    }
-}
+}
--- a/dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/RussianHeuristicBuilder.java
+++ b/dictionary-reader/src/main/java/org/apache/lucene/morphology/generator/RussianHeuristicBuilder.java
@@ -28,7 +28,7 @@ public class RussianHeuristicBuilder {
        GrammarReader grammarInfo = new GrammarReader("dictonary/Dicts/Morph/rgramtab.tab");
        RussianLetterDecoderEncoder decoderEncoder = new RussianLetterDecoderEncoder();

-        DictionaryReader dictionaryReader = new DictionaryReader("dictonary/Dicts/SrcMorph/RusSrc/morphs.mrd", new HashSet<>());
+        DictionaryReader dictionaryReader = new DictionaryReader("dictonary/Dicts/SrcMorph/RusSrc/morphs.mrd", new HashSet<String>());

        StatisticsCollector statisticsCollector = new StatisticsCollector(grammarInfo, decoderEncoder);
        WordCleaner wordCleaner = new WordCleaner(decoderEncoder, statisticsCollector);
--- a/dictionary-reader/src/test/java/org/apache/lucene/TestAllWords.java
+++ b/dictionary-reader/src/test/java/org/apache/lucene/TestAllWords.java
@@ -23,7 +23,6 @@ import org.apache.lucene.morphology.english.EnglishMorphology;
 import org.apache.lucene.morphology.russian.RussianLetterDecoderEncoder;
 import org.apache.lucene.morphology.russian.RussianLuceneMorphology;
 import org.apache.lucene.morphology.russian.RussianMorphology;
-import org.hamcrest.MatcherAssert;
 import org.junit.Before;
 import org.junit.Test;

@@ -34,6 +33,7 @@ import java.util.Map;
 import java.util.concurrent.atomic.AtomicLong;

 import static org.hamcrest.Matchers.hasItem;
+import static org.junit.Assert.assertThat;


 public class TestAllWords {
@@ -73,19 +73,21 @@ public class TestAllWords {
        final List<String> morphInfo = grammarInfo.getGrammarInfo();
        final Map<String, Integer> inversIndex = grammarInfo.getGrammarInverseIndex();

-        DictionaryReader dictionaryReader = new DictionaryReader(pathToDict, new HashSet<>());
+        DictionaryReader dictionaryReader = new DictionaryReader(pathToDict, new HashSet<String>());

        final AtomicLong wordCount = new AtomicLong(0);
-        long startTime = System.currentTimeMillis();
+        Long startTime = System.currentTimeMillis();

-        WordProcessor wordProcessor = wordCard -> {
-            String word = wordCard.getBase() + wordCard.getCanonicalSuffix();
-            for (FlexiaModel fm : wordCard.getWordsForms()) {
-                String wordForm = wordCard.getBase() + fm.getSuffix();
-                String morph = morphInfo.get(inversIndex.get(fm.getCode()));
-                MatcherAssert.assertThat(morphology.getMorphInfo(wordForm), hasItem(word + "|" + morph));
-                MatcherAssert.assertThat(morphology.getNormalForms(wordForm), hasItem(word));
-                wordCount.set(2L + wordCount.get());
+        WordProcessor wordProcessor = new WordProcessor() {
+            public void process(WordCard wordCard) throws IOException {
+                String word = wordCard.getBase() + wordCard.getCanonicalSuffix();
+                for (FlexiaModel fm : wordCard.getWordsForms()) {
+                    String wordForm = wordCard.getBase() + fm.getSuffix();
+                    String morph = morphInfo.get(inversIndex.get(fm.getCode()));
+                    assertThat(morphology.getMorphInfo(wordForm), hasItem(word + "|" + morph));
+                    assertThat(morphology.getNormalForms(wordForm), hasItem(word));
+                    wordCount.set(2L + wordCount.get());
+                }
            }
        };

@@ -121,15 +123,17 @@ public class TestAllWords {

    private void testAllWordForLucene(final LuceneMorphology morphology, LetterDecoderEncoder decoderEncoder, String pathToDic) throws IOException {
        final AtomicLong wordCount = new AtomicLong(0);
-        long startTime = System.currentTimeMillis();
+        Long startTime = System.currentTimeMillis();

-        DictionaryReader dictionaryReader = new DictionaryReader(pathToDic, new HashSet<>());
-        WordProcessor wordProcessor = wordCard -> {
-            String word = wordCard.getBase() + wordCard.getCanonicalSuffix();
-            for (FlexiaModel fm : wordCard.getWordsForms()) {
-                String wordForm = wordCard.getBase() + fm.getSuffix();
-                MatcherAssert.assertThat(morphology.getNormalForms(wordForm), hasItem(word));
-                wordCount.set(1L + wordCount.get());
+        DictionaryReader dictionaryReader = new DictionaryReader(pathToDic, new HashSet<String>());
+        WordProcessor wordProcessor = new WordProcessor() {
+            public void process(WordCard wordCard) throws IOException {
+                String word = wordCard.getBase() + wordCard.getCanonicalSuffix();
+                for (FlexiaModel fm : wordCard.getWordsForms()) {
+                    String wordForm = wordCard.getBase() + fm.getSuffix();
+                    assertThat(morphology.getNormalForms(wordForm), hasItem(word));
+                    wordCount.set(1L + wordCount.get());
+                }
            }
        };

--- a/dictionary-reader/src/test/java/org/apache/lucene/morphology/AnalyzersTest.java
+++ b/dictionary-reader/src/test/java/org/apache/lucene/morphology/AnalyzersTest.java
@@ -16,12 +16,6 @@
 package org.apache.lucene.morphology;

 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.CharArraySet;
-import org.apache.lucene.analysis.LowerCaseFilter;
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.morphology.analyzer.MorphologyAnalyzer;
@@ -31,17 +25,16 @@ import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.morphology.english.EnglishLuceneMorphology;
 import org.apache.lucene.morphology.russian.RussianAnalyzer;
 import org.apache.lucene.morphology.russian.RussianLuceneMorphology;
-import org.hamcrest.MatcherAssert;
 import org.junit.Test;

 import java.io.*;
-import java.nio.charset.StandardCharsets;
 import java.util.*;

 import static org.hamcrest.Matchers.equalTo;
+import static org.junit.Assert.assertThat;


-public class TestAnalyzers extends BaseTokenStreamTestCase {
+public class AnalyzersTest {

    @Test
    public void shouldGiveCorrectWordsForEnglish() throws IOException {
@@ -67,24 +60,24 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
        LuceneMorphology englishLuceneMorphology = new EnglishLuceneMorphology();

        MorphologyAnalyzer russianAnalyzer = new MorphologyAnalyzer(russianLuceneMorphology);
-        InputStreamReader reader = new InputStreamReader(new ByteArrayInputStream("тест пм тест".getBytes()), StandardCharsets.UTF_8);
+        InputStreamReader reader = new InputStreamReader(new ByteArrayInputStream("тест пм тест".getBytes()), "UTF-8");
        TokenStream stream = russianAnalyzer.tokenStream(null, reader);
        MorphologyFilter englishFilter = new MorphologyFilter(stream, englishLuceneMorphology);

        englishFilter.reset();
        while (englishFilter.incrementToken()) {
-            System.out.println(englishFilter);
+            System.out.println(englishFilter.toString());
        }
    }

    @Test
    public void shouldProvideCorrectIndentForWordWithMelitaForm() throws IOException {
        Analyzer morphlogyAnalyzer = new RussianAnalyzer();
-        InputStreamReader reader = new InputStreamReader(new ByteArrayInputStream("принеси мне вина на новый год".getBytes()), StandardCharsets.UTF_8);
+        InputStreamReader reader = new InputStreamReader(new ByteArrayInputStream("принеси мне вина на новый год".getBytes()), "UTF-8");

        TokenStream tokenStream = morphlogyAnalyzer.tokenStream(null, reader);
        tokenStream.reset();
-        Set<String> foromsOfWine = new HashSet<>();
+        Set<String> foromsOfWine = new HashSet<String>();
        foromsOfWine.add("вина");
        foromsOfWine.add("винo");
        boolean wordSeen = false;
@@ -92,7 +85,7 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
            CharTermAttribute charTerm = tokenStream.getAttribute(CharTermAttribute.class);
            PositionIncrementAttribute position = tokenStream.getAttribute(PositionIncrementAttribute.class);
            if(foromsOfWine.contains(charTerm.toString()) && wordSeen){
-                MatcherAssert.assertThat(position.getPositionIncrement(),equalTo(0));
+                assertThat(position.getPositionIncrement(),equalTo(0));
            }
            if(foromsOfWine.contains(charTerm.toString())){
                wordSeen = true;
@@ -102,18 +95,18 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {

    private void testAnalayzer(Analyzer morphlogyAnalyzer, String answerPath, String testPath) throws IOException {
        InputStream stream = this.getClass().getResourceAsStream(answerPath);
-        BufferedReader breader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8));
+        BufferedReader breader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
        String[] strings = breader.readLine().replaceAll(" +", " ").trim().split(" ");
-        HashSet<String> answer = new HashSet<>(Arrays.asList(strings));
+        HashSet<String> answer = new HashSet<String>(Arrays.asList(strings));
        stream.close();

        stream = this.getClass().getResourceAsStream(testPath);

-        InputStreamReader reader = new InputStreamReader(stream, StandardCharsets.UTF_8);
+        InputStreamReader reader = new InputStreamReader(stream, "UTF-8");

        TokenStream tokenStream = morphlogyAnalyzer.tokenStream(null, reader);
        tokenStream.reset();
-        HashSet<String> result = new HashSet<>();
+        HashSet<String> result = new HashSet<String>();
        while (tokenStream.incrementToken()) {
            CharTermAttribute attribute1 = tokenStream.getAttribute(CharTermAttribute.class);
            result.add(attribute1.toString());
@@ -121,45 +114,6 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {

        stream.close();

-        MatcherAssert.assertThat(result, equalTo(answer));
-    }
-
-    @Test
-    public void testPositionIncrement() throws IOException {
-        EnglishAnalyzer englishAnalyzer = new EnglishAnalyzer();
-        assertTokenStreamContents(
-                englishAnalyzer.tokenStream("test", "There are tests!"),
-                new String[]{"there", "are", "be", "test"},
-                new int[]{0, 6, 6, 10},
-                new int[]{5, 9, 9, 15},
-                new String[]{"<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>"},
-                new int[]{1, 1, 0, 1}
-        );
-    }
-
-    @Test
-    public void testKeywordHandling() throws IOException {
-        Analyzer analyzer = new EnglishKeywordTestAnalyzer();
-        assertTokenStreamContents(
-                analyzer.tokenStream("test", "Tests shouldn't be stemmed, but tests should!"),
-                new String[]{"tests", "shouldn't", "be", "stem", "but", "test", "shall"}
-        );
-    }
-
-    private static class EnglishKeywordTestAnalyzer extends Analyzer {
-        @Override
-        protected TokenStreamComponents createComponents(String s) {
-            StandardTokenizer src = new StandardTokenizer();
-            CharArraySet dontStem = new CharArraySet(1, false);
-            dontStem.add("Tests");
-            TokenFilter filter = new SetKeywordMarkerFilter(src, dontStem);
-            filter = new LowerCaseFilter(filter);
-            try {
-                filter = new MorphologyFilter(filter, new EnglishLuceneMorphology());
-            } catch (IOException ex) {
-                throw new RuntimeException("cannot create EnglishLuceneMorphology", ex);
-            }
-            return new TokenStreamComponents(src, filter);
-        }
+        assertThat(result, equalTo(answer));
    }
 }
--- a/dictionary-reader/src/test/java/org/apache/lucene/morphology/LuceneMorphTest.java
+++ b/dictionary-reader/src/test/java/org/apache/lucene/morphology/LuceneMorphTest.java
@@ -17,23 +17,22 @@ package org.apache.lucene.morphology;

 import org.apache.lucene.morphology.russian.RussianLuceneMorphology;
 import org.apache.lucene.morphology.english.EnglishLuceneMorphology;
-import org.hamcrest.MatcherAssert;
 import org.junit.Test;

 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
-import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;

 import static org.hamcrest.CoreMatchers.equalTo;
+import static org.junit.Assert.assertThat;


-public class TestLuceneMorph {
+public class LuceneMorphTest {

    @Test
    public void englishMorphologyShouldGetCorrectNormalForm() throws IOException {
@@ -53,13 +52,14 @@ public class TestLuceneMorph {

    private void testMorphology(LuceneMorphology luceneMorph, String pathToTestData) throws IOException {
        InputStream stream = this.getClass().getResourceAsStream(pathToTestData);
-        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8));
+        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
        String s = bufferedReader.readLine();
        while (s != null) {
            String[] qa = s.trim().split(" ");
-            Set<String> result = new HashSet<>(Arrays.asList(qa).subList(1, qa.length));
-            Set<String> stringList = new HashSet<>(luceneMorph.getNormalForms(qa[0]));
-            MatcherAssert.assertThat(stringList, equalTo(result));
+            Set<String> result = new HashSet<String>();
+            result.addAll(Arrays.asList(qa).subList(1, qa.length));
+            Set<String> stringList = new HashSet<String>(luceneMorph.getNormalForms(qa[0]));
+            assertThat(stringList, equalTo(result));
            s = bufferedReader.readLine();
        }
    }
--- a/english/pom.xml
+++ b/english/pom.xml
@@ -3,20 +3,27 @@
    <parent>
        <artifactId>morphology</artifactId>
        <groupId>org.apache.lucene.morphology</groupId>
-        <version>1.5</version>
+        <version>1.2-SNAPSHOT</version>
    </parent>
    <modelVersion>4.0.0</modelVersion>
+    <groupId>org.apache.lucene.morphology</groupId>
    <artifactId>english</artifactId>
    <name>english</name>
-    <version>1.5</version>
+    <version>1.2-SNAPSHOT</version>
    <url>http://maven.apache.org</url>
    <dependencies>

        <dependency>
            <groupId>org.apache.lucene.morphology</groupId>
            <artifactId>morph</artifactId>
-            <version>1.5</version>
+            <version>1.2-SNAPSHOT</version>
        </dependency>

+        <dependency>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <version>4.8.2</version>
+            <scope>test</scope>
+        </dependency>
    </dependencies>
-</project>
+</project>
--- a/english/src/main/java/org/apache/lucene/morphology/english/EnglishLetterDecoderEncoder.java
+++ b/english/src/main/java/org/apache/lucene/morphology/english/EnglishLetterDecoderEncoder.java
@@ -32,7 +32,7 @@ public class EnglishLetterDecoderEncoder implements LetterDecoderEncoder {
        if (string.length() > 6) throw new SuffixToLongException("Suffix length should not be greater then " + 12);
        int result = 0;
        for (int i = 0; i < string.length(); i++) {
-            int c = string.charAt(i) - ENGLISH_SMALL_LETTER_OFFSET;
+            int c = 0 + string.charAt(i) - ENGLISH_SMALL_LETTER_OFFSET;
            if (c == 45 - ENGLISH_SMALL_LETTER_OFFSET) {
                c = DASH_CODE;
            }
@@ -48,7 +48,7 @@ public class EnglishLetterDecoderEncoder implements LetterDecoderEncoder {

    public int[] encodeToArray(String s) {

-        ArrayList<Integer> integers = new ArrayList<>();
+        ArrayList<Integer> integers = new ArrayList<Integer>();
        while (s.length() > 6) {
            integers.add(encode(s.substring(0, 6)));
            s = s.substring(6);
@@ -64,16 +64,16 @@ public class EnglishLetterDecoderEncoder implements LetterDecoderEncoder {
    }

    public String decodeArray(int[] array) {
-        StringBuilder result = new StringBuilder();
+        String result = "";
        for (int i : array) {
-            result.append(decode(i));
+            result += decode(i);
        }
-        return result.toString();
+        return result;
    }


    public String decode(Integer suffixN) {
-        StringBuilder result = new StringBuilder();
+        String result = "";
        while (suffixN > 27) {
            int c = suffixN % 28 + ENGLISH_SMALL_LETTER_OFFSET;
            if (c == ENGLISH_SMALL_LETTER_OFFSET) {
@@ -81,20 +81,21 @@ public class EnglishLetterDecoderEncoder implements LetterDecoderEncoder {
                continue;
            }
            if (c == DASH_CODE + ENGLISH_SMALL_LETTER_OFFSET) c = DASH_CHAR;
-            result.insert(0, (char) c);
+            result = (char) c + result;
            suffixN /= 28;
        }
        long c = suffixN + ENGLISH_SMALL_LETTER_OFFSET;
        if (c == DASH_CODE + ENGLISH_SMALL_LETTER_OFFSET) c = DASH_CHAR;
-        result.insert(0, (char) c);
-        return result.toString();
+        result = (char) c + result;
+        return result;
    }

    public boolean checkCharacter(char c) {
-        int code = c;
+        int code = 0 + c;
        if (code == 45) return true;
        code -= ENGLISH_SMALL_LETTER_OFFSET;
-        return code > 0 && code < 27;
+        if (code > 0 && code < 27) return true;
+        return false;
    }


--- a/english/src/test/java/org/apache/lucene/morphology/english/EnglishLetterDecoderEncoderTest.java
+++ b/english/src/test/java/org/apache/lucene/morphology/english/EnglishLetterDecoderEncoderTest.java
@@ -16,8 +16,7 @@
 package org.apache.lucene.morphology.english;

 import static org.hamcrest.core.IsEqual.equalTo;
-
-import org.hamcrest.MatcherAssert;
+import static org.junit.Assert.assertThat;
 import org.junit.Before;


@@ -31,11 +30,11 @@ public class EnglishLetterDecoderEncoderTest {

    @org.junit.Test
    public void testDecodeEncodeToArray() {
-        MatcherAssert.assertThat(decoderEncoder.decodeArray(decoderEncoder.encodeToArray("abcdefghijklmnopqrstuvwxyz")), equalTo("abcdefghijklmnopqrstuvwxyz"));
-        MatcherAssert.assertThat(decoderEncoder.decodeArray(decoderEncoder.encodeToArray("xyz")), equalTo("xyz"));
-        MatcherAssert.assertThat(decoderEncoder.decodeArray(decoderEncoder.encodeToArray("ytrrty")), equalTo("ytrrty"));
-        MatcherAssert.assertThat(decoderEncoder.decodeArray(decoderEncoder.encodeToArray("ytrrtyz")), equalTo("ytrrtyz"));
-        MatcherAssert.assertThat(decoderEncoder.decodeArray(decoderEncoder.encodeToArray("ytrrtyzqwqwe")), equalTo("ytrrtyzqwqwe"));
+        assertThat(decoderEncoder.decodeArray(decoderEncoder.encodeToArray("abcdefghijklmnopqrstuvwxyz")), equalTo("abcdefghijklmnopqrstuvwxyz"));
+        assertThat(decoderEncoder.decodeArray(decoderEncoder.encodeToArray("xyz")), equalTo("xyz"));
+        assertThat(decoderEncoder.decodeArray(decoderEncoder.encodeToArray("ytrrty")), equalTo("ytrrty"));
+        assertThat(decoderEncoder.decodeArray(decoderEncoder.encodeToArray("ytrrtyz")), equalTo("ytrrtyz"));
+        assertThat(decoderEncoder.decodeArray(decoderEncoder.encodeToArray("ytrrtyzqwqwe")), equalTo("ytrrtyzqwqwe"));

    }
 }
--- a/english/src/test/java/org/apache/lucene/morphology/english/stemmer/EnglishStemmerTest.java
+++ b/english/src/test/java/org/apache/lucene/morphology/english/stemmer/EnglishStemmerTest.java
@@ -16,9 +16,9 @@
 package org.apache.lucene.morphology.english.stemmer;

 import org.apache.lucene.morphology.english.EnglishLuceneMorphology;
-import org.hamcrest.MatcherAssert;
 import org.junit.Test;
 import static org.hamcrest.core.IsEqual.equalTo;
+import static org.junit.Assert.assertThat;


 public class EnglishStemmerTest {
@@ -26,24 +26,24 @@ public class EnglishStemmerTest {
    public void testGetStemmedWord() throws Exception {
        EnglishLuceneMorphology englishLuceneMorphology = new EnglishLuceneMorphology();
        EnglishStemmer englishStemmer = new EnglishStemmer(englishLuceneMorphology);
-        MatcherAssert.assertThat(englishStemmer.getStemmedWord("running"),equalTo("run"));
-        MatcherAssert.assertThat(englishStemmer.getStemmedWord("run"),equalTo("run"));
-        MatcherAssert.assertThat(englishStemmer.getStemmedWord("killed"),equalTo("kill"));
-        MatcherAssert.assertThat(englishStemmer.getStemmedWord("kill"),equalTo("kill"));
-        MatcherAssert.assertThat(englishStemmer.getStemmedWord("networking"),equalTo("network"));
-        MatcherAssert.assertThat(englishStemmer.getStemmedWord("network"),equalTo("network"));
-        MatcherAssert.assertThat(englishStemmer.getStemmedWord("statistics"),equalTo("statistic"));
-        MatcherAssert.assertThat(englishStemmer.getStemmedWord("statistic"),equalTo("statistic"));
-        MatcherAssert.assertThat(englishStemmer.getStemmedWord("stats"),equalTo("stat"));
-        MatcherAssert.assertThat(englishStemmer.getStemmedWord("stat"),equalTo("stat"));
-        MatcherAssert.assertThat(englishStemmer.getStemmedWord("countries"),equalTo("country"));
-        MatcherAssert.assertThat(englishStemmer.getStemmedWord("country"),equalTo("country"));
-        MatcherAssert.assertThat(englishStemmer.getStemmedWord("delete"),equalTo("delete"));
-        MatcherAssert.assertThat(englishStemmer.getStemmedWord("ended"),equalTo("end"));
-        MatcherAssert.assertThat(englishStemmer.getStemmedWord("end"),equalTo("end"));
-        MatcherAssert.assertThat(englishStemmer.getStemmedWord("ends"),equalTo("end"));
-        MatcherAssert.assertThat(englishStemmer.getStemmedWord("given"),equalTo("give"));
-        MatcherAssert.assertThat(englishStemmer.getStemmedWord("give"),equalTo("give"));
-        MatcherAssert.assertThat(englishStemmer.getStemmedWord("log4j"),equalTo("log4j"));
+        assertThat(englishStemmer.getStemmedWord("running"),equalTo("run"));
+        assertThat(englishStemmer.getStemmedWord("run"),equalTo("run"));
+        assertThat(englishStemmer.getStemmedWord("killed"),equalTo("kill"));
+        assertThat(englishStemmer.getStemmedWord("kill"),equalTo("kill"));
+        assertThat(englishStemmer.getStemmedWord("networking"),equalTo("network"));
+        assertThat(englishStemmer.getStemmedWord("network"),equalTo("network"));
+        assertThat(englishStemmer.getStemmedWord("statistics"),equalTo("statistic"));
+        assertThat(englishStemmer.getStemmedWord("statistic"),equalTo("statistic"));
+        assertThat(englishStemmer.getStemmedWord("stats"),equalTo("stat"));
+        assertThat(englishStemmer.getStemmedWord("stat"),equalTo("stat"));
+        assertThat(englishStemmer.getStemmedWord("countries"),equalTo("country"));
+        assertThat(englishStemmer.getStemmedWord("country"),equalTo("country"));
+        assertThat(englishStemmer.getStemmedWord("delete"),equalTo("delete"));
+        assertThat(englishStemmer.getStemmedWord("ended"),equalTo("end"));
+        assertThat(englishStemmer.getStemmedWord("end"),equalTo("end"));
+        assertThat(englishStemmer.getStemmedWord("ends"),equalTo("end"));
+        assertThat(englishStemmer.getStemmedWord("given"),equalTo("give"));
+        assertThat(englishStemmer.getStemmedWord("give"),equalTo("give"));
+        assertThat(englishStemmer.getStemmedWord("log4j"),equalTo("log4j"));
    }
 }
--- a/etc/header.txt
+++ b/etc/header.txt
@@ -1,4 +1,4 @@
-Copyright 2009 Alexander Kuznetsov 
+Copyright ${project.inceptionYear} ${owner}

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
--- a/morph/pom.xml
+++ b/morph/pom.xml
@@ -3,12 +3,13 @@
    <parent>
        <artifactId>morphology</artifactId>
        <groupId>org.apache.lucene.morphology</groupId>
-        <version>1.5</version>
+        <version>1.2-SNAPSHOT</version>
    </parent>
    <modelVersion>4.0.0</modelVersion>
+    <groupId>org.apache.lucene.morphology</groupId>
    <artifactId>morph</artifactId>
    <name>morph</name>
-    <version>1.5</version>
+    <version>1.2-SNAPSHOT</version>
    <url>http://maven.apache.org</url>

 </project>
--- a/morph/src/main/java/org/apache/lucene/morphology/BaseLetterDecoderEncoder.java
+++ b/morph/src/main/java/org/apache/lucene/morphology/BaseLetterDecoderEncoder.java
@@ -21,7 +21,7 @@ import java.util.ArrayList;

 public abstract class BaseLetterDecoderEncoder implements LetterDecoderEncoder {
    public int[] encodeToArray(String s) {
-        ArrayList<Integer> integers = new ArrayList<>();
+        ArrayList<Integer> integers = new ArrayList<Integer>();
        while (s.length() > 6) {
            integers.add(encode(s.substring(0, 6)));
            s = s.substring(6);
@@ -37,11 +37,11 @@ public abstract class BaseLetterDecoderEncoder implements LetterDecoderEncoder {
    }

    public String decodeArray(int[] array) {
-        StringBuilder result = new StringBuilder();
+        String result = "";
        for (int i : array) {
-            result.append(decode(i));
+            result += decode(i);
        }
-        return result.toString();
+        return result;
    }

    public boolean checkString(String word) {
--- a/morph/src/main/java/org/apache/lucene/morphology/Heuristic.java
+++ b/morph/src/main/java/org/apache/lucene/morphology/Heuristic.java
@@ -16,7 +16,6 @@
 package org.apache.lucene.morphology;

 import java.io.Serializable;
-import java.util.Objects;


 public class Heuristic implements Serializable {
@@ -27,10 +26,10 @@ public class Heuristic implements Serializable {

    public Heuristic(String s) {
        String[] strings = s.split("\\|");
-        actualSuffixLength = Byte.parseByte(strings[0]);
+        actualSuffixLength = Byte.valueOf(strings[0]);
        actualNormalSuffix = strings[1];
-        formMorphInfo = Short.parseShort(strings[2]);
-        normalFormMorphInfo = Short.parseShort(strings[3]);
+        formMorphInfo = Short.valueOf(strings[2]);
+        normalFormMorphInfo = Short.valueOf(strings[3]);
    }

    public Heuristic(byte actualSuffixLength, String actualNormalSuffix, short formMorphInfo, short normalFormMorphInfo) {
@@ -71,12 +70,15 @@ public class Heuristic implements Serializable {
        if (actualSuffixLength != heuristic.actualSuffixLength) return false;
        if (formMorphInfo != heuristic.formMorphInfo) return false;
        if (normalFormMorphInfo != heuristic.normalFormMorphInfo) return false;
-        return Objects.equals(actualNormalSuffix, heuristic.actualNormalSuffix);
+        if (actualNormalSuffix != null ? !actualNormalSuffix.equals(heuristic.actualNormalSuffix) : heuristic.actualNormalSuffix != null)
+            return false;
+
+        return true;
    }

    @Override
    public int hashCode() {
-        int result = actualSuffixLength;
+        int result = (int) actualSuffixLength;
        result = 31 * result + (actualNormalSuffix != null ? actualNormalSuffix.hashCode() : 0);
        result = 31 * result + (int) formMorphInfo;
        result = 31 * result + (int) normalFormMorphInfo;
--- a/morph/src/main/java/org/apache/lucene/morphology/LetterDecoderEncoder.java
+++ b/morph/src/main/java/org/apache/lucene/morphology/LetterDecoderEncoder.java
@@ -17,17 +17,17 @@ package org.apache.lucene.morphology;


 public interface LetterDecoderEncoder {
-    Integer encode(String string);
+    public Integer encode(String string);

-    int[] encodeToArray(String s);
+    public int[] encodeToArray(String s);

-    String decodeArray(int[] array);
+    public String decodeArray(int[] array);

-    String decode(Integer suffixN);
+    public String decode(Integer suffixN);

-    boolean checkCharacter(char c);
+    public boolean checkCharacter(char c);

-    boolean checkString(String word);
+    public boolean checkString(String word);

-    String cleanString(String s);
+    public String cleanString(String s);
 }
--- a/morph/src/main/java/org/apache/lucene/morphology/LuceneMorphology.java
+++ b/morph/src/main/java/org/apache/lucene/morphology/LuceneMorphology.java
@@ -34,13 +34,13 @@ public class LuceneMorphology extends MorphologyImpl {

    protected void readRules(BufferedReader bufferedReader) throws IOException {
        String s;
-        int amount;
+        Integer amount;
        s = bufferedReader.readLine();
-        amount = Integer.parseInt(s);
+        amount = Integer.valueOf(s);
        rules = new Heuristic[amount][];
        for (int i = 0; i < amount; i++) {
            String s1 = bufferedReader.readLine();
-            int ruleLenght = Integer.parseInt(s1);
+            Integer ruleLenght = Integer.valueOf(s1);
            Heuristic[] heuristics = new Heuristic[ruleLenght];
            for (int j = 0; j < ruleLenght; j++) {
                heuristics[j] = new Heuristic(bufferedReader.readLine());
@@ -51,7 +51,7 @@ public class LuceneMorphology extends MorphologyImpl {


    private Heuristic[] modeifyHeuristic(Heuristic[] heuristics) {
-        ArrayList<Heuristic> result = new ArrayList<>();
+        ArrayList<Heuristic> result = new ArrayList<Heuristic>();
        for (Heuristic heuristic : heuristics) {
            boolean isAdded = true;
            for (Heuristic ch : result) {
@@ -61,7 +61,7 @@ public class LuceneMorphology extends MorphologyImpl {
                result.add(heuristic);
            }
        }
-        return result.toArray(new Heuristic[0]);
+        return result.toArray(new Heuristic[result.size()]);
    }

    public boolean checkString(String s) {
--- a/morph/src/main/java/org/apache/lucene/morphology/MorphologyImpl.java
+++ b/morph/src/main/java/org/apache/lucene/morphology/MorphologyImpl.java
@@ -17,7 +17,6 @@ package org.apache.lucene.morphology;


 import java.io.*;
-import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.List;

@@ -48,7 +47,7 @@ public class MorphologyImpl implements Morphology {
    }

    public List<String> getNormalForms(String s) {
-        ArrayList<String> result = new ArrayList<>();
+        ArrayList<String> result = new ArrayList<String>();
        int[] ints = decoderEncoder.encodeToArray(revertWord(s));
        int ruleId = findRuleId(ints);
        boolean notSeenEmptyString = true;
@@ -65,7 +64,7 @@ public class MorphologyImpl implements Morphology {
    }

    public List<String> getMorphInfo(String s) {
-        ArrayList<String> result = new ArrayList<>();
+        ArrayList<String> result = new ArrayList<String>();
        int[] ints = decoderEncoder.encodeToArray(revertWord(s));
        int ruleId = findRuleId(ints);
        for (Heuristic h : rules[rulesId[ruleId]]) {
@@ -101,14 +100,14 @@ public class MorphologyImpl implements Morphology {
    private int compareToInts(int[] i1, int[] i2) {
        int minLength = Math.min(i1.length, i2.length);
        for (int i = 0; i < minLength; i++) {
-            int i3 = Integer.compare(i1[i], i2[i]);
+            int i3 = i1[i] < i2[i] ? -1 : (i1[i] == i2[i] ? 0 : 1);
            if (i3 != 0) return i3;
        }
        return i1.length - i2.length;
    }

    public void writeToFile(String fileName) throws IOException {
-        OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(fileName), StandardCharsets.UTF_8);
+        OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(fileName), "UTF-8");
        writer.write(separators.length + "\n");
        for (int[] i : separators) {
            writer.write(i.length + "\n");
@@ -139,7 +138,7 @@ public class MorphologyImpl implements Morphology {
    }

    private void readFromInputStream(InputStream inputStream) throws IOException {
-        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8));
+        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"));
        String s = bufferedReader.readLine();
        Integer amount = Integer.valueOf(s);

@@ -154,9 +153,9 @@ public class MorphologyImpl implements Morphology {

    private void readGrammaInfo(BufferedReader bufferedReader) throws IOException {
        String s;
-        int amount;
+        Integer amount;
        s = bufferedReader.readLine();
-        amount = Integer.parseInt(s);
+        amount = Integer.valueOf(s);
        grammarInfo = new String[amount];
        for (int i = 0; i < amount; i++) {
            grammarInfo[i] = bufferedReader.readLine();
@@ -165,13 +164,13 @@ public class MorphologyImpl implements Morphology {

    protected void readRules(BufferedReader bufferedReader) throws IOException {
        String s;
-        int amount;
+        Integer amount;
        s = bufferedReader.readLine();
-        amount = Integer.parseInt(s);
+        amount = Integer.valueOf(s);
        rules = new Heuristic[amount][];
        for (int i = 0; i < amount; i++) {
            String s1 = bufferedReader.readLine();
-            int ruleLength = Integer.parseInt(s1);
+            Integer ruleLength = Integer.valueOf(s1);
            rules[i] = new Heuristic[ruleLength];
            for (int j = 0; j < ruleLength; j++) {
                rules[i][j] = new Heuristic(bufferedReader.readLine());
@@ -183,7 +182,7 @@ public class MorphologyImpl implements Morphology {
        rulesId = new short[amount];
        for (int i = 0; i < amount; i++) {
            String s1 = bufferedReader.readLine();
-            rulesId[i] = Short.parseShort(s1);
+            rulesId[i] = Short.valueOf(s1);
        }
    }

@@ -191,10 +190,10 @@ public class MorphologyImpl implements Morphology {
        separators = new int[amount][];
        for (int i = 0; i < amount; i++) {
            String s1 = bufferedReader.readLine();
-            int wordLenght = Integer.parseInt(s1);
+            Integer wordLenght = Integer.valueOf(s1);
            separators[i] = new int[wordLenght];
            for (int j = 0; j < wordLenght; j++) {
-                separators[i][j] = Integer.parseInt(bufferedReader.readLine());
+                separators[i][j] = Integer.valueOf(bufferedReader.readLine());
            }
        }
    }
--- a/morph/src/main/java/org/apache/lucene/morphology/analyzer/MorphologyAnalyzer.java
+++ b/morph/src/main/java/org/apache/lucene/morphology/analyzer/MorphologyAnalyzer.java
@@ -17,10 +17,11 @@
 package org.apache.lucene.morphology.analyzer;

 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.core.LowerCaseFilter;
 import org.apache.lucene.analysis.payloads.PayloadEncoder;
 import org.apache.lucene.analysis.payloads.PayloadHelper;
+import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.morphology.LetterDecoderEncoder;
 import org.apache.lucene.morphology.LuceneMorphology;
@@ -28,7 +29,7 @@ import org.apache.lucene.util.BytesRef;

 import java.io.IOException;
 import java.io.InputStream;
-
+import java.io.Reader;

 public class MorphologyAnalyzer extends Analyzer {
    private LuceneMorphology luceneMorph;
@@ -50,29 +51,17 @@ public class MorphologyAnalyzer extends Analyzer {
    protected TokenStreamComponents createComponents(String s) {

        StandardTokenizer src = new StandardTokenizer();
-        final PayloadEncoder encoder = new PayloadEncoder() {
-            @Override
-            public BytesRef encode(char[] buffer) {
-                final Float payload = Float.valueOf(new String(buffer));
-                System.out.println(payload);
-                final byte[] bytes = PayloadHelper.encodeFloat(payload);
-                return new BytesRef(bytes, 0, bytes.length);
-            }
-
-            @Override
-            public BytesRef encode(char[] buffer, int offset, int length) {
-
-                final Float payload = Float.valueOf(new String(buffer, offset, length));
-                System.out.println(payload);
-                final byte[] bytes = PayloadHelper.encodeFloat(payload);
-
-                return new BytesRef(bytes, 0, bytes.length);
-            }
-        };
-
-        TokenFilter filter = new LowerCaseFilter(src);
+        TokenFilter filter = new StandardFilter(src);
+        filter = new LowerCaseFilter(filter);
        filter = new MorphologyFilter(filter, luceneMorph);

-        return new TokenStreamComponents(src::setReader, filter);
+        return new TokenStreamComponents(src, filter) {
+            @Override
+            protected void setReader(final Reader reader) throws IOException {
+                super.setReader(reader);
+            }
+        };
    }
+
+
 }
--- a/morph/src/main/java/org/apache/lucene/morphology/analyzer/MorphologyFilter.java
+++ b/morph/src/main/java/org/apache/lucene/morphology/analyzer/MorphologyFilter.java
@@ -19,22 +19,18 @@ package org.apache.lucene.morphology.analyzer;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.morphology.LuceneMorphology;

 import java.io.IOException;
 import java.util.Iterator;
-import java.util.List;


 public class MorphologyFilter extends TokenFilter {
    private LuceneMorphology luceneMorph;
    private Iterator<String> iterator;
    private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-    private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
    private final PositionIncrementAttribute position = addAttribute(PositionIncrementAttribute.class);
-    private State state = null;

    public MorphologyFilter(TokenStream tokenStream, LuceneMorphology luceneMorph) {
        super(tokenStream);
@@ -43,45 +39,27 @@ public class MorphologyFilter extends TokenFilter {


    final public boolean incrementToken() throws IOException {
-        if (iterator != null) {
-            if (iterator.hasNext()) {
-                restoreState(state);
-                position.setPositionIncrement(0);
-                termAtt.setEmpty().append(iterator.next());
-                return true;
-            } else {
-                state = null;
-                iterator = null;
-            }
-        }
-        while (true) {
+        boolean oldToken = true;
+        while (iterator == null || !iterator.hasNext()) {
            boolean b = input.incrementToken();
            if (!b) {
                return false;
            }
-            if (!keywordAttr.isKeyword() && termAtt.length() > 0) {
-                String s = new String(termAtt.buffer(), 0, termAtt.length());
-                if (luceneMorph.checkString(s)) {
-                    List<String> forms = luceneMorph.getNormalForms(s);
-                    if (forms.isEmpty()) {
-                        continue;
-                    } else if (forms.size() == 1) {
-                        termAtt.setEmpty().append(forms.get(0));
-                    } else {
-                        state = captureState();
-                        iterator = forms.iterator();
-                        termAtt.setEmpty().append(iterator.next());
-                    }
-                }
+            String s = new String(termAtt.buffer(), 0, termAtt.length());
+            if (luceneMorph.checkString(s)) {
+                oldToken = false;
+                iterator = luceneMorph.getNormalForms(s).iterator();
+            } else {
+                return true;
            }
-            return true;
        }
+        String s = iterator.next();
+        termAtt.setEmpty();
+        termAtt.append(s);
+        if (oldToken) {
+            position.setPositionIncrement(0);
+        }
+        return true;
    }

-    @Override
-    public void reset() throws IOException {
-        super.reset();
-        state = null;
-        iterator = null;
-    }
 }
--- a/pom.xml
+++ b/pom.xml
@@ -1,10 +1,11 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <groupId>org.apache.lucene.morphology</groupId>
    <artifactId>morphology</artifactId>
    <packaging>pom</packaging>
-    <version>1.5</version>
+    <version>1.2-SNAPSHOT</version>
    <name>morphology</name>
    <url>http://maven.apache.org</url>

@@ -15,12 +16,6 @@
        <tag>HEAD</tag>
    </scm>

-    <properties>
-        <lucene.version>9.3.0</lucene.version>
-        <morphology.version>1.5</morphology.version>
-        <junit.version>4.13</junit.version>
-    </properties>
-
    <distributionManagement>
        <repository>
            <id>bintray</id>
@@ -28,36 +23,28 @@
        </repository>
    </distributionManagement>

-    <licenses>
-        <license>
-            <name>Apache License, Version 2.0</name>
-            <url>https://www.apache.org/licenses/LICENSE-2.0.txt</url>
-            <distribution>repo</distribution>
-        </license>
-    </licenses>
-
    <dependencies>
        <dependency>
-            <groupId>org.apache.lucene</groupId>
-            <artifactId>lucene-test-framework</artifactId>
-            <version>${lucene.version}</version>
+            <groupId>junit</groupId>
+            <artifactId>junit</artifactId>
+            <version>4.8.2</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.hamcrest</groupId>
            <artifactId>hamcrest-all</artifactId>
-            <version>1.3</version>
+            <version>1.1</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
-            <version>${lucene.version}</version>
+            <version>5.1.0</version>
        </dependency>
        <dependency>
            <groupId>org.apache.lucene</groupId>
-            <artifactId>lucene-analysis-common</artifactId>
-            <version>${lucene.version}</version>
+            <artifactId>lucene-analyzers-common</artifactId>
+            <version>5.1.0</version>
        </dependency>
    </dependencies>

@@ -65,11 +52,11 @@
        <repository>
            <id>maven2-repository.dev.java.net</id>
            <name>Java.net Repository for Maven</name>
-            <url>https://download.java.net/maven/2/</url>
+            <url>http://download.java.net/maven/2/</url>
        </repository>
        <repository>
            <id>bintray</id>
-            <url>https://dl.bintray.com/akuznetsov/russianmorphology</url>
+            <url>http://dl.bintray.com/akuznetsov/russianmorphology</url>
            <releases>
                <enabled>true</enabled>
            </releases>
@@ -78,25 +65,13 @@
            </snapshots>
        </repository>
    </repositories>
-    
-    <pluginRepositories>
-        <pluginRepository>
-            <id>mc-release</id>
-            <name>maven-license-plugin repository of releases</name>
-            <url>https://mc-repo.googlecode.com/svn/maven2/releases</url>
-            <snapshots>
-                <enabled>false</enabled>
-            </snapshots>
-            <releases>
-                <enabled>true</enabled>
-            </releases>
-        </pluginRepository>
-    </pluginRepositories>
+
+
    <build>
        <plugins>
            <plugin>
                <artifactId>maven-release-plugin</artifactId>
-                <version>2.5.3</version>
+                <version>2.5.2</version>
                <configuration>
                    <useReleaseProfile>false</useReleaseProfile>
                    <releaseProfiles>release</releaseProfiles>
@@ -107,37 +82,42 @@
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
-                <version>3.8.1</version>
                <configuration>
-                    <source>11</source>
-                    <target>11</target>
+                    <source>1.7</source>
+                    <target>1.7</target>
                </configuration>
            </plugin>
-            <plugin>                <!--                 usage: http://code.google.com/p/maven-license-plugin/wiki/HowTo                -->
-                <artifactId>maven-license-plugin</artifactId>
-                <groupId>com.google.code.maven-license-plugin</groupId>
-                <version>1.4.0</version>
-                <configuration>
-                    <basedir>${project.parent.basedir}</basedir>
-                    <header>etc/header.txt</header>
-                    <excludes>
-                        <exclude>**/*.txt</exclude>
-                        <exclude>**/*.info</exclude>
-                        <exclude>**/pom.xml</exclude>
-                    </excludes>
-                    <includes>
-                        <include>**/src/**</include>
-                    </includes>
-                </configuration>
-                <executions>
-                    <execution>
-                        <phase>test</phase>
-                        <goals>
-                            <goal>check</goal>
-                        </goals>
-                    </execution>
-                </executions>
-            </plugin>
+            <!--<plugin>                &lt;!&ndash;                 usage: http://code.google.com/p/maven-license-plugin/wiki/HowTo                &ndash;&gt;-->
+
+                <!--<groupId>com.mycila</groupId>-->
+                <!--<artifactId>license-maven-plugin</artifactId>-->
+                <!--<version>2.11</version>-->
+
+                <!--<configuration>-->
+                    <!--<properties>-->
+                        <!--<owner>Alexander Kuznetsov</owner>-->
+                        <!--&lt;!&ndash;<email>mathieu.carbou@gmail.com</email>&ndash;&gt;-->
+                    <!--</properties>-->
+                    <!--<basedir>${project.parent.basedir}</basedir>-->
+                    <!--<header>etc/header.txt</header>-->
+                    <!--<excludes>-->
+                        <!--<exclude>**/*.txt</exclude>-->
+                        <!--<exclude>**/*.info</exclude>-->
+                        <!--<exclude>**/pom.xml</exclude>-->
+                    <!--</excludes>-->
+                    <!--<includes>-->
+                        <!--<include>**/src/**</include>-->
+                    <!--</includes>-->
+                <!--</configuration>-->
+                <!--<executions>-->
+                    <!--<execution>-->
+                        <!--<phase>test</phase>-->
+                        <!--<goals>-->
+                            <!--<goal>check</goal>-->
+                        <!--</goals>-->
+                    <!--</execution>-->
+                <!--</executions>-->
+            <!--</plugin>-->
        </plugins>
    </build>
    <profiles>
@@ -147,7 +127,6 @@
                <plugins>
                    <plugin>
                        <artifactId>maven-source-plugin</artifactId>
-                        <version>3.2.1</version>
                        <executions>
                            <execution>
                                <id>attach-sources</id>
@@ -159,7 +138,6 @@
                    </plugin>
                    <plugin>
                        <artifactId>maven-javadoc-plugin</artifactId>
-                        <version>3.3.1</version>
                        <executions>
                            <execution>
                                <id>attach-javadocs</id>
@@ -178,6 +156,6 @@
        <module>dictionary-reader</module>
        <module>russian</module>
        <module>english</module>
-        <module>solr-morphology-analysis</module>
+        <module>context</module>
    </modules>
-</project>
+</project>
--- a/russian/pom.xml
+++ b/russian/pom.xml
@@ -3,12 +3,13 @@
    <parent>
        <artifactId>morphology</artifactId>
        <groupId>org.apache.lucene.morphology</groupId>
-        <version>1.5</version>
+        <version>1.2-SNAPSHOT</version>
    </parent>
    <modelVersion>4.0.0</modelVersion>
+    <groupId>org.apache.lucene.morphology</groupId>
    <artifactId>russian</artifactId>
    <name>russian</name>
-    <version>1.5</version>
+    <version>1.2-SNAPSHOT</version>
    <url>http://maven.apache.org</url>
    <dependencies>

@@ -16,15 +17,15 @@
        <dependency>
            <groupId>org.apache.lucene.morphology</groupId>
            <artifactId>morph</artifactId>
-            <version>1.5</version>
+            <version>1.2-SNAPSHOT</version>
        </dependency>

        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
-            <version>${junit.version}</version>
+            <version>4.8.2</version>
            <scope>test</scope>
        </dependency>

    </dependencies>
-</project>
+</project>
--- a/russian/src/main/java/org/apache/lucene/morphology/russian/RussianLetterDecoderEncoder.java
+++ b/russian/src/main/java/org/apache/lucene/morphology/russian/RussianLetterDecoderEncoder.java
@@ -20,6 +20,7 @@ import org.apache.lucene.morphology.LetterDecoderEncoder;
 import org.apache.lucene.morphology.SuffixToLongException;
 import org.apache.lucene.morphology.WrongCharaterException;

+import java.util.ArrayList;
 import java.util.LinkedList;

 /**
@@ -41,7 +42,7 @@ public class RussianLetterDecoderEncoder implements LetterDecoderEncoder {
            throw new SuffixToLongException("Suffix length should not be greater then " + WORD_PART_LENGHT + " " + string);
        int result = 0;
        for (int i = 0; i < string.length(); i++) {
-            int c = string.charAt(i) - RUSSIAN_SMALL_LETTER_OFFSET;
+            int c = 0 + string.charAt(i) - RUSSIAN_SMALL_LETTER_OFFSET;
            if (c == 45 - RUSSIAN_SMALL_LETTER_OFFSET) {
                c = DASH_CODE;
            }
@@ -57,7 +58,7 @@ public class RussianLetterDecoderEncoder implements LetterDecoderEncoder {
    }

    public int[] encodeToArray(String s) {
-        LinkedList<Integer> integers = new LinkedList<>();
+        LinkedList<Integer> integers = new LinkedList<Integer>();
        while (s.length() > WORD_PART_LENGHT) {
            integers.add(encode(s.substring(0, WORD_PART_LENGHT)));
            s = s.substring(WORD_PART_LENGHT);
@@ -73,16 +74,16 @@ public class RussianLetterDecoderEncoder implements LetterDecoderEncoder {
    }

    public String decodeArray(int[] array) {
-        StringBuilder result = new StringBuilder();
+        String result = "";
        for (int i : array) {
-            result.append(decode(i));
+            result += decode(i);
        }
-        return result.toString();
+        return result;
    }


    public String decode(Integer suffixN) {
-        StringBuilder result = new StringBuilder();
+        String result = "";
        while (suffixN > 33) {
            int c = suffixN % 34 + RUSSIAN_SMALL_LETTER_OFFSET;
            if (c == RUSSIAN_SMALL_LETTER_OFFSET) {
@@ -90,20 +91,21 @@ public class RussianLetterDecoderEncoder implements LetterDecoderEncoder {
                continue;
            }
            if (c == DASH_CODE + RUSSIAN_SMALL_LETTER_OFFSET) c = DASH_CHAR;
-            result.insert(0, (char) c);
+            result = (char) c + result;
            suffixN /= 34;
        }
        long c = suffixN + RUSSIAN_SMALL_LETTER_OFFSET;
        if (c == DASH_CODE + RUSSIAN_SMALL_LETTER_OFFSET) c = DASH_CHAR;
-        result.insert(0, (char) c);
-        return result.toString();
+        result = (char) c + result;
+        return result;
    }

    public boolean checkCharacter(char c) {
-        int code = c;
+        int code = 0 + c;
        if (code == 45) return true;
        code -= RUSSIAN_SMALL_LETTER_OFFSET;
-        return code > 0 && code < 33;
+        if (code > 0 && code < 33) return true;
+        return false;
    }

    public boolean checkString(String word) {
--- a/russian/src/test/java/org/apache/lucene/morphology/russian/RussianLetterDecoderEncoderTest.java
+++ b/russian/src/test/java/org/apache/lucene/morphology/russian/RussianLetterDecoderEncoderTest.java
@@ -17,7 +17,6 @@ package org.apache.lucene.morphology.russian;

 import org.apache.lucene.morphology.SuffixToLongException;
 import org.apache.lucene.morphology.WrongCharaterException;
-import org.hamcrest.MatcherAssert;
 import org.junit.Before;
 import org.junit.Test;

@@ -25,9 +24,9 @@ import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
-import java.nio.charset.StandardCharsets;

 import static org.hamcrest.core.IsEqual.equalTo;
+import static org.junit.Assert.assertThat;

 public class RussianLetterDecoderEncoderTest {
    private RussianLetterDecoderEncoder decoderEncoder;
@@ -41,12 +40,12 @@ public class RussianLetterDecoderEncoderTest {
    @Test
    public void testShouldPreserverStringComporision() throws IOException {
        InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/decoder-test-monotonic.txt");
-        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8));
+        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
        String s = bufferedReader.readLine();
        while (s != null) {
            String[] qa = s.trim().split(" ");
            if (qa[0].length() <= RussianLetterDecoderEncoder.WORD_PART_LENGHT && qa[1].length() <= RussianLetterDecoderEncoder.WORD_PART_LENGHT) {
-                MatcherAssert.assertThat(decoderEncoder.encode(qa[1]) > decoderEncoder.encode(qa[0]), equalTo(true));
+                assertThat(decoderEncoder.encode(qa[1]) > decoderEncoder.encode(qa[0]), equalTo(true));
            }
            s = bufferedReader.readLine();
        }
@@ -56,13 +55,13 @@ public class RussianLetterDecoderEncoderTest {
    @Test
    public void testShouldCorrectDecodeEncode() throws IOException {
        InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/decoder-test-data.txt");
-        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8));
+        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
        String s = bufferedReader.readLine();
        while (s != null) {
            String[] qa = s.trim().split(" ");
            if (qa[0].length() <= RussianLetterDecoderEncoder.WORD_PART_LENGHT) {
                Integer encodedSuffix = decoderEncoder.encode(qa[0]);
-                MatcherAssert.assertThat(decoderEncoder.decode(encodedSuffix), equalTo(qa[1]));
+                assertThat(decoderEncoder.decode(encodedSuffix), equalTo(qa[1]));
            }
            s = bufferedReader.readLine();
        }
@@ -71,12 +70,12 @@ public class RussianLetterDecoderEncoderTest {
    @Test
    public void testShouldCorrectDecodeEncodeStringToArray() throws IOException {
        InputStream stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/decoder-test-data-for-array.txt");
-        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8));
+        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
        String s = bufferedReader.readLine();
        while (s != null) {
            String[] qa = s.trim().split(" ");
            int[] ecodedSuffix = decoderEncoder.encodeToArray(qa[0]);
-            MatcherAssert.assertThat(decoderEncoder.decodeArray(ecodedSuffix), equalTo(qa[1]));
+            assertThat(decoderEncoder.decodeArray(ecodedSuffix), equalTo(qa[1]));
            s = bufferedReader.readLine();
        }
    }
--- a/solr-morphology-analysis/src/main/java/org/apache/lucene/analysis/morphology/MorphologyFilterFactory.java
+++ b/solr-morphology-analysis/src/main/java/org/apache/lucene/analysis/morphology/MorphologyFilterFactory.java
@@ -1,70 +0,0 @@
-/**
- * Copyright 2009 Alexander Kuznetsov
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.morphology;
-
-import org.apache.lucene.analysis.TokenFilterFactory;
-import org.apache.lucene.analysis.TokenStream;
-
-import org.apache.lucene.morphology.LuceneMorphology;
-import org.apache.lucene.morphology.analyzer.MorphologyFilter;
-import org.apache.lucene.util.ResourceLoader;
-import org.apache.lucene.util.ResourceLoaderAware;
-
-import java.util.Map;
-
-/**
- * Factory for {@link MorphologyFilter}, with configurable language
- * <p>
- * <b>Note:</b> Two languages are available now: English (default value) and Russian.
- * <pre class="prettyprint">
- * &lt;fieldType name="content" class="solr.TextField" positionIncrementGap="100"&gt;
- *   &lt;analyzer&gt;
- *     &lt;tokenizer class="solr.StandardTokenizerFactory"/&gt;
- *     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
- *     &lt;filter class="solr.MorphologyFilterFactory" language="English"/&gt;
- *   &lt;/analyzer&gt;
- * &lt;/fieldType&gt;</pre>
- */
-public class MorphologyFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
-
-    private static final String LANGUAGE_KEY = "language";
-
-    private String language;
-    private LuceneMorphology luceneMorphology;
-
-    public MorphologyFilterFactory(Map<String, String> args) {
-        super(args);
-
-        language = get(args, LANGUAGE_KEY, "English");
-        if (!args.isEmpty()) {
-            throw new IllegalArgumentException("Unknown parameters: " + args);
-        }
-    }
-
-    public TokenStream create(TokenStream input) {
-        return new MorphologyFilter(input, luceneMorphology);
-    }
-
-    public void inform(ResourceLoader loader) {
-
-        String className = "org.apache.lucene.morphology." + language.toLowerCase() + "." + language + "LuceneMorphology";
-        luceneMorphology = loader.newInstance(className, LuceneMorphology.class);
-    }
-
-    public LuceneMorphology getLuceneMorphology() {
-        return luceneMorphology;
-    }
-}
--- a/solr-morphology-analysis/src/test/java/org/apache/lucene/analysis/morphology/MorphologyFilterFactoryTest.java
+++ b/solr-morphology-analysis/src/test/java/org/apache/lucene/analysis/morphology/MorphologyFilterFactoryTest.java
@@ -1,75 +0,0 @@
-/**
- * Copyright 2009 Alexander Kuznetsov
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.morphology;
-
-import org.apache.lucene.morphology.LuceneMorphology;
-import org.apache.lucene.morphology.english.EnglishLuceneMorphology;
-import org.apache.lucene.morphology.russian.RussianLuceneMorphology;
-import org.apache.lucene.util.ClasspathResourceLoader;
-import org.apache.lucene.util.ResourceLoader;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.Test;
-
-import java.util.HashMap;
-import java.util.Map;
-
-public class MorphologyFilterFactoryTest {
-
-    private static final String LANGUAGE_KEY = "language";
-    private ResourceLoader loader = new ClasspathResourceLoader(MorphologyFilterFactoryTest.class);
-    private Map<String, String> args;
-
-    @Before
-    public void setUp() {
-        args = new HashMap<>();
-    }
-
-    @Test
-    public void if_RussianLanguageKey_then_CreateRussianMorphologyFilter() {
-
-        args.put(LANGUAGE_KEY, "Russian");
-        MorphologyFilterFactory morphologyFilterFactory = new MorphologyFilterFactory(args);
-        morphologyFilterFactory.inform(loader);
-
-        LuceneMorphology luceneMorphology = morphologyFilterFactory.getLuceneMorphology();
-
-        Assert.assertTrue("Creation the MorphologyFilterFactory with a Russian language key", luceneMorphology instanceof RussianLuceneMorphology);
-    }
-
-    @Test
-    public void if_EnglishLanguageKey_then_CreateEnglishMorphologyFilter() {
-
-        args.put(LANGUAGE_KEY, "English");
-        MorphologyFilterFactory morphologyFilterFactory = new MorphologyFilterFactory(args);
-        morphologyFilterFactory.inform(loader);
-
-        LuceneMorphology luceneMorphology = morphologyFilterFactory.getLuceneMorphology();
-
-        Assert.assertTrue("Creation the MorphologyFilterFactory with a English language key", luceneMorphology instanceof EnglishLuceneMorphology);
-    }
-
-    @Test
-    public void if_NoLanguageKey_then_CreateEnglishMorphologyFilter() {
-
-        MorphologyFilterFactory morphologyFilterFactory = new MorphologyFilterFactory(args);
-        morphologyFilterFactory.inform(loader);
-
-        LuceneMorphology luceneMorphology = morphologyFilterFactory.getLuceneMorphology();
-
-        Assert.assertTrue("Creation the MorphologyFilterFactory without any language keys", luceneMorphology instanceof EnglishLuceneMorphology);
-    }
-}
Author	SHA1	Message	Date
Alexander Kuznetsov	f095cbe7c0	Adding context stats	2015-07-14 16:26:22 +04:00
Alexander Kuznetsov	3b2e48821a	Working on morph ambiguously resolver	2015-06-22 22:42:15 +03:00