moving to lucene 3.0.0

git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@92 d817d54c-26ab-11de-abc9-2f7d1455ff7a
2010-02-22 13:49:47 +00:00
parent 36012f2943
commit ebc367f16c
6 changed files with 37 additions and 79 deletions
--- a/english/src/test/java/org/apache/lucene/morphology/english/EnglishAnalayzerTest.java
+++ b/english/src/test/java/org/apache/lucene/morphology/english/EnglishAnalayzerTest.java
@@ -15,8 +15,8 @@
 */
 package org.apache.lucene.morphology.english;

-import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import static org.hamcrest.Matchers.equalTo;
 import static org.junit.Assert.assertThat;
 import org.junit.Test;
@@ -43,19 +43,12 @@ public class EnglishAnalayzerTest {
        stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/english/englsih-analayzer-data.txt");

        InputStreamReader reader = new InputStreamReader(stream, "UTF-8");
-        final Token reusableToken = new Token();

-        Token nextToken;
-        TokenStream in = morphlogyAnalayzer.tokenStream(null, reader);
+        TokenStream tokenStream = morphlogyAnalayzer.tokenStream(null, reader);
        HashSet<String> result = new HashSet<String>();
-        for (; ;) {
-            nextToken = in.next(reusableToken);
-
-            if (nextToken == null) {
-                break;
-            }
-
-            result.add(nextToken.term());
+        while (tokenStream.incrementToken()) {
+            TermAttribute attribute1 = tokenStream.getAttribute(TermAttribute.class);
+            result.add(attribute1.term());
        }

        stream.close();
--- a/morph/pom.xml
+++ b/morph/pom.xml
@@ -1,5 +1,6 @@
 <?xml version="1.0"?>
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
    <parent>
        <artifactId>morphology</artifactId>
        <groupId>org.apache.lucene.morphology</groupId>
@@ -13,11 +14,6 @@
    <url>http://maven.apache.org</url>

    <dependencies>
-        <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
-            <version>3.8.1</version>
-            <scope>test</scope>
-        </dependency>
+
    </dependencies>
 </project>
--- a/morph/src/main/java/org/apache/lucene/morphology/analayzer/MorphlogyAnalayzer.java
+++ b/morph/src/main/java/org/apache/lucene/morphology/analayzer/MorphlogyAnalayzer.java
@@ -23,6 +23,7 @@ import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.morphology.LetterDecoderEncoder;
 import org.apache.lucene.morphology.LuceneMorphology;
+import org.apache.lucene.util.Version;

 import java.io.IOException;
 import java.io.InputStream;
@@ -44,7 +45,7 @@ public class MorphlogyAnalayzer extends Analyzer {
    }

    public TokenStream tokenStream(String fieldName, Reader reader) {
-        TokenStream result = new StandardTokenizer(reader);
+        TokenStream result = new StandardTokenizer(Version.LUCENE_30, reader);
        result = new StandardFilter(result);
        result = new LowerCaseFilter(result);
        return new MorphlogyFilter(result, luceneMorph);
--- a/morph/src/main/java/org/apache/lucene/morphology/analayzer/MorphlogyFilter.java
+++ b/morph/src/main/java/org/apache/lucene/morphology/analayzer/MorphlogyFilter.java
@@ -16,67 +16,43 @@

 package org.apache.lucene.morphology.analayzer;

-import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.lucene.morphology.LuceneMorphology;

 import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
+import java.util.Iterator;


 public class MorphlogyFilter extends TokenFilter {
    private LuceneMorphology luceneMorph;
+    private Iterator<String> iterator;
+    private TermAttribute termAtt;

    public MorphlogyFilter(TokenStream tokenStream, LuceneMorphology luceneMorph) {
        super(tokenStream);
        this.luceneMorph = luceneMorph;
+        termAtt = addAttribute(TermAttribute.class);
    }


-    private List<String> stack = new ArrayList<String>();
-    private int index = 0;
-    private Token current = null;
-
-    /**
-     * Returns the next token in the stream, or null at EOS.
-     */
-    public Token next(final Token reusableToken) throws IOException {
-        assert reusableToken != null;
-        while (index < stack.size()) { // pop from stack
-            Token nextToken = createToken(stack.get(index++), current, reusableToken);
-            if (nextToken != null) {
-                return nextToken;
+    public boolean incrementToken() throws IOException {
+        while (iterator == null || !iterator.hasNext()) {
+            boolean b = input.incrementToken();
+            if (!b) {
+                return false;
            }
+            String s = termAtt.term();
+            if (luceneMorph.checkString(s)) {
+                iterator = luceneMorph.getNormalForms(termAtt.term()).iterator();
+            } else {
+                return true;
+            }
+        }
+        String s = iterator.next();
+        termAtt.setTermBuffer(s);
+        return true;
    }

-        Token nextToken = input.next(reusableToken);
-        if (nextToken == null) return null; // EOS; iterator exhausted
-        if (!luceneMorph.checkString(nextToken.term())) {
-            return nextToken;
-        }
-        stack = luceneMorph.getNormalForms(nextToken.term());
-        index = 0;
-        current = (Token) nextToken.clone();
-        nextToken = createToken(stack.get(index++), current, reusableToken);
-        return nextToken;
-    }
-
-    /**
-     * Creates and returns a token for the given synonym of the current input
-     * token; Override for custom (stateless or stateful) behavior, if desired.
-     *
-     * @param synonym       a synonym for the current token's term
-     * @param current       the current token from the underlying child stream
-     * @param reusableToken the token to reuse
-     * @return a new token, or null to indicate that the given synonym should be
-     *         ignored
-     */
-    protected Token createToken(String synonym, Token current, final Token reusableToken) {
-        reusableToken.reinit(current, synonym);
-        reusableToken.setTermBuffer(synonym);
-        reusableToken.setPositionIncrement(0);
-        return reusableToken;
-    }
 }
--- a/pom.xml
+++ b/pom.xml
@@ -1,5 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <groupId>org.apache.lucene.morphology</groupId>
    <artifactId>morphology</artifactId>
@@ -49,7 +50,7 @@
        <dependency>
            <groupId>org.apache.lucene</groupId>
            <artifactId>lucene-core</artifactId>
-            <version>2.4.1</version>
+            <version>3.0.0</version>
        </dependency>
    </dependencies>

--- a/russian/src/test/java/org/apache/lucene/morphology/russian/RussianAnalayzerTest.java
+++ b/russian/src/test/java/org/apache/lucene/morphology/russian/RussianAnalayzerTest.java
@@ -15,8 +15,8 @@
 */
 package org.apache.lucene.morphology.russian;

-import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import static org.hamcrest.Matchers.equalTo;
 import static org.junit.Assert.assertThat;
 import org.junit.Test;
@@ -43,21 +43,12 @@ public class RussianAnalayzerTest {
        stream = this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/russian-analayzer-data.txt");

        InputStreamReader reader = new InputStreamReader(stream, "UTF-8");
-        final Token reusableToken = new Token();

-        Token nextToken;
-        TokenStream in = morphlogyAnalayzer.tokenStream(null, reader);
+        TokenStream tokenStream = morphlogyAnalayzer.tokenStream(null, reader);
        HashSet<String> result = new HashSet<String>();
-        for (; ;) {
-            nextToken = in.next(reusableToken);
-
-            if (nextToken == null) {
-                break;
-            }
-
-            result.add(nextToken.term());
-            //
-
+        while (tokenStream.incrementToken()) {
+            TermAttribute attribute1 = tokenStream.getAttribute(TermAttribute.class);
+            result.add(attribute1.term());
        }

        stream.close();