adding morph classes for language
git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@64 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
parent
c858d2560a
commit
9ee47e2f99
@ -19,7 +19,7 @@ package org.apache.lucene.morphology.dictionary;
|
||||
|
||||
import org.apache.lucene.morphology.Heuristic;
|
||||
import org.apache.lucene.morphology.LetterDecoderEncoder;
|
||||
import org.apache.lucene.morphology.Morph;
|
||||
import org.apache.lucene.morphology.Morphology;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
@ -119,8 +119,8 @@ public class StatiticsCollector implements WordProccessor {
|
||||
prevSet = currentSet;
|
||||
}
|
||||
}
|
||||
Morph morph = new Morph(ints, rulesId, heuristics, grammaReader.getGrammaInfoAsArray());
|
||||
morph.writeToFile(fileName);
|
||||
Morphology morphology = new Morphology(ints, rulesId, heuristics, grammaReader.getGrammaInfoAsArray());
|
||||
morphology.writeToFile(fileName);
|
||||
}
|
||||
|
||||
private String revertWord(String s) {
|
||||
|
@ -28,7 +28,7 @@ import java.util.HashSet;
|
||||
public class EnglishHeuristicBuilder {
|
||||
public static void main(String[] args) throws IOException {
|
||||
|
||||
GrammaReader grammaInfo = new GrammaReader("dictonary/Dicts/Morph/egramtab.tab");
|
||||
GrammaReader grammaInfo = new GrammaReader("dictonary/Dicts/Morphology/egramtab.tab");
|
||||
DictonaryReader dictonaryReader = new DictonaryReader("dictonary/Dicts/SrcMorph/EngSrc/morphs.mrd", new HashSet<String>());
|
||||
|
||||
EnglishLetterDecoderEncoder decoderEncoder = new EnglishLetterDecoderEncoder();
|
||||
|
@ -27,7 +27,7 @@ import java.util.HashSet;
|
||||
|
||||
public class RussianHeuristicBuilder {
|
||||
public static void main(String[] args) throws IOException {
|
||||
GrammaReader grammaInfo = new GrammaReader("dictonary/Dicts/Morph/rgramtab.tab");
|
||||
GrammaReader grammaInfo = new GrammaReader("dictonary/Dicts/Morphology/rgramtab.tab");
|
||||
DictonaryReader dictonaryReader = new DictonaryReader("dictonary/Dicts/SrcMorph/RusSrc/morphs.mrd", new HashSet<String>());
|
||||
|
||||
RussianLetterDecoderEncoder decoderEncoder = new RussianLetterDecoderEncoder();
|
||||
|
@ -22,7 +22,6 @@ import org.apache.lucene.morphology.WrongCharaterException;
|
||||
import java.util.ArrayList;
|
||||
|
||||
|
||||
//todo extract supper class for common method with russian letter decoder
|
||||
public class EnglishLetterDecoderEncoder implements LetterDecoderEncoder {
|
||||
public static final int ENGLISH_SMALL_LETTER_OFFSET = 96;
|
||||
static public int SUFFIX_LENGTH = 6;
|
||||
|
@ -0,0 +1,28 @@
|
||||
/**
|
||||
* Copyright 2009 Alexander Kuznetsov
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.morphology.english;
|
||||
|
||||
import org.apache.lucene.morphology.LuceneMorphology;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
|
||||
public class EnglishLuceneMorphology extends LuceneMorphology {
|
||||
|
||||
public EnglishLuceneMorphology() throws IOException {
|
||||
super(EnglishLuceneMorphology.class.getResourceAsStream("/org/apache/lucene/morphology/english/morph.info"), new EnglishLetterDecoderEncoder());
|
||||
}
|
||||
}
|
@ -0,0 +1,28 @@
|
||||
/**
|
||||
* Copyright 2009 Alexander Kuznetsov
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.morphology.english;
|
||||
|
||||
import org.apache.lucene.morphology.Morphology;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
|
||||
public class EnglishMorphology extends Morphology {
|
||||
|
||||
public EnglishMorphology() throws IOException {
|
||||
super(EnglishLuceneMorphology.class.getResourceAsStream("/org/apache/lucene/morphology/english/morph.info"), new EnglishLetterDecoderEncoder());
|
||||
}
|
||||
}
|
@ -15,7 +15,7 @@
|
||||
*/
|
||||
package org.apache.lucene.morphology.english;
|
||||
|
||||
import org.apache.lucene.morphology.LuceneMorph;
|
||||
import org.apache.lucene.morphology.LuceneMorphology;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.junit.Assert.assertThat;
|
||||
import org.junit.Before;
|
||||
@ -29,11 +29,11 @@ import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
public class EnglishLuceneMorphTest {
|
||||
private LuceneMorph luceneMorph;
|
||||
private LuceneMorphology luceneMorph;
|
||||
|
||||
@Before
|
||||
public void setUp() throws IOException {
|
||||
luceneMorph = new LuceneMorph(this.getClass().getResourceAsStream("/org/apache/lucene/morphology/english/morph.info"), new EnglishLetterDecoderEncoder());
|
||||
luceneMorph = new LuceneMorphology(this.getClass().getResourceAsStream("/org/apache/lucene/morphology/english/morph.info"), new EnglishLetterDecoderEncoder());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -23,13 +23,13 @@ import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
public class LuceneMorph extends Morph {
|
||||
public class LuceneMorphology extends Morphology {
|
||||
|
||||
public LuceneMorph(String fileName, LetterDecoderEncoder decoderEncoder) throws IOException {
|
||||
public LuceneMorphology(String fileName, LetterDecoderEncoder decoderEncoder) throws IOException {
|
||||
super(fileName, decoderEncoder);
|
||||
}
|
||||
|
||||
public LuceneMorph(InputStream inputStream, LetterDecoderEncoder decoderEncoder) throws IOException {
|
||||
public LuceneMorphology(InputStream inputStream, LetterDecoderEncoder decoderEncoder) throws IOException {
|
||||
super(inputStream, decoderEncoder);
|
||||
}
|
||||
|
@ -21,7 +21,7 @@ import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
public class Morph {
|
||||
public class Morphology {
|
||||
protected int[][] separators;
|
||||
protected short[] rulesId;
|
||||
protected Heuristic[][] rules;
|
||||
@ -29,17 +29,17 @@ public class Morph {
|
||||
protected LetterDecoderEncoder decoderEncoder;
|
||||
|
||||
|
||||
public Morph(String fileName, LetterDecoderEncoder decoderEncoder) throws IOException {
|
||||
public Morphology(String fileName, LetterDecoderEncoder decoderEncoder) throws IOException {
|
||||
readFromFile(fileName);
|
||||
this.decoderEncoder = decoderEncoder;
|
||||
}
|
||||
|
||||
public Morph(InputStream inputStream, LetterDecoderEncoder decoderEncoder) throws IOException {
|
||||
public Morphology(InputStream inputStream, LetterDecoderEncoder decoderEncoder) throws IOException {
|
||||
readFromInputStream(inputStream);
|
||||
this.decoderEncoder = decoderEncoder;
|
||||
}
|
||||
|
||||
public Morph(int[][] separators, short[] rulesId, Heuristic[][] rules, String[] grammaInfo) {
|
||||
public Morphology(int[][] separators, short[] rulesId, Heuristic[][] rules, String[] grammaInfo) {
|
||||
this.separators = separators;
|
||||
this.rulesId = rulesId;
|
||||
this.rules = rules;
|
@ -22,21 +22,21 @@ import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.morphology.LetterDecoderEncoder;
|
||||
import org.apache.lucene.morphology.LuceneMorph;
|
||||
import org.apache.lucene.morphology.LuceneMorphology;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.Reader;
|
||||
|
||||
public class MorphlogyAnalayzer extends Analyzer {
|
||||
private LuceneMorph luceneMorph;
|
||||
private LuceneMorphology luceneMorph;
|
||||
|
||||
public MorphlogyAnalayzer(String pathToMorph, LetterDecoderEncoder letterDecoderEncoder) throws IOException {
|
||||
luceneMorph = new LuceneMorph(pathToMorph, letterDecoderEncoder);
|
||||
luceneMorph = new LuceneMorphology(pathToMorph, letterDecoderEncoder);
|
||||
}
|
||||
|
||||
public MorphlogyAnalayzer(InputStream inputStream, LetterDecoderEncoder letterDecoderEncoder) throws IOException {
|
||||
luceneMorph = new LuceneMorph(inputStream, letterDecoderEncoder);
|
||||
luceneMorph = new LuceneMorphology(inputStream, letterDecoderEncoder);
|
||||
}
|
||||
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
|
@ -19,7 +19,7 @@ package org.apache.lucene.morphology.analayzer;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.morphology.LuceneMorph;
|
||||
import org.apache.lucene.morphology.LuceneMorphology;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
@ -27,9 +27,9 @@ import java.util.List;
|
||||
|
||||
|
||||
public class MorphlogyFilter extends TokenFilter {
|
||||
private LuceneMorph luceneMorph;
|
||||
private LuceneMorphology luceneMorph;
|
||||
|
||||
public MorphlogyFilter(TokenStream tokenStream, LuceneMorph luceneMorph) {
|
||||
public MorphlogyFilter(TokenStream tokenStream, LuceneMorphology luceneMorph) {
|
||||
super(tokenStream);
|
||||
this.luceneMorph = luceneMorph;
|
||||
}
|
||||
|
@ -0,0 +1,27 @@
|
||||
/**
|
||||
* Copyright 2009 Alexander Kuznetsov
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.morphology.russian;
|
||||
|
||||
import org.apache.lucene.morphology.LuceneMorphology;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class RussianLuceneMorphology extends LuceneMorphology {
|
||||
|
||||
public RussianLuceneMorphology() throws IOException {
|
||||
super(RussianLuceneMorphology.class.getResourceAsStream("/org/apache/lucene/morphology/english/morph.info"), new RussianLetterDecoderEncoder());
|
||||
}
|
||||
}
|
@ -0,0 +1,27 @@
|
||||
/**
|
||||
* Copyright 2009 Alexander Kuznetsov
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.morphology.russian;
|
||||
|
||||
import org.apache.lucene.morphology.Morphology;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class RussianMorphology extends Morphology {
|
||||
|
||||
public RussianMorphology() throws IOException {
|
||||
super(RussianMorphology.class.getResourceAsStream("/org/apache/lucene/morphology/english/morph.info"), new RussianLetterDecoderEncoder());
|
||||
}
|
||||
}
|
@ -15,7 +15,7 @@
|
||||
*/
|
||||
package org.apache.lucene.morphology.russian;
|
||||
|
||||
import org.apache.lucene.morphology.LuceneMorph;
|
||||
import org.apache.lucene.morphology.LuceneMorphology;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.junit.Assert.assertThat;
|
||||
import org.junit.Before;
|
||||
@ -29,11 +29,11 @@ import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
public class RussianLuceneMorphTest {
|
||||
private LuceneMorph luceneMorph;
|
||||
private LuceneMorphology luceneMorph;
|
||||
|
||||
@Before
|
||||
public void setUp() throws IOException {
|
||||
luceneMorph = new LuceneMorph(this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/morph.info"), new RussianLetterDecoderEncoder());
|
||||
luceneMorph = new LuceneMorphology(this.getClass().getResourceAsStream("/org/apache/lucene/morphology/russian/morph.info"), new RussianLetterDecoderEncoder());
|
||||
}
|
||||
|
||||
@Test
|
||||
|
Loading…
x
Reference in New Issue
Block a user