Compare commits
4 Commits
morphology
...
ambiguousl
Author | SHA1 | Date | |
---|---|---|---|
f095cbe7c0 | |||
3b2e48821a | |||
6ca2b27781 | |||
393665f08a |
70
README.md
Normal file
70
README.md
Normal file
@ -0,0 +1,70 @@
|
||||
# Russian Morphology for lucene
|
||||
|
||||
Russian and English morphology for java and lucene 3.0 framework based on open source dictionary from site [АОТ](http://aot.ru). It use dictionary base morphology with some heuristics for unknown words. It support homonym for example for Russian word "вина" it gives two variants "вино" and "вина".
|
||||
|
||||
|
||||
### How to use
|
||||
|
||||
First download
|
||||
[morph-1.0.jar](https://bintray.com/artifact/download/akuznetsov/russianmorphology/org/apache/lucene/morphology/morph/1.1/morph-1.1.jar)
|
||||
and add it to your class path. When download [Russian](https://bintray.com/artifact/download/akuznetsov/russianmorphology/org/apache/lucene/morphology/russian/1.1/russian-1.1.jar) or
|
||||
[English](https://bintray.com/artifact/download/akuznetsov/russianmorphology/org/apache/lucene/morphology/english/1.1/english-1.1.jar) package.
|
||||
|
||||
If you use maven you can add dependency
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene.morphology</groupId>
|
||||
<artifactId>russian</artifactId>
|
||||
<version>1.1</version>
|
||||
</dependency>
|
||||
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene.morphology</groupId>
|
||||
<artifactId>english</artifactId>
|
||||
<version>1.1</version>
|
||||
</dependency>
|
||||
|
||||
Don't forget add link to repository
|
||||
|
||||
|
||||
<repositories>
|
||||
...............
|
||||
<repository>
|
||||
<snapshots>
|
||||
<enabled>false</enabled>
|
||||
</snapshots>
|
||||
<id>bintray-akuznetsov-russianmorphology</id>
|
||||
<name>bintray</name>
|
||||
<url>http://dl.bintray.com/akuznetsov/russianmorphology</url>
|
||||
</repository>
|
||||
</repositories>
|
||||
|
||||
|
||||
|
||||
Now you can create a Lucene Analyzer
|
||||
|
||||
|
||||
RussianAnalayzer russian = new RussianAnalayzer();
|
||||
EnglishAnalayzer english = new EnglishAnalayzer();
|
||||
|
||||
|
||||
You can write you own analyzer using filter that convert word in it's right forms.
|
||||
|
||||
LuceneMorphology luceneMorph = new EnglishLuceneMorphology();
|
||||
TokenStream tokenStream = new MorphlogyFilter(result, luceneMorph);
|
||||
|
||||
Because usually LuceneMorphology contains a lot data needing for it functionality, it is better didn't create this object for each MorphologyFilter.
|
||||
|
||||
Also if you need get a list of base forms of word, you can use following example
|
||||
|
||||
|
||||
LuceneMorphology luceneMorph = new EnglishLuceneMorphology();
|
||||
List<String> wordBaseForms = luceneMorph.getMorphInfo(word);
|
||||
|
||||
|
||||
### Restrictions
|
||||
|
||||
* It works only with UTF-8.
|
||||
* It assume what letters е and ё are the same.
|
||||
* Word forms with prefixes like "наибольший" treated as separate word.
|
36
context/pom.xml
Normal file
36
context/pom.xml
Normal file
@ -0,0 +1,36 @@
|
||||
<?xml version="1.0"?>
|
||||
<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
|
||||
xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>org.apache.lucene.morphology</groupId>
|
||||
<artifactId>morphology</artifactId>
|
||||
<version>1.2-SNAPSHOT</version>
|
||||
</parent>
|
||||
<groupId>org.apache.lucene.morphology</groupId>
|
||||
<artifactId>context</artifactId>
|
||||
<version>1.0-SNAPSHOT</version>
|
||||
<name>context</name>
|
||||
<url>http://maven.apache.org</url>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<version>4.8.2</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene.morphology</groupId>
|
||||
<artifactId>russian</artifactId>
|
||||
<version>1.2-SNAPSHOT</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene.morphology</groupId>
|
||||
<artifactId>english</artifactId>
|
||||
<version>1.2-SNAPSHOT</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
@ -0,0 +1,52 @@
|
||||
/**
|
||||
* Copyright 2015 Alexander Kuznetsov
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.morphology.context;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
public class CalculateContextItem {
|
||||
|
||||
public List<ContextItem> createContextItems(String text) throws IOException {
|
||||
Analyzer statAnalyzer = new StatAnalyzer();
|
||||
InputStreamReader reader = new InputStreamReader(new ByteArrayInputStream("принеси мне вина на новый год? - и что я жду тебя, где вино".getBytes()), "UTF-8");
|
||||
|
||||
|
||||
// new RussianMorphology();
|
||||
|
||||
TokenStream tokenStream = statAnalyzer.tokenStream(null, reader);
|
||||
tokenStream.reset();
|
||||
|
||||
List<List<String>> listedLink = new LinkedList<>();
|
||||
while (tokenStream.incrementToken()) {
|
||||
CharTermAttribute charTerm = tokenStream.getAttribute(CharTermAttribute.class);
|
||||
PositionIncrementAttribute position = tokenStream.getAttribute(PositionIncrementAttribute.class);
|
||||
|
||||
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
@ -0,0 +1,80 @@
|
||||
/**
|
||||
* Copyright 2015 Alexander Kuznetsov
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.morphology.context;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
public class ContextItem implements Comparable<ContextItem> {
|
||||
String[][] morphInfo;
|
||||
|
||||
public ContextItem(String[][] morphInfo) {
|
||||
this.morphInfo = morphInfo;
|
||||
}
|
||||
|
||||
public String[][] getMorphInfo() {
|
||||
return morphInfo;
|
||||
}
|
||||
|
||||
public void setMorphInfo(String[][] morphInfo) {
|
||||
this.morphInfo = morphInfo;
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
int h = 0;
|
||||
for (String[] m : morphInfo) {
|
||||
for (String s : m) {
|
||||
h = 31 * h + s.hashCode();
|
||||
}
|
||||
}
|
||||
return h;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
|
||||
ContextItem that = (ContextItem) o;
|
||||
|
||||
if (that.morphInfo.length != this.morphInfo.length) {
|
||||
return false;
|
||||
}
|
||||
for (int i = 0; i < morphInfo.length; i++) {
|
||||
if (!Arrays.equals(morphInfo[i], that.morphInfo[i])) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int compareTo(ContextItem o) {
|
||||
int i = o.morphInfo.length - morphInfo.length;
|
||||
if (i != 0) return i;
|
||||
for (int j = 0; j < morphInfo.length; j++) {
|
||||
i = o.morphInfo[j].length - morphInfo[j].length;
|
||||
if (i != 0) return i;
|
||||
for (int k = 0; k < morphInfo[j].length; k++) {
|
||||
i = morphInfo[j][k].compareTo(o.morphInfo[j][k]);
|
||||
if (i != 0) return i;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
@ -0,0 +1,37 @@
|
||||
/**
|
||||
* Copyright 2015 Alexander Kuznetsov
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.morphology.context;
|
||||
|
||||
public class ContextStats {
|
||||
String[] morphInfo;
|
||||
double prob;
|
||||
|
||||
public String[] getMorphInfo() {
|
||||
return morphInfo;
|
||||
}
|
||||
|
||||
public void setMorphInfo(String[] morphInfo) {
|
||||
this.morphInfo = morphInfo;
|
||||
}
|
||||
|
||||
public double getProb() {
|
||||
return prob;
|
||||
}
|
||||
|
||||
public void setProb(double prob) {
|
||||
this.prob = prob;
|
||||
}
|
||||
}
|
@ -0,0 +1,21 @@
|
||||
/**
|
||||
* Copyright 2015 Alexander Kuznetsov
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.morphology.context;
|
||||
|
||||
public class ProbClalucator {
|
||||
|
||||
|
||||
}
|
@ -0,0 +1,116 @@
|
||||
/**
|
||||
* Copyright 2015 Alexander Kuznetsov
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.morphology.context;
|
||||
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttributeImpl;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.util.AttributeFactory;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.Set;
|
||||
|
||||
public class SimpleTokenizer extends Tokenizer {
|
||||
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
LinkedList<String> terms;
|
||||
|
||||
public final static Set<Character> SEPARATION_LETTERS = new HashSet<>(Arrays.asList(' ', '(', ')', ',', '|', '\t',
|
||||
'\n', '"', ':', '!', '?', ',', ';', '•'));
|
||||
|
||||
public final static Set<Character> MEANING_CHARS = new HashSet<>(Arrays.asList('(', ')', ',', '|',
|
||||
'"', ':', '!', '?', ',', ';', '•', '.'));
|
||||
|
||||
public SimpleTokenizer() {
|
||||
}
|
||||
|
||||
public SimpleTokenizer(AttributeFactory factory) {
|
||||
super(factory);
|
||||
}
|
||||
|
||||
@Override
|
||||
final public boolean incrementToken() throws IOException {
|
||||
if (terms == null) {
|
||||
createTeams();
|
||||
}
|
||||
if (terms.size() > 0) {
|
||||
String str = terms.poll();
|
||||
termAtt.setEmpty();
|
||||
termAtt.append(str);
|
||||
posAtt.setPositionIncrement(1);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private void createTeams() throws IOException {
|
||||
terms = new LinkedList<>();
|
||||
|
||||
BufferedReader br = new BufferedReader(input);
|
||||
StringBuilder sb = new StringBuilder();
|
||||
String s = "";
|
||||
while ((s = br.readLine()) != null) {
|
||||
sb.append(s).append(" ");
|
||||
}
|
||||
|
||||
s = sb.toString();
|
||||
CharTermAttributeImpl currentTerm = new CharTermAttributeImpl();
|
||||
for (int i = 0; i < s.length(); i++) {
|
||||
if (checkIsCharSepartor(s, i)) {
|
||||
if (checkIsCharHasMeaning(s, i)) {
|
||||
terms.add(s.substring(i, i + 1));
|
||||
}
|
||||
String term = currentTerm.toString();
|
||||
currentTerm.clear();
|
||||
if (term.length() > 0) {
|
||||
terms.add(term);
|
||||
}
|
||||
} else {
|
||||
currentTerm.append(s.charAt(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private boolean checkIsCharHasMeaning(String s, int i) {
|
||||
return MEANING_CHARS.contains(s.charAt(i));
|
||||
}
|
||||
|
||||
private boolean checkIsCharSepartor(String s, int i) {
|
||||
char c = s.charAt(i);
|
||||
if (SEPARATION_LETTERS.contains(c)) {
|
||||
return true;
|
||||
}
|
||||
if ('.' == c
|
||||
&& s.length() > i + 1
|
||||
&& SEPARATION_LETTERS.contains(s.charAt(i + 1))) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() throws IOException {
|
||||
this.terms = null;
|
||||
super.reset();
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,34 @@
|
||||
package org.apache.lucene.morphology.context;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
/**
|
||||
* Created by akuznetsov on 6/24/15.
|
||||
*/
|
||||
public class StatAnalyzer extends Analyzer {
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String s) {
|
||||
|
||||
SimpleTokenizer src = new SimpleTokenizer();
|
||||
TokenFilter filter = new StandardFilter(src);
|
||||
filter = new LowerCaseFilter(filter);
|
||||
|
||||
return new TokenStreamComponents(src, filter) {
|
||||
@Override
|
||||
protected void setReader(final Reader reader) throws IOException {
|
||||
super.setReader(reader);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
}
|
@ -0,0 +1,32 @@
|
||||
package org.apache.lucene.morphology.context;
|
||||
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
|
||||
public class SimpleTokenizerTest {
|
||||
|
||||
@Test
|
||||
public void testSimpleTokenizer() throws IOException {
|
||||
Analyzer statAnalyzer = new StatAnalyzer();
|
||||
InputStreamReader reader = new InputStreamReader(new ByteArrayInputStream("принеси мне вина на новый год? - и что я жду тебя, где вино".getBytes()), "UTF-8");
|
||||
|
||||
TokenStream tokenStream = statAnalyzer.tokenStream(null, reader);
|
||||
tokenStream.reset();
|
||||
|
||||
boolean wordSeen = false;
|
||||
while (tokenStream.incrementToken()) {
|
||||
CharTermAttribute charTerm = tokenStream.getAttribute(CharTermAttribute.class);
|
||||
PositionIncrementAttribute position = tokenStream.getAttribute(PositionIncrementAttribute.class);
|
||||
System.out.println(charTerm.toString());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -3,27 +3,27 @@
|
||||
<parent>
|
||||
<artifactId>morphology</artifactId>
|
||||
<groupId>org.apache.lucene.morphology</groupId>
|
||||
<version>1.1</version>
|
||||
<version>1.2-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>org.apache.lucene.morphology</groupId>
|
||||
<artifactId>dictionary-reader</artifactId>
|
||||
<name>dictionary-reader</name>
|
||||
<version>1.1</version>
|
||||
<version>1.2-SNAPSHOT</version>
|
||||
<url>http://maven.apache.org</url>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene.morphology</groupId>
|
||||
<artifactId>russian</artifactId>
|
||||
<version>1.1</version>
|
||||
<version>1.2-SNAPSHOT</version>
|
||||
</dependency>
|
||||
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene.morphology</groupId>
|
||||
<artifactId>english</artifactId>
|
||||
<version>1.1</version>
|
||||
<version>1.2-SNAPSHOT</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
|
@ -3,20 +3,20 @@
|
||||
<parent>
|
||||
<artifactId>morphology</artifactId>
|
||||
<groupId>org.apache.lucene.morphology</groupId>
|
||||
<version>1.1</version>
|
||||
<version>1.2-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>org.apache.lucene.morphology</groupId>
|
||||
<artifactId>english</artifactId>
|
||||
<name>english</name>
|
||||
<version>1.1</version>
|
||||
<version>1.2-SNAPSHOT</version>
|
||||
<url>http://maven.apache.org</url>
|
||||
<dependencies>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene.morphology</groupId>
|
||||
<artifactId>morph</artifactId>
|
||||
<version>1.1</version>
|
||||
<version>1.2-SNAPSHOT</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
|
@ -1,4 +1,4 @@
|
||||
Copyright 2009 Alexander Kuznetsov
|
||||
Copyright ${project.inceptionYear} ${owner}
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
|
@ -3,13 +3,13 @@
|
||||
<parent>
|
||||
<artifactId>morphology</artifactId>
|
||||
<groupId>org.apache.lucene.morphology</groupId>
|
||||
<version>1.1</version>
|
||||
<version>1.2-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>org.apache.lucene.morphology</groupId>
|
||||
<artifactId>morph</artifactId>
|
||||
<name>morph</name>
|
||||
<version>1.1</version>
|
||||
<version>1.2-SNAPSHOT</version>
|
||||
<url>http://maven.apache.org</url>
|
||||
|
||||
</project>
|
||||
|
@ -51,25 +51,6 @@ public class MorphologyAnalyzer extends Analyzer {
|
||||
protected TokenStreamComponents createComponents(String s) {
|
||||
|
||||
StandardTokenizer src = new StandardTokenizer();
|
||||
final PayloadEncoder encoder = new PayloadEncoder() {
|
||||
@Override
|
||||
public BytesRef encode(char[] buffer) {
|
||||
final Float payload = Float.valueOf(new String(buffer));
|
||||
System.out.println(payload);
|
||||
final byte[] bytes = PayloadHelper.encodeFloat(payload);
|
||||
return new BytesRef(bytes, 0, bytes.length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef encode(char[] buffer, int offset, int length) {
|
||||
|
||||
final Float payload = Float.valueOf(new String(buffer, offset, length));
|
||||
System.out.println(payload);
|
||||
final byte[] bytes = PayloadHelper.encodeFloat(payload);
|
||||
|
||||
return new BytesRef(bytes, 0, bytes.length);
|
||||
}
|
||||
};
|
||||
TokenFilter filter = new StandardFilter(src);
|
||||
filter = new LowerCaseFilter(filter);
|
||||
filter = new MorphologyFilter(filter, luceneMorph);
|
||||
|
83
pom.xml
83
pom.xml
@ -1,10 +1,11 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>org.apache.lucene.morphology</groupId>
|
||||
<artifactId>morphology</artifactId>
|
||||
<packaging>pom</packaging>
|
||||
<version>1.1</version>
|
||||
<version>1.2-SNAPSHOT</version>
|
||||
<name>morphology</name>
|
||||
<url>http://maven.apache.org</url>
|
||||
|
||||
@ -12,7 +13,7 @@
|
||||
<connection>scm:git:https://github.com/AKuznetsov/russianmorphology.git</connection>
|
||||
<developerConnection>scm:git:git@github.com:AKuznetsov/russianmorphology.git</developerConnection>
|
||||
<url>https://github.com/AKuznetsov/russianmorphology</url>
|
||||
<tag>morphology-1.1</tag>
|
||||
<tag>HEAD</tag>
|
||||
</scm>
|
||||
|
||||
<distributionManagement>
|
||||
@ -64,20 +65,8 @@
|
||||
</snapshots>
|
||||
</repository>
|
||||
</repositories>
|
||||
|
||||
<pluginRepositories>
|
||||
<pluginRepository>
|
||||
<id>mc-release</id>
|
||||
<name>maven-license-plugin repository of releases</name>
|
||||
<url>http://mc-repo.googlecode.com/svn/maven2/releases</url>
|
||||
<snapshots>
|
||||
<enabled>false</enabled>
|
||||
</snapshots>
|
||||
<releases>
|
||||
<enabled>true</enabled>
|
||||
</releases>
|
||||
</pluginRepository>
|
||||
</pluginRepositories>
|
||||
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
@ -94,34 +83,41 @@
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<configuration>
|
||||
<source>1.5</source>
|
||||
<target>1.5</target>
|
||||
<source>1.7</source>
|
||||
<target>1.7</target>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin> <!-- usage: http://code.google.com/p/maven-license-plugin/wiki/HowTo -->
|
||||
<artifactId>maven-license-plugin</artifactId>
|
||||
<groupId>com.mathieucarbou.mojo</groupId>
|
||||
<configuration>
|
||||
<basedir>${project.parent.basedir}</basedir>
|
||||
<header>etc/header.txt</header>
|
||||
<excludes>
|
||||
<exclude>**/*.txt</exclude>
|
||||
<exclude>**/*.info</exclude>
|
||||
<exclude>**/pom.xml</exclude>
|
||||
</excludes>
|
||||
<includes>
|
||||
<include>**/src/**</include>
|
||||
</includes>
|
||||
</configuration>
|
||||
<executions>
|
||||
<execution>
|
||||
<phase>test</phase>
|
||||
<goals>
|
||||
<goal>check</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
<!--<plugin> <!– usage: http://code.google.com/p/maven-license-plugin/wiki/HowTo –>-->
|
||||
|
||||
<!--<groupId>com.mycila</groupId>-->
|
||||
<!--<artifactId>license-maven-plugin</artifactId>-->
|
||||
<!--<version>2.11</version>-->
|
||||
|
||||
<!--<configuration>-->
|
||||
<!--<properties>-->
|
||||
<!--<owner>Alexander Kuznetsov</owner>-->
|
||||
<!--<!–<email>mathieu.carbou@gmail.com</email>–>-->
|
||||
<!--</properties>-->
|
||||
<!--<basedir>${project.parent.basedir}</basedir>-->
|
||||
<!--<header>etc/header.txt</header>-->
|
||||
<!--<excludes>-->
|
||||
<!--<exclude>**/*.txt</exclude>-->
|
||||
<!--<exclude>**/*.info</exclude>-->
|
||||
<!--<exclude>**/pom.xml</exclude>-->
|
||||
<!--</excludes>-->
|
||||
<!--<includes>-->
|
||||
<!--<include>**/src/**</include>-->
|
||||
<!--</includes>-->
|
||||
<!--</configuration>-->
|
||||
<!--<executions>-->
|
||||
<!--<execution>-->
|
||||
<!--<phase>test</phase>-->
|
||||
<!--<goals>-->
|
||||
<!--<goal>check</goal>-->
|
||||
<!--</goals>-->
|
||||
<!--</execution>-->
|
||||
<!--</executions>-->
|
||||
<!--</plugin>-->
|
||||
</plugins>
|
||||
</build>
|
||||
<profiles>
|
||||
@ -160,5 +156,6 @@
|
||||
<module>dictionary-reader</module>
|
||||
<module>russian</module>
|
||||
<module>english</module>
|
||||
<module>context</module>
|
||||
</modules>
|
||||
</project>
|
@ -3,13 +3,13 @@
|
||||
<parent>
|
||||
<artifactId>morphology</artifactId>
|
||||
<groupId>org.apache.lucene.morphology</groupId>
|
||||
<version>1.1</version>
|
||||
<version>1.2-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>org.apache.lucene.morphology</groupId>
|
||||
<artifactId>russian</artifactId>
|
||||
<name>russian</name>
|
||||
<version>1.1</version>
|
||||
<version>1.2-SNAPSHOT</version>
|
||||
<url>http://maven.apache.org</url>
|
||||
<dependencies>
|
||||
|
||||
@ -17,7 +17,7 @@
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene.morphology</groupId>
|
||||
<artifactId>morph</artifactId>
|
||||
<version>1.1</version>
|
||||
<version>1.2-SNAPSHOT</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
|
Reference in New Issue
Block a user