Compare commits
	
		
			4 Commits
		
	
	
		
			morphology
			...
			ambiguousl
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | f095cbe7c0 | ||
|   | 3b2e48821a | ||
|   | 6ca2b27781 | ||
|   | 393665f08a | 
							
								
								
									
										70
									
								
								README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										70
									
								
								README.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,70 @@ | ||||
| # Russian Morphology for lucene | ||||
|  | ||||
| Russian and English morphology for java and lucene 3.0 framework based on open source dictionary from site [АОТ](http://aot.ru). It use dictionary base morphology with some heuristics for unknown words. It support homonym for example for Russian word "вина" it gives two variants "вино" and "вина".  | ||||
|  | ||||
|  | ||||
| ### How to use | ||||
|  | ||||
| First download  | ||||
| [morph-1.0.jar](https://bintray.com/artifact/download/akuznetsov/russianmorphology/org/apache/lucene/morphology/morph/1.1/morph-1.1.jar)   | ||||
| and add it to your class path. When download [Russian](https://bintray.com/artifact/download/akuznetsov/russianmorphology/org/apache/lucene/morphology/russian/1.1/russian-1.1.jar) or  | ||||
| [English](https://bintray.com/artifact/download/akuznetsov/russianmorphology/org/apache/lucene/morphology/english/1.1/english-1.1.jar) package.  | ||||
|  | ||||
| If you use maven you can add dependency  | ||||
|  | ||||
|         <dependency> | ||||
|             <groupId>org.apache.lucene.morphology</groupId> | ||||
|             <artifactId>russian</artifactId> | ||||
|             <version>1.1</version> | ||||
|         </dependency> | ||||
|  | ||||
|  | ||||
|         <dependency> | ||||
|             <groupId>org.apache.lucene.morphology</groupId> | ||||
|             <artifactId>english</artifactId> | ||||
|             <version>1.1</version> | ||||
|         </dependency> | ||||
|  | ||||
| Don't forget add link to repository | ||||
|  | ||||
|  | ||||
|     <repositories> | ||||
|     ............... | ||||
|       <repository> | ||||
|         <snapshots> | ||||
|           <enabled>false</enabled> | ||||
|         </snapshots> | ||||
|         <id>bintray-akuznetsov-russianmorphology</id> | ||||
|         <name>bintray</name> | ||||
|         <url>http://dl.bintray.com/akuznetsov/russianmorphology</url> | ||||
|       </repository> | ||||
|     </repositories> | ||||
|  | ||||
|  | ||||
|  | ||||
| Now you can create a Lucene Analyzer  | ||||
|  | ||||
|  | ||||
|       RussianAnalayzer russian = new RussianAnalayzer(); | ||||
|       EnglishAnalayzer english = new EnglishAnalayzer(); | ||||
|  | ||||
|  | ||||
| You can write you own analyzer using filter that convert word in it's right forms.  | ||||
|  | ||||
|       LuceneMorphology luceneMorph = new EnglishLuceneMorphology(); | ||||
|       TokenStream tokenStream = new MorphlogyFilter(result, luceneMorph); | ||||
|  | ||||
| Because usually LuceneMorphology contains a lot data needing for it functionality, it is better didn't create this object for each MorphologyFilter. | ||||
|  | ||||
| Also if you need get a list of base forms of word, you can use following example  | ||||
|  | ||||
|  | ||||
|      LuceneMorphology luceneMorph = new EnglishLuceneMorphology(); | ||||
|      List<String> wordBaseForms = luceneMorph.getMorphInfo(word); | ||||
|  | ||||
|  | ||||
| ### Restrictions | ||||
|    | ||||
|   * It works only with UTF-8. | ||||
|   * It assume what letters е and ё are the same. | ||||
|   * Word forms with prefixes like "наибольший" treated as separate word.  | ||||
							
								
								
									
										36
									
								
								context/pom.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								context/pom.xml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,36 @@ | ||||
| <?xml version="1.0"?> | ||||
| <project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" | ||||
|          xmlns="http://maven.apache.org/POM/4.0.0" | ||||
|          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"> | ||||
|     <modelVersion>4.0.0</modelVersion> | ||||
|     <parent> | ||||
|         <groupId>org.apache.lucene.morphology</groupId> | ||||
|         <artifactId>morphology</artifactId> | ||||
|         <version>1.2-SNAPSHOT</version> | ||||
|     </parent> | ||||
|     <groupId>org.apache.lucene.morphology</groupId> | ||||
|     <artifactId>context</artifactId> | ||||
|     <version>1.0-SNAPSHOT</version> | ||||
|     <name>context</name> | ||||
|     <url>http://maven.apache.org</url> | ||||
|     <dependencies> | ||||
|         <dependency> | ||||
|             <groupId>junit</groupId> | ||||
|             <artifactId>junit</artifactId> | ||||
|             <version>4.8.2</version> | ||||
|             <scope>test</scope> | ||||
|         </dependency> | ||||
|         <dependency> | ||||
|             <groupId>org.apache.lucene.morphology</groupId> | ||||
|             <artifactId>russian</artifactId> | ||||
|             <version>1.2-SNAPSHOT</version> | ||||
|             <scope>test</scope> | ||||
|         </dependency> | ||||
|         <dependency> | ||||
|             <groupId>org.apache.lucene.morphology</groupId> | ||||
|             <artifactId>english</artifactId> | ||||
|             <version>1.2-SNAPSHOT</version> | ||||
|             <scope>test</scope> | ||||
|         </dependency> | ||||
|     </dependencies> | ||||
| </project> | ||||
| @@ -0,0 +1,52 @@ | ||||
| /** | ||||
|  * Copyright 2015 Alexander Kuznetsov | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
|  * you may not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  *     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an "AS IS" BASIS, | ||||
|  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
| package org.apache.lucene.morphology.context; | ||||
|  | ||||
| import org.apache.lucene.analysis.Analyzer; | ||||
| import org.apache.lucene.analysis.TokenStream; | ||||
| import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; | ||||
| import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; | ||||
|  | ||||
| import java.io.ByteArrayInputStream; | ||||
| import java.io.IOException; | ||||
| import java.io.InputStreamReader; | ||||
| import java.util.ArrayList; | ||||
| import java.util.LinkedList; | ||||
| import java.util.List; | ||||
|  | ||||
| public class CalculateContextItem { | ||||
|  | ||||
|     public List<ContextItem> createContextItems(String text) throws IOException { | ||||
|         Analyzer statAnalyzer = new StatAnalyzer(); | ||||
|         InputStreamReader reader = new InputStreamReader(new ByteArrayInputStream("принеси мне вина на новый год? - и что я жду тебя, где вино".getBytes()), "UTF-8"); | ||||
|  | ||||
|  | ||||
| //        new RussianMorphology(); | ||||
|  | ||||
|         TokenStream tokenStream = statAnalyzer.tokenStream(null, reader); | ||||
|         tokenStream.reset(); | ||||
|  | ||||
|         List<List<String>> listedLink =  new LinkedList<>(); | ||||
|         while (tokenStream.incrementToken()) { | ||||
|             CharTermAttribute charTerm = tokenStream.getAttribute(CharTermAttribute.class); | ||||
|             PositionIncrementAttribute position = tokenStream.getAttribute(PositionIncrementAttribute.class); | ||||
|  | ||||
|  | ||||
|         } | ||||
|  | ||||
|         return null; | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,80 @@ | ||||
| /** | ||||
|  * Copyright 2015 Alexander Kuznetsov | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
|  * you may not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  *     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an "AS IS" BASIS, | ||||
|  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
| package org.apache.lucene.morphology.context; | ||||
|  | ||||
| import java.util.Arrays; | ||||
|  | ||||
| public class ContextItem implements Comparable<ContextItem> { | ||||
|     String[][] morphInfo; | ||||
|  | ||||
|     public ContextItem(String[][] morphInfo) { | ||||
|         this.morphInfo = morphInfo; | ||||
|     } | ||||
|  | ||||
|     public String[][] getMorphInfo() { | ||||
|         return morphInfo; | ||||
|     } | ||||
|  | ||||
|     public void setMorphInfo(String[][] morphInfo) { | ||||
|         this.morphInfo = morphInfo; | ||||
|     } | ||||
|  | ||||
|     public int hashCode() { | ||||
|         int h = 0; | ||||
|         for (String[] m : morphInfo) { | ||||
|             for (String s : m) { | ||||
|                 h = 31 * h + s.hashCode(); | ||||
|             } | ||||
|         } | ||||
|         return h; | ||||
|     } | ||||
|  | ||||
|  | ||||
|     @Override | ||||
|     public boolean equals(Object o) { | ||||
|         if (this == o) return true; | ||||
|         if (o == null || getClass() != o.getClass()) return false; | ||||
|  | ||||
|         ContextItem that = (ContextItem) o; | ||||
|  | ||||
|         if (that.morphInfo.length != this.morphInfo.length) { | ||||
|             return false; | ||||
|         } | ||||
|         for (int i = 0; i < morphInfo.length; i++) { | ||||
|             if (!Arrays.equals(morphInfo[i], that.morphInfo[i])) { | ||||
|                 return false; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         return true; | ||||
|     } | ||||
|  | ||||
|  | ||||
|     @Override | ||||
|     public int compareTo(ContextItem o) { | ||||
|         int i = o.morphInfo.length - morphInfo.length; | ||||
|         if (i != 0) return i; | ||||
|         for (int j = 0; j < morphInfo.length; j++) { | ||||
|             i = o.morphInfo[j].length - morphInfo[j].length; | ||||
|             if (i != 0) return i; | ||||
|             for (int k = 0; k < morphInfo[j].length; k++) { | ||||
|                 i = morphInfo[j][k].compareTo(o.morphInfo[j][k]); | ||||
|                 if (i != 0) return i; | ||||
|             } | ||||
|         } | ||||
|         return 0; | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,37 @@ | ||||
| /** | ||||
|  * Copyright 2015 Alexander Kuznetsov | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
|  * you may not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  *     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an "AS IS" BASIS, | ||||
|  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
| package org.apache.lucene.morphology.context; | ||||
|  | ||||
| public class ContextStats { | ||||
|     String[] morphInfo; | ||||
|     double prob; | ||||
|  | ||||
|     public String[] getMorphInfo() { | ||||
|         return morphInfo; | ||||
|     } | ||||
|  | ||||
|     public void setMorphInfo(String[] morphInfo) { | ||||
|         this.morphInfo = morphInfo; | ||||
|     } | ||||
|  | ||||
|     public double getProb() { | ||||
|         return prob; | ||||
|     } | ||||
|  | ||||
|     public void setProb(double prob) { | ||||
|         this.prob = prob; | ||||
|     } | ||||
| } | ||||
| @@ -0,0 +1,21 @@ | ||||
| /** | ||||
|  * Copyright 2015 Alexander Kuznetsov | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
|  * you may not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  *     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an "AS IS" BASIS, | ||||
|  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
| package org.apache.lucene.morphology.context; | ||||
|  | ||||
| public class ProbClalucator { | ||||
|  | ||||
|  | ||||
| } | ||||
| @@ -0,0 +1,116 @@ | ||||
| /** | ||||
|  * Copyright 2015 Alexander Kuznetsov | ||||
|  * | ||||
|  * Licensed under the Apache License, Version 2.0 (the "License"); | ||||
|  * you may not use this file except in compliance with the License. | ||||
|  * You may obtain a copy of the License at | ||||
|  * | ||||
|  *     http://www.apache.org/licenses/LICENSE-2.0 | ||||
|  * | ||||
|  * Unless required by applicable law or agreed to in writing, software | ||||
|  * distributed under the License is distributed on an "AS IS" BASIS, | ||||
|  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
|  * See the License for the specific language governing permissions and | ||||
|  * limitations under the License. | ||||
|  */ | ||||
| package org.apache.lucene.morphology.context; | ||||
|  | ||||
| import org.apache.lucene.analysis.Tokenizer; | ||||
| import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; | ||||
| import org.apache.lucene.analysis.tokenattributes.CharTermAttributeImpl; | ||||
| import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; | ||||
| import org.apache.lucene.util.AttributeFactory; | ||||
|  | ||||
| import java.io.BufferedReader; | ||||
| import java.io.IOException; | ||||
| import java.util.Arrays; | ||||
| import java.util.HashSet; | ||||
| import java.util.LinkedList; | ||||
| import java.util.Set; | ||||
|  | ||||
| public class SimpleTokenizer extends Tokenizer { | ||||
|  | ||||
|     private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); | ||||
|     private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class); | ||||
|     LinkedList<String> terms; | ||||
|  | ||||
|     public final static Set<Character> SEPARATION_LETTERS = new HashSet<>(Arrays.asList(' ', '(', ')', ',', '|', '\t', | ||||
|             '\n', '"', ':', '!', '?', ',', ';', '•')); | ||||
|  | ||||
|     public final static Set<Character> MEANING_CHARS = new HashSet<>(Arrays.asList('(', ')', ',', '|', | ||||
|             '"', ':', '!', '?', ',', ';', '•', '.')); | ||||
|  | ||||
|     public SimpleTokenizer() { | ||||
|     } | ||||
|  | ||||
|     public SimpleTokenizer(AttributeFactory factory) { | ||||
|         super(factory); | ||||
|     } | ||||
|  | ||||
|     @Override | ||||
|     final public boolean incrementToken() throws IOException { | ||||
|         if (terms == null) { | ||||
|             createTeams(); | ||||
|         } | ||||
|         if (terms.size() > 0) { | ||||
|             String str = terms.poll(); | ||||
|             termAtt.setEmpty(); | ||||
|             termAtt.append(str); | ||||
|             posAtt.setPositionIncrement(1); | ||||
|             return true; | ||||
|         } | ||||
|         return false; | ||||
|     } | ||||
|  | ||||
|     private void createTeams() throws IOException { | ||||
|         terms = new LinkedList<>(); | ||||
|  | ||||
|         BufferedReader br = new BufferedReader(input); | ||||
|         StringBuilder sb = new StringBuilder(); | ||||
|         String s = ""; | ||||
|         while ((s = br.readLine()) != null) { | ||||
|             sb.append(s).append(" "); | ||||
|         } | ||||
|  | ||||
|         s = sb.toString(); | ||||
|         CharTermAttributeImpl currentTerm = new CharTermAttributeImpl(); | ||||
|         for (int i = 0; i < s.length(); i++) { | ||||
|             if (checkIsCharSepartor(s, i)) { | ||||
|                 if (checkIsCharHasMeaning(s, i)) { | ||||
|                     terms.add(s.substring(i, i + 1)); | ||||
|                 } | ||||
|                 String term = currentTerm.toString(); | ||||
|                 currentTerm.clear(); | ||||
|                 if (term.length() > 0) { | ||||
|                     terms.add(term); | ||||
|                 } | ||||
|             } else { | ||||
|                 currentTerm.append(s.charAt(i)); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     private boolean checkIsCharHasMeaning(String s, int i) { | ||||
|         return MEANING_CHARS.contains(s.charAt(i)); | ||||
|     } | ||||
|  | ||||
|     private boolean checkIsCharSepartor(String s, int i) { | ||||
|         char c = s.charAt(i); | ||||
|         if (SEPARATION_LETTERS.contains(c)) { | ||||
|             return true; | ||||
|         } | ||||
|         if ('.' == c | ||||
|                 && s.length() > i + 1 | ||||
|                 && SEPARATION_LETTERS.contains(s.charAt(i + 1))) { | ||||
|             return true; | ||||
|         } | ||||
|         return false; | ||||
|     } | ||||
|  | ||||
|     @Override | ||||
|     public void reset() throws IOException { | ||||
|         this.terms = null; | ||||
|         super.reset(); | ||||
|     } | ||||
|  | ||||
| } | ||||
| @@ -0,0 +1,34 @@ | ||||
| package org.apache.lucene.morphology.context; | ||||
|  | ||||
| import org.apache.lucene.analysis.Analyzer; | ||||
| import org.apache.lucene.analysis.TokenFilter; | ||||
| import org.apache.lucene.analysis.core.LowerCaseFilter; | ||||
| import org.apache.lucene.analysis.standard.StandardFilter; | ||||
|  | ||||
| import java.io.IOException; | ||||
| import java.io.Reader; | ||||
|  | ||||
| /** | ||||
|  * Created by akuznetsov on 6/24/15. | ||||
|  */ | ||||
| public class StatAnalyzer extends Analyzer { | ||||
|  | ||||
|  | ||||
|  | ||||
|     @Override | ||||
|     protected TokenStreamComponents createComponents(String s) { | ||||
|  | ||||
|         SimpleTokenizer src = new SimpleTokenizer(); | ||||
|         TokenFilter filter = new StandardFilter(src); | ||||
|         filter = new LowerCaseFilter(filter); | ||||
|  | ||||
|         return new TokenStreamComponents(src, filter) { | ||||
|             @Override | ||||
|             protected void setReader(final Reader reader) throws IOException { | ||||
|                 super.setReader(reader); | ||||
|             } | ||||
|         }; | ||||
|     } | ||||
|  | ||||
|  | ||||
| } | ||||
| @@ -0,0 +1,32 @@ | ||||
| package org.apache.lucene.morphology.context; | ||||
|  | ||||
|  | ||||
| import org.apache.lucene.analysis.Analyzer; | ||||
| import org.apache.lucene.analysis.TokenStream; | ||||
| import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; | ||||
| import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; | ||||
| import org.junit.Test; | ||||
|  | ||||
| import java.io.ByteArrayInputStream; | ||||
| import java.io.IOException; | ||||
| import java.io.InputStreamReader; | ||||
|  | ||||
| public class SimpleTokenizerTest { | ||||
|  | ||||
|     @Test | ||||
|     public void testSimpleTokenizer() throws IOException { | ||||
|         Analyzer statAnalyzer = new StatAnalyzer(); | ||||
|         InputStreamReader reader = new InputStreamReader(new ByteArrayInputStream("принеси мне вина на новый год? - и что я жду тебя, где вино".getBytes()), "UTF-8"); | ||||
|  | ||||
|         TokenStream tokenStream = statAnalyzer.tokenStream(null, reader); | ||||
|         tokenStream.reset(); | ||||
|  | ||||
|         boolean wordSeen = false; | ||||
|         while (tokenStream.incrementToken()) { | ||||
|             CharTermAttribute charTerm = tokenStream.getAttribute(CharTermAttribute.class); | ||||
|             PositionIncrementAttribute position = tokenStream.getAttribute(PositionIncrementAttribute.class); | ||||
|             System.out.println(charTerm.toString()); | ||||
|         } | ||||
|     } | ||||
|  | ||||
| } | ||||
| @@ -3,27 +3,27 @@ | ||||
|     <parent> | ||||
|         <artifactId>morphology</artifactId> | ||||
|         <groupId>org.apache.lucene.morphology</groupId> | ||||
|         <version>1.1</version> | ||||
|         <version>1.2-SNAPSHOT</version> | ||||
|     </parent> | ||||
|     <modelVersion>4.0.0</modelVersion> | ||||
|     <groupId>org.apache.lucene.morphology</groupId> | ||||
|     <artifactId>dictionary-reader</artifactId> | ||||
|     <name>dictionary-reader</name> | ||||
|     <version>1.1</version> | ||||
|     <version>1.2-SNAPSHOT</version> | ||||
|     <url>http://maven.apache.org</url> | ||||
|  | ||||
|     <dependencies> | ||||
|         <dependency> | ||||
|             <groupId>org.apache.lucene.morphology</groupId> | ||||
|             <artifactId>russian</artifactId> | ||||
|             <version>1.1</version> | ||||
|             <version>1.2-SNAPSHOT</version> | ||||
|         </dependency> | ||||
|  | ||||
|  | ||||
|         <dependency> | ||||
|             <groupId>org.apache.lucene.morphology</groupId> | ||||
|             <artifactId>english</artifactId> | ||||
|             <version>1.1</version> | ||||
|             <version>1.2-SNAPSHOT</version> | ||||
|         </dependency> | ||||
|     </dependencies> | ||||
|  | ||||
|   | ||||
| @@ -3,20 +3,20 @@ | ||||
|     <parent> | ||||
|         <artifactId>morphology</artifactId> | ||||
|         <groupId>org.apache.lucene.morphology</groupId> | ||||
|         <version>1.1</version> | ||||
|         <version>1.2-SNAPSHOT</version> | ||||
|     </parent> | ||||
|     <modelVersion>4.0.0</modelVersion> | ||||
|     <groupId>org.apache.lucene.morphology</groupId> | ||||
|     <artifactId>english</artifactId> | ||||
|     <name>english</name> | ||||
|     <version>1.1</version> | ||||
|     <version>1.2-SNAPSHOT</version> | ||||
|     <url>http://maven.apache.org</url> | ||||
|     <dependencies> | ||||
|  | ||||
|         <dependency> | ||||
|             <groupId>org.apache.lucene.morphology</groupId> | ||||
|             <artifactId>morph</artifactId> | ||||
|             <version>1.1</version> | ||||
|             <version>1.2-SNAPSHOT</version> | ||||
|         </dependency> | ||||
|  | ||||
|         <dependency> | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| Copyright 2009 Alexander Kuznetsov  | ||||
| Copyright ${project.inceptionYear} ${owner} | ||||
|  | ||||
| Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| you may not use this file except in compliance with the License. | ||||
|   | ||||
| @@ -3,13 +3,13 @@ | ||||
|     <parent> | ||||
|         <artifactId>morphology</artifactId> | ||||
|         <groupId>org.apache.lucene.morphology</groupId> | ||||
|         <version>1.1</version> | ||||
|         <version>1.2-SNAPSHOT</version> | ||||
|     </parent> | ||||
|     <modelVersion>4.0.0</modelVersion> | ||||
|     <groupId>org.apache.lucene.morphology</groupId> | ||||
|     <artifactId>morph</artifactId> | ||||
|     <name>morph</name> | ||||
|     <version>1.1</version> | ||||
|     <version>1.2-SNAPSHOT</version> | ||||
|     <url>http://maven.apache.org</url> | ||||
|  | ||||
| </project> | ||||
|   | ||||
| @@ -51,25 +51,6 @@ public class MorphologyAnalyzer extends Analyzer { | ||||
|     protected TokenStreamComponents createComponents(String s) { | ||||
|  | ||||
|         StandardTokenizer src = new StandardTokenizer(); | ||||
|         final PayloadEncoder encoder = new PayloadEncoder() { | ||||
|             @Override | ||||
|             public BytesRef encode(char[] buffer) { | ||||
|                 final Float payload = Float.valueOf(new String(buffer)); | ||||
|                 System.out.println(payload); | ||||
|                 final byte[] bytes = PayloadHelper.encodeFloat(payload); | ||||
|                 return new BytesRef(bytes, 0, bytes.length); | ||||
|             } | ||||
|  | ||||
|             @Override | ||||
|             public BytesRef encode(char[] buffer, int offset, int length) { | ||||
|  | ||||
|                 final Float payload = Float.valueOf(new String(buffer, offset, length)); | ||||
|                 System.out.println(payload); | ||||
|                 final byte[] bytes = PayloadHelper.encodeFloat(payload); | ||||
|  | ||||
|                 return new BytesRef(bytes, 0, bytes.length); | ||||
|             } | ||||
|         }; | ||||
|         TokenFilter filter = new StandardFilter(src); | ||||
|         filter = new LowerCaseFilter(filter); | ||||
|         filter = new MorphologyFilter(filter, luceneMorph); | ||||
|   | ||||
							
								
								
									
										81
									
								
								pom.xml
									
									
									
									
									
								
							
							
						
						
									
										81
									
								
								pom.xml
									
									
									
									
									
								
							| @@ -1,10 +1,11 @@ | ||||
| <?xml version="1.0" encoding="UTF-8"?> | ||||
| <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> | ||||
| <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | ||||
|          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> | ||||
|     <modelVersion>4.0.0</modelVersion> | ||||
|     <groupId>org.apache.lucene.morphology</groupId> | ||||
|     <artifactId>morphology</artifactId> | ||||
|     <packaging>pom</packaging> | ||||
|     <version>1.1</version> | ||||
|     <version>1.2-SNAPSHOT</version> | ||||
|     <name>morphology</name> | ||||
|     <url>http://maven.apache.org</url> | ||||
|  | ||||
| @@ -12,7 +13,7 @@ | ||||
|         <connection>scm:git:https://github.com/AKuznetsov/russianmorphology.git</connection> | ||||
|         <developerConnection>scm:git:git@github.com:AKuznetsov/russianmorphology.git</developerConnection> | ||||
|         <url>https://github.com/AKuznetsov/russianmorphology</url> | ||||
|         <tag>morphology-1.1</tag> | ||||
|         <tag>HEAD</tag> | ||||
|     </scm> | ||||
|  | ||||
|     <distributionManagement> | ||||
| @@ -65,19 +66,7 @@ | ||||
|         </repository> | ||||
|     </repositories> | ||||
|  | ||||
|     <pluginRepositories> | ||||
|         <pluginRepository> | ||||
|             <id>mc-release</id> | ||||
|             <name>maven-license-plugin repository of releases</name> | ||||
|             <url>http://mc-repo.googlecode.com/svn/maven2/releases</url> | ||||
|             <snapshots> | ||||
|                 <enabled>false</enabled> | ||||
|             </snapshots> | ||||
|             <releases> | ||||
|                 <enabled>true</enabled> | ||||
|             </releases> | ||||
|         </pluginRepository> | ||||
|     </pluginRepositories> | ||||
|  | ||||
|     <build> | ||||
|         <plugins> | ||||
|             <plugin> | ||||
| @@ -94,34 +83,41 @@ | ||||
|                 <groupId>org.apache.maven.plugins</groupId> | ||||
|                 <artifactId>maven-compiler-plugin</artifactId> | ||||
|                 <configuration> | ||||
|                     <source>1.5</source> | ||||
|                     <target>1.5</target> | ||||
|                     <source>1.7</source> | ||||
|                     <target>1.7</target> | ||||
|                 </configuration> | ||||
|             </plugin> | ||||
|             <plugin>                <!--                 usage: http://code.google.com/p/maven-license-plugin/wiki/HowTo                --> | ||||
|                 <artifactId>maven-license-plugin</artifactId> | ||||
|                 <groupId>com.mathieucarbou.mojo</groupId> | ||||
|                 <configuration> | ||||
|                     <basedir>${project.parent.basedir}</basedir> | ||||
|                     <header>etc/header.txt</header> | ||||
|                     <excludes> | ||||
|                         <exclude>**/*.txt</exclude> | ||||
|                         <exclude>**/*.info</exclude> | ||||
|                         <exclude>**/pom.xml</exclude> | ||||
|                     </excludes> | ||||
|                     <includes> | ||||
|                         <include>**/src/**</include> | ||||
|                     </includes> | ||||
|                 </configuration> | ||||
|                 <executions> | ||||
|                     <execution> | ||||
|                         <phase>test</phase> | ||||
|                         <goals> | ||||
|                             <goal>check</goal> | ||||
|                         </goals> | ||||
|                     </execution> | ||||
|                 </executions> | ||||
|             </plugin> | ||||
|             <!--<plugin>                <!–                 usage: http://code.google.com/p/maven-license-plugin/wiki/HowTo                –>--> | ||||
|  | ||||
|                 <!--<groupId>com.mycila</groupId>--> | ||||
|                 <!--<artifactId>license-maven-plugin</artifactId>--> | ||||
|                 <!--<version>2.11</version>--> | ||||
|  | ||||
|                 <!--<configuration>--> | ||||
|                     <!--<properties>--> | ||||
|                         <!--<owner>Alexander Kuznetsov</owner>--> | ||||
|                         <!--<!–<email>mathieu.carbou@gmail.com</email>–>--> | ||||
|                     <!--</properties>--> | ||||
|                     <!--<basedir>${project.parent.basedir}</basedir>--> | ||||
|                     <!--<header>etc/header.txt</header>--> | ||||
|                     <!--<excludes>--> | ||||
|                         <!--<exclude>**/*.txt</exclude>--> | ||||
|                         <!--<exclude>**/*.info</exclude>--> | ||||
|                         <!--<exclude>**/pom.xml</exclude>--> | ||||
|                     <!--</excludes>--> | ||||
|                     <!--<includes>--> | ||||
|                         <!--<include>**/src/**</include>--> | ||||
|                     <!--</includes>--> | ||||
|                 <!--</configuration>--> | ||||
|                 <!--<executions>--> | ||||
|                     <!--<execution>--> | ||||
|                         <!--<phase>test</phase>--> | ||||
|                         <!--<goals>--> | ||||
|                             <!--<goal>check</goal>--> | ||||
|                         <!--</goals>--> | ||||
|                     <!--</execution>--> | ||||
|                 <!--</executions>--> | ||||
|             <!--</plugin>--> | ||||
|         </plugins> | ||||
|     </build> | ||||
|     <profiles> | ||||
| @@ -160,5 +156,6 @@ | ||||
|         <module>dictionary-reader</module> | ||||
|         <module>russian</module> | ||||
|         <module>english</module> | ||||
|         <module>context</module> | ||||
|     </modules> | ||||
| </project> | ||||
| @@ -3,13 +3,13 @@ | ||||
|     <parent> | ||||
|         <artifactId>morphology</artifactId> | ||||
|         <groupId>org.apache.lucene.morphology</groupId> | ||||
|         <version>1.1</version> | ||||
|         <version>1.2-SNAPSHOT</version> | ||||
|     </parent> | ||||
|     <modelVersion>4.0.0</modelVersion> | ||||
|     <groupId>org.apache.lucene.morphology</groupId> | ||||
|     <artifactId>russian</artifactId> | ||||
|     <name>russian</name> | ||||
|     <version>1.1</version> | ||||
|     <version>1.2-SNAPSHOT</version> | ||||
|     <url>http://maven.apache.org</url> | ||||
|     <dependencies> | ||||
|  | ||||
| @@ -17,7 +17,7 @@ | ||||
|         <dependency> | ||||
|             <groupId>org.apache.lucene.morphology</groupId> | ||||
|             <artifactId>morph</artifactId> | ||||
|             <version>1.1</version> | ||||
|             <version>1.2-SNAPSHOT</version> | ||||
|         </dependency> | ||||
|  | ||||
|         <dependency> | ||||
|   | ||||
		Reference in New Issue
	
	Block a user