adding modules
git-svn-id: https://russianmorphology.googlecode.com/svn/trunk@49 d817d54c-26ab-11de-abc9-2f7d1455ff7a
This commit is contained in:
23
morph/pom.xml
Normal file
23
morph/pom.xml
Normal file
@ -0,0 +1,23 @@
|
||||
<?xml version="1.0"?>
|
||||
<project>
|
||||
<parent>
|
||||
<artifactId>morpholgy</artifactId>
|
||||
<groupId>org.apache.lucene.morpholgy</groupId>
|
||||
<version>0.7-SNAPSHOT</version>
|
||||
</parent>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>org.apache.lucene.morpholgy</groupId>
|
||||
<artifactId>morph</artifactId>
|
||||
<name>morph</name>
|
||||
<version>0.7-SNAPSHOT</version>
|
||||
<url>http://maven.apache.org</url>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<version>3.8.1</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
@ -0,0 +1,91 @@
|
||||
/**
|
||||
* Copyright 2009 Alexander Kuznetsov
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.morphology;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
|
||||
public class Heuristic implements Serializable {
|
||||
byte actualSuffixLengh;
|
||||
String actualNormalSuffix;
|
||||
short formMorphInfo;
|
||||
short normalFormMorphInfo;
|
||||
|
||||
public Heuristic(String s) {
|
||||
String[] strings = s.split("\\|");
|
||||
actualSuffixLengh = Byte.valueOf(strings[0]);
|
||||
actualNormalSuffix = strings[1];
|
||||
formMorphInfo = Short.valueOf(strings[2]);
|
||||
normalFormMorphInfo = Short.valueOf(strings[3]);
|
||||
}
|
||||
|
||||
public Heuristic(byte actualSuffixLengh, String actualNormalSuffix, short formMorphInfo, short normalFormMorphInfo) {
|
||||
this.actualSuffixLengh = actualSuffixLengh;
|
||||
this.actualNormalSuffix = actualNormalSuffix;
|
||||
this.formMorphInfo = formMorphInfo;
|
||||
this.normalFormMorphInfo = normalFormMorphInfo;
|
||||
}
|
||||
|
||||
public String transofrmWord(String w) {
|
||||
return w.substring(0, w.length() - actualSuffixLengh) + actualNormalSuffix;
|
||||
}
|
||||
|
||||
public byte getActualSuffixLengh() {
|
||||
return actualSuffixLengh;
|
||||
}
|
||||
|
||||
public String getActualNormalSuffix() {
|
||||
return actualNormalSuffix;
|
||||
}
|
||||
|
||||
public short getFormMorphInfo() {
|
||||
return formMorphInfo;
|
||||
}
|
||||
|
||||
public short getNormalFormMorphInfo() {
|
||||
return normalFormMorphInfo;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
|
||||
Heuristic heuristic = (Heuristic) o;
|
||||
|
||||
if (actualSuffixLengh != heuristic.actualSuffixLengh) return false;
|
||||
if (formMorphInfo != heuristic.formMorphInfo) return false;
|
||||
if (normalFormMorphInfo != heuristic.normalFormMorphInfo) return false;
|
||||
if (actualNormalSuffix != null ? !actualNormalSuffix.equals(heuristic.actualNormalSuffix) : heuristic.actualNormalSuffix != null)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int result = (int) actualSuffixLengh;
|
||||
result = 31 * result + (actualNormalSuffix != null ? actualNormalSuffix.hashCode() : 0);
|
||||
result = 31 * result + (int) formMorphInfo;
|
||||
result = 31 * result + (int) normalFormMorphInfo;
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "" + actualSuffixLengh + "|" + actualNormalSuffix + "|" + formMorphInfo + "|" + normalFormMorphInfo;
|
||||
}
|
||||
}
|
@ -0,0 +1,31 @@
|
||||
/**
|
||||
* Copyright 2009 Alexander Kuznetsov
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.morphology;
|
||||
|
||||
|
||||
public interface LetterDecoderEncoder {
|
||||
public Integer encode(String string);
|
||||
|
||||
public int[] encodeToArray(String s);
|
||||
|
||||
public String decodeArray(int[] array);
|
||||
|
||||
public String decode(Integer suffixN);
|
||||
|
||||
public boolean checkCharacter(char c);
|
||||
|
||||
public String cleanString(String s);
|
||||
}
|
@ -0,0 +1,74 @@
|
||||
/**
|
||||
* Copyright 2009 Alexander Kuznetsov
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.morphology;
|
||||
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
public class LuceneMorph extends Morph {
|
||||
LetterDecoderEncoder decoderEncoder;
|
||||
|
||||
public LuceneMorph(String fileName) throws IOException {
|
||||
super(fileName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getMorhInfo(String s) {
|
||||
ArrayList<String> result = new ArrayList<String>();
|
||||
int[] ints = decoderEncoder.encodeToArray(revertWord(s));
|
||||
int ruleId = findRuleId(ints);
|
||||
for (Heuristic h : rules[rulesId[ruleId]]) {
|
||||
result.add(h.transofrmWord(s));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
protected void readRules(BufferedReader bufferedReader) throws IOException {
|
||||
String s;
|
||||
Integer amount;
|
||||
s = bufferedReader.readLine();
|
||||
amount = Integer.valueOf(s);
|
||||
rules = new Heuristic[amount][];
|
||||
for (int i = 0; i < amount; i++) {
|
||||
String s1 = bufferedReader.readLine();
|
||||
Integer ruleLenght = Integer.valueOf(s1);
|
||||
Heuristic[] heuristics = new Heuristic[ruleLenght];
|
||||
for (int j = 0; j < ruleLenght; j++) {
|
||||
heuristics[j] = new Heuristic(bufferedReader.readLine());
|
||||
}
|
||||
rules[i] = modeifyHeuristic(heuristics);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private Heuristic[] modeifyHeuristic(Heuristic[] heuristics) {
|
||||
ArrayList<Heuristic> result = new ArrayList<Heuristic>();
|
||||
for (Heuristic heuristic : heuristics) {
|
||||
boolean isAdded = true;
|
||||
for (Heuristic ch : result) {
|
||||
isAdded = isAdded && !(ch.getActualNormalSuffix().equals(heuristic.getActualNormalSuffix()) && (ch.getActualSuffixLengh() == heuristic.getActualSuffixLengh()));
|
||||
}
|
||||
if (isAdded) {
|
||||
result.add(heuristic);
|
||||
}
|
||||
}
|
||||
return result.toArray(new Heuristic[result.size()]);
|
||||
}
|
||||
}
|
199
morph/src/main/java/org/apache/lucene/morphology/Morph.java
Normal file
199
morph/src/main/java/org/apache/lucene/morphology/Morph.java
Normal file
@ -0,0 +1,199 @@
|
||||
/**
|
||||
* Copyright 2009 Alexander Kuznetsov
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.morphology;
|
||||
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.FileReader;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
public class Morph {
|
||||
protected int[][] separators;
|
||||
protected short[] rulesId;
|
||||
protected Heuristic[][] rules;
|
||||
protected String[] grammaInfo;
|
||||
LetterDecoderEncoder decoderEncoder;
|
||||
|
||||
|
||||
public Morph(String fileName) throws IOException {
|
||||
readFromFile(fileName);
|
||||
}
|
||||
|
||||
public Morph(int[][] separators, short[] rulesId, Heuristic[][] rules, String[] grammaInfo) {
|
||||
this.separators = separators;
|
||||
this.rulesId = rulesId;
|
||||
this.rules = rules;
|
||||
this.grammaInfo = grammaInfo;
|
||||
}
|
||||
|
||||
public int[][] getSeparators() {
|
||||
return separators;
|
||||
}
|
||||
|
||||
public short[] getRulesId() {
|
||||
return rulesId;
|
||||
}
|
||||
|
||||
public Heuristic[][] getRules() {
|
||||
return rules;
|
||||
}
|
||||
|
||||
public String[] getGrammaInfo() {
|
||||
return grammaInfo;
|
||||
}
|
||||
|
||||
public List<String> getMorhInfo(String s) {
|
||||
ArrayList<String> result = new ArrayList<String>();
|
||||
int[] ints = decoderEncoder.encodeToArray(revertWord(s));
|
||||
int ruleId = findRuleId(ints);
|
||||
for (Heuristic h : rules[rulesId[ruleId]]) {
|
||||
result.add(h.transofrmWord(s) + "|" + grammaInfo[h.getFormMorphInfo()]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
protected int findRuleId(int[] ints) {
|
||||
int low = 0;
|
||||
int high = separators.length - 1;
|
||||
int mid = 0;
|
||||
while (low <= high) {
|
||||
mid = (low + high) >>> 1;
|
||||
int[] midVal = separators[mid];
|
||||
|
||||
int comResult = compareToInts(ints, midVal);
|
||||
if (comResult > 0)
|
||||
low = mid + 1;
|
||||
else if (comResult < 0)
|
||||
high = mid - 1;
|
||||
else
|
||||
break;
|
||||
}
|
||||
if (compareToInts(ints, separators[mid]) >= 0) {
|
||||
return mid;
|
||||
} else {
|
||||
return mid - 1;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private int compareToInts(int[] i1, int[] i2) {
|
||||
int minLength = Math.min(i1.length, i2.length);
|
||||
for (int i = 0; i < minLength; i++) {
|
||||
int i3 = i1[i] < i2[i] ? -1 : (i1[i] == i2[i] ? 0 : 1);
|
||||
if (i3 != 0) return i3;
|
||||
}
|
||||
return i2.length - i1.length;
|
||||
}
|
||||
|
||||
public void writeToFile(String fileName) throws IOException {
|
||||
FileWriter writer = new FileWriter(fileName);
|
||||
writer.write(separators.length + "\n");
|
||||
for (int[] i : separators) {
|
||||
writer.write(i.length + "\n");
|
||||
for (int j : i) {
|
||||
writer.write(j + "\n");
|
||||
}
|
||||
}
|
||||
for (short i : rulesId) {
|
||||
writer.write(i + "\n");
|
||||
}
|
||||
writer.write(rules.length + "\n");
|
||||
for (Heuristic[] heuristics : rules) {
|
||||
writer.write(heuristics.length + "\n");
|
||||
for (Heuristic heuristic : heuristics) {
|
||||
writer.write(heuristic.toString() + "\n");
|
||||
}
|
||||
}
|
||||
writer.write(grammaInfo.length + "\n");
|
||||
for (String s : grammaInfo) {
|
||||
writer.write(s + "\n");
|
||||
}
|
||||
writer.close();
|
||||
}
|
||||
|
||||
public void readFromFile(String fileName) throws IOException {
|
||||
BufferedReader bufferedReader = new BufferedReader(new FileReader(fileName));
|
||||
String s = bufferedReader.readLine();
|
||||
Integer amount = Integer.valueOf(s);
|
||||
|
||||
readSeparators(bufferedReader, amount);
|
||||
|
||||
readRulesId(bufferedReader, amount);
|
||||
|
||||
readRules(bufferedReader);
|
||||
readGrammaInfo(bufferedReader);
|
||||
bufferedReader.close();
|
||||
}
|
||||
|
||||
private void readGrammaInfo(BufferedReader bufferedReader) throws IOException {
|
||||
String s;
|
||||
Integer amount;
|
||||
s = bufferedReader.readLine();
|
||||
amount = Integer.valueOf(s);
|
||||
grammaInfo = new String[amount];
|
||||
for (int i = 0; i < amount; i++) {
|
||||
grammaInfo[i] = bufferedReader.readLine();
|
||||
}
|
||||
}
|
||||
|
||||
protected void readRules(BufferedReader bufferedReader) throws IOException {
|
||||
String s;
|
||||
Integer amount;
|
||||
s = bufferedReader.readLine();
|
||||
amount = Integer.valueOf(s);
|
||||
rules = new Heuristic[amount][];
|
||||
for (int i = 0; i < amount; i++) {
|
||||
String s1 = bufferedReader.readLine();
|
||||
Integer ruleLenght = Integer.valueOf(s1);
|
||||
rules[i] = new Heuristic[ruleLenght];
|
||||
for (int j = 0; j < ruleLenght; j++) {
|
||||
rules[i][j] = new Heuristic(bufferedReader.readLine());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void readRulesId(BufferedReader bufferedReader, Integer amount) throws IOException {
|
||||
rulesId = new short[amount];
|
||||
for (int i = 0; i < amount; i++) {
|
||||
String s1 = bufferedReader.readLine();
|
||||
rulesId[i] = Short.valueOf(s1);
|
||||
}
|
||||
}
|
||||
|
||||
private void readSeparators(BufferedReader bufferedReader, Integer amount) throws IOException {
|
||||
separators = new int[amount][];
|
||||
for (int i = 0; i < amount; i++) {
|
||||
String s1 = bufferedReader.readLine();
|
||||
Integer wordLenght = Integer.valueOf(s1);
|
||||
separators[i] = new int[wordLenght];
|
||||
for (int j = 0; j < wordLenght; j++) {
|
||||
separators[i][j] = Integer.valueOf(bufferedReader.readLine());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected String revertWord(String s) {
|
||||
String result = "";
|
||||
for (int i = 1; i <= s.length(); i++) {
|
||||
result += s.charAt(s.length() - i);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
@ -0,0 +1,28 @@
|
||||
/**
|
||||
* Copyright 2009 Alexander Kuznetsov
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.morphology;
|
||||
|
||||
|
||||
public class SuffixToLongException extends RuntimeException {
|
||||
|
||||
public SuffixToLongException() {
|
||||
}
|
||||
|
||||
public SuffixToLongException(String message) {
|
||||
super(message);
|
||||
}
|
||||
}
|
@ -0,0 +1,27 @@
|
||||
/**
|
||||
* Copyright 2009 Alexander Kuznetsov
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.morphology;
|
||||
|
||||
|
||||
public class WrongCharaterException extends RuntimeException {
|
||||
public WrongCharaterException() {
|
||||
}
|
||||
|
||||
public WrongCharaterException(String message) {
|
||||
super(message);
|
||||
}
|
||||
}
|
@ -0,0 +1,42 @@
|
||||
/**
|
||||
* Copyright 2009 Alexander Kuznetsov
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.morphology.analayzer;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.morphology.LuceneMorph;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
public class RussianMorphlogyAnalayzer extends Analyzer {
|
||||
private LuceneMorph luceneMorph;
|
||||
|
||||
public RussianMorphlogyAnalayzer() throws IOException {
|
||||
luceneMorph = new LuceneMorph("sep.txt");
|
||||
}
|
||||
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream result = new StandardTokenizer(reader);
|
||||
result = new StandardFilter(result);
|
||||
result = new LowerCaseFilter(result);
|
||||
return new RussianMorphlogyFilter(result, luceneMorph);
|
||||
}
|
||||
}
|
@ -0,0 +1,83 @@
|
||||
/**
|
||||
* Copyright 2009 Alexander Kuznetsov
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.morphology.analayzer;
|
||||
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.morphology.LuceneMorph;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
public class RussianMorphlogyFilter extends TokenFilter {
|
||||
private LuceneMorph luceneMorph;
|
||||
|
||||
public RussianMorphlogyFilter(TokenStream tokenStream, LuceneMorph luceneMorph) {
|
||||
super(tokenStream);
|
||||
this.luceneMorph = luceneMorph;
|
||||
}
|
||||
|
||||
|
||||
private List<String> stack = new ArrayList<String>();
|
||||
private int index = 0;
|
||||
private Token current = null;
|
||||
|
||||
/**
|
||||
* Returns the next token in the stream, or null at EOS.
|
||||
*/
|
||||
public Token next(final Token reusableToken) throws IOException {
|
||||
assert reusableToken != null;
|
||||
while (index < stack.size()) { // pop from stack
|
||||
Token nextToken = createToken(stack.get(index++), current, reusableToken);
|
||||
if (nextToken != null) {
|
||||
return nextToken;
|
||||
}
|
||||
}
|
||||
|
||||
Token nextToken = input.next(reusableToken);
|
||||
if (nextToken == null) return null; // EOS; iterator exhausted
|
||||
Character testC = nextToken.term().charAt(0);
|
||||
if (Character.UnicodeBlock.of(testC) != Character.UnicodeBlock.CYRILLIC) {
|
||||
return nextToken;
|
||||
}
|
||||
stack = luceneMorph.getMorhInfo(nextToken.term());
|
||||
index = 0;
|
||||
current = (Token) nextToken.clone();
|
||||
nextToken = createToken(stack.get(index++), current, reusableToken);
|
||||
return nextToken;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates and returns a token for the given synonym of the current input
|
||||
* token; Override for custom (stateless or stateful) behavior, if desired.
|
||||
*
|
||||
* @param synonym a synonym for the current token's term
|
||||
* @param current the current token from the underlying child stream
|
||||
* @param reusableToken the token to reuse
|
||||
* @return a new token, or null to indicate that the given synonym should be
|
||||
* ignored
|
||||
*/
|
||||
protected Token createToken(String synonym, Token current, final Token reusableToken) {
|
||||
reusableToken.reinit(current, synonym);
|
||||
reusableToken.setTermBuffer(synonym);
|
||||
reusableToken.setPositionIncrement(0);
|
||||
return reusableToken;
|
||||
}
|
||||
}
|
@ -0,0 +1,8 @@
|
||||
пушке А бутявка волит за напушкой Сяпала Калуша по напушке и увазила бутявку И волит Калушата калушаточки Бутявка Калушата присяпали и бутявку стрямкали И подудонились А Калуша волит Бутявка то некузявая Калушата бутявку вычучили Бутявка вздребезнулась сопритюкнулась и усяпала с напушки
|
||||
А Калуша волит:
|
||||
— Бутявок не трямкают. Бутявки дюбые и зюмо-зюмо некузявые. От бутявок дудонятся.
|
||||
А бутявка волит за напушкой:
|
||||
— Калушата подудонились! Калушата подудонились! Зюмо некузявые! Пуськи бятые!
|
||||
В условиях нарастающей пурги было сделано 4 успешных захода на посадку. "Все нормально, будем рекомендовать систему к внедрению".
|
||||
Рейсы из Кейптауна (ЮАР) на станцию "Новолазаревская" (Антарктида) совершаются
|
||||
примерно один раз в две недели. вина твоя вина мне
|
@ -0,0 +1,9 @@
|
||||
шел идти
|
||||
турестических турестический
|
||||
отзывы отзыв
|
||||
победы победа
|
||||
поэтическая поэтический
|
||||
произошло произойти
|
||||
test test
|
||||
ананасов ананас
|
||||
встовашего встовать
|
@ -0,0 +1,33 @@
|
||||
в
|
||||
условие
|
||||
нарастать
|
||||
пурга
|
||||
быть
|
||||
сделать
|
||||
4
|
||||
успешный
|
||||
заход
|
||||
на
|
||||
посадка
|
||||
весь
|
||||
нормальный
|
||||
быть
|
||||
рекомендовать
|
||||
система
|
||||
к
|
||||
внедрение
|
||||
рейс
|
||||
из
|
||||
кейптаун
|
||||
юар
|
||||
на
|
||||
станция
|
||||
новолазаревский
|
||||
антарктида
|
||||
совершаться
|
||||
примерно
|
||||
один
|
||||
раз
|
||||
в
|
||||
два
|
||||
неделя
|
Reference in New Issue
Block a user