Commit d4ddb7f8 authored by Aleksandra Tsys's avatar Aleksandra Tsys
Browse files

add class that I will change

parent 0fec80cb
public class MyFuzzySuggester extends FuzzySuggester {
@Override
public List<LookupResult> lookup(final CharSequence key, Set<BytesRef> contexts, boolean onlyMorePopular, int num) {
assert num > 0;
if (onlyMorePopular) {
throw new IllegalArgumentException("this suggester only works with onlyMorePopular=false");
}
if (contexts != null) {
throw new IllegalArgumentException("this suggester doesn't support contexts");
}
if (fst == null) {
return Collections.emptyList();
}
for (int i = 0; i < key.length(); i++) {
if (key.charAt(i) == 0x1E) {
throw new IllegalArgumentException("lookup key cannot contain HOLE character U+001E; this character is reserved");
}
if (key.charAt(i) == 0x1F) {
throw new IllegalArgumentException("lookup key cannot contain unit separator character U+001F; this character is reserved");
}
}
final BytesRef utf8Key = new BytesRef(key);
try {
Automaton lookupAutomaton = toLookupAutomaton(key);
final CharsRefBuilder spare = new CharsRefBuilder();
BytesReader bytesReader = fst.getBytesReader();
FST.Arc<Pair<Long,BytesRef>> scratchArc = new FST.Arc<>();
final List<LookupResult> results = new ArrayList<>();
List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths = FSTUtil.intersectPrefixPaths(convertAutomaton(lookupAutomaton), fst);
if (exactFirst) {
int count = 0;
for (FSTUtil.Path<Pair<Long,BytesRef>> path : prefixPaths) {
if (fst.findTargetArc(END_BYTE, path.fstNode, scratchArc, bytesReader) != null) {
count++;
}
}
Util.TopNSearcher<Pair<Long,BytesRef>> searcher;
searcher = new Util.TopNSearcher<>(fst, count * maxSurfaceFormsPerAnalyzedForm, count * maxSurfaceFormsPerAnalyzedForm, weightComparator);
for (FSTUtil.Path<Pair<Long,BytesRef>> path : prefixPaths) {
if (fst.findTargetArc(END_BYTE, path.fstNode, scratchArc, bytesReader) != null) {
searcher.addStartPaths(scratchArc, fst.outputs.add(path.output, scratchArc.output()), false, path.input);
}
}
TopResults<Pair<Long,BytesRef>> completions = searcher.search();
assert completions.isComplete;
for(Result<Pair<Long,BytesRef>> completion : completions) {
BytesRef output2 = completion.output.output2;
if (sameSurfaceForm(utf8Key, output2)) {
results.add(getLookupResult(completion.output.output1, output2, spare));
break;
}
}
if (results.size() == num) {
// That was quick:
return results;
}
}
Util.TopNSearcher<Pair<Long,BytesRef>> searcher;
searcher = new Util.TopNSearcher<Pair<Long,BytesRef>>(fst,
num - results.size(),
num * maxAnalyzedPathsForOneInput,
weightComparator) {
private final Set<BytesRef> seen = new HashSet<>();
@Override
protected boolean acceptResult(IntsRef input, Pair<Long,BytesRef> output) {
if (seen.contains(output.output2)) {
return false;
}
seen.add(output.output2);
if (!exactFirst) {
return true;
} else {
if (sameSurfaceForm(utf8Key, output.output2)) {
assert results.size() == 1;
return false;
} else {
return true;
}
}
}
};
prefixPaths = getFullPrefixPaths(prefixPaths, lookupAutomaton, fst);
for (FSTUtil.Path<Pair<Long,BytesRef>> path : prefixPaths) {
searcher.addStartPaths(path.fstNode, path.output, true, path.input);
}
TopResults<Pair<Long,BytesRef>> completions = searcher.search();
assert completions.isComplete;
for(Result<Pair<Long,BytesRef>> completion : completions) {
LookupResult result = getLookupResult(completion.output.output1, completion.output.output2, spare);
// TODO: for fuzzy case would be nice to return
// how many edits were required
//System.out.println(" result=" + result);
results.add(result);
if (results.size() == num) {
// In the exactFirst=true case the search may
// produce one extra path
break;
}
}
return results;
} catch (IOException bogus) {
throw new RuntimeException(bogus);
}
}
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment