Commit b70e25d0 authored by tssasha's avatar tssasha
Browse files

print to build

parent b0dc33b1
......@@ -388,10 +388,7 @@ public class ContextAnalyzingSuggester extends Lookup implements Accountable {
@Override
public void build(InputIterator iterator) throws IOException {
if (iterator.hasContexts()) {
throw new IllegalArgumentException("this suggester doesn't support contexts");
}
System.out.println("bpoint1");
hasPayloads = iterator.hasPayloads();
OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix, new AnalyzingComparator(hasPayloads));
......@@ -409,6 +406,7 @@ public class ContextAnalyzingSuggester extends Lookup implements Accountable {
count = 0;
byte buffer[] = new byte[8];
try {
System.out.println("bpoint2");
ByteArrayDataOutput output = new ByteArrayDataOutput(buffer);
for (BytesRef surfaceForm; (surfaceForm = iterator.next()) != null;) {
......@@ -431,9 +429,11 @@ public class ContextAnalyzingSuggester extends Lookup implements Accountable {
BytesRef payload;
if (hasPayloads) {
System.out.println("bpoint3");
if (surfaceForm.length > (Short.MAX_VALUE-2)) {
throw new IllegalArgumentException("cannot handle surface form > " + (Short.MAX_VALUE-2) + " in length (got " + surfaceForm.length + ")");
}
System.out.println("bpoint4");
payload = iterator.payload(); //----------------------------------?
// payload + surfaceLength (short)
requiredLength += payload.length + 2;
......@@ -457,6 +457,7 @@ public class ContextAnalyzingSuggester extends Lookup implements Accountable {
throw new IllegalArgumentException("surface form cannot contain unit separator character U+001F; this character is reserved");
}
}
System.out.println("bpoint5");
output.writeShort((short) surfaceForm.length);
output.writeBytes(surfaceForm.bytes, surfaceForm.offset, surfaceForm.length);
output.writeBytes(payload.bytes, payload.offset, payload.length);
......@@ -569,7 +570,7 @@ public class ContextAnalyzingSuggester extends Lookup implements Accountable {
}
}
fst = fstCompiler.finish();
System.out.println("bpoint6");
//Util.dotToFile(fst, "/tmp/suggest.dot");
} finally {
IOUtils.closeWhileHandlingException(reader, writer);
......@@ -645,18 +646,17 @@ public class ContextAnalyzingSuggester extends Lookup implements Accountable {
@Override
public List<LookupResult> lookup(final CharSequence key, Set<BytesRef> contexts, boolean onlyMorePopular, int num) {
System.out.println("Iaminlookup");
assert num > 0;
System.out.println("point-3");
if (onlyMorePopular) {
throw new IllegalArgumentException("this suggester only works with onlyMorePopular=false");
}
if (contexts != null) {
throw new IllegalArgumentException("this suggester doesn't support contexts");
}
System.out.println("point-2");
if (fst == null) {
return Collections.emptyList();
}
System.out.println("point-1");
//System.out.println("lookup key=" + key + " num=" + num);
for (int i = 0; i < key.length(); i++) {
if (key.charAt(i) == 0x1E) {
......@@ -667,7 +667,9 @@ public class ContextAnalyzingSuggester extends Lookup implements Accountable {
}
}
final BytesRef utf8Key = new BytesRef(key);
System.out.println("point0");
try {
System.out.println("point1");
Automaton lookupAutomaton = toLookupAutomaton(key);
final CharsRefBuilder spare = new CharsRefBuilder();
......@@ -689,7 +691,7 @@ public class ContextAnalyzingSuggester extends Lookup implements Accountable {
List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths = FSTUtil.intersectPrefixPaths(convertAutomaton(lookupAutomaton), fst);
if (exactFirst) {
System.out.println("point2");
int count = 0;
for (FSTUtil.Path<Pair<Long,BytesRef>> path : prefixPaths) {
if (fst.findTargetArc(END_BYTE, path.fstNode, scratchArc, bytesReader) != null) {
......@@ -702,11 +704,13 @@ public class ContextAnalyzingSuggester extends Lookup implements Accountable {
// Searcher just to find the single exact only
// match, if present:
Util.TopNSearcher<Pair<Long,BytesRef>> searcher;
System.out.println("point3");
searcher = new Util.TopNSearcher<Pair<Long,BytesRef>>(fst,
count * maxSurfaceFormsPerAnalyzedForm,
count * maxSurfaceFormsPerAnalyzedForm, weightComparator) {
@Override
protected boolean acceptResult(IntsRef input, PairOutputs.Pair<Long, BytesRef> output) {
System.out.println("point4");
LookupResult result = getLookupResult(output.output1, output.output2, spare);
BytesRef res_context = result.payload;
......@@ -727,7 +731,7 @@ public class ContextAnalyzingSuggester extends Lookup implements Accountable {
return true;
}
};
System.out.println("point5");
// NOTE: we could almost get away with only using
// the first start node. The only catch is if
// maxSurfaceFormsPerAnalyzedForm had kicked in and
......@@ -771,14 +775,15 @@ public class ContextAnalyzingSuggester extends Lookup implements Accountable {
}
Util.TopNSearcher<Pair<Long,BytesRef>> searcher;
System.out.println("point6");
searcher = new Util.TopNSearcher<Pair<Long,BytesRef>>(fst,
num - results.size(),
num * maxAnalyzedPathsForOneInput,
weightComparator) {
private final Set<BytesRef> seen = new HashSet<>();
@Override
protected boolean acceptResult(IntsRef input, Pair<Long,BytesRef> output) {
System.out.println("point7");
LookupResult result = getLookupResult(output.output1, output.output2, spare);
BytesRef res_context = result.payload;
System.out.println("res_context2 = " + res_context);
......@@ -817,7 +822,7 @@ public class ContextAnalyzingSuggester extends Lookup implements Accountable {
}
}
};
System.out.println("point8");
prefixPaths = getFullPrefixPaths(prefixPaths, lookupAutomaton, fst);
for (FSTUtil.Path<Pair<Long,BytesRef>> path : prefixPaths) {
......@@ -838,12 +843,13 @@ public class ContextAnalyzingSuggester extends Lookup implements Accountable {
results.add(result);
if (results.size() == num) {
System.out.println("point9");
// In the exactFirst=true case the search may
// produce one extra path
break;
}
}
System.out.println("point10");
return results;
} catch (IOException bogus) {
throw new RuntimeException(bogus);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment