Commit 07fa415f authored by tssasha's avatar tssasha
Browse files

testing done

parent 0b0b25ca
......@@ -669,7 +669,7 @@ public class ContextAnalyzingSuggester extends Lookup implements Accountable {
if (fst == null) {
return Collections.emptyList();
}
//System.out.println("lookup key=" + key + " num=" + num);
System.out.println("lookup key=" + key + " num=" + num);
for (int i = 0; i < key.length(); i++) {
if (key.charAt(i) == 0x1E) {
throw new IllegalArgumentException("lookup key cannot contain HOLE character U+001E; this character is reserved");
......@@ -792,8 +792,6 @@ public class ContextAnalyzingSuggester extends Lookup implements Accountable {
}
Util.TopNSearcher<Pair<Long,BytesRef>> searcher;
// System.out.println("point6");
// System.out.format("topN: %d ", num - results.size());
searcher = new Util.TopNSearcher<Pair<Long, BytesRef>>(fst,
num - results.size(),
num * maxAnalyzedPathsForOneInput,
......@@ -801,29 +799,17 @@ public class ContextAnalyzingSuggester extends Lookup implements Accountable {
private final Set<BytesRef> seen = new HashSet<>();
@Override
protected boolean acceptResult(IntsRef input, Pair<Long,BytesRef> output) {
// System.out.println("point7");
LookupResult result = getLookupResult(output.output1, output.output2, spare);
BytesRef res_context = result.payload;
// System.out.println("res_context2 = " + res_context);
// for (BytesRef context : contexts) {
// System.out.println(context);
// }
if (contexts == null) {
System.out.println("null");
return true;
}
for (BytesRef context : contexts) {
System.out.println("context 2 = " + context);
}
System.out.println("payload = " + res_context);
if (!(contexts.contains(res_context))) {
return false;
}
System.out.println("contains 2");
if (seen.contains(output.output2)) {
return false;
}
......@@ -846,34 +832,6 @@ public class ContextAnalyzingSuggester extends Lookup implements Accountable {
}
}
};
// @Override
// protected boolean acceptResult(IntsRef input, Pair<Long, BytesRef> output) {
//
// // Dedup: when the input analyzes to a graph we
// // can get duplicate surface forms:
// if (seen.contains(output.output2)) {
// return false;
// }
// seen.add(output.output2);
//
// if (!exactFirst) {
// return true;
// } else {
// // In exactFirst mode, don't accept any paths
// // matching the surface form since that will
// // create duplicate results:
// if (sameSurfaceForm(utf8Key, output.output2)) {
// // We found exact match, which means we should
// // have already found it in the first search:
// assert results.size() == 1;
// return false;
// } else {
// return true;
// }
// }
// }
// };
// System.out.println("point8");
prefixPaths = getFullPrefixPaths(prefixPaths, lookupAutomaton, fst);
for (FSTUtil.Path<Pair<Long,BytesRef>> path : prefixPaths) {
......@@ -890,17 +848,14 @@ public class ContextAnalyzingSuggester extends Lookup implements Accountable {
// TODO: for fuzzy case would be nice to return
// how many edits were required
//System.out.println(" result=" + result);
results.add(result);
if (results.size() == num) {
// System.out.println("point9");
// In the exactFirst=true case the search may
// produce one extra path
break;
}
}
// System.out.println("point10");
return results;
} catch (IOException bogus) {
throw new RuntimeException(bogus);
......
......@@ -20,7 +20,7 @@ case "$1" in
# index all data (decompress on the fly and pass to index)
#tar xzf data/$YELP_DATA final.json -O | curl 'http://localhost:8983/solr/yelp/update?commit=true' --data-binary @- -H 'Content-type:application/json'
sleep 10
cat data/little-final.json | curl 'http://localhost:8983/solr/yelp/update?commit=true' --data-binary @- -H 'Content-type:application/json'
cat data/middle_final.json | curl 'http://localhost:8983/solr/yelp/update?commit=true' --data-binary @- -H 'Content-type:application/json'
# else
# echo "Please use ./run get-yelp to obtain yelp data"
# fi
......
This diff is collapsed.
import csv
import random
import urllib.parse
import json
import requests
test_res = [["query", "context", "my result", "my len", "my time", "fuzzy result", "fuzzy len", "fuzzy time"]]
def request(data):
url_values = urllib.parse.urlencode(data)
url_values = url_values.replace('%28', '(')
url_values = url_values.replace('%29', ')')
url_values = url_values.replace('+', '%20')
url = 'http://localhost:8983/solr/yelp/suggest'
full_url = url + '?' + url_values
time = 0
for i in range(10):
response = requests.get(full_url)
time += response.elapsed.total_seconds()
time /= 10
result = json.loads(response.content)['suggest'][data['suggest.dictionary']][data['suggest.q']]['suggestions']
res_list = []
if len(result) == 0:
return False
for item in result:
res_list.append(item['term'])
if data['suggest.dictionary'] == 'mySuggester':
test_res.append([data['suggest.q'], data['suggest.cfq'], res_list, len(res_list), time])
else:
test_res[-1].append(res_list)
test_res[-1].append(len(res_list))
test_res[-1].append(time)
return True
with open('cities.txt') as c, open('words.txt') as w:
cities = c.read().splitlines()
words = w.read().splitlines()
quantity = 0
while quantity < 50:
data = dict()
data['suggest.q'] = random.choice(words)
data['suggest.cfq'] = random.choice(cities)
data['suggest.count'] = '10000'
data['suggest.dictionary'] = 'mySuggester'
not_empty = request(data)
if not_empty:
data['suggest.dictionary'] = 'fuzzySuggester'
request(data)
quantity += 1
myFile = open('results.csv', 'w')
with myFile:
writer = csv.writer(myFile)
writer.writerows(test_res)
print("Writing complete")
\ No newline at end of file
import json
import random
with open('../data/middle_final.json') as json_file, open('words.txt', "w") as words_file, open('cities.txt', "w") as cities_file:
data = json.load(json_file)
for item in data[:500]:
words = list(map(str, item['name'].split()))
word = random.choice(words)
if not word[-1].isalpha():
word = word[:-1]
words_file.write(word + "\n")
cities = set()
for item in data:
cities.add(item['city'])
for item in cities:
if item.count(' '):
item = item.replace(" ", "%20")
item = '(' + item + ')'
cities_file.write(item + "\n")
\ No newline at end of file
......@@ -330,12 +330,22 @@
<str name="dictionaryImpl">DocumentDictionaryFactory</str>
<str name="field">name</str>
<str name="suggestAnalyzerFieldType">textSuggest</str>
<!--<str name="suggestAnalyzerFieldType">textSuggest</str>-->
<str name="payloadField">city</str>
<str name="contextField">city</str>
<str name="buildOnStartup">true</str>
<str name="buildOnCommit">true</str>
</lst>
<lst name="suggester">
<str name="name">fuzzySuggester</str>
<str name="lookupImpl">FuzzyLookupFactory</str>
<str name="storeDir">suggester_fuzzy_dir</str>
<str name="dictionaryImpl">DocumentDictionaryFactory</str>
<str name="field">name</str>
<str name="suggestAnalyzerFieldType">textSuggest</str>
<str name="buildOnStartup">true</str>
<str name="buildOnCommit">true</str>
</lst>
</searchComponent>
<requestHandler name="/suggest" class="solr.SearchHandler" startup="lazy" >
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment