Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Aleksandra Tsys
Suggester
Commits
07fa415f
Commit
07fa415f
authored
May 19, 2021
by
tssasha
Browse files
testing done
parent
0b0b25ca
Changes
6
Expand all
Hide whitespace changes
Inline
Side-by-side
fuzzy_gradle/src/main/java/org/apache/lucene/search/suggest/analyzing/ContextAnalyzingSuggester.java
View file @
07fa415f
...
...
@@ -669,7 +669,7 @@ public class ContextAnalyzingSuggester extends Lookup implements Accountable {
if
(
fst
==
null
)
{
return
Collections
.
emptyList
();
}
//
System.out.println("lookup key=" + key + " num=" + num);
System
.
out
.
println
(
"lookup key="
+
key
+
" num="
+
num
);
for
(
int
i
=
0
;
i
<
key
.
length
();
i
++)
{
if
(
key
.
charAt
(
i
)
==
0x1E
)
{
throw
new
IllegalArgumentException
(
"lookup key cannot contain HOLE character U+001E; this character is reserved"
);
...
...
@@ -792,8 +792,6 @@ public class ContextAnalyzingSuggester extends Lookup implements Accountable {
}
Util
.
TopNSearcher
<
Pair
<
Long
,
BytesRef
>>
searcher
;
// System.out.println("point6");
// System.out.format("topN: %d ", num - results.size());
searcher
=
new
Util
.
TopNSearcher
<
Pair
<
Long
,
BytesRef
>>(
fst
,
num
-
results
.
size
(),
num
*
maxAnalyzedPathsForOneInput
,
...
...
@@ -801,29 +799,17 @@ public class ContextAnalyzingSuggester extends Lookup implements Accountable {
private
final
Set
<
BytesRef
>
seen
=
new
HashSet
<>();
@Override
protected
boolean
acceptResult
(
IntsRef
input
,
Pair
<
Long
,
BytesRef
>
output
)
{
// System.out.println("point7");
LookupResult
result
=
getLookupResult
(
output
.
output1
,
output
.
output2
,
spare
);
BytesRef
res_context
=
result
.
payload
;
// System.out.println("res_context2 = " + res_context);
// for (BytesRef context : contexts) {
// System.out.println(context);
// }
if
(
contexts
==
null
)
{
System
.
out
.
println
(
"null"
);
return
true
;
}
for
(
BytesRef
context
:
contexts
)
{
System
.
out
.
println
(
"context 2 = "
+
context
);
}
System
.
out
.
println
(
"payload = "
+
res_context
);
if
(!(
contexts
.
contains
(
res_context
)))
{
return
false
;
}
System
.
out
.
println
(
"contains 2"
);
if
(
seen
.
contains
(
output
.
output2
))
{
return
false
;
}
...
...
@@ -846,34 +832,6 @@ public class ContextAnalyzingSuggester extends Lookup implements Accountable {
}
}
};
// @Override
// protected boolean acceptResult(IntsRef input, Pair<Long, BytesRef> output) {
//
// // Dedup: when the input analyzes to a graph we
// // can get duplicate surface forms:
// if (seen.contains(output.output2)) {
// return false;
// }
// seen.add(output.output2);
//
// if (!exactFirst) {
// return true;
// } else {
// // In exactFirst mode, don't accept any paths
// // matching the surface form since that will
// // create duplicate results:
// if (sameSurfaceForm(utf8Key, output.output2)) {
// // We found exact match, which means we should
// // have already found it in the first search:
// assert results.size() == 1;
// return false;
// } else {
// return true;
// }
// }
// }
// };
// System.out.println("point8");
prefixPaths
=
getFullPrefixPaths
(
prefixPaths
,
lookupAutomaton
,
fst
);
for
(
FSTUtil
.
Path
<
Pair
<
Long
,
BytesRef
>>
path
:
prefixPaths
)
{
...
...
@@ -890,17 +848,14 @@ public class ContextAnalyzingSuggester extends Lookup implements Accountable {
// TODO: for fuzzy case would be nice to return
// how many edits were required
//System.out.println(" result=" + result);
results
.
add
(
result
);
if
(
results
.
size
()
==
num
)
{
// System.out.println("point9");
// In the exactFirst=true case the search may
// produce one extra path
break
;
}
}
// System.out.println("point10");
return
results
;
}
catch
(
IOException
bogus
)
{
throw
new
RuntimeException
(
bogus
);
...
...
run
View file @
07fa415f
...
...
@@ -20,7 +20,7 @@ case "$1" in
# index all data (decompress on the fly and pass to index)
#tar xzf data/$YELP_DATA final.json -O | curl 'http://localhost:8983/solr/yelp/update?commit=true' --data-binary @- -H 'Content-type:application/json'
sleep
10
cat
data/
litt
le
-
final.json | curl
'http://localhost:8983/solr/yelp/update?commit=true'
--data-binary
@-
-H
'Content-type:application/json'
cat
data/
midd
le
_
final.json | curl
'http://localhost:8983/solr/yelp/update?commit=true'
--data-binary
@-
-H
'Content-type:application/json'
# else
# echo "Please use ./run get-yelp to obtain yelp data"
# fi
...
...
test/little-final.json
0 → 100644
View file @
07fa415f
This diff is collapsed.
Click to expand it.
test/request.py
0 → 100644
View file @
07fa415f
import
csv
import
random
import
urllib.parse
import
json
import
requests
test_res
=
[[
"query"
,
"context"
,
"my result"
,
"my len"
,
"my time"
,
"fuzzy result"
,
"fuzzy len"
,
"fuzzy time"
]]
def
request
(
data
):
url_values
=
urllib
.
parse
.
urlencode
(
data
)
url_values
=
url_values
.
replace
(
'%28'
,
'('
)
url_values
=
url_values
.
replace
(
'%29'
,
')'
)
url_values
=
url_values
.
replace
(
'+'
,
'%20'
)
url
=
'http://localhost:8983/solr/yelp/suggest'
full_url
=
url
+
'?'
+
url_values
time
=
0
for
i
in
range
(
10
):
response
=
requests
.
get
(
full_url
)
time
+=
response
.
elapsed
.
total_seconds
()
time
/=
10
result
=
json
.
loads
(
response
.
content
)[
'suggest'
][
data
[
'suggest.dictionary'
]][
data
[
'suggest.q'
]][
'suggestions'
]
res_list
=
[]
if
len
(
result
)
==
0
:
return
False
for
item
in
result
:
res_list
.
append
(
item
[
'term'
])
if
data
[
'suggest.dictionary'
]
==
'mySuggester'
:
test_res
.
append
([
data
[
'suggest.q'
],
data
[
'suggest.cfq'
],
res_list
,
len
(
res_list
),
time
])
else
:
test_res
[
-
1
].
append
(
res_list
)
test_res
[
-
1
].
append
(
len
(
res_list
))
test_res
[
-
1
].
append
(
time
)
return
True
with
open
(
'cities.txt'
)
as
c
,
open
(
'words.txt'
)
as
w
:
cities
=
c
.
read
().
splitlines
()
words
=
w
.
read
().
splitlines
()
quantity
=
0
while
quantity
<
50
:
data
=
dict
()
data
[
'suggest.q'
]
=
random
.
choice
(
words
)
data
[
'suggest.cfq'
]
=
random
.
choice
(
cities
)
data
[
'suggest.count'
]
=
'10000'
data
[
'suggest.dictionary'
]
=
'mySuggester'
not_empty
=
request
(
data
)
if
not_empty
:
data
[
'suggest.dictionary'
]
=
'fuzzySuggester'
request
(
data
)
quantity
+=
1
myFile
=
open
(
'results.csv'
,
'w'
)
with
myFile
:
writer
=
csv
.
writer
(
myFile
)
writer
.
writerows
(
test_res
)
print
(
"Writing complete"
)
\ No newline at end of file
test/words.py
0 → 100644
View file @
07fa415f
import
json
import
random
with
open
(
'../data/middle_final.json'
)
as
json_file
,
open
(
'words.txt'
,
"w"
)
as
words_file
,
open
(
'cities.txt'
,
"w"
)
as
cities_file
:
data
=
json
.
load
(
json_file
)
for
item
in
data
[:
500
]:
words
=
list
(
map
(
str
,
item
[
'name'
].
split
()))
word
=
random
.
choice
(
words
)
if
not
word
[
-
1
].
isalpha
():
word
=
word
[:
-
1
]
words_file
.
write
(
word
+
"
\n
"
)
cities
=
set
()
for
item
in
data
:
cities
.
add
(
item
[
'city'
])
for
item
in
cities
:
if
item
.
count
(
' '
):
item
=
item
.
replace
(
" "
,
"%20"
)
item
=
'('
+
item
+
')'
cities_file
.
write
(
item
+
"
\n
"
)
\ No newline at end of file
yelp/conf/solrconfig.xml
View file @
07fa415f
...
...
@@ -330,12 +330,22 @@
<str
name=
"dictionaryImpl"
>
DocumentDictionaryFactory
</str>
<str
name=
"field"
>
name
</str>
<str
name=
"suggestAnalyzerFieldType"
>
textSuggest
</str>
<!--<str name="suggestAnalyzerFieldType">textSuggest</str>-->
<str
name=
"payloadField"
>
city
</str>
<str
name=
"contextField"
>
city
</str>
<str
name=
"buildOnStartup"
>
true
</str>
<str
name=
"buildOnCommit"
>
true
</str>
</lst>
<lst
name=
"suggester"
>
<str
name=
"name"
>
fuzzySuggester
</str>
<str
name=
"lookupImpl"
>
FuzzyLookupFactory
</str>
<str
name=
"storeDir"
>
suggester_fuzzy_dir
</str>
<str
name=
"dictionaryImpl"
>
DocumentDictionaryFactory
</str>
<str
name=
"field"
>
name
</str>
<str
name=
"suggestAnalyzerFieldType"
>
textSuggest
</str>
<str
name=
"buildOnStartup"
>
true
</str>
<str
name=
"buildOnCommit"
>
true
</str>
</lst>
</searchComponent>
<requestHandler
name=
"/suggest"
class=
"solr.SearchHandler"
startup=
"lazy"
>
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment