Skip to content

Commit 047ec67

Browse files
authored
xmlprc search improvements (#3827)
harmonizes xmlrpc search to our main search. this dramatically improves it's value
1 parent 6a18349 commit 047ec67

File tree

4 files changed

+75
-47
lines changed

4 files changed

+75
-47
lines changed

tests/unit/legacy/api/xmlrpc/test_xmlrpc.py

Lines changed: 28 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -56,18 +56,20 @@ def __getitem__(self, name):
5656
def execute(self):
5757
assert self.type == "bool"
5858
assert [q.to_dict() for q in self.must] == [
59-
{"term": {"name": "foo"}},
59+
{"match": {"name": {"query": "foo", "boost": 10}}},
6060
{
6161
"bool": {
6262
"should": [
63-
{"term": {"summary": "one"}},
64-
{"term": {"summary": "two"}},
63+
{"match":
64+
{"summary": {"query": "one", "boost": 5}}},
65+
{"match":
66+
{"summary": {"query": "two", "boost": 5}}},
6567
],
6668
},
6769
},
6870
]
6971
assert self.offset is None
70-
assert self.limit == 1000
72+
assert self.limit == 100
7173
assert self.step is None
7274
return [
7375
pretend.stub(
@@ -119,12 +121,14 @@ def execute(self):
119121
assert self.type == "bool"
120122
assert [q.to_dict() for q in self.must] == [
121123
{'bool': {'should': [
122-
{'term': {'summary': 'fix code'}},
123-
{'term': {'summary': 'like this'}}
124+
{'match':
125+
{'summary': {'boost': 5, 'query': 'fix code'}}},
126+
{'match':
127+
{'summary': {'boost': 5, 'query': 'like this'}}}
124128
]}}
125129
]
126130
assert self.offset is None
127-
assert self.limit == 1000
131+
assert self.limit == 100
128132
assert self.step is None
129133
return [
130134
pretend.stub(
@@ -175,18 +179,20 @@ def __getitem__(self, name):
175179
def execute(self):
176180
assert self.type == "bool"
177181
assert [q.to_dict() for q in self.must] == [
178-
{"term": {"name": "foo"}},
182+
{"match": {"name": {"query": "foo", "boost": 10}}},
179183
{
180184
"bool": {
181185
"should": [
182-
{"term": {"summary": "one"}},
183-
{"term": {"summary": "two"}},
186+
{"match":
187+
{"summary": {"query": "one", "boost": 5}}},
188+
{"match":
189+
{"summary": {"query": "two", "boost": 5}}},
184190
],
185191
},
186192
},
187193
]
188194
assert self.offset is None
189-
assert self.limit == 1000
195+
assert self.limit == 100
190196
assert self.step is None
191197
return [
192198
pretend.stub(
@@ -238,18 +244,20 @@ def __getitem__(self, name):
238244
def execute(self):
239245
assert self.type == "bool"
240246
assert [q.to_dict() for q in self.should] == [
241-
{"term": {"name": "foo"}},
247+
{"match": {"name": {"query": "foo", "boost": 10}}},
242248
{
243249
"bool": {
244250
"should": [
245-
{"term": {"summary": "one"}},
246-
{"term": {"summary": "two"}},
251+
{"match":
252+
{"summary": {"query": "one", "boost": 5}}},
253+
{"match":
254+
{"summary": {"query": "two", "boost": 5}}},
247255
],
248256
},
249257
},
250258
]
251259
assert self.offset is None
252-
assert self.limit == 1000
260+
assert self.limit == 100
253261
assert self.step is None
254262
return [
255263
pretend.stub(
@@ -301,11 +309,11 @@ def __getitem__(self, name):
301309
def execute(self):
302310
assert self.type == "bool"
303311
assert [q.to_dict() for q in self.must] == [
304-
{"term": {"name": "foo"}},
305-
{"term": {"version": "1.0"}},
312+
{"match": {"name": {"boost": 10, "query": "foo"}}},
313+
{"match": {"version": {"query": "1.0"}}},
306314
]
307315
assert self.offset is None
308-
assert self.limit == 1000
316+
assert self.limit == 100
309317
assert self.step is None
310318
return [
311319
pretend.stub(
@@ -357,10 +365,10 @@ def __getitem__(self, name):
357365
def execute(self):
358366
assert self.type == "bool"
359367
assert [q.to_dict() for q in self.must] == [
360-
{"term": {"name": "foo"}},
368+
{"match": {"name": {"query": "foo", "boost": 10}}},
361369
]
362370
assert self.offset is None
363-
assert self.limit == 1000
371+
assert self.limit == 100
364372
assert self.step is None
365373
return [
366374
pretend.stub(

warehouse/legacy/api/xmlrpc/views.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from warehouse.packaging.models import (
3131
Role, Project, Release, File, JournalEntry, release_classifiers,
3232
)
33+
from warehouse.search.queries import SEARCH_BOOSTS
3334

3435

3536
_MAX_MULTICALLS = 20
@@ -107,18 +108,21 @@ def search(request, spec, operator="and"):
107108
for field, value in sorted(spec.items()):
108109
q = None
109110
for item in value:
111+
kw = {"query": item}
112+
if field in SEARCH_BOOSTS:
113+
kw["boost"] = SEARCH_BOOSTS[field]
110114
if q is None:
111-
q = Q("term", **{field: item})
115+
q = Q("match", **{field: kw})
112116
else:
113-
q |= Q("term", **{field: item})
117+
q |= Q("match", **{field: kw})
114118
queries.append(q)
115119

116120
if operator == "and":
117121
query = request.es.query("bool", must=queries)
118122
else:
119123
query = request.es.query("bool", should=queries)
120124

121-
results = query[:1000].execute()
125+
results = query[:100].execute()
122126

123127
request.registry.datadog.histogram('warehouse.xmlrpc.search.results',
124128
len(results))

warehouse/search/queries.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# Licensed under the Apache License, Version 2.0 (the "License");
2+
# you may not use this file except in compliance with the License.
3+
# You may obtain a copy of the License at
4+
#
5+
# http://www.apache.org/licenses/LICENSE-2.0
6+
#
7+
# Unless required by applicable law or agreed to in writing, software
8+
# distributed under the License is distributed on an "AS IS" BASIS,
9+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10+
# See the License for the specific language governing permissions and
11+
# limitations under the License.
12+
13+
SEARCH_FIELDS = [
14+
"author", "author_email", "description", "download_url", "home_page",
15+
"keywords", "license", "maintainer", "maintainer_email", "normalized_name",
16+
"platform", "summary",
17+
]
18+
SEARCH_BOOSTS = {
19+
"name": 10,
20+
"normalized_name": 10,
21+
"description": 5,
22+
"keywords": 5,
23+
"summary": 5,
24+
}
25+
SEARCH_FILTER_ORDER = (
26+
"Framework",
27+
"Topic",
28+
"Development Status",
29+
"License",
30+
"Programming Language",
31+
"Operating System",
32+
"Environment",
33+
"Intended Audience",
34+
"Natural Language",
35+
)

warehouse/views.py

Lines changed: 5 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -36,34 +36,15 @@
3636
from warehouse.packaging.models import (
3737
Project, Release, File, release_classifiers,
3838
)
39+
from warehouse.search.queries import (
40+
SEARCH_BOOSTS,
41+
SEARCH_FIELDS,
42+
SEARCH_FILTER_ORDER,
43+
)
3944
from warehouse.utils.row_counter import RowCount
4045
from warehouse.utils.paginate import ElasticsearchPage, paginate_url_factory
4146

4247

43-
SEARCH_FIELDS = [
44-
"author", "author_email", "description", "download_url", "home_page",
45-
"keywords", "license", "maintainer", "maintainer_email", "normalized_name",
46-
"platform", "summary",
47-
]
48-
SEARCH_BOOSTS = {
49-
"normalized_name": 10,
50-
"description": 5,
51-
"keywords": 5,
52-
"summary": 5,
53-
}
54-
SEARCH_FILTER_ORDER = (
55-
"Framework",
56-
"Topic",
57-
"Development Status",
58-
"License",
59-
"Programming Language",
60-
"Operating System",
61-
"Environment",
62-
"Intended Audience",
63-
"Natural Language",
64-
)
65-
66-
6748
# 403, 404, 410, 500,
6849

6950

0 commit comments

Comments
 (0)