diff --git a/tests/unit/legacy/api/xmlrpc/test_xmlrpc.py b/tests/unit/legacy/api/xmlrpc/test_xmlrpc.py index aa6fdc5ec0db..5c65fe9ff426 100644 --- a/tests/unit/legacy/api/xmlrpc/test_xmlrpc.py +++ b/tests/unit/legacy/api/xmlrpc/test_xmlrpc.py @@ -56,18 +56,20 @@ def __getitem__(self, name): def execute(self): assert self.type == "bool" assert [q.to_dict() for q in self.must] == [ - {"term": {"name": "foo"}}, + {"match": {"name": {"query": "foo", "boost": 10}}}, { "bool": { "should": [ - {"term": {"summary": "one"}}, - {"term": {"summary": "two"}}, + {"match": + {"summary": {"query": "one", "boost": 5}}}, + {"match": + {"summary": {"query": "two", "boost": 5}}}, ], }, }, ] assert self.offset is None - assert self.limit == 1000 + assert self.limit == 100 assert self.step is None return [ pretend.stub( @@ -119,12 +121,14 @@ def execute(self): assert self.type == "bool" assert [q.to_dict() for q in self.must] == [ {'bool': {'should': [ - {'term': {'summary': 'fix code'}}, - {'term': {'summary': 'like this'}} + {'match': + {'summary': {'boost': 5, 'query': 'fix code'}}}, + {'match': + {'summary': {'boost': 5, 'query': 'like this'}}} ]}} ] assert self.offset is None - assert self.limit == 1000 + assert self.limit == 100 assert self.step is None return [ pretend.stub( @@ -175,18 +179,20 @@ def __getitem__(self, name): def execute(self): assert self.type == "bool" assert [q.to_dict() for q in self.must] == [ - {"term": {"name": "foo"}}, + {"match": {"name": {"query": "foo", "boost": 10}}}, { "bool": { "should": [ - {"term": {"summary": "one"}}, - {"term": {"summary": "two"}}, + {"match": + {"summary": {"query": "one", "boost": 5}}}, + {"match": + {"summary": {"query": "two", "boost": 5}}}, ], }, }, ] assert self.offset is None - assert self.limit == 1000 + assert self.limit == 100 assert self.step is None return [ pretend.stub( @@ -238,18 +244,20 @@ def __getitem__(self, name): def execute(self): assert self.type == "bool" assert [q.to_dict() for q in self.should] == [ - {"term": {"name": "foo"}}, + {"match": {"name": {"query": "foo", "boost": 10}}}, { "bool": { "should": [ - {"term": {"summary": "one"}}, - {"term": {"summary": "two"}}, + {"match": + {"summary": {"query": "one", "boost": 5}}}, + {"match": + {"summary": {"query": "two", "boost": 5}}}, ], }, }, ] assert self.offset is None - assert self.limit == 1000 + assert self.limit == 100 assert self.step is None return [ pretend.stub( @@ -301,11 +309,11 @@ def __getitem__(self, name): def execute(self): assert self.type == "bool" assert [q.to_dict() for q in self.must] == [ - {"term": {"name": "foo"}}, - {"term": {"version": "1.0"}}, + {"match": {"name": {"boost": 10, "query": "foo"}}}, + {"match": {"version": {"query": "1.0"}}}, ] assert self.offset is None - assert self.limit == 1000 + assert self.limit == 100 assert self.step is None return [ pretend.stub( @@ -357,10 +365,10 @@ def __getitem__(self, name): def execute(self): assert self.type == "bool" assert [q.to_dict() for q in self.must] == [ - {"term": {"name": "foo"}}, + {"match": {"name": {"query": "foo", "boost": 10}}}, ] assert self.offset is None - assert self.limit == 1000 + assert self.limit == 100 assert self.step is None return [ pretend.stub( diff --git a/warehouse/legacy/api/xmlrpc/views.py b/warehouse/legacy/api/xmlrpc/views.py index fb5ebc696bac..e9ae8889195c 100644 --- a/warehouse/legacy/api/xmlrpc/views.py +++ b/warehouse/legacy/api/xmlrpc/views.py @@ -30,6 +30,7 @@ from warehouse.packaging.models import ( Role, Project, Release, File, JournalEntry, release_classifiers, ) +from warehouse.search.queries import SEARCH_BOOSTS _MAX_MULTICALLS = 20 @@ -107,10 +108,13 @@ def search(request, spec, operator="and"): for field, value in sorted(spec.items()): q = None for item in value: + kw = {"query": item} + if field in SEARCH_BOOSTS: + kw["boost"] = SEARCH_BOOSTS[field] if q is None: - q = Q("term", **{field: item}) + q = Q("match", **{field: kw}) else: - q |= Q("term", **{field: item}) + q |= Q("match", **{field: kw}) queries.append(q) if operator == "and": @@ -118,7 +122,7 @@ def search(request, spec, operator="and"): else: query = request.es.query("bool", should=queries) - results = query[:1000].execute() + results = query[:100].execute() request.registry.datadog.histogram('warehouse.xmlrpc.search.results', len(results)) diff --git a/warehouse/search/queries.py b/warehouse/search/queries.py new file mode 100644 index 000000000000..917d3bf1ff90 --- /dev/null +++ b/warehouse/search/queries.py @@ -0,0 +1,35 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +SEARCH_FIELDS = [ + "author", "author_email", "description", "download_url", "home_page", + "keywords", "license", "maintainer", "maintainer_email", "normalized_name", + "platform", "summary", +] +SEARCH_BOOSTS = { + "name": 10, + "normalized_name": 10, + "description": 5, + "keywords": 5, + "summary": 5, +} +SEARCH_FILTER_ORDER = ( + "Framework", + "Topic", + "Development Status", + "License", + "Programming Language", + "Operating System", + "Environment", + "Intended Audience", + "Natural Language", +) diff --git a/warehouse/views.py b/warehouse/views.py index c17ed5024ed8..5a4f5237f23e 100644 --- a/warehouse/views.py +++ b/warehouse/views.py @@ -36,34 +36,15 @@ from warehouse.packaging.models import ( Project, Release, File, release_classifiers, ) +from warehouse.search.queries import ( + SEARCH_BOOSTS, + SEARCH_FIELDS, + SEARCH_FILTER_ORDER, +) from warehouse.utils.row_counter import RowCount from warehouse.utils.paginate import ElasticsearchPage, paginate_url_factory -SEARCH_FIELDS = [ - "author", "author_email", "description", "download_url", "home_page", - "keywords", "license", "maintainer", "maintainer_email", "normalized_name", - "platform", "summary", -] -SEARCH_BOOSTS = { - "normalized_name": 10, - "description": 5, - "keywords": 5, - "summary": 5, -} -SEARCH_FILTER_ORDER = ( - "Framework", - "Topic", - "Development Status", - "License", - "Programming Language", - "Operating System", - "Environment", - "Intended Audience", - "Natural Language", -) - - # 403, 404, 410, 500,