Skip to content
This repository was archived by the owner on Sep 12, 2018. It is now read-only.

Commit dde538b

Browse files
committed
search: Stub out a preliminary search index
This implements the GET /v1/search endpoint using a simple JSON object for storage and a global Python dict for live access. That's not going to scale well for large registries, but it's a start. Benefits of this approach: * We automatically support all the existing storage backends. * The in-memory Python dict gives fast read-only access. Drawbacks to this approach: * Searches iterate over all key/value pairs (there's no precompiled index), so they'll be slow if you have tons of repositories. * We write the whole index to the storage backend whenever a repository is created, updated, or deleted. * If you have multiple registries sharing the same storage backend (e.g. gunicorn workers), your in-memory Python indexes will go out-of-sync, and may end up clobbering changes that other registries made to the stored index. So this is basically a proof-of-concept for small registries. A more serious implementation would use an external SQL database to overcome all of these limitations. That should be easy to add later; for now I'm focusing on getting the internal logic right. Slotting in a better backend later should be easy. Because the search index code is only bound to registry.index by signals, I've moved it into it's own registry.search module. I added a commented-out: #index = Index() in the global-declaration section of the new module, to remind folks that we would be creating a global 'index' object. We can't actually declare the global at that point though, because the Index class hasn't been defined yet. There's a version stamp in the saved index, and a missing index file counts as v0, so folks who have been running earlier versions of docker-registry will have their index built automatically the next time they start the app.
1 parent 46d7577 commit dde538b

File tree

4 files changed

+122
-5
lines changed

4 files changed

+122
-5
lines changed

lib/storage/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,15 @@ class Storage(object):
1212
"""Storage is organized as follow:
1313
$ROOT/images/<image_id>/json
1414
$ROOT/images/<image_id>/layer
15+
$ROOT/index/json
1516
$ROOT/repositories/<namespace>/<repository_name>/<tag_name>
1617
"""
1718

1819
# Useful if we want to change those locations later without rewriting
1920
# the code which uses Storage
2021
repositories = 'repositories'
2122
images = 'images'
23+
index = 'index'
2224
# Set the IO buffer to 128kB
2325
buffer_size = 128 * 1024
2426
# By default no storage plugin supports it
@@ -85,6 +87,9 @@ def private_flag_path(self, namespace, repository):
8587
def is_private(self, namespace, repository):
8688
return self.exists(self.private_flag_path(namespace, repository))
8789

90+
def index_path(self):
91+
return '{0}/json'.format(self.index)
92+
8893
def get_content(self, path):
8994
raise NotImplementedError
9095

registry/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from .app import app
55
from .tags import *
66
from .images import *
7+
from . import search as search
78
from .status import *
89

910
import config

registry/index.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -133,8 +133,3 @@ def delete_repository_images(namespace, repository):
133133
@toolkit.parse_repository_name
134134
def put_repository_auth(namespace, repository):
135135
return toolkit.response('OK')
136-
137-
138-
@app.route('/v1/search', methods=['GET'])
139-
def get_search():
140-
return toolkit.response({})

registry/search.py

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
__all__ = ['get_search']
2+
3+
import flask
4+
import simplejson as json
5+
6+
import signals
7+
import storage
8+
import toolkit
9+
10+
from .app import app
11+
12+
13+
store = storage.load()
14+
#index = Index()
15+
16+
17+
class Index (dict):
18+
"""Maintain an index of repository data
19+
20+
The index is a dictionary. The keys are
21+
'{namespace}/{repository}' strings, and the values are description
22+
strings. For example:
23+
24+
index['library/ubuntu'] = 'An ubuntu image...'
25+
"""
26+
def __init__(self):
27+
super(Index, self).__init__()
28+
self.version = 1
29+
self.load()
30+
signals.repository_created.connect(self._handler_repository_created)
31+
signals.repository_updated.connect(self._handler_repository_created)
32+
signals.repository_deleted.connect(self._handler_repository_deleted)
33+
34+
def load(self):
35+
regenerated = False
36+
try:
37+
index_content = store.get_content(store.index_path())
38+
except (OSError, IOError):
39+
index_data = self._regenerate_index()
40+
regenerated = True
41+
else:
42+
data = json.loads(index_content)
43+
if data['version'] != self.version:
44+
raise NotImplementedError(
45+
'unrecognized search index version {0}'.format(
46+
data['version']))
47+
index_data = data['index']
48+
self.clear()
49+
self.update(index_data)
50+
if regenerated:
51+
self.save()
52+
53+
def save(self):
54+
index_data = {
55+
'version': self.version,
56+
'index': dict(self),
57+
}
58+
store.put_content(store.index_path(), json.dumps(index_data))
59+
60+
def _regenerate_index(self):
61+
index_data = {}
62+
description = '' # TODO(wking): store descriptions
63+
try:
64+
namespace_paths = list(
65+
store.list_directory(path=store.repositories))
66+
except OSError:
67+
namespace_paths = []
68+
for namespace_path in namespace_paths:
69+
namespace = namespace_path.rsplit('/', 1)[-1]
70+
try:
71+
repository_paths = list(
72+
store.list_directory(path=namespace_path))
73+
except OSError:
74+
repository_paths = []
75+
for path in repository_paths:
76+
repository = path.rsplit('/', 1)[-1]
77+
key = '{0}/{1}'.format(namespace, repository)
78+
index_data[key] = description
79+
return index_data
80+
81+
def _handler_repository_created(
82+
self, sender, namespace, repository, value):
83+
key = '{0}/{1}'.format(namespace, repository)
84+
description = '' # TODO(wking): store descriptions
85+
self[key] = description
86+
self.save()
87+
88+
def _handler_repository_deleted(self, sender, namespace, repository):
89+
key = '{0}/{1}'.format(namespace, repository)
90+
try:
91+
self.pop(key)
92+
except KeyError:
93+
pass
94+
else:
95+
self.save()
96+
97+
98+
index = Index()
99+
100+
101+
@app.route('/v1/search', methods=['GET'])
102+
def get_search():
103+
search_term = flask.request.args.get('q', '')
104+
results = [
105+
{
106+
'name': name,
107+
'description': description,
108+
}
109+
for name, description in index.items()
110+
if search_term in name
111+
or search_term in description]
112+
return toolkit.response({
113+
'query': search_term,
114+
'num_results': len(results),
115+
'results': results,
116+
})

0 commit comments

Comments
 (0)