Skip to content

Preload and precompress static assets in CorePlugin #708

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Nov 28, 2017
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 17 additions & 8 deletions tensorboard/backend/http_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,18 +119,24 @@ def Respond(request,
content = tf.compat.as_bytes(content, charset)
if textual and not charset_match and mimetype not in _JSON_MIMETYPES:
content_type += '; charset=' + charset
if (not content_encoding and textual and
_ALLOWS_GZIP_PATTERN.search(request.headers.get('Accept-Encoding', ''))):
gzip_accepted = _ALLOWS_GZIP_PATTERN.search(
request.headers.get('Accept-Encoding', ''))
# Automatically gzip uncompressed text data if accepted.
if textual and not content_encoding and gzip_accepted:
orig_len = len(content)
out = six.BytesIO()
f = gzip.GzipFile(fileobj=out, mode='wb', compresslevel=3)
f.write(content)
f.close()
# Set mtime to zero to make payload for a given input deterministic.
with gzip.GzipFile(fileobj=out, mode='wb', compresslevel=3, mtime=0) as f:
f.write(content)
content = out.getvalue()
content_encoding = 'gzip'
if request.method == 'HEAD':
content = ''
headers = []
# Automatically unzip precompressed data if not accepted.
if content_encoding == 'gzip' and not gzip_accepted:
with gzip.GzipFile(fileobj=six.BytesIO(content), mode='rb') as f:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[optional and probably not worth doing]

If you care about shaving tens of milliseconds or so for the 0.001% of requests that don't accept gzip, you should be able to say: content = gzip.GzipFile(...); [...]; return wrappers.Response(response=content, direct_passthrough=True). Please note that in order for this to work with HTTP/1.1 you would need to get rid of the content-length and make sure Werkzeug is using chunked transfer encoding under the hood. Or say Connection: close.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PTAL - I actually decided to implement this, thanks for the tip! Now it does a streaming gunzip, and indeed that eliminates the 20 millisecond delay in TTFB when doing on-demand decompression - it's <5 ms to the first byte returned now when decompressing on demand :)

The Content-Length hassles would have been a dealbreaker IMO, but as it so happens gzip files encode the length of the original uncompressed content in their last four bytes (well, strictly speaking they record the lower 32 bits of that length, but I think we can rely on not having pre-compressed blobs larger than 2^32 bytes = 4GB). So we can extract those and use them to send the Content-Length header before we've decompressed a single byte.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's such a great detail you uncovered!

content = f.read()
content_encoding = None

headers = []
headers.append(('Content-Length', str(len(content))))
if content_encoding:
headers.append(('Content-Encoding', content_encoding))
Expand All @@ -142,5 +148,8 @@ def Respond(request,
headers.append(('Expires', '0'))
headers.append(('Cache-Control', 'no-cache, must-revalidate'))

if request.method == 'HEAD':
content = None

return wrappers.Response(
response=content, status=code, headers=headers, content_type=content_type)
28 changes: 26 additions & 2 deletions tensorboard/backend/http_util_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,16 @@ def testHelloWorld(self):
r = http_util.Respond(q, '<b>hello world</b>', 'text/html')
self.assertEqual(r.status_code, 200)
self.assertEqual(r.response, [six.b('<b>hello world</b>')])
self.assertEqual(r.headers.get('Content-Length'), '18')

def testHeadRequest_doesNotWrite(self):
builder = wtest.EnvironBuilder(method='HEAD')
env = builder.get_environ()
request = wrappers.Request(env)
r = http_util.Respond(request, '<b>hello world</b>', 'text/html')
self.assertEqual(r.status_code, 200)
self.assertEqual(r.response, [six.b('')])
self.assertEqual(r.response, [])
self.assertEqual(r.headers.get('Content-Length'), '18')

def testPlainText_appendsUtf8ToContentType(self):
q = wrappers.Request(wtest.EnvironBuilder().get_environ())
Expand Down Expand Up @@ -136,6 +138,20 @@ def testAcceptGzip_compressesResponse(self):
self.assertEqual(
r.response, [fall_of_hyperion_canto1_stanza1.encode('utf-8')])

def testAcceptGzip_alreadyCompressed_sendsPrecompressedResponse(self):
gzipped_text = _gzip('hello hello hello world')
e = wtest.EnvironBuilder(headers={'Accept-Encoding': 'gzip'}).get_environ()
q = wrappers.Request(e)
r = http_util.Respond(q, gzipped_text, 'text/plain', content_encoding='gzip')
self.assertEqual(r.response, [gzipped_text]) # Still singly zipped

def testPrecompressedResponse_noAcceptGzip_decompressesResponse(self):
orig_text = 'hello hello hello world'
gzipped_text = _gzip(orig_text)
q = wrappers.Request(wtest.EnvironBuilder().get_environ())
r = http_util.Respond(q, gzipped_text, 'text/plain', content_encoding='gzip')
self.assertEqual(r.response, [orig_text])

def testJson_getsAutoSerialized(self):
q = wrappers.Request(wtest.EnvironBuilder().get_environ())
r = http_util.Respond(q, [1, 2, 3], 'application/json')
Expand All @@ -147,8 +163,16 @@ def testExpires_setsCruiseControl(self):
self.assertEqual(r.headers.get('Cache-Control'), 'private, max-age=60')


def _gzip(bs):
out = six.BytesIO()
with gzip.GzipFile(fileobj=out, mode='wb') as f:
f.write(bs)
return out.getvalue()


def _gunzip(bs):
return gzip.GzipFile('', 'rb', 9, six.BytesIO(bs)).read()
with gzip.GzipFile(fileobj=six.BytesIO(bs), mode='rb') as f:
return f.read()


if __name__ == '__main__':
Expand Down
30 changes: 19 additions & 11 deletions tensorboard/plugins/core/core_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,11 @@
from __future__ import print_function

import functools
import gzip
import mimetypes
import zipfile

import six
import tensorflow as tf
from werkzeug import utils
from werkzeug import wrappers
Expand Down Expand Up @@ -65,12 +67,13 @@ def get_plugin_apps(self):
'/images': self._redirect_to_index,
}
if self._assets_zip_provider:
apps['/'] = functools.partial(self._serve_asset, 'index.html')
with self._assets_zip_provider() as fp:
with zipfile.ZipFile(fp) as zip_:
for info in zip_.infolist():
path = info.filename
apps['/' + path] = functools.partial(self._serve_asset, path)
for path in zip_.namelist():
gzipped_asset_bytes = _gzip(zip_.read(path))
apps['/' + path] = functools.partial(
self._serve_asset, path, gzipped_asset_bytes)
apps['/'] = apps['/index.html']
return apps

@wrappers.Request.application
Expand All @@ -82,14 +85,11 @@ def _redirect_to_index(self, unused_request):
return utils.redirect('/')

@wrappers.Request.application
def _serve_asset(self, path, request):
"""Serves a static asset from the zip file."""
def _serve_asset(self, path, gzipped_asset_bytes, request):
"""Serves a pre-gzipped static asset from the zip file."""
mimetype = mimetypes.guess_type(path)[0] or 'application/octet-stream'
with self._assets_zip_provider() as fp:
with zipfile.ZipFile(fp) as zip_:
with zip_.open(path) as file_:
html = file_.read()
return http_util.Respond(request, html, mimetype)
return http_util.Respond(
request, gzipped_asset_bytes, mimetype, content_encoding='gzip')

@wrappers.Request.application
def _serve_logdir(self, request):
Expand Down Expand Up @@ -129,3 +129,11 @@ def get_first_event_timestamp(run_name):
}
run_names.sort(key=first_event_timestamps.get)
return http_util.Respond(request, run_names, 'application/json')


def _gzip(bytestring):
out = six.BytesIO()
# Set mtime to zero for deterministic results across TensorBoard launches.
with gzip.GzipFile(fileobj=out, mode='wb', compresslevel=3, mtime=0) as f:
f.write(bytestring)
return out.getvalue()