Skip to content
This repository was archived by the owner on Jan 11, 2023. It is now read-only.

Commit af33c09

Browse files
Conduitryrubenaloker
authored
in exporting, handle binary files from server routes (#1398)
- follow <img src>, <source src>, <source srcset> when crawling - don't corrupt binary responses from server routes Co-authored-by: ruben <[email protected]> Co-authored-by: Andre Loker <[email protected]>
1 parent 1b4b37d commit af33c09

File tree

11 files changed

+151
-19
lines changed

11 files changed

+151
-19
lines changed

runtime/src/server/middleware/get_server_route_handler.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ export function get_server_route_handler(routes: ServerRoute[]) {
3737
method: req.method,
3838
status: res.statusCode,
3939
type: headers['content-type'],
40-
body: Buffer.concat(chunks).toString()
40+
body: Buffer.concat(chunks)
4141
});
4242
};
4343
}

src/api/export.ts

+55-11
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,29 @@ function get_href(attrs: string) {
5050
return match && (match[1] || match[2] || match[3]);
5151
}
5252

53+
function get_src(attrs: string) {
54+
const match = /src\s*=\s*(?:"(.*?)"|'(.*?)'|([^\s>]*))/.exec(attrs);
55+
return match && (match[1] || match[2] || match[3]);
56+
}
57+
58+
export function get_srcset_urls(attrs: string) {
59+
const results: string[] = [];
60+
// Note that the srcset allows any ASCII whitespace, including newlines.
61+
const match = /srcset\s*=\s*(?:"(.*?)"|'(.*?)'|([^\s>]*))/s.exec(attrs);
62+
if (match) {
63+
const attr_content = match[1] || match[2] || match[3];
64+
// Parse the content of the srcset attribute.
65+
// The regexp is modelled after the srcset specs (https://html.spec.whatwg.org/multipage/images.html#srcset-attribute)
66+
// and should cover most reasonable cases.
67+
const regex = /\s*([^\s,]\S+[^\s,])\s*((?:\d+w)|(?:-?\d+(?:\.\d+)?(?:[eE]-?\d+)?x))?/gm;
68+
let sub_matches;
69+
while (sub_matches = regex.exec(attr_content)) {
70+
results.push(sub_matches[1]);
71+
}
72+
}
73+
return results;
74+
}
75+
5376
export { _export as export };
5477

5578
async function _export({
@@ -109,7 +132,7 @@ async function _export({
109132
const seen = new Set();
110133
const saved = new Set();
111134

112-
function save(url: string, status: number, type: string, body: string) {
135+
function save(url: string, status: number, type: string, body: string | ArrayBuffer) {
113136
const { pathname } = resolve(origin, url);
114137
let file = decodeURIComponent(pathname.slice(1));
115138

@@ -122,20 +145,27 @@ async function _export({
122145
if (!file.endsWith('.html')) {
123146
file = file === '' ? 'index.html' : `${file}/index.html`;
124147
}
125-
body = minify_html(body);
148+
149+
if (typeof body === 'string') {
150+
body = minify_html(body);
151+
} else {
152+
oninfo({ message: `Content of {url} has content-type text/html but the content was received as a binary buffer. The HTML will not be minified.` });
153+
}
126154
}
127155

156+
const buffer = Buffer.from(body);
157+
128158
onfile({
129159
file,
130-
size: body.length,
160+
size: buffer.byteLength,
131161
status
132162
});
133163

134164
const export_file = path.join(export_dir, file);
135165
if (fs.existsSync(export_file)) return;
136166
mkdirp(path.dirname(export_file));
137167

138-
return writeFile(export_file, body);
168+
return writeFile(export_file, buffer);
139169
}
140170

141171
function handle(url: URL, fetchOpts: FetchOpts, addCallback: Function) {
@@ -184,7 +214,9 @@ async function _export({
184214

185215
let type = response.headers.get('Content-Type');
186216

187-
let body = await response.text();
217+
let body = type.startsWith('text/')
218+
? await response.text()
219+
: await response.arrayBuffer();
188220

189221
const range = ~~(response.status / 100);
190222

@@ -193,26 +225,38 @@ async function _export({
193225
const link = parseLinkHeader(response.headers.get('Link') || '');
194226
link.refs.forEach((ref: Ref) => {
195227
if (ref.rel === 'preload') {
196-
body = body.replace('</head>',
228+
body = (body as string).replace('</head>',
197229
`<link rel="preload" as=${JSON.stringify(ref.as)} href=${JSON.stringify(ref.uri)}></head>`);
198230
}
199231
});
200232

201233
if (pathname !== '/service-worker-index.html') {
202-
const cleaned = clean_html(body);
234+
const cleaned = clean_html(body as string);
203235

204236
const base_match = /<base ([\s\S]+?)>/m.exec(cleaned);
205237
const base_href = base_match && get_href(base_match[1]);
206238
const base = resolve(url.href, base_href);
207239

208240
let match;
209-
const pattern = /<a ([\s\S]+?)>/gm;
241+
const pattern = /<(a|img|source)\s+([\s\S]+?)>/gm;
210242

211243
while (match = pattern.exec(cleaned)) {
212-
const attrs = match[1];
213-
const href = get_href(attrs);
244+
let hrefs: string[] = [];
245+
const element = match[1];
246+
const attrs = match[2];
247+
248+
if (element === 'a') {
249+
hrefs.push(get_href(attrs));
250+
} else {
251+
if (element === 'img') {
252+
hrefs.push(get_src(attrs));
253+
}
254+
hrefs.push.apply(hrefs, get_srcset_urls(attrs));
255+
}
256+
257+
hrefs = hrefs.filter(Boolean);
214258

215-
if (href) {
259+
for (const href of hrefs) {
216260
const url = resolve(base.href, href);
217261

218262
if (url.protocol === protocol && url.host === host) {
5.21 KB
Loading
15 KB
Loading

test/apps/export/content/test.pdf

5.55 KB
Binary file not shown.

test/apps/export/src/routes/blog/[slug].html

+12-1
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,15 @@
1010
export let post;
1111
</script>
1212

13-
<h1>{post.title}</h1>
13+
<h1>{post.title}</h1>
14+
15+
{#if post.src}
16+
<picture>
17+
<source srcset={post.srcset}/>
18+
<img src={post.src}/>
19+
</picture>
20+
{/if}
21+
22+
{#if post.pdf}
23+
<a href={post.pdf}>{post.pdf}</a>
24+
{/if}
+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
export default [
2-
{ slug: 'foo', title: 'once upon a foo' },
2+
{ slug: 'foo', title: 'once upon a foo', src: 'img/example-512.png', srcset: 'img/example-512.png 512w, img/example-192.png 192w', pdf: 'pdfs/test.pdf' },
33
{ slug: 'bar', title: 'a bar is born' },
44
{ slug: 'baz', title: 'bazzily ever after' }
5-
];
5+
];
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
const fs = require('fs');
2+
const path = require('path');
3+
const cwd = process.cwd();
4+
5+
export function get(req, res) {
6+
7+
const { slug } = req.params;
8+
const image = path.join(cwd, `/content/${slug}.png`);
9+
10+
let s = fs.createReadStream(image);
11+
s.on('open', () => {
12+
res.writeHead(200, { 'Content-Type': 'image/png' });
13+
s.pipe(res);
14+
});
15+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
const fs = require('fs');
2+
const path = require('path');
3+
const cwd = process.cwd();
4+
5+
export function get(req, res, next) {
6+
7+
const { slug } = req.params;
8+
const image = path.join(cwd, `/content/${slug}.pdf`);
9+
10+
let s = fs.createReadStream(image);
11+
s.on('open', () => {
12+
res.writeHead(200, { 'Content-Type': 'application/pdf' });
13+
s.pipe(res);
14+
});
15+
}

test/apps/export/test.ts

+21-4
Original file line numberDiff line numberDiff line change
@@ -42,15 +42,32 @@ describe('export', function() {
4242
'service-worker-index.html',
4343
'service-worker.js',
4444
'test.pdf',
45+
'img/example-192.png',
46+
'img/example-512.png',
47+
'pdfs/test.pdf',
4548
...boom
4649
].sort());
4750
});
4851

4952
it('does not corrupt binary file links (like pdf)', () => {
50-
const input = readFileSync(`${__dirname}/static/test.pdf`)
51-
const output = readFileSync(`${__dirname}/__sapper__/export/test.pdf`)
52-
assert.ok(input.equals(output))
53-
})
53+
const input = readFileSync(`${__dirname}/static/test.pdf`);
54+
const output = readFileSync(`${__dirname}/__sapper__/export/test.pdf`);
55+
assert.ok(input.equals(output));
56+
});
57+
58+
it('does not corrupt image files from server routes', () => {
59+
for(const file of ['example-192.png', 'example-512.png']) {
60+
const input = readFileSync(`${__dirname}/content/${file}`);
61+
const output = readFileSync(`${__dirname}/__sapper__/export/img/${file}`);
62+
assert.ok(input.equals(output));
63+
}
64+
});
65+
66+
it('does not corrupt pdf files from server routes', () => {
67+
const input = readFileSync(`${__dirname}/content/test.pdf`);
68+
const output = readFileSync(`${__dirname}/__sapper__/export/pdfs/test.pdf`);
69+
assert.ok(input.equals(output));
70+
});
5471

5572
// TODO test timeout, basepath
5673
});

test/unit/srcset/test.ts

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import * as assert from "assert";
2+
import { get_srcset_urls } from "../../../src/api/export";
3+
4+
describe("get_srcset_urls", () => {
5+
it("should parse single entry without descriptor", () => {
6+
const result = get_srcset_urls("<source srcset=\"assets/image/1.jpg\"/>");
7+
assert.deepEqual(result, ["assets/image/1.jpg"]);
8+
});
9+
10+
it("should parse single entry with width descriptor", () => {
11+
const result = get_srcset_urls("<source srcset=\"assets/image/1.jpg 1234w\"/>");
12+
assert.deepEqual(result, ["assets/image/1.jpg"]);
13+
});
14+
15+
it("should parse single entry with density descriptor", () => {
16+
const result = get_srcset_urls("<source srcset=\"assets/image/1.jpg -123.456x\"/>");
17+
assert.deepEqual(result, ["assets/image/1.jpg"]);
18+
});
19+
20+
it("should parse multiple entries with different descriptors", () => {
21+
const result = get_srcset_urls("<source srcset=\" assets/image/1.jpg -1.3E-3x,assets/image/2.jpg 2560w,assets/image/3.jpg, \nassets/image/4.jpg 2.5x , assets/image/5.jpg 640w \"/>");
22+
assert.deepEqual(result, [
23+
"assets/image/1.jpg",
24+
"assets/image/2.jpg",
25+
"assets/image/3.jpg",
26+
"assets/image/4.jpg",
27+
"assets/image/5.jpg",
28+
]);
29+
});
30+
});

0 commit comments

Comments
 (0)