Skip to content

Commit 9cf8cab

Browse files
authored
Turbopack: Support data URI sources (#76865)
These are technically not entirely spec compliant as identical data urls in different folders aren't deduplicated (but each generate a new module). But this is good enough for now. You can now do ```js import { bar } from "data:text/javascript,export var bar = 1234;"; import data from 'data:application/json,{ "foo": 1234 }'; import dataURLEncoded from 'data:application/json,%7B%20%22foo%22%3A%201234%20%7D'; import dataBase64Encoded from 'data:application/json;base64,eyAiZm9vIjogMTIzNCB9'; import "data:text/css,body { color: red }"; it("support data URL imports", () => { expect(bar).toEqual(1234); expect(data).toEqual({ foo: 1234 }); expect(dataURLEncoded).toEqual({ foo: 1234 }); expect(dataBase64Encoded).toEqual({ foo: 1234 }); }); ``` Only data-urls in JS ESM imports are turned into modules. For JS, turning data-urls into separate modules is zero-cost, because they all end up in the same bundle anyway, but that is not the case for image data urls in CSS.
1 parent bc2c5bc commit 9cf8cab

File tree

277 files changed

+528
-262
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

277 files changed

+528
-262
lines changed

Diff for: Cargo.lock

+2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Diff for: Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,7 @@ const_format = "0.2.30"
342342
criterion = "0.5.1"
343343
crossbeam-channel = "0.5.8"
344344
dashmap = "6.1.0"
345+
data-encoding = "2.3.3"
345346
dhat = { version = "0.3.2" }
346347
dialoguer = "0.10.3"
347348
dunce = "1.0.3"

Diff for: turbopack/crates/turbopack-core/Cargo.toml

+2
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ async-trait = { workspace = true }
1818
auto-hash-map = { workspace = true }
1919
browserslist-rs = { workspace = true }
2020
const_format = { workspace = true }
21+
data-encoding = { workspace = true }
2122
either = { workspace = true }
2223
futures = { workspace = true }
2324
indexmap = { workspace = true }
@@ -42,6 +43,7 @@ turbo-tasks = { workspace = true }
4243
turbo-tasks-env = { workspace = true }
4344
turbo-tasks-fs = { workspace = true }
4445
turbo-tasks-hash = { workspace = true }
46+
urlencoding = { workspace = true }
4547

4648
[build-dependencies]
4749
turbo-tasks-build = { workspace = true }
+80
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
use anyhow::{bail, Result};
2+
use turbo_rcstr::RcStr;
3+
use turbo_tasks::{ResolvedVc, Vc};
4+
use turbo_tasks_fs::{rope::Rope, File, FileContent, FileSystemPath};
5+
use turbo_tasks_hash::{encode_hex, hash_xxh3_hash64};
6+
7+
use crate::{
8+
asset::{Asset, AssetContent},
9+
ident::AssetIdent,
10+
source::Source,
11+
};
12+
13+
/// The raw [Source]. It represents raw content from a path without any
14+
/// references to other [Source]s.
15+
#[turbo_tasks::value]
16+
pub struct DataUriSource {
17+
media_type: RcStr,
18+
encoding: RcStr,
19+
data: ResolvedVc<RcStr>,
20+
lookup_path: ResolvedVc<FileSystemPath>,
21+
}
22+
23+
#[turbo_tasks::value_impl]
24+
impl DataUriSource {
25+
#[turbo_tasks::function]
26+
pub fn new(
27+
media_type: RcStr,
28+
encoding: RcStr,
29+
data: ResolvedVc<RcStr>,
30+
lookup_path: ResolvedVc<FileSystemPath>,
31+
) -> Vc<Self> {
32+
Self::cell(DataUriSource {
33+
media_type,
34+
encoding,
35+
data,
36+
lookup_path,
37+
})
38+
}
39+
}
40+
41+
#[turbo_tasks::value_impl]
42+
impl Source for DataUriSource {
43+
#[turbo_tasks::function]
44+
async fn ident(&self) -> Result<Vc<AssetIdent>> {
45+
let content_type = self.media_type.split(";").next().unwrap().into();
46+
let filename = format!(
47+
"data:{}",
48+
&encode_hex(hash_xxh3_hash64((
49+
&*self.data.await?,
50+
&self.media_type,
51+
&self.encoding
52+
)))[0..6]
53+
);
54+
Ok(
55+
AssetIdent::from_path(self.lookup_path.join(filename.into()))
56+
.with_content_type(content_type),
57+
)
58+
}
59+
}
60+
61+
#[turbo_tasks::value_impl]
62+
impl Asset for DataUriSource {
63+
#[turbo_tasks::function]
64+
async fn content(&self) -> Result<Vc<AssetContent>> {
65+
let data = self.data.await?;
66+
let rope = if self.encoding == "base64" {
67+
let decoded = data_encoding::BASE64.decode(data.as_bytes())?;
68+
// TODO this should read self.media_type and potentially use a different encoding
69+
Rope::from(decoded)
70+
} else if self.encoding.is_empty() {
71+
let decoded = urlencoding::decode(data.as_str())?.into_owned();
72+
Rope::from(decoded)
73+
} else {
74+
bail!("Unsupported data URL encoding: {}", self.encoding);
75+
};
76+
Ok(AssetContent::file(
77+
FileContent::from(File::from(rope)).cell(),
78+
))
79+
}
80+
}

Diff for: turbopack/crates/turbopack-core/src/ident.rs

+21-1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ pub struct AssetIdent {
2727
pub parts: Vec<ModulePart>,
2828
/// The asset layer the asset was created from.
2929
pub layer: Option<ResolvedVc<RcStr>>,
30+
/// The MIME content type, if this asset was created from a data URL.
31+
pub content_type: Option<RcStr>,
3032
}
3133

3234
impl AssetIdent {
@@ -98,6 +100,10 @@ impl ValueToString for AssetIdent {
98100
s.push(')');
99101
}
100102

103+
if let Some(content_type) = &self.content_type {
104+
write!(s, " <{}>", content_type)?;
105+
}
106+
101107
if !self.parts.is_empty() {
102108
for part in self.parts.iter() {
103109
if !matches!(part, ModulePart::Facade) {
@@ -130,6 +136,7 @@ impl AssetIdent {
130136
modifiers: Vec::new(),
131137
parts: Vec::new(),
132138
layer: None,
139+
content_type: None,
133140
}))
134141
}
135142

@@ -168,6 +175,13 @@ impl AssetIdent {
168175
Self::new(Value::new(this))
169176
}
170177

178+
#[turbo_tasks::function]
179+
pub fn with_content_type(&self, content_type: RcStr) -> Vc<Self> {
180+
let mut this = self.clone();
181+
this.content_type = Some(content_type);
182+
Self::new(Value::new(this))
183+
}
184+
171185
#[turbo_tasks::function]
172186
pub async fn rename_as(&self, pattern: RcStr) -> Result<Vc<Self>> {
173187
let mut this = self.clone();
@@ -230,6 +244,7 @@ impl AssetIdent {
230244
modifiers,
231245
parts,
232246
layer,
247+
content_type,
233248
} = self;
234249
let query = query.await?;
235250
if !query.is_empty() {
@@ -303,10 +318,15 @@ impl AssetIdent {
303318
has_hash = true;
304319
}
305320
if let Some(layer) = layer {
306-
1_u8.deterministic_hash(&mut hasher);
321+
5_u8.deterministic_hash(&mut hasher);
307322
layer.await?.deterministic_hash(&mut hasher);
308323
has_hash = true;
309324
}
325+
if let Some(content_type) = content_type {
326+
6_u8.deterministic_hash(&mut hasher);
327+
content_type.deterministic_hash(&mut hasher);
328+
has_hash = true;
329+
}
310330

311331
if has_hash {
312332
let hash = encode_hex(hasher.finish());

Diff for: turbopack/crates/turbopack-core/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ pub mod code_builder;
1414
pub mod compile_time_info;
1515
pub mod condition;
1616
pub mod context;
17+
pub mod data_uri_source;
1718
pub mod diagnostics;
1819
pub mod environment;
1920
pub mod error;

Diff for: turbopack/crates/turbopack-core/src/resolve/mod.rs

+36
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ use self::{
3232
};
3333
use crate::{
3434
context::AssetContext,
35+
data_uri_source::DataUriSource,
3536
file_source::FileSource,
3637
issue::{
3738
module::emit_unknown_module_type_error, resolve::ResolvingIssue, IssueExt, IssueSource,
@@ -43,6 +44,7 @@ use crate::{
4344
reference_type::ReferenceType,
4445
resolve::{
4546
node::{node_cjs_resolve_options, node_esm_resolve_options},
47+
parse::stringify_data_uri,
4648
pattern::{read_matches, PatternMatch},
4749
plugin::AfterResolvePlugin,
4850
},
@@ -1998,6 +2000,40 @@ async fn resolve_internal_inline(
19982000
)
19992001
.await?
20002002
}
2003+
Request::DataUri {
2004+
media_type,
2005+
encoding,
2006+
data,
2007+
} => {
2008+
// Behave like Request::Uri
2009+
let uri: RcStr = stringify_data_uri(media_type, encoding, *data)
2010+
.await?
2011+
.into();
2012+
if options.await?.parse_data_uris {
2013+
*ResolveResult::primary_with_key(
2014+
RequestKey::new(uri.clone()),
2015+
ResolveResultItem::Source(ResolvedVc::upcast(
2016+
DataUriSource::new(
2017+
media_type.clone(),
2018+
encoding.clone(),
2019+
**data,
2020+
lookup_path,
2021+
)
2022+
.to_resolved()
2023+
.await?,
2024+
)),
2025+
)
2026+
} else {
2027+
*ResolveResult::primary_with_key(
2028+
RequestKey::new(uri.clone()),
2029+
ResolveResultItem::External {
2030+
name: uri,
2031+
ty: ExternalType::Url,
2032+
traced: ExternalTraced::Untraced,
2033+
},
2034+
)
2035+
}
2036+
}
20012037
Request::Uri {
20022038
protocol,
20032039
remainder,

Diff for: turbopack/crates/turbopack-core/src/resolve/options.rs

+2
Original file line numberDiff line numberDiff line change
@@ -602,6 +602,8 @@ pub struct ResolveOptions {
602602
pub enable_typescript_with_output_extension: bool,
603603
/// Warn instead of error for resolve errors
604604
pub loose_errors: bool,
605+
/// Whether to parse data URIs into modules (as opposed to keeping them as externals)
606+
pub parse_data_uris: bool,
605607

606608
pub placeholder_for_future_extensions: (),
607609
}

0 commit comments

Comments
 (0)