Skip to content

Commit 90fd71b

Browse files
Rollup merge of rust-lang#139367 - GuillaumeGomez:proc-macro-values, r=Urgau
Add `*_value` methods to proc_macro lib This is the (re-)implementation of rust-lang/libs-team#459. It allows to get the actual value (unescaped) of the different string literals. It was originally done in rust-lang#136355 but it broke the artifacts build so we decided to move the crate to crates.io to go around this limitation. Part of rust-lang#136652. Considering this is a copy-paste of the originally approved PR, no need to go through the whole process again. \o/ r? `@Urgau`
2 parents aa70748 + b74a032 commit 90fd71b

File tree

3 files changed

+126
-0
lines changed

3 files changed

+126
-0
lines changed

Cargo.lock

+10
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

proc_macro/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,4 @@ std = { path = "../std" }
99
# `core` when resolving doc links. Without this line a different `core` will be
1010
# loaded from sysroot causing duplicate lang items and other similar errors.
1111
core = { path = "../core" }
12+
rustc-literal-escaper = { version = "0.0.2", features = ["rustc-dep-of-std"] }

proc_macro/src/lib.rs

+115
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#![feature(panic_can_unwind)]
2828
#![feature(restricted_std)]
2929
#![feature(rustc_attrs)]
30+
#![feature(stmt_expr_attributes)]
3031
#![feature(extend_one)]
3132
#![recursion_limit = "256"]
3233
#![allow(internal_features)]
@@ -51,11 +52,24 @@ use std::{error, fmt};
5152

5253
#[unstable(feature = "proc_macro_diagnostic", issue = "54140")]
5354
pub use diagnostic::{Diagnostic, Level, MultiSpan};
55+
#[unstable(feature = "proc_macro_value", issue = "136652")]
56+
pub use rustc_literal_escaper::EscapeError;
57+
use rustc_literal_escaper::{MixedUnit, Mode, byte_from_char, unescape_mixed, unescape_unicode};
5458
#[unstable(feature = "proc_macro_totokens", issue = "130977")]
5559
pub use to_tokens::ToTokens;
5660

5761
use crate::escape::{EscapeOptions, escape_bytes};
5862

63+
/// Errors returned when trying to retrieve a literal unescaped value.
64+
#[unstable(feature = "proc_macro_value", issue = "136652")]
65+
#[derive(Debug, PartialEq, Eq)]
66+
pub enum ConversionErrorKind {
67+
/// The literal failed to be escaped, take a look at [`EscapeError`] for more information.
68+
FailedToUnescape(EscapeError),
69+
/// Trying to convert a literal with the wrong type.
70+
InvalidLiteralKind,
71+
}
72+
5973
/// Determines whether proc_macro has been made accessible to the currently
6074
/// running program.
6175
///
@@ -1451,6 +1465,107 @@ impl Literal {
14511465
}
14521466
})
14531467
}
1468+
1469+
/// Returns the unescaped string value if the current literal is a string or a string literal.
1470+
#[unstable(feature = "proc_macro_value", issue = "136652")]
1471+
pub fn str_value(&self) -> Result<String, ConversionErrorKind> {
1472+
self.0.symbol.with(|symbol| match self.0.kind {
1473+
bridge::LitKind::Str => {
1474+
if symbol.contains('\\') {
1475+
let mut buf = String::with_capacity(symbol.len());
1476+
let mut error = None;
1477+
// Force-inlining here is aggressive but the closure is
1478+
// called on every char in the string, so it can be hot in
1479+
// programs with many long strings containing escapes.
1480+
unescape_unicode(
1481+
symbol,
1482+
Mode::Str,
1483+
&mut #[inline(always)]
1484+
|_, c| match c {
1485+
Ok(c) => buf.push(c),
1486+
Err(err) => {
1487+
if err.is_fatal() {
1488+
error = Some(ConversionErrorKind::FailedToUnescape(err));
1489+
}
1490+
}
1491+
},
1492+
);
1493+
if let Some(error) = error { Err(error) } else { Ok(buf) }
1494+
} else {
1495+
Ok(symbol.to_string())
1496+
}
1497+
}
1498+
bridge::LitKind::StrRaw(_) => Ok(symbol.to_string()),
1499+
_ => Err(ConversionErrorKind::InvalidLiteralKind),
1500+
})
1501+
}
1502+
1503+
/// Returns the unescaped string value if the current literal is a c-string or a c-string
1504+
/// literal.
1505+
#[unstable(feature = "proc_macro_value", issue = "136652")]
1506+
pub fn cstr_value(&self) -> Result<Vec<u8>, ConversionErrorKind> {
1507+
self.0.symbol.with(|symbol| match self.0.kind {
1508+
bridge::LitKind::CStr => {
1509+
let mut error = None;
1510+
let mut buf = Vec::with_capacity(symbol.len());
1511+
1512+
unescape_mixed(symbol, Mode::CStr, &mut |_span, c| match c {
1513+
Ok(MixedUnit::Char(c)) => {
1514+
buf.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes())
1515+
}
1516+
Ok(MixedUnit::HighByte(b)) => buf.push(b),
1517+
Err(err) => {
1518+
if err.is_fatal() {
1519+
error = Some(ConversionErrorKind::FailedToUnescape(err));
1520+
}
1521+
}
1522+
});
1523+
if let Some(error) = error {
1524+
Err(error)
1525+
} else {
1526+
buf.push(0);
1527+
Ok(buf)
1528+
}
1529+
}
1530+
bridge::LitKind::CStrRaw(_) => {
1531+
// Raw strings have no escapes so we can convert the symbol
1532+
// directly to a `Lrc<u8>` after appending the terminating NUL
1533+
// char.
1534+
let mut buf = symbol.to_owned().into_bytes();
1535+
buf.push(0);
1536+
Ok(buf)
1537+
}
1538+
_ => Err(ConversionErrorKind::InvalidLiteralKind),
1539+
})
1540+
}
1541+
1542+
/// Returns the unescaped string value if the current literal is a byte string or a byte string
1543+
/// literal.
1544+
#[unstable(feature = "proc_macro_value", issue = "136652")]
1545+
pub fn byte_str_value(&self) -> Result<Vec<u8>, ConversionErrorKind> {
1546+
self.0.symbol.with(|symbol| match self.0.kind {
1547+
bridge::LitKind::ByteStr => {
1548+
let mut buf = Vec::with_capacity(symbol.len());
1549+
let mut error = None;
1550+
1551+
unescape_unicode(symbol, Mode::ByteStr, &mut |_, c| match c {
1552+
Ok(c) => buf.push(byte_from_char(c)),
1553+
Err(err) => {
1554+
if err.is_fatal() {
1555+
error = Some(ConversionErrorKind::FailedToUnescape(err));
1556+
}
1557+
}
1558+
});
1559+
if let Some(error) = error { Err(error) } else { Ok(buf) }
1560+
}
1561+
bridge::LitKind::ByteStrRaw(_) => {
1562+
// Raw strings have no escapes so we can convert the symbol
1563+
// directly to a `Lrc<u8>`.
1564+
Ok(symbol.to_owned().into_bytes())
1565+
}
1566+
_ => Err(ConversionErrorKind::InvalidLiteralKind),
1567+
})
1568+
}
14541569
}
14551570

14561571
/// Parse a single literal from its stringified representation.

0 commit comments

Comments
 (0)