Skip to content

Introduce ByteSymbol #141875

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions compiler/rustc_ast/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ use rustc_data_structures::tagged_ptr::Tag;
use rustc_macros::{Decodable, Encodable, HashStable_Generic};
pub use rustc_span::AttrId;
use rustc_span::source_map::{Spanned, respan};
use rustc_span::{DUMMY_SP, ErrorGuaranteed, Ident, Span, Symbol, kw, sym};
use rustc_span::{ByteSymbol, DUMMY_SP, ErrorGuaranteed, Ident, Span, Symbol, kw, sym};
use thin_vec::{ThinVec, thin_vec};

pub use crate::format::*;
Expand Down Expand Up @@ -1766,7 +1766,7 @@ pub enum ExprKind {
/// Added for optimization purposes to avoid the need to escape
/// large binary blobs - should always behave like [`ExprKind::Lit`]
/// with a `ByteStr` literal.
IncludedBytes(Arc<[u8]>),
IncludedBytes(Arc<[u8]>), // njn: change to ByteSymbol?

/// A `format_args!()` expression.
FormatArgs(P<FormatArgs>),
Expand Down Expand Up @@ -2024,7 +2024,8 @@ impl YieldKind {
}

/// A literal in a meta item.
#[derive(Clone, Encodable, Decodable, Debug, HashStable_Generic)]
// njn: look for clones
#[derive(Clone, Copy, Encodable, Decodable, Debug, HashStable_Generic)]
pub struct MetaItemLit {
/// The original literal as written in the source code.
pub symbol: Symbol,
Expand Down Expand Up @@ -2087,16 +2088,17 @@ pub enum LitFloatType {
/// deciding the `LitKind`. This means that float literals like `1f32` are
/// classified by this type as `Float`. This is different to `token::LitKind`
/// which does *not* consider the suffix.
#[derive(Clone, Encodable, Decodable, Debug, Hash, Eq, PartialEq, HashStable_Generic)]
#[derive(Clone, Copy, Encodable, Decodable, Debug, Hash, Eq, PartialEq, HashStable_Generic)]
// njn: look for clones
pub enum LitKind {
/// A string literal (`"foo"`). The symbol is unescaped, and so may differ
/// from the original token's symbol.
Str(Symbol, StrStyle),
/// A byte string (`b"foo"`). Not stored as a symbol because it might be
/// non-utf8, and symbols only allow utf8 strings.
ByteStr(Arc<[u8]>, StrStyle),
ByteStr(ByteSymbol, StrStyle),
/// A C String (`c"foo"`). Guaranteed to only have `\0` at the end.
CStr(Arc<[u8]>, StrStyle),
CStr(ByteSymbol, StrStyle),
/// A byte char (`b'f'`).
Byte(u8),
/// A character literal (`'a'`).
Expand Down
18 changes: 9 additions & 9 deletions compiler/rustc_ast/src/util/literal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use std::{ascii, fmt, str};
use rustc_literal_escaper::{
MixedUnit, Mode, byte_from_char, unescape_byte, unescape_char, unescape_mixed, unescape_unicode,
};
use rustc_span::{Span, Symbol, kw, sym};
use rustc_span::{ByteSymbol, Span, Symbol, kw, sym};
use tracing::debug;

use crate::ast::{self, LitKind, MetaItemLit, StrStyle};
Expand Down Expand Up @@ -117,13 +117,13 @@ impl LitKind {
assert!(!err.is_fatal(), "failed to unescape string literal")
}
});
LitKind::ByteStr(buf.into(), StrStyle::Cooked)
LitKind::ByteStr(ByteSymbol::intern(&buf), StrStyle::Cooked)
}
token::ByteStrRaw(n) => {
// Raw strings have no escapes so we can convert the symbol
// directly to a `Arc<u8>`.
let buf = symbol.as_str().to_owned().into_bytes();
LitKind::ByteStr(buf.into(), StrStyle::Raw(n))
LitKind::ByteStr(ByteSymbol::intern(&buf), StrStyle::Raw(n))
}
token::CStr => {
let s = symbol.as_str();
Expand All @@ -138,15 +138,15 @@ impl LitKind {
}
});
buf.push(0);
LitKind::CStr(buf.into(), StrStyle::Cooked)
LitKind::CStr(ByteSymbol::intern(&buf), StrStyle::Cooked)
}
token::CStrRaw(n) => {
// Raw strings have no escapes so we can convert the symbol
// directly to a `Arc<u8>` after appending the terminating NUL
// char.
let mut buf = symbol.as_str().to_owned().into_bytes();
buf.push(0);
LitKind::CStr(buf.into(), StrStyle::Raw(n))
LitKind::CStr(ByteSymbol::intern(&buf), StrStyle::Raw(n))
}
token::Err(guar) => LitKind::Err(guar),
})
Expand All @@ -169,11 +169,11 @@ impl fmt::Display for LitKind {
string = sym
)?,
LitKind::ByteStr(ref bytes, StrStyle::Cooked) => {
write!(f, "b\"{}\"", escape_byte_str_symbol(bytes))?
write!(f, "b\"{}\"", escape_byte_str_symbol(bytes.as_byte_str()))?
}
LitKind::ByteStr(ref bytes, StrStyle::Raw(n)) => {
// Unwrap because raw byte string literals can only contain ASCII.
let symbol = str::from_utf8(bytes).unwrap();
let symbol = str::from_utf8(bytes.as_byte_str()).unwrap();
write!(
f,
"br{delim}\"{string}\"{delim}",
Expand All @@ -182,11 +182,11 @@ impl fmt::Display for LitKind {
)?;
}
LitKind::CStr(ref bytes, StrStyle::Cooked) => {
write!(f, "c\"{}\"", escape_byte_str_symbol(bytes))?
write!(f, "c\"{}\"", escape_byte_str_symbol(bytes.as_byte_str()))?
}
LitKind::CStr(ref bytes, StrStyle::Raw(n)) => {
// This can only be valid UTF-8.
let symbol = str::from_utf8(bytes).unwrap();
let symbol = str::from_utf8(bytes.as_byte_str()).unwrap();
write!(f, "cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize),)?;
}
LitKind::Int(n, ty) => {
Expand Down
24 changes: 9 additions & 15 deletions compiler/rustc_ast_lowering/src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use rustc_middle::span_bug;
use rustc_middle::ty::TyCtxt;
use rustc_session::errors::report_lit_error;
use rustc_span::source_map::{Spanned, respan};
use rustc_span::{DUMMY_SP, DesugaringKind, Ident, Span, Symbol, sym};
use rustc_span::{ByteSymbol, DUMMY_SP, DesugaringKind, Ident, Span, Symbol, sym};
use thin_vec::{ThinVec, thin_vec};
use visit::{Visitor, walk_expr};

Expand Down Expand Up @@ -146,10 +146,10 @@ impl<'hir> LoweringContext<'_, 'hir> {
}
ExprKind::Lit(token_lit) => hir::ExprKind::Lit(self.lower_lit(token_lit, e.span)),
ExprKind::IncludedBytes(bytes) => {
let lit = self.arena.alloc(respan(
let lit = respan(
self.lower_span(e.span),
LitKind::ByteStr(Arc::clone(bytes), StrStyle::Cooked),
));
LitKind::ByteStr(ByteSymbol::intern(&bytes), StrStyle::Cooked),
);
hir::ExprKind::Lit(lit)
}
ExprKind::Cast(expr, ty) => {
Expand Down Expand Up @@ -422,19 +422,15 @@ impl<'hir> LoweringContext<'_, 'hir> {
})
}

pub(crate) fn lower_lit(
&mut self,
token_lit: &token::Lit,
span: Span,
) -> &'hir Spanned<LitKind> {
pub(crate) fn lower_lit(&mut self, token_lit: &token::Lit, span: Span) -> hir::Lit {
let lit_kind = match LitKind::from_token_lit(*token_lit) {
Ok(lit_kind) => lit_kind,
Err(err) => {
let guar = report_lit_error(&self.tcx.sess.psess, err, *token_lit, span);
LitKind::Err(guar)
}
};
self.arena.alloc(respan(self.lower_span(span), lit_kind))
respan(self.lower_span(span), lit_kind)
}

fn lower_unop(&mut self, u: UnOp) -> hir::UnOp {
Expand Down Expand Up @@ -2140,10 +2136,10 @@ impl<'hir> LoweringContext<'_, 'hir> {
}

fn expr_uint(&mut self, sp: Span, ty: ast::UintTy, value: u128) -> hir::Expr<'hir> {
let lit = self.arena.alloc(hir::Lit {
let lit = hir::Lit {
span: sp,
node: ast::LitKind::Int(value.into(), ast::LitIntType::Unsigned(ty)),
});
};
self.expr(sp, hir::ExprKind::Lit(lit))
}

Expand All @@ -2160,9 +2156,7 @@ impl<'hir> LoweringContext<'_, 'hir> {
}

pub(super) fn expr_str(&mut self, sp: Span, value: Symbol) -> hir::Expr<'hir> {
let lit = self
.arena
.alloc(hir::Lit { span: sp, node: ast::LitKind::Str(value, ast::StrStyle::Cooked) });
let lit = hir::Lit { span: sp, node: ast::LitKind::Str(value, ast::StrStyle::Cooked) };
self.expr(sp, hir::ExprKind::Lit(lit))
}

Expand Down
12 changes: 4 additions & 8 deletions compiler/rustc_ast_lowering/src/pat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use rustc_hir::def::{DefKind, Res};
use rustc_hir::{self as hir, LangItem};
use rustc_middle::span_bug;
use rustc_span::source_map::{Spanned, respan};
use rustc_span::{DesugaringKind, Ident, Span};
use rustc_span::{ByteSymbol, DesugaringKind, Ident, Span};

use super::errors::{
ArbitraryExpressionInPattern, ExtraDoubleDot, MisplacedDoubleDot, SubTupleBinding,
Expand Down Expand Up @@ -390,19 +390,15 @@ impl<'a, 'hir> LoweringContext<'a, 'hir> {
allow_paths: bool,
) -> &'hir hir::PatExpr<'hir> {
let span = self.lower_span(expr.span);
let err = |guar| hir::PatExprKind::Lit {
lit: self.arena.alloc(respan(span, LitKind::Err(guar))),
negated: false,
};
let err =
|guar| hir::PatExprKind::Lit { lit: respan(span, LitKind::Err(guar)), negated: false };
let kind = match &expr.kind {
ExprKind::Lit(lit) => {
hir::PatExprKind::Lit { lit: self.lower_lit(lit, span), negated: false }
}
ExprKind::ConstBlock(c) => hir::PatExprKind::ConstBlock(self.lower_const_block(c)),
ExprKind::IncludedBytes(bytes) => hir::PatExprKind::Lit {
lit: self
.arena
.alloc(respan(span, LitKind::ByteStr(Arc::clone(bytes), StrStyle::Cooked))),
lit: respan(span, LitKind::ByteStr(ByteSymbol::intern(bytes), StrStyle::Cooked)),
negated: false,
},
ExprKind::Err(guar) => err(*guar),
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_builtin_macros/src/concat_bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ pub(crate) fn expand_concat_bytes(
accumulator.push(val);
}
Ok(LitKind::ByteStr(ref bytes, _)) => {
accumulator.extend_from_slice(bytes);
accumulator.extend_from_slice(bytes.as_byte_str());
}
_ => {
guar.get_or_insert_with(|| invalid_type_err(cx, token_lit, e.span, false));
Expand Down
1 change: 0 additions & 1 deletion compiler/rustc_hir/src/arena.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ macro_rules! arena_types {
[] attribute: rustc_hir::Attribute,
[] owner_info: rustc_hir::OwnerInfo<'tcx>,
[] use_path: rustc_hir::UsePath<'tcx>,
[] lit: rustc_hir::Lit,
[] macro_def: rustc_ast::MacroDef,
]);
)
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_hir/src/hir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1809,7 +1809,7 @@ pub struct PatExpr<'hir> {
#[derive(Debug, Clone, Copy, HashStable_Generic)]
pub enum PatExprKind<'hir> {
Lit {
lit: &'hir Lit,
lit: Lit,
// FIXME: move this into `Lit` and handle negated literal expressions
// once instead of matching on unop neg expressions everywhere.
negated: bool,
Expand Down Expand Up @@ -2722,7 +2722,7 @@ pub enum ExprKind<'hir> {
/// A unary operation (e.g., `!x`, `*x`).
Unary(UnOp, &'hir Expr<'hir>),
/// A literal (e.g., `1`, `"foo"`).
Lit(&'hir Lit),
Lit(Lit),
/// A cast (e.g., `foo as f64`).
Cast(&'hir Expr<'hir>, &'hir Ty<'hir>),
/// A type ascription (e.g., `x: Foo`). See RFC 3307.
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_hir/src/intravisit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ pub trait Visitor<'v>: Sized {
fn visit_pat_expr(&mut self, expr: &'v PatExpr<'v>) -> Self::Result {
walk_pat_expr(self, expr)
}
fn visit_lit(&mut self, _hir_id: HirId, _lit: &'v Lit, _negated: bool) -> Self::Result {
fn visit_lit(&mut self, _hir_id: HirId, _lit: Lit, _negated: bool) -> Self::Result {
Self::Result::output()
}
fn visit_anon_const(&mut self, c: &'v AnonConst) -> Self::Result {
Expand Down Expand Up @@ -768,7 +768,7 @@ pub fn walk_pat_field<'v, V: Visitor<'v>>(visitor: &mut V, field: &'v PatField<'
pub fn walk_pat_expr<'v, V: Visitor<'v>>(visitor: &mut V, expr: &'v PatExpr<'v>) -> V::Result {
try_visit!(visitor.visit_id(expr.hir_id));
match &expr.kind {
PatExprKind::Lit { lit, negated } => visitor.visit_lit(expr.hir_id, lit, *negated),
PatExprKind::Lit { lit, negated } => visitor.visit_lit(expr.hir_id, *lit, *negated),
PatExprKind::ConstBlock(c) => visitor.visit_inline_const(c),
PatExprKind::Path(qpath) => visitor.visit_qpath(qpath, expr.hir_id, expr.span),
}
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_hir_analysis/src/hir_ty_lowering/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2433,9 +2433,9 @@ impl<'tcx> dyn HirTyLowerer<'tcx> + '_ {
};

let lit_input = match expr.kind {
hir::ExprKind::Lit(lit) => Some(LitToConstInput { lit: &lit.node, ty, neg: false }),
hir::ExprKind::Lit(lit) => Some(LitToConstInput { lit: lit.node, ty, neg: false }),
hir::ExprKind::Unary(hir::UnOp::Neg, expr) => match expr.kind {
hir::ExprKind::Lit(lit) => Some(LitToConstInput { lit: &lit.node, ty, neg: true }),
hir::ExprKind::Lit(lit) => Some(LitToConstInput { lit: lit.node, ty, neg: true }),
_ => None,
},
_ => None,
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_hir_pretty/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1480,7 +1480,7 @@ impl<'a> State<'a> {
self.print_expr_addr_of(k, m, expr);
}
hir::ExprKind::Lit(lit) => {
self.print_literal(lit);
self.print_literal(&lit);
}
hir::ExprKind::Cast(expr, ty) => {
self.print_expr_cond_paren(expr, expr.precedence() < ExprPrecedence::Cast);
Expand Down
3 changes: 2 additions & 1 deletion compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1631,10 +1631,11 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {

match lit.node {
ast::LitKind::Str(..) => Ty::new_static_str(tcx),
// njn: why is this an array, not a slice?
ast::LitKind::ByteStr(ref v, _) => Ty::new_imm_ref(
tcx,
tcx.lifetimes.re_static,
Ty::new_array(tcx, tcx.types.u8, v.len() as u64),
Ty::new_array(tcx, tcx.types.u8, v.as_byte_str().len() as u64),
),
ast::LitKind::Byte(_) => tcx.types.u8,
ast::LitKind::Char(_) => tcx.types.char,
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_hir_typeck/src/fn_ctxt/suggestions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1624,7 +1624,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {
node: rustc_ast::LitKind::Int(lit, rustc_ast::LitIntType::Unsuffixed),
span,
}) => {
let Ok(snippet) = self.tcx.sess.source_map().span_to_snippet(*span) else {
let Ok(snippet) = self.tcx.sess.source_map().span_to_snippet(span) else {
return false;
};
if !(snippet.starts_with("0x") || snippet.starts_with("0X")) {
Expand Down Expand Up @@ -1683,7 +1683,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {

// We have satisfied all requirements to provide a suggestion. Emit it.
err.span_suggestion(
*span,
span,
format!("if you meant to create a null pointer, use `{null_path_str}()`"),
null_path_str + "()",
Applicability::MachineApplicable,
Expand Down
3 changes: 2 additions & 1 deletion compiler/rustc_lint/src/invalid_from_utf8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,9 @@ impl<'tcx> LateLintPass<'tcx> for InvalidFromUtf8 {
}
match init.kind {
ExprKind::Lit(Spanned { node: lit, .. }) => {
// njn: rename bytes as byte_sym, here and elsewhere
if let LitKind::ByteStr(bytes, _) = &lit
&& let Err(utf8_error) = std::str::from_utf8(bytes)
&& let Err(utf8_error) = std::str::from_utf8(bytes.as_byte_str())
{
lint(init.span, utf8_error);
}
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_lint/src/late.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ impl<'tcx, T: LateLintPass<'tcx>> hir_visit::Visitor<'tcx> for LateContextAndPas
hir_visit::walk_pat(self, p);
}

fn visit_lit(&mut self, hir_id: HirId, lit: &'tcx hir::Lit, negated: bool) {
fn visit_lit(&mut self, hir_id: HirId, lit: hir::Lit, negated: bool) {
lint_callback!(self, check_lit, hir_id, lit, negated);
}

Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_lint/src/passes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ macro_rules! late_lint_methods {
fn check_stmt(a: &'tcx rustc_hir::Stmt<'tcx>);
fn check_arm(a: &'tcx rustc_hir::Arm<'tcx>);
fn check_pat(a: &'tcx rustc_hir::Pat<'tcx>);
fn check_lit(hir_id: rustc_hir::HirId, a: &'tcx rustc_hir::Lit, negated: bool);
fn check_lit(hir_id: rustc_hir::HirId, a: rustc_hir::Lit, negated: bool);
fn check_expr(a: &'tcx rustc_hir::Expr<'tcx>);
fn check_expr_post(a: &'tcx rustc_hir::Expr<'tcx>);
fn check_ty(a: &'tcx rustc_hir::Ty<'tcx, rustc_hir::AmbigArg>);
Expand Down
10 changes: 2 additions & 8 deletions compiler/rustc_lint/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -548,18 +548,12 @@ fn lint_fn_pointer<'tcx>(
}

impl<'tcx> LateLintPass<'tcx> for TypeLimits {
fn check_lit(
&mut self,
cx: &LateContext<'tcx>,
hir_id: HirId,
lit: &'tcx hir::Lit,
negated: bool,
) {
fn check_lit(&mut self, cx: &LateContext<'tcx>, hir_id: HirId, lit: hir::Lit, negated: bool) {
if negated {
self.negated_expr_id = Some(hir_id);
self.negated_expr_span = Some(lit.span);
}
lint_literal(cx, self, hir_id, lit.span, lit, negated);
lint_literal(cx, self, hir_id, lit.span, &lit, negated);
}

fn check_expr(&mut self, cx: &LateContext<'tcx>, e: &'tcx hir::Expr<'tcx>) {
Expand Down
Loading
Loading