Skip to content

gh-112301: Use literal format strings in unicode_fromformat_arg #124203

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Sep 25, 2024
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 36 additions & 43 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -2694,11 +2694,6 @@ unicode_fromformat_write_wcstr(_PyUnicodeWriter *writer, const wchar_t *str,
#define F_SIZE 3
#define F_PTRDIFF 4
#define F_INTMAX 5
static const char * const formats[] = {"%d", "%ld", "%lld", "%zd", "%td", "%jd"};
static const char * const formats_o[] = {"%o", "%lo", "%llo", "%zo", "%to", "%jo"};
static const char * const formats_u[] = {"%u", "%lu", "%llu", "%zu", "%tu", "%ju"};
static const char * const formats_x[] = {"%x", "%lx", "%llx", "%zx", "%tx", "%jx"};
static const char * const formats_X[] = {"%X", "%lX", "%llX", "%zX", "%tX", "%jX"};

static const char*
unicode_fromformat_arg(_PyUnicodeWriter *writer,
Expand Down Expand Up @@ -2840,47 +2835,45 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
case 'd': case 'i':
case 'o': case 'u': case 'x': case 'X':
{
/* used by sprintf */
char buffer[MAX_INTMAX_CHARS];
const char *fmt = NULL;
switch (*f) {
case 'o': fmt = formats_o[sizemod]; break;
case 'u': fmt = formats_u[sizemod]; break;
case 'x': fmt = formats_x[sizemod]; break;
case 'X': fmt = formats_X[sizemod]; break;
default: fmt = formats[sizemod]; break;
}
int issigned = (*f == 'd' || *f == 'i');

// Fill buffer using sprinf, with one of many possible format
// strings, like "%llX" for `long long` in hexadecimal.
// The type/size is in `sizemod`; the format is in `*f`.

// Use macros with nested switches to keep the sprintf format strings
// as compile-time literals, avoiding warnings and maybe allowing
// optimizations.

// `SPRINT` macro does one sprintf
// Example usage: SPRINT("l", "X", unsigned long) expands to
// sprintf(buffer, "%" "l" "X", va_arg(*vargs, unsigned long))
#define SPRINT(SIZE_SPEC, FMT_CHAR, TYPE) \
sprintf(buffer, "%" SIZE_SPEC FMT_CHAR, va_arg(*vargs, TYPE))

// One inner switch to handle all format variants
#define DO_SPRINTS(SIZE_SPEC, SIGNED_TYPE, UNSIGNED_TYPE) \
switch (*f) { \
case 'o': len = SPRINT(SIZE_SPEC, "o", UNSIGNED_TYPE); break; \
case 'u': len = SPRINT(SIZE_SPEC, "u", UNSIGNED_TYPE); break; \
case 'x': len = SPRINT(SIZE_SPEC, "x", UNSIGNED_TYPE); break; \
case 'X': len = SPRINT(SIZE_SPEC, "X", UNSIGNED_TYPE); break; \
default: len = SPRINT(SIZE_SPEC, "d", SIGNED_TYPE); break; \
} \
break;

// Outer switch to handle all the sizes/types
switch (sizemod) {
case F_LONG:
len = issigned ?
sprintf(buffer, fmt, va_arg(*vargs, long)) :
sprintf(buffer, fmt, va_arg(*vargs, unsigned long));
break;
case F_LONGLONG:
len = issigned ?
sprintf(buffer, fmt, va_arg(*vargs, long long)) :
sprintf(buffer, fmt, va_arg(*vargs, unsigned long long));
break;
case F_SIZE:
len = issigned ?
sprintf(buffer, fmt, va_arg(*vargs, Py_ssize_t)) :
sprintf(buffer, fmt, va_arg(*vargs, size_t));
break;
case F_PTRDIFF:
len = sprintf(buffer, fmt, va_arg(*vargs, ptrdiff_t));
break;
case F_INTMAX:
len = issigned ?
sprintf(buffer, fmt, va_arg(*vargs, intmax_t)) :
sprintf(buffer, fmt, va_arg(*vargs, uintmax_t));
break;
default:
len = issigned ?
sprintf(buffer, fmt, va_arg(*vargs, int)) :
sprintf(buffer, fmt, va_arg(*vargs, unsigned int));
break;
case F_LONG: DO_SPRINTS("l", long, unsigned long)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you put the break here? Something like:

Suggested change
case F_LONG: DO_SPRINTS("l", long, unsigned long)
case F_LONG: DO_SPRINTS("l", long, unsigned long); break;

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK!
One line is 85 characters now, but I think it's most readable this way.

case F_LONGLONG: DO_SPRINTS("ll", long long, unsigned long long)
case F_SIZE: DO_SPRINTS("z", Py_ssize_t, size_t)
case F_PTRDIFF: DO_SPRINTS("t", ptrdiff_t, ptrdiff_t)
case F_INTMAX: DO_SPRINTS("j", intmax_t, uintmax_t)
default: DO_SPRINTS("", int, unsigned int)
}
#undef SPRINT
#undef DO_SPRINTS

assert(len >= 0);

int sign = (buffer[0] == '-');
Expand Down
Loading