From 1ebc9eea451619374aec09c548ed7d1d4e9dcf11 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Wed, 18 Sep 2024 10:03:14 +0200 Subject: [PATCH 1/3] Generated code --- Objects/unicodeobject.c | 114 +++++++++++++++++++++++++++++----------- 1 file changed, 82 insertions(+), 32 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 2494c989544ca0..666d695be43a35 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -2694,11 +2694,6 @@ unicode_fromformat_write_wcstr(_PyUnicodeWriter *writer, const wchar_t *str, #define F_SIZE 3 #define F_PTRDIFF 4 #define F_INTMAX 5 -static const char * const formats[] = {"%d", "%ld", "%lld", "%zd", "%td", "%jd"}; -static const char * const formats_o[] = {"%o", "%lo", "%llo", "%zo", "%to", "%jo"}; -static const char * const formats_u[] = {"%u", "%lu", "%llu", "%zu", "%tu", "%ju"}; -static const char * const formats_x[] = {"%x", "%lx", "%llx", "%zx", "%tx", "%jx"}; -static const char * const formats_X[] = {"%X", "%lX", "%llX", "%zX", "%tX", "%jX"}; static const char* unicode_fromformat_arg(_PyUnicodeWriter *writer, @@ -2840,46 +2835,101 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, case 'd': case 'i': case 'o': case 'u': case 'x': case 'X': { - /* used by sprintf */ - char buffer[MAX_INTMAX_CHARS]; - const char *fmt = NULL; - switch (*f) { - case 'o': fmt = formats_o[sizemod]; break; - case 'u': fmt = formats_u[sizemod]; break; - case 'x': fmt = formats_x[sizemod]; break; - case 'X': fmt = formats_X[sizemod]; break; - default: fmt = formats[sizemod]; break; - } - int issigned = (*f == 'd' || *f == 'i'); + char buffer[MAX_INTMAX_CHARS]; // used by sprintf switch (sizemod) { +/*[python input] + # Use generated code so that the `sprintf` format strings + # are compile-time literals, which avoids a warning and possibly + # allows optimizations. +if 1: + indent = ' ' * 11 + cases = [ + ('F_LONG', 'l', 'long', 'unsigned long'), + ('F_LONGLONG', 'll', 'long long', 'unsigned long long'), + ('F_SIZE', 'z', 'Py_ssize_t', 'size_t'), + ('F_PTRDIFF', 't', 'ptrdiff_t', 'ptrdiff_t'), + ('F_INTMAX', 'j', 'intmax_t', 'uintmax_t'), + (None, '', 'int', 'unsigned int'), + ] + for (size, sizechar, stype, utype) in cases: + if size is None: + print(indent, f'default:') + else: + print(indent, f'case {size}:') + print(indent, f' switch (*f) {{') + for c in 'ouxX': + # signed + print( + indent, + f" case '{c}':", + f'len = sprintf(buffer, "%{sizechar}{c}",', + f'va_arg(*vargs, {utype}));', + 'break;') + # unsigned + print( + indent, + f" default: ", + f'len = sprintf(buffer, "%{sizechar}d",', + f'va_arg(*vargs, {stype}));', + 'break;') + print(indent, f' }}') + print(indent, f' break;') +[python start generated code]*/ case F_LONG: - len = issigned ? - sprintf(buffer, fmt, va_arg(*vargs, long)) : - sprintf(buffer, fmt, va_arg(*vargs, unsigned long)); + switch (*f) { + case 'o': len = sprintf(buffer, "%lo", va_arg(*vargs, unsigned long)); break; + case 'u': len = sprintf(buffer, "%lu", va_arg(*vargs, unsigned long)); break; + case 'x': len = sprintf(buffer, "%lx", va_arg(*vargs, unsigned long)); break; + case 'X': len = sprintf(buffer, "%lX", va_arg(*vargs, unsigned long)); break; + default: len = sprintf(buffer, "%ld", va_arg(*vargs, long)); break; + } break; case F_LONGLONG: - len = issigned ? - sprintf(buffer, fmt, va_arg(*vargs, long long)) : - sprintf(buffer, fmt, va_arg(*vargs, unsigned long long)); + switch (*f) { + case 'o': len = sprintf(buffer, "%llo", va_arg(*vargs, unsigned long long)); break; + case 'u': len = sprintf(buffer, "%llu", va_arg(*vargs, unsigned long long)); break; + case 'x': len = sprintf(buffer, "%llx", va_arg(*vargs, unsigned long long)); break; + case 'X': len = sprintf(buffer, "%llX", va_arg(*vargs, unsigned long long)); break; + default: len = sprintf(buffer, "%lld", va_arg(*vargs, long long)); break; + } break; case F_SIZE: - len = issigned ? - sprintf(buffer, fmt, va_arg(*vargs, Py_ssize_t)) : - sprintf(buffer, fmt, va_arg(*vargs, size_t)); + switch (*f) { + case 'o': len = sprintf(buffer, "%zo", va_arg(*vargs, size_t)); break; + case 'u': len = sprintf(buffer, "%zu", va_arg(*vargs, size_t)); break; + case 'x': len = sprintf(buffer, "%zx", va_arg(*vargs, size_t)); break; + case 'X': len = sprintf(buffer, "%zX", va_arg(*vargs, size_t)); break; + default: len = sprintf(buffer, "%zd", va_arg(*vargs, Py_ssize_t)); break; + } break; case F_PTRDIFF: - len = sprintf(buffer, fmt, va_arg(*vargs, ptrdiff_t)); + switch (*f) { + case 'o': len = sprintf(buffer, "%to", va_arg(*vargs, ptrdiff_t)); break; + case 'u': len = sprintf(buffer, "%tu", va_arg(*vargs, ptrdiff_t)); break; + case 'x': len = sprintf(buffer, "%tx", va_arg(*vargs, ptrdiff_t)); break; + case 'X': len = sprintf(buffer, "%tX", va_arg(*vargs, ptrdiff_t)); break; + default: len = sprintf(buffer, "%td", va_arg(*vargs, ptrdiff_t)); break; + } break; case F_INTMAX: - len = issigned ? - sprintf(buffer, fmt, va_arg(*vargs, intmax_t)) : - sprintf(buffer, fmt, va_arg(*vargs, uintmax_t)); + switch (*f) { + case 'o': len = sprintf(buffer, "%jo", va_arg(*vargs, uintmax_t)); break; + case 'u': len = sprintf(buffer, "%ju", va_arg(*vargs, uintmax_t)); break; + case 'x': len = sprintf(buffer, "%jx", va_arg(*vargs, uintmax_t)); break; + case 'X': len = sprintf(buffer, "%jX", va_arg(*vargs, uintmax_t)); break; + default: len = sprintf(buffer, "%jd", va_arg(*vargs, intmax_t)); break; + } break; default: - len = issigned ? - sprintf(buffer, fmt, va_arg(*vargs, int)) : - sprintf(buffer, fmt, va_arg(*vargs, unsigned int)); + switch (*f) { + case 'o': len = sprintf(buffer, "%o", va_arg(*vargs, unsigned int)); break; + case 'u': len = sprintf(buffer, "%u", va_arg(*vargs, unsigned int)); break; + case 'x': len = sprintf(buffer, "%x", va_arg(*vargs, unsigned int)); break; + case 'X': len = sprintf(buffer, "%X", va_arg(*vargs, unsigned int)); break; + default: len = sprintf(buffer, "%d", va_arg(*vargs, int)); break; + } break; +/*[python end generated code: output=e37b56d7b69e78f3 input=b77dfc89939506ed]*/ } assert(len >= 0); From b562a43154cab877c89abbdd33b5665d8c497ca1 Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Wed, 18 Sep 2024 10:24:48 +0200 Subject: [PATCH 2/3] Macros --- Objects/unicodeobject.c | 131 ++++++++++++---------------------------- 1 file changed, 37 insertions(+), 94 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 666d695be43a35..bce5841697891c 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -2835,102 +2835,45 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, case 'd': case 'i': case 'o': case 'u': case 'x': case 'X': { - char buffer[MAX_INTMAX_CHARS]; // used by sprintf + char buffer[MAX_INTMAX_CHARS]; + + // Fill buffer using sprinf, with one of many possible format + // strings, like "%llX" for `long long` in hexadecimal. + // The type/size is in `sizemod`; the format is in `*f`. + + // Use macros with nested switches to keep the sprintf format strings + // as compile-time literals, avoiding warnings and maybe allowing + // optimizations. + + // `SPRINT` macro does one sprintf + // Example usage: SPRINT("l", "X", unsigned long) expands to + // sprintf(buffer, "%" "l" "X", va_arg(*vargs, unsigned long)) + #define SPRINT(SIZE_SPEC, FMT_CHAR, TYPE) \ + sprintf(buffer, "%" SIZE_SPEC FMT_CHAR, va_arg(*vargs, TYPE)) + + // One inner switch to handle all format variants + #define DO_SPRINTS(SIZE_SPEC, SIGNED_TYPE, UNSIGNED_TYPE) \ + switch (*f) { \ + case 'o': len = SPRINT(SIZE_SPEC, "o", UNSIGNED_TYPE); break; \ + case 'u': len = SPRINT(SIZE_SPEC, "u", UNSIGNED_TYPE); break; \ + case 'x': len = SPRINT(SIZE_SPEC, "x", UNSIGNED_TYPE); break; \ + case 'X': len = SPRINT(SIZE_SPEC, "X", UNSIGNED_TYPE); break; \ + default: len = SPRINT(SIZE_SPEC, "d", SIGNED_TYPE); break; \ + } \ + break; + + // Outer switch to handle all the sizes/types switch (sizemod) { -/*[python input] - # Use generated code so that the `sprintf` format strings - # are compile-time literals, which avoids a warning and possibly - # allows optimizations. -if 1: - indent = ' ' * 11 - cases = [ - ('F_LONG', 'l', 'long', 'unsigned long'), - ('F_LONGLONG', 'll', 'long long', 'unsigned long long'), - ('F_SIZE', 'z', 'Py_ssize_t', 'size_t'), - ('F_PTRDIFF', 't', 'ptrdiff_t', 'ptrdiff_t'), - ('F_INTMAX', 'j', 'intmax_t', 'uintmax_t'), - (None, '', 'int', 'unsigned int'), - ] - for (size, sizechar, stype, utype) in cases: - if size is None: - print(indent, f'default:') - else: - print(indent, f'case {size}:') - print(indent, f' switch (*f) {{') - for c in 'ouxX': - # signed - print( - indent, - f" case '{c}':", - f'len = sprintf(buffer, "%{sizechar}{c}",', - f'va_arg(*vargs, {utype}));', - 'break;') - # unsigned - print( - indent, - f" default: ", - f'len = sprintf(buffer, "%{sizechar}d",', - f'va_arg(*vargs, {stype}));', - 'break;') - print(indent, f' }}') - print(indent, f' break;') -[python start generated code]*/ - case F_LONG: - switch (*f) { - case 'o': len = sprintf(buffer, "%lo", va_arg(*vargs, unsigned long)); break; - case 'u': len = sprintf(buffer, "%lu", va_arg(*vargs, unsigned long)); break; - case 'x': len = sprintf(buffer, "%lx", va_arg(*vargs, unsigned long)); break; - case 'X': len = sprintf(buffer, "%lX", va_arg(*vargs, unsigned long)); break; - default: len = sprintf(buffer, "%ld", va_arg(*vargs, long)); break; - } - break; - case F_LONGLONG: - switch (*f) { - case 'o': len = sprintf(buffer, "%llo", va_arg(*vargs, unsigned long long)); break; - case 'u': len = sprintf(buffer, "%llu", va_arg(*vargs, unsigned long long)); break; - case 'x': len = sprintf(buffer, "%llx", va_arg(*vargs, unsigned long long)); break; - case 'X': len = sprintf(buffer, "%llX", va_arg(*vargs, unsigned long long)); break; - default: len = sprintf(buffer, "%lld", va_arg(*vargs, long long)); break; - } - break; - case F_SIZE: - switch (*f) { - case 'o': len = sprintf(buffer, "%zo", va_arg(*vargs, size_t)); break; - case 'u': len = sprintf(buffer, "%zu", va_arg(*vargs, size_t)); break; - case 'x': len = sprintf(buffer, "%zx", va_arg(*vargs, size_t)); break; - case 'X': len = sprintf(buffer, "%zX", va_arg(*vargs, size_t)); break; - default: len = sprintf(buffer, "%zd", va_arg(*vargs, Py_ssize_t)); break; - } - break; - case F_PTRDIFF: - switch (*f) { - case 'o': len = sprintf(buffer, "%to", va_arg(*vargs, ptrdiff_t)); break; - case 'u': len = sprintf(buffer, "%tu", va_arg(*vargs, ptrdiff_t)); break; - case 'x': len = sprintf(buffer, "%tx", va_arg(*vargs, ptrdiff_t)); break; - case 'X': len = sprintf(buffer, "%tX", va_arg(*vargs, ptrdiff_t)); break; - default: len = sprintf(buffer, "%td", va_arg(*vargs, ptrdiff_t)); break; - } - break; - case F_INTMAX: - switch (*f) { - case 'o': len = sprintf(buffer, "%jo", va_arg(*vargs, uintmax_t)); break; - case 'u': len = sprintf(buffer, "%ju", va_arg(*vargs, uintmax_t)); break; - case 'x': len = sprintf(buffer, "%jx", va_arg(*vargs, uintmax_t)); break; - case 'X': len = sprintf(buffer, "%jX", va_arg(*vargs, uintmax_t)); break; - default: len = sprintf(buffer, "%jd", va_arg(*vargs, intmax_t)); break; - } - break; - default: - switch (*f) { - case 'o': len = sprintf(buffer, "%o", va_arg(*vargs, unsigned int)); break; - case 'u': len = sprintf(buffer, "%u", va_arg(*vargs, unsigned int)); break; - case 'x': len = sprintf(buffer, "%x", va_arg(*vargs, unsigned int)); break; - case 'X': len = sprintf(buffer, "%X", va_arg(*vargs, unsigned int)); break; - default: len = sprintf(buffer, "%d", va_arg(*vargs, int)); break; - } - break; -/*[python end generated code: output=e37b56d7b69e78f3 input=b77dfc89939506ed]*/ + case F_LONG: DO_SPRINTS("l", long, unsigned long) + case F_LONGLONG: DO_SPRINTS("ll", long long, unsigned long long) + case F_SIZE: DO_SPRINTS("z", Py_ssize_t, size_t) + case F_PTRDIFF: DO_SPRINTS("t", ptrdiff_t, ptrdiff_t) + case F_INTMAX: DO_SPRINTS("j", intmax_t, uintmax_t) + default: DO_SPRINTS("", int, unsigned int) } + #undef SPRINT + #undef DO_SPRINTS + assert(len >= 0); int sign = (buffer[0] == '-'); From 218929c9b197e800ddff47319afb7dddde2bef1d Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Tue, 24 Sep 2024 09:35:18 -0700 Subject: [PATCH 3/3] Put ;break; in the switch --- Objects/unicodeobject.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index bce5841697891c..e9589cfe44f3bf 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -2859,17 +2859,16 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer, case 'x': len = SPRINT(SIZE_SPEC, "x", UNSIGNED_TYPE); break; \ case 'X': len = SPRINT(SIZE_SPEC, "X", UNSIGNED_TYPE); break; \ default: len = SPRINT(SIZE_SPEC, "d", SIGNED_TYPE); break; \ - } \ - break; + } // Outer switch to handle all the sizes/types switch (sizemod) { - case F_LONG: DO_SPRINTS("l", long, unsigned long) - case F_LONGLONG: DO_SPRINTS("ll", long long, unsigned long long) - case F_SIZE: DO_SPRINTS("z", Py_ssize_t, size_t) - case F_PTRDIFF: DO_SPRINTS("t", ptrdiff_t, ptrdiff_t) - case F_INTMAX: DO_SPRINTS("j", intmax_t, uintmax_t) - default: DO_SPRINTS("", int, unsigned int) + case F_LONG: DO_SPRINTS("l", long, unsigned long); break; + case F_LONGLONG: DO_SPRINTS("ll", long long, unsigned long long); break; + case F_SIZE: DO_SPRINTS("z", Py_ssize_t, size_t); break; + case F_PTRDIFF: DO_SPRINTS("t", ptrdiff_t, ptrdiff_t); break; + case F_INTMAX: DO_SPRINTS("j", intmax_t, uintmax_t); break; + default: DO_SPRINTS("", int, unsigned int); break; } #undef SPRINT #undef DO_SPRINTS