Skip to content

[libc++][format] Fixes formatting code units as integers. #73396

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Nov 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion libcxx/docs/FeatureTestMacroTable.rst
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ Status
--------------------------------------------------- -----------------
``__cpp_lib_format`` *unimplemented*
--------------------------------------------------- -----------------
``__cpp_lib_format_uchar`` *unimplemented*
``__cpp_lib_format_uchar`` ``202311L``
--------------------------------------------------- -----------------
``__cpp_lib_generic_unordered_lookup`` ``201811L``
--------------------------------------------------- -----------------
Expand Down
1 change: 1 addition & 0 deletions libcxx/docs/ReleaseNotes/18.rst
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ Implemented Papers
- P2918R2 - Runtime format strings II
- P2871R3 - Remove Deprecated Unicode Conversion Facets from C++26
- P2870R3 - Remove basic_string::reserve()
- P2909R4 - Fix formatting of code units as integers (Dude, where’s my ``char``?)


Improvements and New Features
Expand Down
2 changes: 1 addition & 1 deletion libcxx/docs/Status/Cxx2cPapers.csv
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
"`P2546R5 <https://wg21.link/P2546R5>`__","LWG","Debugging Support","Kona November 2023","","",""
"`P2905R2 <https://wg21.link/P2905R2>`__","LWG","Runtime format strings","Kona November 2023","","","|format| |DR|"
"`P2918R2 <https://wg21.link/P2918R2>`__","LWG","Runtime format strings II","Kona November 2023","|Complete|","18.0","|format|"
"`P2909R4 <https://wg21.link/P2909R4>`__","LWG","Fix formatting of code units as integers (Dude, where’s my ``char``?)","Kona November 2023","","","|format| |DR|"
"`P2909R4 <https://wg21.link/P2909R4>`__","LWG","Fix formatting of code units as integers (Dude, where’s my ``char``?)","Kona November 2023","|Complete|","18.0","|format| |DR|"
"`P0952R2 <https://wg21.link/P0952R2>`__","LWG","A new specification for ``std::generate_canonical``","Kona November 2023","","",""
"`P2447R6 <https://wg21.link/P2447R6>`__","LWG","``std::span`` over an initializer list","Kona November 2023","","",""
"`P2821R5 <https://wg21.link/P2821R5>`__","LWG","``span.at()``","Kona November 2023","","",""
Expand Down
2 changes: 1 addition & 1 deletion libcxx/docs/Status/FormatIssues.csv
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ Number,Name,Standard,Assignee,Status,First released version
"`P2637R3 <https://wg21.link/P2637R3>`__","Member ``visit``","C++26","","",
"`P2905R2 <https://wg21.link/P2905R2>`__","Runtime format strings","C++26 DR","Mark de Wever","|In Progress|"
"`P2918R2 <https://wg21.link/P2918R2>`__","Runtime format strings II","C++26","Mark de Wever","|Complete|",18.0
"`P2909R4 <https://wg21.link/P2909R4>`__","Fix formatting of code units as integers (Dude, where’s my ``char``?)","C++26 DR","Mark de Wever","|In Progress|"
"`P2909R4 <https://wg21.link/P2909R4>`__","Fix formatting of code units as integers (Dude, where’s my ``char``?)","C++26 DR","Mark de Wever","|Complete|",18.0
`P1361 <https://wg21.link/P1361>`_,"Integration of chrono with text formatting","C++20",Mark de Wever,|In Progress|,
`P2372 <https://wg21.link/P2372>`__,"Fixing locale handling in chrono formatters","C++20",Mark de Wever,|In Progress|,
"`P2419R2 <https://wg21.link/P2419R2>`__","Clarify handling of encodings in localized formatting of chrono types","C++23",
10 changes: 7 additions & 3 deletions libcxx/include/__format/format_arg_store.h
Original file line number Diff line number Diff line change
Expand Up @@ -172,9 +172,13 @@ _LIBCPP_HIDE_FROM_ABI basic_format_arg<_Context> __create_format_arg(_Tp& __valu
// __basic_format_arg_value. First handle all types needing adjustment, the
// final else requires no adjustment.
if constexpr (__arg == __arg_t::__char_type)
// On some platforms initializing a wchar_t from a char is a narrowing
// conversion.
return basic_format_arg<_Context>{__arg, static_cast<typename _Context::char_type>(__value)};

# ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS
if constexpr (same_as<typename _Context::char_type, wchar_t> && same_as<_Dp, char>)
return basic_format_arg<_Context>{__arg, static_cast<wchar_t>(static_cast<unsigned char>(__value))};
else
# endif
return basic_format_arg<_Context>{__arg, __value};
else if constexpr (__arg == __arg_t::__int)
return basic_format_arg<_Context>{__arg, static_cast<int>(__value)};
else if constexpr (__arg == __arg_t::__long_long)
Expand Down
16 changes: 7 additions & 9 deletions libcxx/include/__format/formatter_char.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#include <__format/parser_std_format_spec.h>
#include <__format/write_escaped.h>
#include <__type_traits/conditional.h>
#include <__type_traits/is_signed.h>
#include <__type_traits/make_unsigned.h>

#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
# pragma GCC system_header
Expand Down Expand Up @@ -51,22 +51,21 @@ struct _LIBCPP_TEMPLATE_VIS __formatter_char {
return __formatter::__format_escaped_char(__value, __ctx.out(), __parser_.__get_parsed_std_specifications(__ctx));
# endif

if constexpr (sizeof(_CharT) <= sizeof(int))
// Promotes _CharT to an integral type. This reduces the number of
// instantiations of __format_integer reducing code size.
if constexpr (sizeof(_CharT) <= sizeof(unsigned))
return __formatter::__format_integer(
static_cast<conditional_t<is_signed_v<_CharT>, int, unsigned>>(__value),
static_cast<unsigned>(static_cast<make_unsigned_t<_CharT>>(__value)),
__ctx,
__parser_.__get_parsed_std_specifications(__ctx));
else
return __formatter::__format_integer(__value, __ctx, __parser_.__get_parsed_std_specifications(__ctx));
return __formatter::__format_integer(
static_cast<make_unsigned_t<_CharT>>(__value), __ctx, __parser_.__get_parsed_std_specifications(__ctx));
}

template <class _FormatContext>
_LIBCPP_HIDE_FROM_ABI typename _FormatContext::iterator format(char __value, _FormatContext& __ctx) const
requires(same_as<_CharT, wchar_t>)
{
return format(static_cast<wchar_t>(__value), __ctx);
return format(static_cast<wchar_t>(static_cast<unsigned char>(__value)), __ctx);
}

# if _LIBCPP_STD_VER >= 23
Expand All @@ -84,8 +83,7 @@ template <>
struct _LIBCPP_TEMPLATE_VIS formatter<char, wchar_t> : public __formatter_char<wchar_t> {};

template <>
struct _LIBCPP_TEMPLATE_VIS formatter<wchar_t, wchar_t> : public __formatter_char<wchar_t> {
};
struct _LIBCPP_TEMPLATE_VIS formatter<wchar_t, wchar_t> : public __formatter_char<wchar_t> {};

# endif // _LIBCPP_HAS_NO_WIDE_CHARACTERS

Expand Down
2 changes: 1 addition & 1 deletion libcxx/include/version
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ __cpp_lib_within_lifetime 202306L <type_traits>
# undef __cpp_lib_execution
// # define __cpp_lib_execution 201902L
// # define __cpp_lib_format 202106L
// # define __cpp_lib_format_uchar 202311L
# define __cpp_lib_format_uchar 202311L
# define __cpp_lib_generic_unordered_lookup 201811L
# define __cpp_lib_int_pow2 202002L
# define __cpp_lib_integer_comparison_functions 202002L
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,17 +55,11 @@

#elif TEST_STD_VER == 20

# if !defined(_LIBCPP_VERSION)
# ifndef __cpp_lib_format_uchar
# error "__cpp_lib_format_uchar should be defined in c++20"
# endif
# if __cpp_lib_format_uchar != 202311L
# error "__cpp_lib_format_uchar should have the value 202311L in c++20"
# endif
# else // _LIBCPP_VERSION
# ifdef __cpp_lib_format_uchar
# error "__cpp_lib_format_uchar should not be defined because it is unimplemented in libc++!"
# endif
# ifndef __cpp_lib_format_uchar
# error "__cpp_lib_format_uchar should be defined in c++20"
# endif
# if __cpp_lib_format_uchar != 202311L
# error "__cpp_lib_format_uchar should have the value 202311L in c++20"
# endif

# ifdef __cpp_lib_saturation_arithmetic
Expand All @@ -74,17 +68,11 @@

#elif TEST_STD_VER == 23

# if !defined(_LIBCPP_VERSION)
# ifndef __cpp_lib_format_uchar
# error "__cpp_lib_format_uchar should be defined in c++23"
# endif
# if __cpp_lib_format_uchar != 202311L
# error "__cpp_lib_format_uchar should have the value 202311L in c++23"
# endif
# else // _LIBCPP_VERSION
# ifdef __cpp_lib_format_uchar
# error "__cpp_lib_format_uchar should not be defined because it is unimplemented in libc++!"
# endif
# ifndef __cpp_lib_format_uchar
# error "__cpp_lib_format_uchar should be defined in c++23"
# endif
# if __cpp_lib_format_uchar != 202311L
# error "__cpp_lib_format_uchar should have the value 202311L in c++23"
# endif

# ifdef __cpp_lib_saturation_arithmetic
Expand All @@ -93,17 +81,11 @@

#elif TEST_STD_VER > 23

# if !defined(_LIBCPP_VERSION)
# ifndef __cpp_lib_format_uchar
# error "__cpp_lib_format_uchar should be defined in c++26"
# endif
# if __cpp_lib_format_uchar != 202311L
# error "__cpp_lib_format_uchar should have the value 202311L in c++26"
# endif
# else // _LIBCPP_VERSION
# ifdef __cpp_lib_format_uchar
# error "__cpp_lib_format_uchar should not be defined because it is unimplemented in libc++!"
# endif
# ifndef __cpp_lib_format_uchar
# error "__cpp_lib_format_uchar should be defined in c++26"
# endif
# if __cpp_lib_format_uchar != 202311L
# error "__cpp_lib_format_uchar should have the value 202311L in c++26"
# endif

# if !defined(_LIBCPP_VERSION)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3383,17 +3383,11 @@
# error "__cpp_lib_format_ranges should not be defined before c++23"
# endif

# if !defined(_LIBCPP_VERSION)
# ifndef __cpp_lib_format_uchar
# error "__cpp_lib_format_uchar should be defined in c++20"
# endif
# if __cpp_lib_format_uchar != 202311L
# error "__cpp_lib_format_uchar should have the value 202311L in c++20"
# endif
# else // _LIBCPP_VERSION
# ifdef __cpp_lib_format_uchar
# error "__cpp_lib_format_uchar should not be defined because it is unimplemented in libc++!"
# endif
# ifndef __cpp_lib_format_uchar
# error "__cpp_lib_format_uchar should be defined in c++20"
# endif
# if __cpp_lib_format_uchar != 202311L
# error "__cpp_lib_format_uchar should have the value 202311L in c++20"
# endif

# ifdef __cpp_lib_formatters
Expand Down Expand Up @@ -4778,17 +4772,11 @@
# error "__cpp_lib_format_ranges should have the value 202207L in c++23"
# endif

# if !defined(_LIBCPP_VERSION)
# ifndef __cpp_lib_format_uchar
# error "__cpp_lib_format_uchar should be defined in c++23"
# endif
# if __cpp_lib_format_uchar != 202311L
# error "__cpp_lib_format_uchar should have the value 202311L in c++23"
# endif
# else // _LIBCPP_VERSION
# ifdef __cpp_lib_format_uchar
# error "__cpp_lib_format_uchar should not be defined because it is unimplemented in libc++!"
# endif
# ifndef __cpp_lib_format_uchar
# error "__cpp_lib_format_uchar should be defined in c++23"
# endif
# if __cpp_lib_format_uchar != 202311L
# error "__cpp_lib_format_uchar should have the value 202311L in c++23"
# endif

# if !defined(_LIBCPP_VERSION)
Expand Down Expand Up @@ -6389,17 +6377,11 @@
# error "__cpp_lib_format_ranges should have the value 202207L in c++26"
# endif

# if !defined(_LIBCPP_VERSION)
# ifndef __cpp_lib_format_uchar
# error "__cpp_lib_format_uchar should be defined in c++26"
# endif
# if __cpp_lib_format_uchar != 202311L
# error "__cpp_lib_format_uchar should have the value 202311L in c++26"
# endif
# else // _LIBCPP_VERSION
# ifdef __cpp_lib_format_uchar
# error "__cpp_lib_format_uchar should not be defined because it is unimplemented in libc++!"
# endif
# ifndef __cpp_lib_format_uchar
# error "__cpp_lib_format_uchar should be defined in c++26"
# endif
# if __cpp_lib_format_uchar != 202311L
# error "__cpp_lib_format_uchar should have the value 202311L in c++26"
# endif

# if !defined(_LIBCPP_VERSION)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
//===----------------------------------------------------------------------===//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

// UNSUPPORTED: c++03, c++11, c++14, c++17
// ADDITIONAL_COMPILE_FLAGS: -fsigned-char

// <format>

// C++23 the formatter is a debug-enabled specialization.
// [format.formatter.spec]:
// Each header that declares the template `formatter` provides the following
// enabled specializations:
// template<> struct formatter<char, char>;
// template<> struct formatter<char, wchar_t>;
// template<> struct formatter<wchar_t, wchar_t>;

// P2909R4 "Fix formatting of code units as integers (Dude, where’s my char?)"
// changed the behaviour of char (and wchar_t) when their underlying type is signed.

#include <format>
#include <cassert>
#include <concepts>
#include <iterator>
#include <type_traits>

#include "test_format_context.h"
#include "test_macros.h"
#include "make_string.h"
#include "assert_macros.h"
#include "concat_macros.h"

#define STR(S) MAKE_STRING(CharT, S)
#define SV(S) MAKE_STRING_VIEW(CharT, S)

template <class StringT, class StringViewT, class ArgumentT>
void test(StringT expected, StringViewT fmt, ArgumentT arg) {
using CharT = typename StringT::value_type;
auto parse_ctx = std::basic_format_parse_context<CharT>(fmt);
std::formatter<ArgumentT, CharT> formatter;
static_assert(std::semiregular<decltype(formatter)>);

formatter.parse(parse_ctx);

StringT result;
auto out = std::back_inserter(result);
using FormatCtxT = std::basic_format_context<decltype(out), CharT>;

FormatCtxT format_ctx = test_format_context_create<decltype(out), CharT>(out, std::make_format_args<FormatCtxT>(arg));
formatter.format(arg, format_ctx);
TEST_REQUIRE(result == expected,
TEST_WRITE_CONCATENATED(
"\nFormat string ", fmt, "\nExpected output ", expected, "\nActual output ", result, '\n'));
}

template <class CharT>
void test() {
test(STR("\x00"), STR("}"), '\x00');
test(STR("a"), STR("}"), 'a');
test(STR("\x80"), STR("}"), '\x80');
test(STR("\xff"), STR("}"), '\xff');

test(STR("\x00"), STR("c}"), '\x00');
test(STR("a"), STR("c}"), 'a');
test(STR("\x80"), STR("c}"), '\x80');
test(STR("\xff"), STR("c}"), '\xff');

#if TEST_STD_VER > 20
test(STR(R"('\u{0}')"), STR("?}"), '\x00');
test(STR("'a'"), STR("?}"), 'a');
if constexpr (std::same_as<CharT, char>) {
test(STR(R"('\x{80}')"), STR("?}"), '\x80');
test(STR(R"('\x{ff}')"), STR("?}"), '\xff');
}
# ifndef TEST_HAS_NO_WIDE_CHARACTERS
else {
test(STR(R"('\u{80}')"), STR("?}"), '\x80');
test(STR("'\u00ff'"), STR("?}"), '\xff');
}
# endif // TEST_HAS_NO_WIDE_CHARACTERS
#endif // TEST_STD_VER > 20

test(STR("10000000"), STR("b}"), char(-128));
test(STR("11111111"), STR("b}"), char(-1));
test(STR("0"), STR("b}"), char(0));
test(STR("1"), STR("b}"), char(1));
test(STR("1111111"), STR("b}"), char(127));

test(STR("10000000"), STR("B}"), char(-128));
test(STR("11111111"), STR("B}"), char(-1));
test(STR("0"), STR("B}"), char(0));
test(STR("1"), STR("B}"), char(1));
test(STR("1111111"), STR("B}"), char(127));

test(STR("128"), STR("d}"), char(-128));
test(STR("255"), STR("d}"), char(-1));
test(STR("0"), STR("d}"), char(0));
test(STR("1"), STR("d}"), char(1));
test(STR("127"), STR("d}"), char(127));

test(STR("200"), STR("o}"), char(-128));
test(STR("377"), STR("o}"), char(-1));
test(STR("0"), STR("o}"), char(0));
test(STR("1"), STR("o}"), char(1));
test(STR("177"), STR("o}"), char(127));

test(STR("80"), STR("x}"), char(-128));
test(STR("ff"), STR("x}"), char(-1));
test(STR("0"), STR("x}"), char(0));
test(STR("1"), STR("x}"), char(1));
test(STR("7f"), STR("x}"), char(127));

test(STR("80"), STR("X}"), char(-128));
test(STR("FF"), STR("X}"), char(-1));
test(STR("0"), STR("X}"), char(0));
test(STR("1"), STR("X}"), char(1));
test(STR("7F"), STR("X}"), char(127));
}

int main(int, char**) {
test<char>();
#ifndef TEST_HAS_NO_WIDE_CHARACTERS
test<wchar_t>();
#endif

return 0;
}
Loading