Skip to content

DO-NOT-MERGE: bpo-34595: Add %t format to PyUnicode_FromFormatV() #9122

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions Doc/c-api/unicode.rst
Original file line number Diff line number Diff line change
Expand Up @@ -519,8 +519,12 @@ APIs:
| :attr:`%R` | PyObject\* | The result of calling |
| | | :c:func:`PyObject_Repr`. |
+-------------------+---------------------+--------------------------------+
| :attr:`%T` | PyObject\* | Object type name, equivalent |
| | | to ``Py_TYPE(op)->tp_name``. |
| :attr:`%T` | PyObject\* | The object type name, |
| | | equivalent to: |
| | | ``type(op).__name__``. |
+-------------------+---------------------+--------------------------------+
| :attr:`%T` | PyObject\* | The object type fully |
| | | qualified name [2]_. |
+-------------------+---------------------+--------------------------------+

An unrecognized format character causes all the rest of the format string to be
Expand All @@ -536,6 +540,9 @@ APIs:
.. [1] For integer specifiers (d, u, ld, li, lu, lld, lli, llu, zd, zi,
zu, i, x): the 0-conversion flag has effect even when a precision is given.

.. [2] The object type fully qualified name is equivalent to:
``f"{type(obj).__module__}.{type(obj).__qualname__}"``.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Except that the module name is omitted for types in the builtins module (and for non-heap extension types that don't specify the module, but this can be considered as a bug).


.. versionchanged:: 3.2
Support for ``"%lld"`` and ``"%llu"`` added.

Expand All @@ -547,7 +554,8 @@ APIs:
``"%V"``, ``"%S"``, ``"%R"`` added.

.. versionchanged:: 3.7
Support for ``"%T"`` (object type name) added.
Support for ``"%t"`` (object type name) and ``"%T"`` (object type fully
qualified name) added.


.. c:function:: PyObject* PyUnicode_FromFormatV(const char *format, va_list vargs)
Expand Down
1 change: 1 addition & 0 deletions Include/object.h
Original file line number Diff line number Diff line change
Expand Up @@ -502,6 +502,7 @@ PyAPI_FUNC(PyObject *) PyType_GenericNew(PyTypeObject *,
PyObject *, PyObject *);
#ifndef Py_LIMITED_API
PyAPI_FUNC(const char *) _PyType_Name(PyTypeObject *);
PyAPI_FUNC(PyObject*) _PyType_FullName(PyTypeObject *);
PyAPI_FUNC(PyObject *) _PyType_Lookup(PyTypeObject *, PyObject *);
PyAPI_FUNC(PyObject *) _PyType_LookupId(PyTypeObject *, _Py_Identifier *);
PyAPI_FUNC(PyObject *) _PyObject_LookupSpecial(PyObject *, _Py_Identifier *);
Expand Down
25 changes: 20 additions & 5 deletions Lib/test/test_unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -2454,8 +2454,7 @@ def test_from_format(self):
pythonapi, py_object, sizeof,
c_int, c_long, c_longlong, c_ssize_t,
c_uint, c_ulong, c_ulonglong, c_size_t, c_void_p)
name = "PyUnicode_FromFormat"
_PyUnicode_FromFormat = getattr(pythonapi, name)
_PyUnicode_FromFormat = pythonapi.PyUnicode_FromFormat
_PyUnicode_FromFormat.restype = py_object

def PyUnicode_FromFormat(format, *args):
Expand Down Expand Up @@ -2655,9 +2654,25 @@ def check_format(expected, format, *args):
check_format(r"%A:'abc\xe9\uabcd\U0010ffff'",
b'%%A:%A', 'abc\xe9\uabcd\U0010ffff')

# test %T (object type name)
check_format(r"type name: str",
b'type name: %T', 'text')
# test %t (object type name) and %T (object type fully qualified name)
# test builtin type, builtin function, heap type
class HeapType:
pass

for obj in ("string", format, HeapType()):
obj_type = type(obj)
type_name = obj_type.__name__
check_format(r"type name: %s" % type_name,
b'type name: %t', py_object(obj))

type_fqn = obj_type.__qualname__
module = obj_type.__module__
if module != "builtins":
type_fqn = "%s.%s" % (module, type_fqn)
# str fully qualified name is formatted as 'str',
# not as 'builtins.str'
check_format(r"type name: %s" % type_fqn,
b'type name: %T', py_object(obj))

# test %V
check_format('repr=abc',
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
:c:func:`PyUnicode_FromFormatV`: add ``%T`` format to
:c:func:`PyUnicode_FromFormatV`: add ``%t`` and ``%T`` formats to
:c:func:`PyUnicode_FromFormatV`, and so to :c:func:`PyUnicode_FromFormat`
and :c:func:`PyErr_Format`, to format an object type name: equivalent to
"%s" with ``Py_TYPE(obj)->tp_name``.
and :c:func:`PyErr_Format`, to format an object type name. ``%t`` is equivalent to
``type(obj).__name__`` and ``%T`` is equivalent to
``f"{type(obj).__module__}.{type(obj).__qualname__}"``.
43 changes: 40 additions & 3 deletions Objects/typeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,7 @@ check_set_special_type_attr(PyTypeObject *type, PyObject *value, const char *nam
return 1;
}


const char *
_PyType_Name(PyTypeObject *type)
{
Expand Down Expand Up @@ -417,7 +418,7 @@ type_name(PyTypeObject *type, void *context)
}

static PyObject *
type_qualname(PyTypeObject *type, void *context)
_PyType_QualName(PyTypeObject *type)
{
if (type->tp_flags & Py_TPFLAGS_HEAPTYPE) {
PyHeapTypeObject* et = (PyHeapTypeObject*)type;
Expand All @@ -429,6 +430,12 @@ type_qualname(PyTypeObject *type, void *context)
}
}

static PyObject *
type_qualname(PyTypeObject *type, void *context)
{
return _PyType_QualName(type);
}

static int
type_set_name(PyTypeObject *type, PyObject *value, void *context)
{
Expand Down Expand Up @@ -480,8 +487,8 @@ type_set_qualname(PyTypeObject *type, PyObject *value, void *context)
return 0;
}

static PyObject *
type_module(PyTypeObject *type, void *context)
static PyObject*
_PyType_Module(PyTypeObject *type)
{
PyObject *mod;

Expand Down Expand Up @@ -509,6 +516,12 @@ type_module(PyTypeObject *type, void *context)
return mod;
}

static PyObject *
type_module(PyTypeObject *type, void *context)
{
return _PyType_Module(type);
}

static int
type_set_module(PyTypeObject *type, PyObject *value, void *context)
{
Expand All @@ -520,6 +533,30 @@ type_set_module(PyTypeObject *type, PyObject *value, void *context)
return _PyDict_SetItemId(type->tp_dict, &PyId___module__, value);
}


PyObject *
_PyType_FullName(PyTypeObject *type)
{
if (!(type->tp_flags & Py_TPFLAGS_HEAPTYPE)) {
return PyUnicode_FromString(type->tp_name);
}

PyObject *module = _PyType_Module(type);
if (module == NULL) {
return NULL;
}
PyObject *qualname = _PyType_QualName(type);
if (qualname == NULL) {
Py_DECREF(module);
return NULL;
}
PyObject *fullname = PyUnicode_FromFormat("%U.%U", module, qualname);
Py_DECREF(module);
Py_DECREF(qualname);
return fullname;
}


static PyObject *
type_abstractmethods(PyTypeObject *type, void *context)
{
Expand Down
64 changes: 40 additions & 24 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -768,7 +768,7 @@ ensure_unicode(PyObject *obj)
{
if (!PyUnicode_Check(obj)) {
PyErr_Format(PyExc_TypeError,
"must be str, not %T", obj);
"must be str, not %t", obj);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What was used instead of %t/%T before?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Python 3.7 code:

        PyErr_Format(PyExc_TypeError,
                     "must be str, not %.100s",
                     Py_TYPE(obj)->tp_name);

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thus it was closer to %T. In error messages it is better to use fully qualified names.

return -1;
}
return PyUnicode_READY(obj);
Expand Down Expand Up @@ -2826,17 +2826,33 @@ unicode_fromformat_arg(_PyUnicodeWriter *writer,
break;
}

case 'T':
case 't':
{
/* Object type name (tp_name) */
/* Object type name: type(obj).__name__ */
PyObject *obj = va_arg(*vargs, PyObject *);
PyTypeObject *type = Py_TYPE(obj);
const char *type_name = type->tp_name;
const char *type_name = _PyType_Name(type);
if (unicode_fromformat_write_utf8(writer, type_name, -1, -1) < 0) {
return NULL;
}
break;
}

case 'T':
{
/* Object type fully qualified name:
f"{type(obj).__module__}.{type(obj).__qualname__}". */
PyObject *obj = va_arg(*vargs, PyObject *);
PyTypeObject *type = Py_TYPE(obj);
PyObject *name = _PyType_FullName(type);
if (_PyUnicodeWriter_WriteStr(writer, name) < 0) {
Py_DECREF(name);
return NULL;
}
Py_DECREF(name);
break;
}

case '%':
if (_PyUnicodeWriter_WriteCharInline(writer, '%') < 0)
return NULL;
Expand Down Expand Up @@ -3034,7 +3050,7 @@ PyUnicode_FromObject(PyObject *obj)
return _PyUnicode_Copy(obj);
}
PyErr_Format(PyExc_TypeError,
"Can't convert '%T' object to str implicitly", obj);
"Can't convert '%t' object to str implicitly", obj);
return NULL;
}

Expand Down Expand Up @@ -3070,7 +3086,7 @@ PyUnicode_FromEncodedObject(PyObject *obj,
/* Retrieve a bytes buffer view through the PEP 3118 buffer interface */
if (PyObject_GetBuffer(obj, &buffer, PyBUF_SIMPLE) < 0) {
PyErr_Format(PyExc_TypeError,
"decoding to str: need a bytes-like object, %T found",
"decoding to str: need a bytes-like object, %t found",
obj);
return NULL;
}
Expand Down Expand Up @@ -3201,7 +3217,7 @@ PyUnicode_Decode(const char *s,
goto onError;
if (!PyUnicode_Check(unicode)) {
PyErr_Format(PyExc_TypeError,
"'%.400s' decoder returned '%T' instead of 'str'; "
"'%.400s' decoder returned '%t' instead of 'str'; "
"use codecs.decode() to decode to arbitrary types",
encoding, unicode);
Py_DECREF(unicode);
Expand Down Expand Up @@ -3263,7 +3279,7 @@ PyUnicode_AsDecodedUnicode(PyObject *unicode,
goto onError;
if (!PyUnicode_Check(v)) {
PyErr_Format(PyExc_TypeError,
"'%.400s' decoder returned '%T' instead of 'str'; "
"'%.400s' decoder returned '%t' instead of 'str'; "
"use codecs.decode() to decode to arbitrary types",
encoding, unicode);
Py_DECREF(v);
Expand Down Expand Up @@ -3496,7 +3512,7 @@ PyUnicode_AsEncodedString(PyObject *unicode,
}

PyErr_Format(PyExc_TypeError,
"'%.400s' encoder returned '%T' instead of 'bytes'; "
"'%.400s' encoder returned '%t' instead of 'bytes'; "
"use codecs.encode() to encode to arbitrary types",
encoding, v);
Py_DECREF(v);
Expand Down Expand Up @@ -3529,7 +3545,7 @@ PyUnicode_AsEncodedUnicode(PyObject *unicode,
goto onError;
if (!PyUnicode_Check(v)) {
PyErr_Format(PyExc_TypeError,
"'%.400s' encoder returned '%T' instead of 'str'; "
"'%.400s' encoder returned '%t' instead of 'str'; "
"use codecs.encode() to encode to arbitrary types",
encoding, v);
Py_DECREF(v);
Expand Down Expand Up @@ -3704,7 +3720,7 @@ PyUnicode_FSDecoder(PyObject* arg, void* addr)
if (!PyBytes_Check(path) &&
PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
"path should be string, bytes, "
"or os.PathLike, not %T",
"or os.PathLike, not %t",
arg))
{
Py_DECREF(path);
Expand All @@ -3724,7 +3740,7 @@ PyUnicode_FSDecoder(PyObject* arg, void* addr)
}
else {
PyErr_Format(PyExc_TypeError,
"path should be string, bytes, or os.PathLike, not %T",
"path should be string, bytes, or os.PathLike, not %t",
arg);
Py_DECREF(path);
return 0;
Expand Down Expand Up @@ -9893,7 +9909,7 @@ _PyUnicode_JoinArray(PyObject *separator, PyObject *const *items, Py_ssize_t seq
else {
if (!PyUnicode_Check(separator)) {
PyErr_Format(PyExc_TypeError,
"separator: expected str instance, %T found",
"separator: expected str instance, %t found",
separator);
goto onError;
}
Expand Down Expand Up @@ -9925,7 +9941,7 @@ _PyUnicode_JoinArray(PyObject *separator, PyObject *const *items, Py_ssize_t seq
item = items[i];
if (!PyUnicode_Check(item)) {
PyErr_Format(PyExc_TypeError,
"sequence item %zd: expected str instance, %T found",
"sequence item %zd: expected str instance, %t found",
i, item);
goto onError;
}
Expand Down Expand Up @@ -10741,7 +10757,7 @@ convert_uc(PyObject *obj, void *addr)
if (!PyUnicode_Check(obj)) {
PyErr_Format(PyExc_TypeError,
"The fill character must be a unicode character, "
"not %T", obj);
"not %t", obj);
return 0;
}
if (PyUnicode_READY(obj) < 0)
Expand Down Expand Up @@ -11147,7 +11163,7 @@ PyUnicode_Contains(PyObject *str, PyObject *substr)

if (!PyUnicode_Check(substr)) {
PyErr_Format(PyExc_TypeError,
"'in <string>' requires string as left operand, not %T",
"'in <string>' requires string as left operand, not %t",
substr);
return -1;
}
Expand Down Expand Up @@ -12853,7 +12869,7 @@ unicode_split_impl(PyObject *self, PyObject *sep, Py_ssize_t maxsplit)
if (PyUnicode_Check(sep))
return split(self, sep, maxsplit);

PyErr_Format(PyExc_TypeError, "must be str or None, not %T", sep);
PyErr_Format(PyExc_TypeError, "must be str or None, not %t", sep);
return NULL;
}

Expand Down Expand Up @@ -13039,7 +13055,7 @@ unicode_rsplit_impl(PyObject *self, PyObject *sep, Py_ssize_t maxsplit)
if (PyUnicode_Check(sep))
return rsplit(self, sep, maxsplit);

PyErr_Format(PyExc_TypeError, "must be str or None, not %T", sep);
PyErr_Format(PyExc_TypeError, "must be str or None, not %t", sep);
return NULL;
}

Expand Down Expand Up @@ -13334,7 +13350,7 @@ unicode_startswith(PyObject *self,
if (!PyUnicode_Check(substring)) {
PyErr_Format(PyExc_TypeError,
"tuple for startswith must only contain str, "
"not %T",
"not %t",
substring);
return NULL;
}
Expand All @@ -13351,7 +13367,7 @@ unicode_startswith(PyObject *self,
if (!PyUnicode_Check(subobj)) {
PyErr_Format(PyExc_TypeError,
"startswith first arg must be str or "
"a tuple of str, not %T", subobj);
"a tuple of str, not %t", subobj);
return NULL;
}
result = tailmatch(self, subobj, start, end, -1);
Expand Down Expand Up @@ -13388,7 +13404,7 @@ unicode_endswith(PyObject *self,
if (!PyUnicode_Check(substring)) {
PyErr_Format(PyExc_TypeError,
"tuple for endswith must only contain str, "
"not %T",
"not %t",
substring);
return NULL;
}
Expand All @@ -13404,7 +13420,7 @@ unicode_endswith(PyObject *self,
if (!PyUnicode_Check(subobj)) {
PyErr_Format(PyExc_TypeError,
"endswith first arg must be str or "
"a tuple of str, not %T", subobj);
"a tuple of str, not %t", subobj);
return NULL;
}
result = tailmatch(self, subobj, start, end, +1);
Expand Down Expand Up @@ -14314,12 +14330,12 @@ mainformatlong(PyObject *v,
case 'x':
case 'X':
PyErr_Format(PyExc_TypeError,
"%%%c format: an integer is required, not %T",
"%%%c format: an integer is required, not %t",
type, v);
break;
default:
PyErr_Format(PyExc_TypeError,
"%%%c format: a number is required, not %T",
"%%%c format: a number is required, not %t",
type, v);
break;
}
Expand Down