From 6f699b565626556aaf255884cbe20871fc27e760 Mon Sep 17 00:00:00 2001 From: Ben Hsing Date: Tue, 24 Dec 2024 15:38:15 +0800 Subject: [PATCH 01/12] gh-128213: fast path for bytes creation from list and tuple --- Objects/bytesobject.c | 89 +++++++++++-------------------------------- 1 file changed, 22 insertions(+), 67 deletions(-) diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 533089d25cd73a..1d6775d275bd99 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -2810,79 +2810,33 @@ _PyBytes_FromBuffer(PyObject *x) } static PyObject* -_PyBytes_FromList(PyObject *x) +_PyBytes_FromSequence(PyObject *x) { - Py_ssize_t i, size = PyList_GET_SIZE(x); - Py_ssize_t value; - char *str; - PyObject *item; - _PyBytesWriter writer; - - _PyBytesWriter_Init(&writer); - str = _PyBytesWriter_Alloc(&writer, size); - if (str == NULL) - return NULL; - writer.overallocate = 1; - size = writer.allocated; - - for (i = 0; i < PyList_GET_SIZE(x); i++) { - item = PyList_GET_ITEM(x, i); - Py_INCREF(item); - value = PyNumber_AsSsize_t(item, NULL); - Py_DECREF(item); - if (value == -1 && PyErr_Occurred()) - goto error; - - if (value < 0 || value >= 256) { - PyErr_SetString(PyExc_ValueError, - "bytes must be in range(0, 256)"); - goto error; - } - - if (i >= size) { - str = _PyBytesWriter_Resize(&writer, str, size+1); - if (str == NULL) - return NULL; - size = writer.allocated; - } - *str++ = (char) value; - } - return _PyBytesWriter_Finish(&writer, str); - - error: - _PyBytesWriter_Dealloc(&writer); - return NULL; -} - -static PyObject* -_PyBytes_FromTuple(PyObject *x) -{ - PyObject *bytes; - Py_ssize_t i, size = PyTuple_GET_SIZE(x); - Py_ssize_t value; - char *str; - PyObject *item; - - bytes = PyBytes_FromStringAndSize(NULL, size); + Py_ssize_t size = PySequence_Fast_GET_SIZE(x); + PyObject *bytes = PyBytes_FromStringAndSize(NULL, size); if (bytes == NULL) return NULL; - str = ((PyBytesObject *)bytes)->ob_sval; - - for (i = 0; i < size; i++) { - item = PyTuple_GET_ITEM(x, i); - value = PyNumber_AsSsize_t(item, NULL); - if (value == -1 && PyErr_Occurred()) + char *s = PyBytes_AS_STRING(bytes); + PyObject **items = PySequence_Fast_ITEMS(x); + for (Py_ssize_t i = 0; i < size; i++) { + if (!PyLong_CheckExact(items[i])) { + Py_DECREF(bytes); + return Py_None; // None as fallback sentinel to the slow path + } + int overflow; + long value = PyLong_AsLongAndOverflow(items[i], &overflow); + if (value == -1 && PyErr_Occurred()) { goto error; - + } if (value < 0 || value >= 256) { + /* this includes an overflow in converting to C long */ PyErr_SetString(PyExc_ValueError, "bytes must be in range(0, 256)"); goto error; } - *str++ = (char) value; + s[i] = value; } return bytes; - error: Py_DECREF(bytes); return NULL; @@ -2968,11 +2922,12 @@ PyBytes_FromObject(PyObject *x) if (PyObject_CheckBuffer(x)) return _PyBytes_FromBuffer(x); - if (PyList_CheckExact(x)) - return _PyBytes_FromList(x); - - if (PyTuple_CheckExact(x)) - return _PyBytes_FromTuple(x); + if (PyList_CheckExact(x) || PyTuple_CheckExact(x)) { + PyObject *bytes = _PyBytes_FromSequence(x); + if (bytes != Py_None) { + return bytes; + } + } if (!PyUnicode_Check(x)) { it = PyObject_GetIter(x); From 18c8e4af961b88a337f7979e798faaf9d45dce8d Mon Sep 17 00:00:00 2001 From: Ben Hsing Date: Tue, 24 Dec 2024 16:03:45 +0800 Subject: [PATCH 02/12] coerce long to char after validation of integer in byte range --- Objects/bytesobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 1d6775d275bd99..746635004db97a 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -2834,7 +2834,7 @@ _PyBytes_FromSequence(PyObject *x) "bytes must be in range(0, 256)"); goto error; } - s[i] = value; + s[i] = (char)value; } return bytes; error: From 406fbdbec7cc75ca18711922626dd058acbd3e55 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Tue, 24 Dec 2024 08:44:56 +0000 Subject: [PATCH 03/12] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?= =?UTF-8?q?lurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2024-12-24-08-44-49.gh-issue-128213.Y71jDi.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-12-24-08-44-49.gh-issue-128213.Y71jDi.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-12-24-08-44-49.gh-issue-128213.Y71jDi.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-24-08-44-49.gh-issue-128213.Y71jDi.rst new file mode 100644 index 00000000000000..80677a8afdc47a --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-24-08-44-49.gh-issue-128213.Y71jDi.rst @@ -0,0 +1,2 @@ +Speed up :class:`bytes` creation from :class:`list` and :class:`tuple` by around 62%. +Patch by Ben Hsing From 4912a05220f54df82e3ac7488c1fc0cf1af31876 Mon Sep 17 00:00:00 2001 From: blhsing Date: Tue, 24 Dec 2024 16:54:37 +0800 Subject: [PATCH 04/12] Update 2024-12-24-08-44-49.gh-issue-128213.Y71jDi.rst --- .../2024-12-24-08-44-49.gh-issue-128213.Y71jDi.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-12-24-08-44-49.gh-issue-128213.Y71jDi.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-24-08-44-49.gh-issue-128213.Y71jDi.rst index 80677a8afdc47a..85614b8f473dad 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2024-12-24-08-44-49.gh-issue-128213.Y71jDi.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-24-08-44-49.gh-issue-128213.Y71jDi.rst @@ -1,2 +1,2 @@ -Speed up :class:`bytes` creation from :class:`list` and :class:`tuple` by around 62%. -Patch by Ben Hsing +Speed up :class:`bytes` creation from :class:`list` and :class:`tuple` by around 81%. +Patch by Ben Hsing From 56f802e86688dcf134f6d2adfd50ea62d5c4cf50 Mon Sep 17 00:00:00 2001 From: Ben Hsing Date: Wed, 25 Dec 2024 16:53:03 +0800 Subject: [PATCH 05/12] updated for thread-safety, style choices, function choices and benchmark notes --- ...-12-24-08-44-49.gh-issue-128213.Y71jDi.rst | 3 +- Objects/bytesobject.c | 31 ++++++++++++------- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-12-24-08-44-49.gh-issue-128213.Y71jDi.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-24-08-44-49.gh-issue-128213.Y71jDi.rst index 85614b8f473dad..e425258d3615aa 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2024-12-24-08-44-49.gh-issue-128213.Y71jDi.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-24-08-44-49.gh-issue-128213.Y71jDi.rst @@ -1,2 +1,3 @@ -Speed up :class:`bytes` creation from :class:`list` and :class:`tuple` by around 81%. +Speed up :class:`bytes` creation from :class:`list` and :class:`tuple` of integers. Benchmarks show that from a list with 1000000 random numbers the time to create a bytes object is reduced by around 31%, or 30% with 10000 numbers, or 27% with 100 numbers. + Patch by Ben Hsing diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 746635004db97a..c0e3a7921640c3 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -6,6 +6,7 @@ #include "pycore_bytesobject.h" // _PyBytes_Find(), _PyBytes_Repeat() #include "pycore_call.h" // _PyObject_CallNoArgs() #include "pycore_ceval.h" // _PyEval_GetBuiltin() +#include "pycore_critical_section.h" // Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST() #include "pycore_format.h" // F_LJUST #include "pycore_global_objects.h"// _Py_GET_GLOBAL_OBJECT() #include "pycore_initconfig.h" // _PyStatus_OK() @@ -2813,18 +2814,21 @@ static PyObject* _PyBytes_FromSequence(PyObject *x) { Py_ssize_t size = PySequence_Fast_GET_SIZE(x); - PyObject *bytes = PyBytes_FromStringAndSize(NULL, size); - if (bytes == NULL) + PyObject *bytes = _PyBytes_FromSize(size, 0); + if (bytes == NULL) { return NULL; - char *s = PyBytes_AS_STRING(bytes); - PyObject **items = PySequence_Fast_ITEMS(x); + } + char *str = PyBytes_AS_STRING(bytes); + PyObject *const *items = PySequence_Fast_ITEMS(x); + Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST(x); for (Py_ssize_t i = 0; i < size; i++) { - if (!PyLong_CheckExact(items[i])) { + if (!PyLong_Check(items[i])) { Py_DECREF(bytes); - return Py_None; // None as fallback sentinel to the slow path + /* Py_None as a fallback sentinel to the slow path */ + bytes = Py_None; + goto done; } - int overflow; - long value = PyLong_AsLongAndOverflow(items[i], &overflow); + int value = PyLong_AsInt(items[i]); if (value == -1 && PyErr_Occurred()) { goto error; } @@ -2834,12 +2838,16 @@ _PyBytes_FromSequence(PyObject *x) "bytes must be in range(0, 256)"); goto error; } - s[i] = (char)value; + *str++ = (char) value; } - return bytes; + goto done; error: Py_DECREF(bytes); - return NULL; + bytes = NULL; + done: + /* both success and failure need to end the critical section */ + Py_END_CRITICAL_SECTION_SEQUENCE_FAST(); + return bytes; } static PyObject * @@ -2924,6 +2932,7 @@ PyBytes_FromObject(PyObject *x) if (PyList_CheckExact(x) || PyTuple_CheckExact(x)) { PyObject *bytes = _PyBytes_FromSequence(x); + /* Py_None as a fallback sentinel to the slow path */ if (bytes != Py_None) { return bytes; } From 4e1e3e6bb9c3d2ce6c565070a3fc1cfc769fa9f8 Mon Sep 17 00:00:00 2001 From: Ben Hsing Date: Wed, 25 Dec 2024 17:28:33 +0800 Subject: [PATCH 06/12] revert to PyLong_AsLongAndOverflow for easier overflow handling --- Objects/bytesobject.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index c0e3a7921640c3..4e8de734457c69 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -2828,7 +2828,8 @@ _PyBytes_FromSequence(PyObject *x) bytes = Py_None; goto done; } - int value = PyLong_AsInt(items[i]); + int overflow; + long value = PyLong_AsLongAndOverflow(items[i], &overflow); if (value == -1 && PyErr_Occurred()) { goto error; } From bf96d06894c79d79cbaebab2b94862346113f296 Mon Sep 17 00:00:00 2001 From: Ben Hsing Date: Thu, 26 Dec 2024 16:50:36 +0800 Subject: [PATCH 07/12] fixed issue of a label at the end of a compound statment; revert to using PyNumber_AsSsize_t; fixed indentation --- Objects/bytesobject.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 4e8de734457c69..858aa3b6459540 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -2826,15 +2826,13 @@ _PyBytes_FromSequence(PyObject *x) Py_DECREF(bytes); /* Py_None as a fallback sentinel to the slow path */ bytes = Py_None; - goto done; + goto done; } - int overflow; - long value = PyLong_AsLongAndOverflow(items[i], &overflow); + Py_ssize_t value = PyNumber_AsSsize_t(items[i], NULL); if (value == -1 && PyErr_Occurred()) { goto error; } if (value < 0 || value >= 256) { - /* this includes an overflow in converting to C long */ PyErr_SetString(PyExc_ValueError, "bytes must be in range(0, 256)"); goto error; @@ -2846,6 +2844,10 @@ _PyBytes_FromSequence(PyObject *x) Py_DECREF(bytes); bytes = NULL; done: + /* some C parsers require a label not to be at the end of a compound + statement, which the ending macro of a critical section introduces, so + we need an empty statement here to satisfy that syntax rule */ + ; /* both success and failure need to end the critical section */ Py_END_CRITICAL_SECTION_SEQUENCE_FAST(); return bytes; From f3a94235cbc1efec3a0a0d5be861ff72f575e244 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Wed, 26 Mar 2025 10:22:12 +0100 Subject: [PATCH 08/12] Add FT test for bytes from list --- .../test_free_threading/test_bytes_object.py | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 Lib/test/test_free_threading/test_bytes_object.py diff --git a/Lib/test/test_free_threading/test_bytes_object.py b/Lib/test/test_free_threading/test_bytes_object.py new file mode 100644 index 00000000000000..ee3bd42f61c175 --- /dev/null +++ b/Lib/test/test_free_threading/test_bytes_object.py @@ -0,0 +1,37 @@ +import unittest +import sys +from threading import Thread, Barrier +from test.support import threading_helper + +threading_helper.requires_working_threading(module=True) + +class BytesThreading(unittest.TestCase): + + @threading_helper.reap_threads + def test_conversion_from_list(self): + number_of_threads = 10 + number_of_iterations = 10 + barrier = Barrier(number_of_threads) + + x = [1, 2, 3, 4, 5] + e = [ (ii,)*(2+4*ii) for ii in range(number_of_threads)] # range of sizes to extend + def work(ii): + barrier.wait() + for _ in range(1000): + bytes(x) + x.extend(e[ii]) + if len(x) > 10: + x[:] = [0] + + for it in range(number_of_iterations): + worker_threads = [] + for ii in range(number_of_threads): + worker_threads.append( + Thread(target=work, args=[ii])) + with threading_helper.start_threads(worker_threads): + pass + + barrier.reset() + +if __name__ == "__main__": + unittest.main() From bc6f8f20b5ea8d914f58933d470741e05cf56c31 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Wed, 16 Apr 2025 14:58:55 +0200 Subject: [PATCH 09/12] refactor --- ...-12-24-08-44-49.gh-issue-128213.Y71jDi.rst | 2 +- Objects/bytesobject.c | 27 +++++++------------ 2 files changed, 11 insertions(+), 18 deletions(-) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-12-24-08-44-49.gh-issue-128213.Y71jDi.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-24-08-44-49.gh-issue-128213.Y71jDi.rst index e425258d3615aa..ef3ab9f04d20ac 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2024-12-24-08-44-49.gh-issue-128213.Y71jDi.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-24-08-44-49.gh-issue-128213.Y71jDi.rst @@ -1,3 +1,3 @@ Speed up :class:`bytes` creation from :class:`list` and :class:`tuple` of integers. Benchmarks show that from a list with 1000000 random numbers the time to create a bytes object is reduced by around 31%, or 30% with 10000 numbers, or 27% with 100 numbers. -Patch by Ben Hsing +Patch by Ben Hsing and Pieter Eendebak diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index bb9c13e4d31696..063c7260e04d75 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -2855,7 +2855,7 @@ _PyBytes_FromBuffer(PyObject *x) } static PyObject* -_PyBytes_FromSequence(PyObject *x) +_PyBytes_FromSequence_lock_held(PyObject *x) { Py_ssize_t size = PySequence_Fast_GET_SIZE(x); PyObject *bytes = _PyBytes_FromSize(size, 0); @@ -2864,13 +2864,11 @@ _PyBytes_FromSequence(PyObject *x) } char *str = PyBytes_AS_STRING(bytes); PyObject *const *items = PySequence_Fast_ITEMS(x); - Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST(x); for (Py_ssize_t i = 0; i < size; i++) { if (!PyLong_Check(items[i])) { Py_DECREF(bytes); /* Py_None as a fallback sentinel to the slow path */ - bytes = Py_None; - goto done; + Py_RETURN_NONE; } Py_ssize_t value = PyNumber_AsSsize_t(items[i], NULL); if (value == -1 && PyErr_Occurred()) { @@ -2883,18 +2881,11 @@ _PyBytes_FromSequence(PyObject *x) } *str++ = (char) value; } - goto done; + return bytes; + error: Py_DECREF(bytes); - bytes = NULL; - done: - /* some C parsers require a label not to be at the end of a compound - statement, which the ending macro of a critical section introduces, so - we need an empty statement here to satisfy that syntax rule */ - ; - /* both success and failure need to end the critical section */ - Py_END_CRITICAL_SECTION_SEQUENCE_FAST(); - return bytes; + return NULL; } static PyObject * @@ -2978,10 +2969,12 @@ PyBytes_FromObject(PyObject *x) return _PyBytes_FromBuffer(x); if (PyList_CheckExact(x) || PyTuple_CheckExact(x)) { - PyObject *bytes = _PyBytes_FromSequence(x); + Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST(x); + result = _PyBytes_FromSequence_lock_held(x); + Py_END_CRITICAL_SECTION_SEQUENCE_FAST(); /* Py_None as a fallback sentinel to the slow path */ - if (bytes != Py_None) { - return bytes; + if (result != Py_None) { + return result; } } From 970c10b2ac0b15cf296cbeb9795209c737d45189 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Wed, 16 Apr 2025 15:00:13 +0200 Subject: [PATCH 10/12] refactor --- Objects/bytesobject.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 063c7260e04d75..99f6c614b04139 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -2872,20 +2872,18 @@ _PyBytes_FromSequence_lock_held(PyObject *x) } Py_ssize_t value = PyNumber_AsSsize_t(items[i], NULL); if (value == -1 && PyErr_Occurred()) { - goto error; + Py_DECREF(bytes); + return NULL } if (value < 0 || value >= 256) { PyErr_SetString(PyExc_ValueError, "bytes must be in range(0, 256)"); - goto error; + Py_DECREF(bytes); + return NULL } *str++ = (char) value; } return bytes; - - error: - Py_DECREF(bytes); - return NULL; } static PyObject * From cb664fe8c7b5b8bfb7e017c200cddcf35da272cc Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Wed, 16 Apr 2025 15:00:27 +0200 Subject: [PATCH 11/12] refactor --- Objects/bytesobject.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c index 99f6c614b04139..fa04b116086aa0 100644 --- a/Objects/bytesobject.c +++ b/Objects/bytesobject.c @@ -2873,13 +2873,13 @@ _PyBytes_FromSequence_lock_held(PyObject *x) Py_ssize_t value = PyNumber_AsSsize_t(items[i], NULL); if (value == -1 && PyErr_Occurred()) { Py_DECREF(bytes); - return NULL + return NULL; } if (value < 0 || value >= 256) { PyErr_SetString(PyExc_ValueError, "bytes must be in range(0, 256)"); Py_DECREF(bytes); - return NULL + return NULL; } *str++ = (char) value; } From c3572179c2f98f745c03fc497d915e00e4e59011 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Wed, 16 Apr 2025 15:01:26 +0200 Subject: [PATCH 12/12] Update Misc/NEWS.d/next/Core_and_Builtins/2024-12-24-08-44-49.gh-issue-128213.Y71jDi.rst Co-authored-by: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> --- .../2024-12-24-08-44-49.gh-issue-128213.Y71jDi.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-12-24-08-44-49.gh-issue-128213.Y71jDi.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-24-08-44-49.gh-issue-128213.Y71jDi.rst index ef3ab9f04d20ac..ca47f0cfc3522a 100644 --- a/Misc/NEWS.d/next/Core_and_Builtins/2024-12-24-08-44-49.gh-issue-128213.Y71jDi.rst +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-12-24-08-44-49.gh-issue-128213.Y71jDi.rst @@ -1,3 +1,3 @@ -Speed up :class:`bytes` creation from :class:`list` and :class:`tuple` of integers. Benchmarks show that from a list with 1000000 random numbers the time to create a bytes object is reduced by around 31%, or 30% with 10000 numbers, or 27% with 100 numbers. +Speed up :class:`bytes` creation from :class:`list` and :class:`tuple` of integers by 27-31%. Patch by Ben Hsing and Pieter Eendebak