@@ -629,6 +629,8 @@ init_interpreter(PyInterpreterState *interp,
629
629
assert (next != NULL || (interp == runtime -> interpreters .main ));
630
630
interp -> next = next ;
631
631
632
+ interp -> threads .preallocated = & interp -> _initial_thread ;
633
+
632
634
// We would call _PyObject_InitState() at this point
633
635
// if interp->feature_flags were alredy set.
634
636
@@ -766,7 +768,6 @@ PyInterpreterState_New(void)
766
768
return interp ;
767
769
}
768
770
769
-
770
771
static void
771
772
interpreter_clear (PyInterpreterState * interp , PyThreadState * tstate )
772
773
{
@@ -910,6 +911,9 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate)
910
911
// XXX Once we have one allocator per interpreter (i.e.
911
912
// per-interpreter GC) we must ensure that all of the interpreter's
912
913
// objects have been cleaned up at the point.
914
+
915
+ // We could clear interp->threads.freelist here
916
+ // if it held more than just the initial thread state.
913
917
}
914
918
915
919
@@ -1386,22 +1390,45 @@ allocate_chunk(int size_in_bytes, _PyStackChunk* previous)
1386
1390
return res ;
1387
1391
}
1388
1392
1393
+ static void
1394
+ reset_threadstate (_PyThreadStateImpl * tstate )
1395
+ {
1396
+ // Set to _PyThreadState_INIT directly?
1397
+ memcpy (tstate ,
1398
+ & initial ._main_interpreter ._initial_thread ,
1399
+ sizeof (* tstate ));
1400
+ }
1401
+
1389
1402
static _PyThreadStateImpl *
1390
- alloc_threadstate (void )
1403
+ alloc_threadstate (PyInterpreterState * interp )
1391
1404
{
1392
- return PyMem_RawCalloc (1 , sizeof (_PyThreadStateImpl ));
1405
+ _PyThreadStateImpl * tstate ;
1406
+
1407
+ // Try the preallocated tstate first.
1408
+ tstate = _Py_atomic_exchange_ptr (& interp -> threads .preallocated , NULL );
1409
+
1410
+ // Fall back to the allocator.
1411
+ if (tstate == NULL ) {
1412
+ tstate = PyMem_RawCalloc (1 , sizeof (_PyThreadStateImpl ));
1413
+ if (tstate == NULL ) {
1414
+ return NULL ;
1415
+ }
1416
+ reset_threadstate (tstate );
1417
+ }
1418
+ return tstate ;
1393
1419
}
1394
1420
1395
1421
static void
1396
1422
free_threadstate (_PyThreadStateImpl * tstate )
1397
1423
{
1424
+ PyInterpreterState * interp = tstate -> base .interp ;
1398
1425
// The initial thread state of the interpreter is allocated
1399
1426
// as part of the interpreter state so should not be freed.
1400
- if (tstate == & tstate -> base . interp -> _initial_thread ) {
1401
- // Restore to _PyThreadState_INIT .
1402
- memcpy (tstate ,
1403
- & initial . _main_interpreter . _initial_thread ,
1404
- sizeof ( * tstate ) );
1427
+ if (tstate == & interp -> _initial_thread ) {
1428
+ // Make it available again .
1429
+ reset_threadstate (tstate );
1430
+ assert ( interp -> threads . preallocated == NULL );
1431
+ _Py_atomic_store_ptr ( & interp -> threads . preallocated , tstate );
1405
1432
}
1406
1433
else {
1407
1434
PyMem_RawFree (tstate );
@@ -1492,66 +1519,38 @@ add_threadstate(PyInterpreterState *interp, PyThreadState *tstate,
1492
1519
static PyThreadState *
1493
1520
new_threadstate (PyInterpreterState * interp , int whence )
1494
1521
{
1495
- _PyThreadStateImpl * tstate ;
1496
- _PyRuntimeState * runtime = interp -> runtime ;
1497
- // We don't need to allocate a thread state for the main interpreter
1498
- // (the common case), but doing it later for the other case revealed a
1499
- // reentrancy problem (deadlock). So for now we always allocate before
1500
- // taking the interpreters lock. See GH-96071.
1501
- _PyThreadStateImpl * new_tstate = alloc_threadstate ();
1502
- int used_newtstate ;
1503
- if (new_tstate == NULL ) {
1522
+ // Allocate the thread state.
1523
+ _PyThreadStateImpl * tstate = alloc_threadstate (interp );
1524
+ if (tstate == NULL ) {
1504
1525
return NULL ;
1505
1526
}
1527
+
1506
1528
#ifdef Py_GIL_DISABLED
1507
1529
Py_ssize_t qsbr_idx = _Py_qsbr_reserve (interp );
1508
1530
if (qsbr_idx < 0 ) {
1509
- PyMem_RawFree ( new_tstate );
1531
+ free_threadstate ( tstate );
1510
1532
return NULL ;
1511
1533
}
1512
1534
int32_t tlbc_idx = _Py_ReserveTLBCIndex (interp );
1513
1535
if (tlbc_idx < 0 ) {
1514
- PyMem_RawFree ( new_tstate );
1536
+ free_threadstate ( tstate );
1515
1537
return NULL ;
1516
1538
}
1517
1539
#endif
1518
1540
1519
1541
/* We serialize concurrent creation to protect global state. */
1520
- HEAD_LOCK (runtime );
1542
+ HEAD_LOCK (interp -> runtime );
1521
1543
1544
+ // Initialize the new thread state.
1522
1545
interp -> threads .next_unique_id += 1 ;
1523
1546
uint64_t id = interp -> threads .next_unique_id ;
1547
+ init_threadstate (tstate , interp , id , whence );
1524
1548
1525
- // Allocate the thread state and add it to the interpreter.
1549
+ // Add the new thread state to the interpreter.
1526
1550
PyThreadState * old_head = interp -> threads .head ;
1527
- if (old_head == NULL ) {
1528
- // It's the interpreter's initial thread state.
1529
- used_newtstate = 0 ;
1530
- tstate = & interp -> _initial_thread ;
1531
- }
1532
- // XXX Re-use interp->_initial_thread if not in use?
1533
- else {
1534
- // Every valid interpreter must have at least one thread.
1535
- assert (id > 1 );
1536
- assert (old_head -> prev == NULL );
1537
- used_newtstate = 1 ;
1538
- tstate = new_tstate ;
1539
- // Set to _PyThreadState_INIT.
1540
- memcpy (tstate ,
1541
- & initial ._main_interpreter ._initial_thread ,
1542
- sizeof (* tstate ));
1543
- }
1544
-
1545
- init_threadstate (tstate , interp , id , whence );
1546
1551
add_threadstate (interp , (PyThreadState * )tstate , old_head );
1547
1552
1548
- HEAD_UNLOCK (runtime );
1549
- if (!used_newtstate ) {
1550
- // Must be called with lock unlocked to avoid re-entrancy deadlock.
1551
- PyMem_RawFree (new_tstate );
1552
- }
1553
- else {
1554
- }
1553
+ HEAD_UNLOCK (interp -> runtime );
1555
1554
1556
1555
#ifdef Py_GIL_DISABLED
1557
1556
// Must be called with lock unlocked to avoid lock ordering deadlocks.
0 commit comments