@@ -948,6 +948,196 @@ _PyMem_Strdup(const char *str)
948
948
return copy ;
949
949
}
950
950
951
+ /***********************************************/
952
+ /* Delayed freeing support for Py_GIL_DISABLED */
953
+ /***********************************************/
954
+
955
+ // So that sizeof(struct _mem_work_chunk) is 4096 bytes on 64-bit platforms.
956
+ #define WORK_ITEMS_PER_CHUNK 254
957
+
958
+ // A pointer to be freed once the QSBR read sequence reaches qsbr_goal.
959
+ struct _mem_work_item {
960
+ void * ptr ;
961
+ uint64_t qsbr_goal ;
962
+ };
963
+
964
+ // A fixed-size buffer of pointers to be freed
965
+ struct _mem_work_chunk {
966
+ // Linked list node of chunks in queue
967
+ struct llist_node node ;
968
+
969
+ Py_ssize_t rd_idx ; // index of next item to read
970
+ Py_ssize_t wr_idx ; // index of next item to write
971
+ struct _mem_work_item array [WORK_ITEMS_PER_CHUNK ];
972
+ };
973
+
974
+ void
975
+ _PyMem_FreeDelayed (void * ptr )
976
+ {
977
+ #ifndef Py_GIL_DISABLED
978
+ PyMem_Free (ptr );
979
+ #else
980
+ if (_PyRuntime .stoptheworld .world_stopped ) {
981
+ // Free immediately if the world is stopped, including during
982
+ // interpreter shutdown.
983
+ PyMem_Free (ptr );
984
+ return ;
985
+ }
986
+
987
+ _PyThreadStateImpl * tstate = (_PyThreadStateImpl * )_PyThreadState_GET ();
988
+ struct llist_node * head = & tstate -> mem_free_queue ;
989
+
990
+ struct _mem_work_chunk * buf = NULL ;
991
+ if (!llist_empty (head )) {
992
+ // Try to re-use the last buffer
993
+ buf = llist_data (head -> prev , struct _mem_work_chunk , node );
994
+ if (buf -> wr_idx == WORK_ITEMS_PER_CHUNK ) {
995
+ // already full
996
+ buf = NULL ;
997
+ }
998
+ }
999
+
1000
+ if (buf == NULL ) {
1001
+ buf = PyMem_Calloc (1 , sizeof (* buf ));
1002
+ if (buf != NULL ) {
1003
+ llist_insert_tail (head , & buf -> node );
1004
+ }
1005
+ }
1006
+
1007
+ if (buf == NULL ) {
1008
+ // failed to allocate a buffer, free immediately
1009
+ _PyEval_StopTheWorld (tstate -> base .interp );
1010
+ PyMem_Free (ptr );
1011
+ _PyEval_StartTheWorld (tstate -> base .interp );
1012
+ return ;
1013
+ }
1014
+
1015
+ assert (buf != NULL && buf -> wr_idx < WORK_ITEMS_PER_CHUNK );
1016
+ uint64_t seq = _Py_qsbr_deferred_advance (tstate -> qsbr );
1017
+ buf -> array [buf -> wr_idx ].ptr = ptr ;
1018
+ buf -> array [buf -> wr_idx ].qsbr_goal = seq ;
1019
+ buf -> wr_idx ++ ;
1020
+
1021
+ if (buf -> wr_idx == WORK_ITEMS_PER_CHUNK ) {
1022
+ _PyMem_ProcessDelayed ((PyThreadState * )tstate );
1023
+ }
1024
+ #endif
1025
+ }
1026
+
1027
+ static struct _mem_work_chunk *
1028
+ work_queue_first (struct llist_node * head )
1029
+ {
1030
+ return llist_data (head -> next , struct _mem_work_chunk , node );
1031
+ }
1032
+
1033
+ static void
1034
+ process_queue (struct llist_node * head , struct _qsbr_thread_state * qsbr ,
1035
+ bool keep_empty )
1036
+ {
1037
+ while (!llist_empty (head )) {
1038
+ struct _mem_work_chunk * buf = work_queue_first (head );
1039
+
1040
+ while (buf -> rd_idx < buf -> wr_idx ) {
1041
+ struct _mem_work_item * item = & buf -> array [buf -> rd_idx ];
1042
+ if (!_Py_qsbr_poll (qsbr , item -> qsbr_goal )) {
1043
+ return ;
1044
+ }
1045
+
1046
+ PyMem_Free (item -> ptr );
1047
+ buf -> rd_idx ++ ;
1048
+ }
1049
+
1050
+ assert (buf -> rd_idx == buf -> wr_idx );
1051
+ if (keep_empty && buf -> node .next == head ) {
1052
+ // Keep the last buffer in the queue to reduce re-allocations
1053
+ buf -> rd_idx = buf -> wr_idx = 0 ;
1054
+ return ;
1055
+ }
1056
+
1057
+ llist_remove (& buf -> node );
1058
+ PyMem_Free (buf );
1059
+ }
1060
+ }
1061
+
1062
+ static void
1063
+ process_interp_queue (struct _Py_mem_interp_free_queue * queue ,
1064
+ struct _qsbr_thread_state * qsbr )
1065
+ {
1066
+ if (!_Py_atomic_load_int_relaxed (& queue -> has_work )) {
1067
+ return ;
1068
+ }
1069
+
1070
+ // Try to acquire the lock, but don't block if it's already held.
1071
+ if (_PyMutex_LockTimed (& queue -> mutex , 0 , 0 ) == PY_LOCK_ACQUIRED ) {
1072
+ process_queue (& queue -> head , qsbr , false);
1073
+
1074
+ int more_work = !llist_empty (& queue -> head );
1075
+ _Py_atomic_store_int_relaxed (& queue -> has_work , more_work );
1076
+
1077
+ PyMutex_Unlock (& queue -> mutex );
1078
+ }
1079
+ }
1080
+
1081
+ void
1082
+ _PyMem_ProcessDelayed (PyThreadState * tstate )
1083
+ {
1084
+ PyInterpreterState * interp = tstate -> interp ;
1085
+ _PyThreadStateImpl * tstate_impl = (_PyThreadStateImpl * )tstate ;
1086
+
1087
+ // Process thread-local work
1088
+ process_queue (& tstate_impl -> mem_free_queue , tstate_impl -> qsbr , true);
1089
+
1090
+ // Process shared interpreter work
1091
+ process_interp_queue (& interp -> mem_free_queue , tstate_impl -> qsbr );
1092
+ }
1093
+
1094
+ void
1095
+ _PyMem_AbandonDelayed (PyThreadState * tstate )
1096
+ {
1097
+ PyInterpreterState * interp = tstate -> interp ;
1098
+ struct llist_node * queue = & ((_PyThreadStateImpl * )tstate )-> mem_free_queue ;
1099
+
1100
+ if (llist_empty (queue )) {
1101
+ return ;
1102
+ }
1103
+
1104
+ // Check if the queue contains one empty buffer
1105
+ struct _mem_work_chunk * buf = work_queue_first (queue );
1106
+ if (buf -> rd_idx == buf -> wr_idx ) {
1107
+ llist_remove (& buf -> node );
1108
+ PyMem_Free (buf );
1109
+ assert (llist_empty (queue ));
1110
+ return ;
1111
+ }
1112
+
1113
+ // Merge the thread's work queue into the interpreter's work queue.
1114
+ PyMutex_Lock (& interp -> mem_free_queue .mutex );
1115
+ llist_concat (& interp -> mem_free_queue .head , queue );
1116
+ _Py_atomic_store_int_relaxed (& interp -> mem_free_queue .has_work , 1 );
1117
+ PyMutex_Unlock (& interp -> mem_free_queue .mutex );
1118
+
1119
+ assert (llist_empty (queue )); // the thread's queue is now empty
1120
+ }
1121
+
1122
+ void
1123
+ _PyMem_FiniDelayed (PyInterpreterState * interp )
1124
+ {
1125
+ struct llist_node * head = & interp -> mem_free_queue .head ;
1126
+ while (!llist_empty (head )) {
1127
+ struct _mem_work_chunk * buf = work_queue_first (head );
1128
+
1129
+ while (buf -> rd_idx < buf -> wr_idx ) {
1130
+ // Free the remaining items immediately. There should be no other
1131
+ // threads accessing the memory at this point during shutdown.
1132
+ struct _mem_work_item * item = & buf -> array [buf -> rd_idx ];
1133
+ PyMem_Free (item -> ptr );
1134
+ buf -> rd_idx ++ ;
1135
+ }
1136
+
1137
+ llist_remove (& buf -> node );
1138
+ PyMem_Free (buf );
1139
+ }
1140
+ }
951
1141
952
1142
/**************************/
953
1143
/* the "object" allocator */
0 commit comments