Skip to content
This repository was archived by the owner on Feb 13, 2025. It is now read-only.

Commit a8cf08d

Browse files
author
Anselm Kruis
committed
Stackless issue #181: Replace slp_dont_optimize... vars
by more appropriate compiler specific code. The new code should work with whole program optimisation. (cherry picked from commit 464ef17bcbf634ae33b37672a5b803f0ce940086)
1 parent 49e3ede commit a8cf08d

File tree

5 files changed

+66
-22
lines changed

5 files changed

+66
-22
lines changed

Python/ceval.c

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4048,9 +4048,6 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
40484048

40494049

40504050
#ifdef STACKLESS
4051-
/* a global write only dummy variable */
4052-
char _dont_optimise_away_slp_eval_frame_functions;
4053-
40544051
PyObject *
40554052
slp_eval_frame_noval(PyFrameObject *f, int throwflag, PyObject *retval)
40564053
{
@@ -4060,7 +4057,8 @@ slp_eval_frame_noval(PyFrameObject *f, int throwflag, PyObject *retval)
40604057
* it serves as a marker whether we expect a value or
40614058
* not, and it makes debugging a little easier.
40624059
*/
4063-
_dont_optimise_away_slp_eval_frame_functions = 1;
4060+
SLP_DO_NOT_OPTIMIZE_AWAY((char *)1);
4061+
40644062
r = slp_eval_frame_value(f, throwflag, retval);
40654063
return r;
40664064
}
@@ -4075,7 +4073,7 @@ slp_eval_frame_iter(PyFrameObject *f, int throwflag, PyObject *retval)
40754073
* for_iter operation. In this case we need to handle
40764074
* null without error as valid result.
40774075
*/
4078-
_dont_optimise_away_slp_eval_frame_functions = 2;
4076+
SLP_DO_NOT_OPTIMIZE_AWAY((char *)2);
40794077
r = slp_eval_frame_value(f, throwflag, retval);
40804078
return r;
40814079
}
@@ -4090,7 +4088,7 @@ slp_eval_frame_setup_with(PyFrameObject *f, int throwflag, PyObject *retval)
40904088
* SETUP_WITH operation.
40914089
* NOTE / XXX: see above.
40924090
*/
4093-
_dont_optimise_away_slp_eval_frame_functions = 3;
4091+
SLP_DO_NOT_OPTIMIZE_AWAY((char *)3);
40944092
r = slp_eval_frame_value(f, throwflag, retval);
40954093
return r;
40964094
}
@@ -4105,7 +4103,7 @@ slp_eval_frame_with_cleanup(PyFrameObject *f, int throwflag, PyObject *retval)
41054103
* WITH_CLEANUP operation.
41064104
* NOTE / XXX: see above.
41074105
*/
4108-
_dont_optimise_away_slp_eval_frame_functions = 4;
4106+
SLP_DO_NOT_OPTIMIZE_AWAY((char *)4);
41094107
r = slp_eval_frame_value(f, throwflag, retval);
41104108
return r;
41114109
}

Stackless/core/slp_transfer.c

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ static PyTaskletObject *_prev;
4545
#define SLP_EVAL
4646
#include "platf/slp_platformselect.h"
4747

48+
SLP_DO_NOT_OPTIMIZE_AWAY_DEFINITIONS
49+
4850
#ifdef EXTERNAL_ASM
4951
/* CCP addition: Make these functions, to be called from assembler.
5052
* The token include file for the given platform should enable the
@@ -80,8 +82,6 @@ extern int slp_switch(void);
8082

8183
#endif
8284

83-
/* a write only variable used to prevent overly optimisation */
84-
intptr_t *global_goobledigoobs;
8585
static int
8686
climb_stack_and_transfer(PyCStackObject **cstprev, PyCStackObject *cst,
8787
PyTaskletObject *prev)
@@ -96,15 +96,14 @@ climb_stack_and_transfer(PyCStackObject **cstprev, PyCStackObject *cst,
9696
intptr_t probe;
9797
register ptrdiff_t needed = &probe - ts->st.cstack_base;
9898
/* in rare cases, the need might have vanished due to the recursion */
99-
register intptr_t *goobledigoobs;
10099
if (needed > 0) {
101-
goobledigoobs = alloca(needed * sizeof(intptr_t));
102-
if (goobledigoobs == NULL)
100+
register void * stack_ptr_tmp = alloca(needed * sizeof(intptr_t));
101+
if (stack_ptr_tmp == NULL)
103102
return -1;
104-
/* hinder the compiler to optimise away
105-
goobledigoobs and the alloca call.
103+
/* hinder the compiler to optimise away
104+
stack_ptr_tmp and the alloca call.
106105
This happens with gcc 4.7.x and -O2 */
107-
global_goobledigoobs = goobledigoobs;
106+
SLP_DO_NOT_OPTIMIZE_AWAY(stack_ptr_tmp);
108107
}
109108
return slp_transfer(cstprev, cst, prev);
110109
}

Stackless/core/stacklesseval.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -281,11 +281,14 @@ climb_stack_and_eval_frame(PyFrameObject *f)
281281
intptr_t probe;
282282
ptrdiff_t needed = &probe - ts->st.cstack_base;
283283
/* in rare cases, the need might have vanished due to the recursion */
284-
intptr_t *goobledigoobs;
285284
if (needed > 0) {
286-
goobledigoobs = alloca(needed * sizeof(intptr_t));
287-
if (goobledigoobs == NULL)
285+
register void * stack_ptr_tmp = alloca(needed * sizeof(intptr_t));
286+
if (stack_ptr_tmp == NULL)
288287
return NULL;
288+
/* hinder the compiler to optimise away
289+
stack_ptr_tmp and the alloca call.
290+
This happens with gcc 4.7.x and -O2 */
291+
SLP_DO_NOT_OPTIMIZE_AWAY(stack_ptr_tmp);
289292
}
290293
return slp_eval_frame(f);
291294
}

Stackless/module/scheduling.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,6 @@ typedef struct {
361361
/* not a valid ptr and not a common integer */
362362
#define SAVED_TSTATE_MAGIC1 (((intptr_t)transfer_with_exc)+1)
363363
#define SAVED_TSTATE_MAGIC2 (-1*((intptr_t)transfer_with_exc))
364-
saved_tstat_with_magic_t * _dont_optimise_away_saved_tstat_with_magic;
365364

366365
static int
367366
transfer_with_exc(PyCStackObject **cstprev, PyCStackObject *cst, PyTaskletObject *prev)
@@ -376,7 +375,7 @@ transfer_with_exc(PyCStackObject **cstprev, PyCStackObject *cst, PyTaskletObject
376375
/* prevent overly compiler optimisation.
377376
We store the address of sm into a global variable.
378377
This way the optimizer can't change the layout of the structure. */
379-
_dont_optimise_away_saved_tstat_with_magic = &sm;
378+
SLP_DO_NOT_OPTIMIZE_AWAY(&sm);
380379

381380
sm.s.tracing = ts->tracing;
382381
sm.s.c_profilefunc = ts->c_profilefunc;

Stackless/platf/slp_platformselect.h

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@
1717
#elif defined(__GNUC__) && defined(sparc) && defined(sun)
1818
#include "switch_sparc_sun_gcc.h" /* SunOS sparc with gcc */
1919
#elif defined(__GNUC__) && defined(__s390__) && defined(__linux__)
20-
#include "switch_s390_unix.h" /* Linux/S390 */
20+
#include "switch_s390_unix.h" /* Linux/S390 */
2121
#elif defined(__GNUC__) && defined(__s390x__) && defined(__linux__)
22-
#include "switch_s390_unix.h" /* Linux/S390 zSeries (identical) */
22+
#include "switch_s390_unix.h" /* Linux/S390 zSeries (identical) */
2323
#elif defined(__GNUC__) && defined(__arm__) && defined(__thumb__)
2424
#include "switch_arm_thumb_gcc.h" /* gcc using arm thumb */
2525
#elif defined(__GNUC__) && defined(__arm32__)
@@ -32,6 +32,51 @@
3232

3333
/* default definitions if not defined in above files */
3434

35+
/*
36+
* Call SLP_DO_NOT_OPTIMIZE_AWAY(pointer) to ensure that pointer will be
37+
* computed even post-optimization. Use it for pointers that are computed but
38+
* otherwise are useless. The compiler tends to do a good job at eliminating
39+
* unused variables, and this macro fools it into thinking var is in fact
40+
* needed.
41+
*/
42+
43+
#ifndef SLP_DO_NOT_OPTIMIZE_AWAY
44+
45+
/* Code is based on Facebook folly
46+
* https://github.com/facebook/folly/blob/master/folly/Benchmark.h,
47+
* which has an Apache 2 license.
48+
*/
49+
#ifdef _MSC_VER
50+
51+
#pragma optimize("", off)
52+
53+
static inline void doNotOptimizeDependencySink(const void* p) {}
54+
55+
#pragma optimize("", on)
56+
57+
#define SLP_DO_NOT_OPTIMIZE_AWAY(pointer) doNotOptimizeDependencySink(pointer)
58+
#define SLP_DO_NOT_OPTIMIZE_AWAY_DEFINITIONS /* empty */
59+
60+
#elif (defined(__GNUC__) || defined(__clang__))
61+
/*
62+
* The "r" constraint forces the compiler to make datum available
63+
* in a register to the asm block, which means that it must have
64+
* computed/loaded it.
65+
*/
66+
#define SLP_DO_NOT_OPTIMIZE_AWAY(pointer) \
67+
do {__asm__ volatile("" ::"r"(pointer));} while(0)
68+
#define SLP_DO_NOT_OPTIMIZE_AWAY_DEFINITIONS /* empty */
69+
#else
70+
/*
71+
* Unknown compiler
72+
*/
73+
#define SLP_DO_NOT_OPTIMIZE_AWAY(pointer) \
74+
do { slp_do_not_opimize_away_sink = ((void*)(pointer)); } while(0)
75+
extern uint8_t* volatile slp_do_not_opimize_away_sink;
76+
#define SLP_DO_NOT_OPTIMIZE_AWAY_DEFINITIONS uint8_t* volatile slp_do_not_opimize_away_sink;
77+
#endif
78+
#endif
79+
3580
/* adjust slots to typical size of a few recursions on your system */
3681

3782
#ifndef CSTACK_SLOTS

0 commit comments

Comments
 (0)