Skip to content

Commit 8c10f0a

Browse files
authored
Merge pull request #3794 from bartoldeman/benchmark-align-malloc
Benchmarks: align malloc'ed buffers.
2 parents ad424fc + 9e6b060 commit 8c10f0a

File tree

1 file changed

+18
-0
lines changed

1 file changed

+18
-0
lines changed

benchmark/bench.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,24 @@ static void *huge_malloc(BLASLONG size){
7474

7575
#endif
7676

77+
/* Benchmarks should allocate with cacheline (often 64 bytes) alignment
78+
to avoid unreliable results. This technique, storing the allocated
79+
pointer value just before the aligned memory, doesn't require
80+
C11's aligned_alloc for compatibility with older compilers. */
81+
static void *aligned_alloc_cacheline(size_t n)
82+
{
83+
void *p = malloc((size_t)(void *) + n + L1_DATA_LINESIZE - 1);
84+
if (p) {
85+
void **newp = (void **)
86+
(((uintptr_t)p + L1_DATA_LINESIZE) & (uintptr_t)-L1_DATA_LINESIZE);
87+
newp[-1] = p;
88+
p = newp;
89+
}
90+
return p;
91+
}
92+
#define malloc aligned_alloc_cacheline
93+
#define free(p) free((p) ? ((void **)(p))[-1] : (p))
94+
7795
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
7896
struct timeval start, stop;
7997
#elif defined(__APPLE__)

0 commit comments

Comments
 (0)