Skip to content

Commit 7145ff0

Browse files
authored
Merge pull request #66567 from al45tair/eng/PR-110653167
[Backtracing][Linux] Fix Ubuntu 20.04-aarch64 CI failures.
2 parents 15e6c6f + 7e632c0 commit 7145ff0

File tree

1 file changed

+161
-16
lines changed

1 file changed

+161
-16
lines changed

stdlib/public/runtime/CrashHandlerLinux.cpp

+161-16
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,12 @@ void resume_other_threads();
6262
void take_thread_lock();
6363
void release_thread_lock();
6464
void notify_paused();
65+
uint32_t currently_paused();
6566
void wait_paused(uint32_t expected, const struct timespec *timeout);
6667
int memserver_start();
6768
int memserver_entry(void *);
6869
bool run_backtracer(int fd);
70+
void format_unsigned(unsigned u, char buffer[22]);
6971

7072
ssize_t safe_read(int fd, void *buf, size_t len) {
7173
uint8_t *ptr = (uint8_t *)buf;
@@ -88,7 +90,7 @@ ssize_t safe_read(int fd, void *buf, size_t len) {
8890
}
8991

9092
ssize_t safe_write(int fd, const void *buf, size_t len) {
91-
const uint8_t *ptr = (uint8_t *)buf;
93+
const uint8_t *ptr = (const uint8_t *)buf;
9294
const uint8_t *end = ptr + len;
9395
ssize_t total = 0;
9496

@@ -308,6 +310,124 @@ getdents(int fd, void *buf, size_t bufsiz)
308310
return syscall(SYS_getdents64, fd, buf, bufsiz);
309311
}
310312

313+
/* Find the signal to use to suspend the given thread.
314+
315+
Sadly, libdispatch blocks SIGUSR1, so we can't just use that everywhere;
316+
and on Ubuntu 20.04 *something* is starting a thread with SIGPROF blocked,
317+
so we can't just use that either.
318+
319+
We also can't modify the signal mask for another thread, since there is
320+
no syscall to do that.
321+
322+
As a workaround, read /proc/<pid>/task/<tid>/status to find the signal
323+
mask so that we can decide which signal to try and send. */
324+
int
325+
signal_for_suspend(int pid, int tid)
326+
{
327+
char pid_buffer[22];
328+
char tid_buffer[22];
329+
330+
format_unsigned((unsigned)pid, pid_buffer);
331+
format_unsigned((unsigned)tid, tid_buffer);
332+
333+
char status_file[6 + 22 + 6 + 22 + 7 + 1];
334+
335+
strcpy(status_file, "/proc/"); // 6
336+
strcat(status_file, pid_buffer); // 22
337+
strcat(status_file, "/task/"); // 6
338+
strcat(status_file, tid_buffer); // 22
339+
strcat(status_file, "/status"); // 7 + 1 for NUL
340+
341+
int fd = open(status_file, O_RDONLY);
342+
if (fd < 0)
343+
return -1;
344+
345+
enum match_state {
346+
Matching,
347+
EatLine,
348+
AfterMatch,
349+
InHex,
350+
351+
// states after this terminate the loop
352+
Done,
353+
Bad
354+
};
355+
356+
enum match_state state = Matching;
357+
const char *toMatch = "SigBlk:";
358+
const char *matchPtr = toMatch;
359+
char buffer[256];
360+
uint64_t mask = 0;
361+
ssize_t count;
362+
while (state < Done && (count = read(fd, buffer, sizeof(buffer))) > 0) {
363+
char *ptr = buffer;
364+
char *end = buffer + count;
365+
366+
while (state < Done && ptr < end) {
367+
int ch = *ptr++;
368+
369+
switch (state) {
370+
case Matching:
371+
if (ch != *matchPtr) {
372+
state = EatLine;
373+
matchPtr = toMatch;
374+
} else if (!*++matchPtr) {
375+
state = AfterMatch;
376+
}
377+
break;
378+
case EatLine:
379+
if (ch == '\n')
380+
state = Matching;
381+
break;
382+
case AfterMatch:
383+
if (ch == ' ' || ch == '\t') {
384+
break;
385+
}
386+
state = InHex;
387+
SWIFT_FALLTHROUGH;
388+
case InHex:
389+
if (ch >= '0' && ch <= '9') {
390+
mask = (mask << 4) | (ch - '0');
391+
} else if (ch >= 'a' && ch <= 'f') {
392+
mask = (mask << 4) | (ch - 'a' + 10);
393+
} else if (ch >= 'A' && ch <= 'F') {
394+
mask = (mask << 4) | (ch - 'A' + 10);
395+
} else if (ch == '\n') {
396+
state = Done;
397+
break;
398+
} else {
399+
state = Bad;
400+
}
401+
break;
402+
case Done:
403+
case Bad:
404+
break;
405+
}
406+
}
407+
}
408+
409+
close(fd);
410+
411+
if (state == Done) {
412+
if (!(mask & (1 << (SIGUSR1 - 1))))
413+
return SIGUSR1;
414+
else if (!(mask & (1 << (SIGUSR2 - 1))))
415+
return SIGUSR2;
416+
else if (!(mask & (1 << (SIGPROF - 1))))
417+
return SIGPROF;
418+
else
419+
return -1;
420+
}
421+
422+
return -1;
423+
}
424+
425+
// Write a string to stderr
426+
void
427+
warn(const char *str) {
428+
write(STDERR_FILENO, str, strlen(str));
429+
}
430+
311431
/* Stop all other threads in this process; we do this by establishing a
312432
signal handler for SIGPROF, then iterating through the threads sending
313433
SIGPROF.
@@ -321,21 +441,23 @@ getdents(int fd, void *buf, size_t bufsiz)
321441
void
322442
suspend_other_threads(struct thread *self)
323443
{
324-
struct sigaction sa, sa_old;
444+
struct sigaction sa, sa_old_prof, sa_old_usr1, sa_old_usr2;
325445

326446
// Take the lock
327447
take_thread_lock();
328448

329449
// Start the thread list with this thread
330450
reset_threads(self);
331451

332-
// Swap out the SIGPROF signal handler first
452+
// Swap out the signal handlers first
333453
sigfillset(&sa.sa_mask);
334-
sa.sa_flags = SA_NODEFER;
454+
sa.sa_flags = 0;
335455
sa.sa_handler = NULL;
336456
sa.sa_sigaction = pause_thread;
337457

338-
sigaction(SIGPROF, &sa, &sa_old);
458+
sigaction(SIGPROF, &sa, &sa_old_prof);
459+
sigaction(SIGUSR1, &sa, &sa_old_usr1);
460+
sigaction(SIGUSR2, &sa, &sa_old_usr2);
339461

340462
/* Now scan /proc/self/task to get the tids of the threads in this
341463
process. We need to ignore our own thread. */
@@ -346,11 +468,14 @@ suspend_other_threads(struct thread *self)
346468
size_t offset = 0;
347469
size_t count = 0;
348470

349-
uint32_t thread_count = 0;
350-
uint32_t old_thread_count;
471+
unsigned max_loops = 15;
472+
uint32_t pending = 0;
351473

352474
do {
353-
old_thread_count = thread_count;
475+
uint32_t paused = currently_paused();
476+
477+
pending = 0;
478+
354479
lseek(fd, 0, SEEK_SET);
355480

356481
for (;;) {
@@ -372,21 +497,35 @@ suspend_other_threads(struct thread *self)
372497
int tid = atoi(dp->d_name);
373498

374499
if ((int64_t)tid != self->tid && !seen_thread(tid)) {
375-
tgkill(our_pid, tid, SIGPROF);
376-
++thread_count;
500+
int sig_to_use = signal_for_suspend(our_pid, tid);
501+
502+
if (sig_to_use > 0) {
503+
tgkill(our_pid, tid, sig_to_use);
504+
++pending;
505+
} else {
506+
warn("swift-runtime: unable to suspend thread ");
507+
warn(dp->d_name);
508+
warn("\n");
509+
}
377510
}
378511
}
379512

380-
// Wait up to 5 seconds for the threads to pause
381-
struct timespec timeout = { 5, 0 };
382-
wait_paused(thread_count, &timeout);
383-
} while (old_thread_count != thread_count);
513+
// If we find no new threads, we're done
514+
if (!pending)
515+
break;
516+
517+
// Wait for the threads to suspend
518+
struct timespec timeout = { 2, 0 };
519+
wait_paused(paused + pending, &timeout);
520+
} while (max_loops--);
384521

385522
// Close the directory
386523
close(fd);
387524

388-
// Finally, reset the signal handler
389-
sigaction(SIGPROF, &sa_old, NULL);
525+
// Finally, reset the signal handlers
526+
sigaction(SIGPROF, &sa_old_prof, NULL);
527+
sigaction(SIGUSR1, &sa_old_usr1, NULL);
528+
sigaction(SIGUSR2, &sa_old_usr2, NULL);
390529
}
391530

392531
void
@@ -441,6 +580,12 @@ notify_paused()
441580
futex(&threads_paused, FUTEX_WAKE, 1, NULL, NULL, 0);
442581
}
443582

583+
uint32_t
584+
currently_paused()
585+
{
586+
return __atomic_load_n(&threads_paused, __ATOMIC_ACQUIRE);
587+
}
588+
444589
void
445590
wait_paused(uint32_t expected, const struct timespec *timeout)
446591
{

0 commit comments

Comments
 (0)