diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d69976342fcbd..b284378a5fb29 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -142,11 +142,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setLibcallName(RTLIB::POWI_F64, nullptr); } - // If we don't have cmpxchg8b(meaing this is a 386/486), limit atomic size to - // 32 bits so the AtomicExpandPass will expand it so we don't need cmpxchg8b. - // FIXME: Should we be limiting the atomic size on other configs? Default is - // 1024. - if (!Subtarget.canUseCMPXCHG8B()) + if (Subtarget.canUseCMPXCHG16B()) + setMaxAtomicSizeInBitsSupported(128); + else if (Subtarget.canUseCMPXCHG8B()) + setMaxAtomicSizeInBitsSupported(64); + else setMaxAtomicSizeInBitsSupported(32); setMaxDivRemBitWidthSupported(Subtarget.is64Bit() ? 128 : 64); diff --git a/llvm/test/CodeGen/X86/atomic-idempotent.ll b/llvm/test/CodeGen/X86/atomic-idempotent.ll index 3a9648bd1fbb5..d5c46485068a6 100644 --- a/llvm/test/CodeGen/X86/atomic-idempotent.ll +++ b/llvm/test/CodeGen/X86/atomic-idempotent.ll @@ -170,117 +170,130 @@ define i128 @or128(ptr %p) { ; X64-NEXT: .cfi_def_cfa_offset 16 ; X64-NEXT: xorl %esi, %esi ; X64-NEXT: xorl %edx, %edx -; X64-NEXT: callq __sync_fetch_and_or_16@PLT +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: callq __atomic_fetch_or_16@PLT ; X64-NEXT: popq %rcx ; X64-NEXT: .cfi_def_cfa_offset 8 ; X64-NEXT: retq ; -; X86-SSE2-LABEL: or128: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: pushl %ebp -; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 -; X86-SSE2-NEXT: .cfi_offset %ebp, -8 -; X86-SSE2-NEXT: movl %esp, %ebp -; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp -; X86-SSE2-NEXT: pushl %esi -; X86-SSE2-NEXT: andl $-16, %esp -; X86-SSE2-NEXT: subl $32, %esp -; X86-SSE2-NEXT: .cfi_offset %esi, -12 -; X86-SSE2-NEXT: movl 8(%ebp), %esi -; X86-SSE2-NEXT: movl %esp, %eax -; X86-SSE2-NEXT: pushl $0 -; X86-SSE2-NEXT: pushl $0 -; X86-SSE2-NEXT: pushl $0 -; X86-SSE2-NEXT: pushl $0 -; X86-SSE2-NEXT: pushl 12(%ebp) -; X86-SSE2-NEXT: pushl %eax -; X86-SSE2-NEXT: calll __sync_fetch_and_or_16 -; X86-SSE2-NEXT: addl $20, %esp -; X86-SSE2-NEXT: movaps (%esp), %xmm0 -; X86-SSE2-NEXT: movaps %xmm0, (%esi) -; X86-SSE2-NEXT: movl %esi, %eax -; X86-SSE2-NEXT: leal -4(%ebp), %esp -; X86-SSE2-NEXT: popl %esi -; X86-SSE2-NEXT: popl %ebp -; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4 -; X86-SSE2-NEXT: retl $4 -; -; X86-SLM-LABEL: or128: -; X86-SLM: # %bb.0: -; X86-SLM-NEXT: pushl %ebp -; X86-SLM-NEXT: .cfi_def_cfa_offset 8 -; X86-SLM-NEXT: .cfi_offset %ebp, -8 -; X86-SLM-NEXT: movl %esp, %ebp -; X86-SLM-NEXT: .cfi_def_cfa_register %ebp -; X86-SLM-NEXT: pushl %edi -; X86-SLM-NEXT: pushl %esi -; X86-SLM-NEXT: andl $-16, %esp -; X86-SLM-NEXT: subl $16, %esp -; X86-SLM-NEXT: .cfi_offset %esi, -16 -; X86-SLM-NEXT: .cfi_offset %edi, -12 -; X86-SLM-NEXT: movl 8(%ebp), %esi -; X86-SLM-NEXT: movl 12(%ebp), %eax -; X86-SLM-NEXT: movl %esp, %ecx -; X86-SLM-NEXT: pushl $0 -; X86-SLM-NEXT: pushl $0 -; X86-SLM-NEXT: pushl $0 -; X86-SLM-NEXT: pushl $0 -; X86-SLM-NEXT: pushl %eax -; X86-SLM-NEXT: pushl %ecx -; X86-SLM-NEXT: calll __sync_fetch_and_or_16 -; X86-SLM-NEXT: addl $20, %esp -; X86-SLM-NEXT: movl (%esp), %eax -; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-SLM-NEXT: movl %edi, 8(%esi) -; X86-SLM-NEXT: movl %edx, 12(%esi) -; X86-SLM-NEXT: movl %eax, (%esi) -; X86-SLM-NEXT: movl %ecx, 4(%esi) -; X86-SLM-NEXT: movl %esi, %eax -; X86-SLM-NEXT: leal -8(%ebp), %esp -; X86-SLM-NEXT: popl %esi -; X86-SLM-NEXT: popl %edi -; X86-SLM-NEXT: popl %ebp -; X86-SLM-NEXT: .cfi_def_cfa %esp, 4 -; X86-SLM-NEXT: retl $4 +; X86-GENERIC-LABEL: or128: +; X86-GENERIC: # %bb.0: +; X86-GENERIC-NEXT: pushl %ebp +; X86-GENERIC-NEXT: .cfi_def_cfa_offset 8 +; X86-GENERIC-NEXT: .cfi_offset %ebp, -8 +; X86-GENERIC-NEXT: movl %esp, %ebp +; X86-GENERIC-NEXT: .cfi_def_cfa_register %ebp +; X86-GENERIC-NEXT: pushl %ebx +; X86-GENERIC-NEXT: pushl %edi +; X86-GENERIC-NEXT: pushl %esi +; X86-GENERIC-NEXT: andl $-16, %esp +; X86-GENERIC-NEXT: subl $48, %esp +; X86-GENERIC-NEXT: .cfi_offset %esi, -20 +; X86-GENERIC-NEXT: .cfi_offset %edi, -16 +; X86-GENERIC-NEXT: .cfi_offset %ebx, -12 +; X86-GENERIC-NEXT: movl 12(%ebp), %edi +; X86-GENERIC-NEXT: movl 12(%edi), %ecx +; X86-GENERIC-NEXT: movl 8(%edi), %edx +; X86-GENERIC-NEXT: movl (%edi), %ebx +; X86-GENERIC-NEXT: movl 4(%edi), %esi +; X86-GENERIC-NEXT: .p2align 4, 0x90 +; X86-GENERIC-NEXT: .LBB4_1: # %atomicrmw.start +; X86-GENERIC-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-GENERIC-NEXT: movl %ebx, (%esp) +; X86-GENERIC-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-GENERIC-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-GENERIC-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-GENERIC-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-GENERIC-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-GENERIC-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-GENERIC-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-GENERIC-NEXT: pushl $0 +; X86-GENERIC-NEXT: pushl $0 +; X86-GENERIC-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-GENERIC-NEXT: pushl %eax +; X86-GENERIC-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-GENERIC-NEXT: pushl %eax +; X86-GENERIC-NEXT: pushl %edi +; X86-GENERIC-NEXT: pushl $16 +; X86-GENERIC-NEXT: calll __atomic_compare_exchange@PLT +; X86-GENERIC-NEXT: addl $24, %esp +; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-GENERIC-NEXT: movl (%esp), %ebx +; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-GENERIC-NEXT: testb %al, %al +; X86-GENERIC-NEXT: je .LBB4_1 +; X86-GENERIC-NEXT: # %bb.2: # %atomicrmw.end +; X86-GENERIC-NEXT: movl 8(%ebp), %eax +; X86-GENERIC-NEXT: movl %ebx, (%eax) +; X86-GENERIC-NEXT: movl %esi, 4(%eax) +; X86-GENERIC-NEXT: movl %edx, 8(%eax) +; X86-GENERIC-NEXT: movl %ecx, 12(%eax) +; X86-GENERIC-NEXT: leal -12(%ebp), %esp +; X86-GENERIC-NEXT: popl %esi +; X86-GENERIC-NEXT: popl %edi +; X86-GENERIC-NEXT: popl %ebx +; X86-GENERIC-NEXT: popl %ebp +; X86-GENERIC-NEXT: .cfi_def_cfa %esp, 4 +; X86-GENERIC-NEXT: retl $4 ; ; X86-ATOM-LABEL: or128: ; X86-ATOM: # %bb.0: ; X86-ATOM-NEXT: pushl %ebp ; X86-ATOM-NEXT: .cfi_def_cfa_offset 8 ; X86-ATOM-NEXT: .cfi_offset %ebp, -8 -; X86-ATOM-NEXT: leal (%esp), %ebp +; X86-ATOM-NEXT: movl %esp, %ebp ; X86-ATOM-NEXT: .cfi_def_cfa_register %ebp +; X86-ATOM-NEXT: pushl %ebx ; X86-ATOM-NEXT: pushl %edi ; X86-ATOM-NEXT: pushl %esi ; X86-ATOM-NEXT: andl $-16, %esp ; X86-ATOM-NEXT: leal -{{[0-9]+}}(%esp), %esp -; X86-ATOM-NEXT: .cfi_offset %esi, -16 -; X86-ATOM-NEXT: .cfi_offset %edi, -12 -; X86-ATOM-NEXT: movl 8(%ebp), %esi -; X86-ATOM-NEXT: movl 12(%ebp), %eax -; X86-ATOM-NEXT: movl %esp, %ecx -; X86-ATOM-NEXT: pushl $0 -; X86-ATOM-NEXT: pushl $0 +; X86-ATOM-NEXT: .cfi_offset %esi, -20 +; X86-ATOM-NEXT: .cfi_offset %edi, -16 +; X86-ATOM-NEXT: .cfi_offset %ebx, -12 +; X86-ATOM-NEXT: movl 12(%ebp), %edi +; X86-ATOM-NEXT: movl 12(%edi), %ecx +; X86-ATOM-NEXT: movl 8(%edi), %edx +; X86-ATOM-NEXT: movl (%edi), %esi +; X86-ATOM-NEXT: movl 4(%edi), %ebx +; X86-ATOM-NEXT: .p2align 4, 0x90 +; X86-ATOM-NEXT: .LBB4_1: # %atomicrmw.start +; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-ATOM-NEXT: movl %esi, (%esp) +; X86-ATOM-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-ATOM-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-ATOM-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-ATOM-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-ATOM-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-ATOM-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-ATOM-NEXT: movl %esi, {{[0-9]+}}(%esp) ; X86-ATOM-NEXT: pushl $0 ; X86-ATOM-NEXT: pushl $0 +; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-ATOM-NEXT: pushl %eax +; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-ATOM-NEXT: pushl %eax -; X86-ATOM-NEXT: pushl %ecx -; X86-ATOM-NEXT: calll __sync_fetch_and_or_16 +; X86-ATOM-NEXT: pushl %edi +; X86-ATOM-NEXT: pushl $16 +; X86-ATOM-NEXT: calll __atomic_compare_exchange@PLT ; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %esp -; X86-ATOM-NEXT: movl (%esp), %ecx +; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-ATOM-NEXT: movl %eax, 8(%esi) -; X86-ATOM-NEXT: movl %edi, 12(%esi) -; X86-ATOM-NEXT: movl %ecx, (%esi) -; X86-ATOM-NEXT: movl %esi, %eax -; X86-ATOM-NEXT: movl %edx, 4(%esi) -; X86-ATOM-NEXT: leal -8(%ebp), %esp +; X86-ATOM-NEXT: testb %al, %al +; X86-ATOM-NEXT: movl (%esp), %esi +; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-ATOM-NEXT: je .LBB4_1 +; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end +; X86-ATOM-NEXT: movl 8(%ebp), %eax +; X86-ATOM-NEXT: movl %esi, (%eax) +; X86-ATOM-NEXT: movl %ebx, 4(%eax) +; X86-ATOM-NEXT: movl %edx, 8(%eax) +; X86-ATOM-NEXT: movl %ecx, 12(%eax) +; X86-ATOM-NEXT: leal -12(%ebp), %esp ; X86-ATOM-NEXT: popl %esi ; X86-ATOM-NEXT: popl %edi +; X86-ATOM-NEXT: popl %ebx ; X86-ATOM-NEXT: popl %ebp ; X86-ATOM-NEXT: .cfi_def_cfa %esp, 4 ; X86-ATOM-NEXT: retl $4 @@ -507,78 +520,120 @@ define void @or128_nouse_seq_cst(ptr %p) { ; X64-NEXT: .cfi_def_cfa_offset 16 ; X64-NEXT: xorl %esi, %esi ; X64-NEXT: xorl %edx, %edx -; X64-NEXT: callq __sync_fetch_and_or_16@PLT +; X64-NEXT: movl $5, %ecx +; X64-NEXT: callq __atomic_fetch_or_16@PLT ; X64-NEXT: popq %rax ; X64-NEXT: .cfi_def_cfa_offset 8 ; X64-NEXT: retq ; -; X86-SSE2-LABEL: or128_nouse_seq_cst: -; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: pushl %ebp -; X86-SSE2-NEXT: .cfi_def_cfa_offset 8 -; X86-SSE2-NEXT: .cfi_offset %ebp, -8 -; X86-SSE2-NEXT: movl %esp, %ebp -; X86-SSE2-NEXT: .cfi_def_cfa_register %ebp -; X86-SSE2-NEXT: andl $-16, %esp -; X86-SSE2-NEXT: subl $32, %esp -; X86-SSE2-NEXT: movl %esp, %eax -; X86-SSE2-NEXT: pushl $0 -; X86-SSE2-NEXT: pushl $0 -; X86-SSE2-NEXT: pushl $0 -; X86-SSE2-NEXT: pushl $0 -; X86-SSE2-NEXT: pushl 8(%ebp) -; X86-SSE2-NEXT: pushl %eax -; X86-SSE2-NEXT: calll __sync_fetch_and_or_16 -; X86-SSE2-NEXT: addl $20, %esp -; X86-SSE2-NEXT: movl %ebp, %esp -; X86-SSE2-NEXT: popl %ebp -; X86-SSE2-NEXT: .cfi_def_cfa %esp, 4 -; X86-SSE2-NEXT: retl -; -; X86-SLM-LABEL: or128_nouse_seq_cst: -; X86-SLM: # %bb.0: -; X86-SLM-NEXT: pushl %ebp -; X86-SLM-NEXT: .cfi_def_cfa_offset 8 -; X86-SLM-NEXT: .cfi_offset %ebp, -8 -; X86-SLM-NEXT: movl %esp, %ebp -; X86-SLM-NEXT: .cfi_def_cfa_register %ebp -; X86-SLM-NEXT: andl $-16, %esp -; X86-SLM-NEXT: subl $32, %esp -; X86-SLM-NEXT: movl 8(%ebp), %eax -; X86-SLM-NEXT: movl %esp, %ecx -; X86-SLM-NEXT: pushl $0 -; X86-SLM-NEXT: pushl $0 -; X86-SLM-NEXT: pushl $0 -; X86-SLM-NEXT: pushl $0 -; X86-SLM-NEXT: pushl %eax -; X86-SLM-NEXT: pushl %ecx -; X86-SLM-NEXT: calll __sync_fetch_and_or_16 -; X86-SLM-NEXT: addl $20, %esp -; X86-SLM-NEXT: movl %ebp, %esp -; X86-SLM-NEXT: popl %ebp -; X86-SLM-NEXT: .cfi_def_cfa %esp, 4 -; X86-SLM-NEXT: retl +; X86-GENERIC-LABEL: or128_nouse_seq_cst: +; X86-GENERIC: # %bb.0: +; X86-GENERIC-NEXT: pushl %ebp +; X86-GENERIC-NEXT: .cfi_def_cfa_offset 8 +; X86-GENERIC-NEXT: .cfi_offset %ebp, -8 +; X86-GENERIC-NEXT: movl %esp, %ebp +; X86-GENERIC-NEXT: .cfi_def_cfa_register %ebp +; X86-GENERIC-NEXT: pushl %ebx +; X86-GENERIC-NEXT: pushl %edi +; X86-GENERIC-NEXT: pushl %esi +; X86-GENERIC-NEXT: andl $-16, %esp +; X86-GENERIC-NEXT: subl $48, %esp +; X86-GENERIC-NEXT: .cfi_offset %esi, -20 +; X86-GENERIC-NEXT: .cfi_offset %edi, -16 +; X86-GENERIC-NEXT: .cfi_offset %ebx, -12 +; X86-GENERIC-NEXT: movl 8(%ebp), %esi +; X86-GENERIC-NEXT: movl 12(%esi), %ecx +; X86-GENERIC-NEXT: movl 8(%esi), %edi +; X86-GENERIC-NEXT: movl (%esi), %edx +; X86-GENERIC-NEXT: movl 4(%esi), %ebx +; X86-GENERIC-NEXT: .p2align 4, 0x90 +; X86-GENERIC-NEXT: .LBB12_1: # %atomicrmw.start +; X86-GENERIC-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-GENERIC-NEXT: movl %edx, (%esp) +; X86-GENERIC-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-GENERIC-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-GENERIC-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-GENERIC-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-GENERIC-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-GENERIC-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; X86-GENERIC-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-GENERIC-NEXT: pushl $5 +; X86-GENERIC-NEXT: pushl $5 +; X86-GENERIC-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-GENERIC-NEXT: pushl %eax +; X86-GENERIC-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-GENERIC-NEXT: pushl %eax +; X86-GENERIC-NEXT: pushl %esi +; X86-GENERIC-NEXT: pushl $16 +; X86-GENERIC-NEXT: calll __atomic_compare_exchange@PLT +; X86-GENERIC-NEXT: addl $24, %esp +; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-GENERIC-NEXT: movl (%esp), %edx +; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-GENERIC-NEXT: testb %al, %al +; X86-GENERIC-NEXT: je .LBB12_1 +; X86-GENERIC-NEXT: # %bb.2: # %atomicrmw.end +; X86-GENERIC-NEXT: leal -12(%ebp), %esp +; X86-GENERIC-NEXT: popl %esi +; X86-GENERIC-NEXT: popl %edi +; X86-GENERIC-NEXT: popl %ebx +; X86-GENERIC-NEXT: popl %ebp +; X86-GENERIC-NEXT: .cfi_def_cfa %esp, 4 +; X86-GENERIC-NEXT: retl ; ; X86-ATOM-LABEL: or128_nouse_seq_cst: ; X86-ATOM: # %bb.0: ; X86-ATOM-NEXT: pushl %ebp ; X86-ATOM-NEXT: .cfi_def_cfa_offset 8 ; X86-ATOM-NEXT: .cfi_offset %ebp, -8 -; X86-ATOM-NEXT: leal (%esp), %ebp +; X86-ATOM-NEXT: movl %esp, %ebp ; X86-ATOM-NEXT: .cfi_def_cfa_register %ebp +; X86-ATOM-NEXT: pushl %ebx +; X86-ATOM-NEXT: pushl %edi +; X86-ATOM-NEXT: pushl %esi ; X86-ATOM-NEXT: andl $-16, %esp ; X86-ATOM-NEXT: leal -{{[0-9]+}}(%esp), %esp -; X86-ATOM-NEXT: movl 8(%ebp), %eax -; X86-ATOM-NEXT: movl %esp, %ecx -; X86-ATOM-NEXT: pushl $0 -; X86-ATOM-NEXT: pushl $0 -; X86-ATOM-NEXT: pushl $0 -; X86-ATOM-NEXT: pushl $0 +; X86-ATOM-NEXT: .cfi_offset %esi, -20 +; X86-ATOM-NEXT: .cfi_offset %edi, -16 +; X86-ATOM-NEXT: .cfi_offset %ebx, -12 +; X86-ATOM-NEXT: movl 8(%ebp), %esi +; X86-ATOM-NEXT: movl %esp, %ebx +; X86-ATOM-NEXT: movl 12(%esi), %ecx +; X86-ATOM-NEXT: movl 8(%esi), %edx +; X86-ATOM-NEXT: movl (%esi), %eax +; X86-ATOM-NEXT: movl 4(%esi), %edi +; X86-ATOM-NEXT: .p2align 4, 0x90 +; X86-ATOM-NEXT: .LBB12_1: # %atomicrmw.start +; X86-ATOM-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-ATOM-NEXT: movl %eax, (%esp) +; X86-ATOM-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-ATOM-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-ATOM-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-ATOM-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-ATOM-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-ATOM-NEXT: movl %edi, {{[0-9]+}}(%esp) +; X86-ATOM-NEXT: movl %eax, {{[0-9]+}}(%esp) +; X86-ATOM-NEXT: pushl $5 +; X86-ATOM-NEXT: pushl $5 +; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-ATOM-NEXT: pushl %eax -; X86-ATOM-NEXT: pushl %ecx -; X86-ATOM-NEXT: calll __sync_fetch_and_or_16 +; X86-ATOM-NEXT: pushl %ebx +; X86-ATOM-NEXT: pushl %esi +; X86-ATOM-NEXT: pushl $16 +; X86-ATOM-NEXT: calll __atomic_compare_exchange@PLT ; X86-ATOM-NEXT: leal {{[0-9]+}}(%esp), %esp -; X86-ATOM-NEXT: movl %ebp, %esp +; X86-ATOM-NEXT: testb %al, %al +; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-ATOM-NEXT: movl (%esp), %eax +; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-ATOM-NEXT: je .LBB12_1 +; X86-ATOM-NEXT: # %bb.2: # %atomicrmw.end +; X86-ATOM-NEXT: leal -12(%ebp), %esp +; X86-ATOM-NEXT: popl %esi +; X86-ATOM-NEXT: popl %edi +; X86-ATOM-NEXT: popl %ebx ; X86-ATOM-NEXT: popl %ebp ; X86-ATOM-NEXT: .cfi_def_cfa %esp, 4 ; X86-ATOM-NEXT: retl diff --git a/llvm/test/CodeGen/X86/atomic-nocx16.ll b/llvm/test/CodeGen/X86/atomic-nocx16.ll index 5677541242a24..a014da80f189b 100644 --- a/llvm/test/CodeGen/X86/atomic-nocx16.ll +++ b/llvm/test/CodeGen/X86/atomic-nocx16.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs -mcpu=corei7 -mattr=-cx16 | FileCheck %s -; RUN: llc < %s -mtriple=i386-linux-gnu -verify-machineinstrs -mattr=cx16 | FileCheck -check-prefix=CHECK %s +; RUN: llc < %s -mtriple=i386-linux-gnu -verify-machineinstrs -mattr=cx16 | FileCheck -check-prefix=CHECK32 %s ;; Verify that 128-bit atomics emit a libcall without cx16 ;; available. @@ -10,25 +10,35 @@ ; CHECK-LABEL: test: define void @test(ptr %a) nounwind { entry: -; CHECK: __sync_val_compare_and_swap_16 +; CHECK: __atomic_compare_exchange_16 +; CHECK32: __atomic_compare_exchange %0 = cmpxchg ptr %a, i128 1, i128 1 seq_cst seq_cst -; CHECK: __sync_lock_test_and_set_16 +; CHECK: __atomic_exchange_16 +; CHECK32: __atomic_exchange %1 = atomicrmw xchg ptr %a, i128 1 seq_cst -; CHECK: __sync_fetch_and_add_16 +; CHECK: __atomic_fetch_add_16 +; CHECK32: __atomic_compare_exchange %2 = atomicrmw add ptr %a, i128 1 seq_cst -; CHECK: __sync_fetch_and_sub_16 +; CHECK: __atomic_fetch_sub_16 +; CHECK32: __atomic_compare_exchange %3 = atomicrmw sub ptr %a, i128 1 seq_cst -; CHECK: __sync_fetch_and_and_16 +; CHECK: __atomic_fetch_and_16 +; CHECK32: __atomic_compare_exchange %4 = atomicrmw and ptr %a, i128 1 seq_cst -; CHECK: __sync_fetch_and_nand_16 +; CHECK: __atomic_fetch_nand_16 +; CHECK32: __atomic_compare_exchange %5 = atomicrmw nand ptr %a, i128 1 seq_cst -; CHECK: __sync_fetch_and_or_16 +; CHECK: __atomic_fetch_or_16 +; CHECK32: __atomic_compare_exchange %6 = atomicrmw or ptr %a, i128 1 seq_cst -; CHECK: __sync_fetch_and_xor_16 +; CHECK: __atomic_fetch_xor_16 +; CHECK32: __atomic_compare_exchange %7 = atomicrmw xor ptr %a, i128 1 seq_cst -; CHECK: __sync_val_compare_and_swap_16 +; CHECK: __atomic_load_16 +; CHECK32: __atomic_load %8 = load atomic i128, ptr %a seq_cst, align 16 -; CHECK: __sync_lock_test_and_set_16 +; CHECK: __atomic_store_16 +; CHECK32: __atomic_store store atomic i128 %8, ptr %a seq_cst, align 16 ret void } @@ -36,14 +46,20 @@ entry: ; CHECK-LABEL: test_fp: define void @test_fp(fp128* %a) nounwind { entry: -; CHECK: __sync_lock_test_and_set_16 +; CHECK: __atomic_exchange_16 +; CHECK32: __atomic_exchange %0 = atomicrmw xchg fp128* %a, fp128 0xL00000000000000004000900000000000 seq_cst -; Currently fails to compile: -; %1 = atomicrmw fadd fp128* %a, fp128 0xL00000000000000004000900000000000 seq_cst -; %2 = atomicrmw fsub fp128* %a, fp128 0xL00000000000000004000900000000000 seq_cst -; CHECK: __sync_val_compare_and_swap_16 - %1 = load atomic fp128, fp128* %a seq_cst, align 16 -; CHECK: __sync_lock_test_and_set_16 - store atomic fp128 %1, fp128* %a seq_cst, align 16 +; CHECK: __atomic_compare_exchange_16 +; CHECK32: __atomic_compare_exchange + %1 = atomicrmw fadd fp128* %a, fp128 0xL00000000000000004000900000000000 seq_cst +; CHECK: __atomic_compare_exchange_16 +; CHECK32: __atomic_compare_exchange + %2 = atomicrmw fsub fp128* %a, fp128 0xL00000000000000004000900000000000 seq_cst +; CHECK: __atomic_load_16 +; CHECK32: __atomic_load + %3 = load atomic fp128, fp128* %a seq_cst, align 16 +; CHECK: __atomic_store_16 +; CHECK32: __atomic_store + store atomic fp128 %3, fp128* %a seq_cst, align 16 ret void } diff --git a/llvm/test/CodeGen/X86/atomic-ops-ancient-64.ll b/llvm/test/CodeGen/X86/atomic-ops-ancient-64.ll index 493c9a897f06b..bc99caeea12b6 100644 --- a/llvm/test/CodeGen/X86/atomic-ops-ancient-64.ll +++ b/llvm/test/CodeGen/X86/atomic-ops-ancient-64.ll @@ -1,44 +1,43 @@ -; RUN: llc -mtriple=i386-linux-gnu %s -o - | FileCheck %s -; XFAIL: * +; RUN: llc -mtriple=i386-linux-gnu -mcpu=i386 %s -o - | FileCheck %s define i64 @test_add(ptr %addr, i64 %inc) { ; CHECK-LABEL: test_add: -; CHECK: calll __sync_fetch_and_add_8 +; CHECK: calll __atomic_fetch_add_8 %old = atomicrmw add ptr %addr, i64 %inc seq_cst ret i64 %old } define i64 @test_sub(ptr %addr, i64 %inc) { ; CHECK-LABEL: test_sub: -; CHECK: calll __sync_fetch_and_sub_8 +; CHECK: calll __atomic_fetch_sub_8 %old = atomicrmw sub ptr %addr, i64 %inc seq_cst ret i64 %old } define i64 @test_and(ptr %andr, i64 %inc) { ; CHECK-LABEL: test_and: -; CHECK: calll __sync_fetch_and_and_8 +; CHECK: calll __atomic_fetch_and_8 %old = atomicrmw and ptr %andr, i64 %inc seq_cst ret i64 %old } define i64 @test_or(ptr %orr, i64 %inc) { ; CHECK-LABEL: test_or: -; CHECK: calll __sync_fetch_and_or_8 +; CHECK: calll __atomic_fetch_or_8 %old = atomicrmw or ptr %orr, i64 %inc seq_cst ret i64 %old } define i64 @test_xor(ptr %xorr, i64 %inc) { ; CHECK-LABEL: test_xor: -; CHECK: calll __sync_fetch_and_xor_8 +; CHECK: calll __atomic_fetch_xor_8 %old = atomicrmw xor ptr %xorr, i64 %inc seq_cst ret i64 %old } define i64 @test_nand(ptr %nandr, i64 %inc) { ; CHECK-LABEL: test_nand: -; CHECK: calll __sync_fetch_and_nand_8 +; CHECK: calll __atomic_fetch_nand_8 %old = atomicrmw nand ptr %nandr, i64 %inc seq_cst ret i64 %old } diff --git a/llvm/test/CodeGen/X86/atomic-oversize.ll b/llvm/test/CodeGen/X86/atomic-oversize.ll new file mode 100644 index 0000000000000..93213ebc06674 --- /dev/null +++ b/llvm/test/CodeGen/X86/atomic-oversize.ll @@ -0,0 +1,11 @@ +; RUN: llc -mtriple=x86_64 -mattr=cx16 < %s | FileCheck %s + +; Atomics larger than 128-bit are unsupported, and emit libcalls. +define void @test(ptr %a) nounwind { +; CHECK-LABEL: test: +; CHECK: callq __atomic_load +; CHECK: callq __atomic_store + %1 = load atomic i256, ptr %a seq_cst, align 32 + store atomic i256 %1, ptr %a seq_cst, align 32 + ret void +} diff --git a/llvm/test/CodeGen/X86/atomic-xor.ll b/llvm/test/CodeGen/X86/atomic-xor.ll index 97fa908f1b714..930286c8e5fb3 100644 --- a/llvm/test/CodeGen/X86/atomic-xor.ll +++ b/llvm/test/CodeGen/X86/atomic-xor.ll @@ -22,32 +22,54 @@ define i128 @xor128_signbit_used(ptr %p) nounwind { ; X86: # %bb.0: ; X86-NEXT: pushl %ebp ; X86-NEXT: movl %esp, %ebp +; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: andl $-16, %esp -; X86-NEXT: subl $16, %esp -; X86-NEXT: movl 8(%ebp), %esi -; X86-NEXT: movl %esp, %eax -; X86-NEXT: pushl $-2147483648 # imm = 0x80000000 -; X86-NEXT: pushl $0 +; X86-NEXT: subl $48, %esp +; X86-NEXT: movl 12(%ebp), %edi +; X86-NEXT: movl 12(%edi), %ecx +; X86-NEXT: movl 8(%edi), %edx +; X86-NEXT: movl (%edi), %ebx +; X86-NEXT: movl 4(%edi), %esi +; X86-NEXT: .p2align 4, 0x90 +; X86-NEXT: .LBB1_1: # %atomicrmw.start +; X86-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NEXT: movl %ebx, (%esp) +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: addl $-2147483648, %ecx # imm = 0x80000000 +; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %edx, {{[0-9]+}}(%esp) +; X86-NEXT: movl %esi, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ebx, {{[0-9]+}}(%esp) ; X86-NEXT: pushl $0 ; X86-NEXT: pushl $0 -; X86-NEXT: pushl 12(%ebp) +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl %eax +; X86-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-NEXT: pushl %eax -; X86-NEXT: calll __sync_fetch_and_xor_16 -; X86-NEXT: addl $20, %esp -; X86-NEXT: movl (%esp), %eax +; X86-NEXT: pushl %edi +; X86-NEXT: pushl $16 +; X86-NEXT: calll __atomic_compare_exchange@PLT +; X86-NEXT: addl $24, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edi, 8(%esi) -; X86-NEXT: movl %edx, 12(%esi) -; X86-NEXT: movl %eax, (%esi) -; X86-NEXT: movl %ecx, 4(%esi) -; X86-NEXT: movl %esi, %eax -; X86-NEXT: leal -8(%ebp), %esp +; X86-NEXT: movl (%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: testb %al, %al +; X86-NEXT: je .LBB1_1 +; X86-NEXT: # %bb.2: # %atomicrmw.end +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl %ebx, (%eax) +; X86-NEXT: movl %esi, 4(%eax) +; X86-NEXT: movl %edx, 8(%eax) +; X86-NEXT: movl %ecx, 12(%eax) +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx ; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 ; @@ -56,7 +78,8 @@ define i128 @xor128_signbit_used(ptr %p) nounwind { ; X64-NEXT: pushq %rax ; X64-NEXT: movabsq $-9223372036854775808, %rdx # imm = 0x8000000000000000 ; X64-NEXT: xorl %esi, %esi -; X64-NEXT: callq __sync_fetch_and_xor_16@PLT +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: callq __atomic_fetch_xor_16@PLT ; X64-NEXT: popq %rcx ; X64-NEXT: retq %r = atomicrmw xor ptr %p, i128 170141183460469231731687303715884105728 monotonic