diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1322,7 +1322,6 @@ } if (shouldInlineQuadwordAtomics()) { - setMaxAtomicSizeInBitsSupported(128); setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom); setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::i128, Custom); @@ -1347,6 +1346,10 @@ if (!isPPC64) setMaxAtomicSizeInBitsSupported(32); + else if (shouldInlineQuadwordAtomics()) + setMaxAtomicSizeInBitsSupported(128); + else + setMaxAtomicSizeInBitsSupported(64); setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1); diff --git a/llvm/test/CodeGen/PowerPC/atomics-i128.ll b/llvm/test/CodeGen/PowerPC/atomics-i128.ll --- a/llvm/test/CodeGen/PowerPC/atomics-i128.ll +++ b/llvm/test/CodeGen/PowerPC/atomics-i128.ll @@ -47,10 +47,9 @@ ; PWR7-NEXT: stdu r1, -112(r1) ; PWR7-NEXT: .cfi_def_cfa_offset 112 ; PWR7-NEXT: .cfi_offset lr, 16 -; PWR7-NEXT: sync -; PWR7-NEXT: bl __sync_lock_test_and_set_16 +; PWR7-NEXT: li r6, 5 +; PWR7-NEXT: bl __atomic_exchange_16 ; PWR7-NEXT: nop -; PWR7-NEXT: lwsync ; PWR7-NEXT: addi r1, r1, 112 ; PWR7-NEXT: ld r0, 16(r1) ; PWR7-NEXT: mtlr r0 @@ -77,10 +76,9 @@ ; AIX64-PWR8-NEXT: mflr r0 ; AIX64-PWR8-NEXT: std r0, 16(r1) ; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: sync -; AIX64-PWR8-NEXT: bl .__sync_lock_test_and_set_16[PR] +; AIX64-PWR8-NEXT: li r6, 5 +; AIX64-PWR8-NEXT: bl .__atomic_exchange_16[PR] ; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: lwsync ; AIX64-PWR8-NEXT: addi r1, r1, 112 ; AIX64-PWR8-NEXT: ld r0, 16(r1) ; AIX64-PWR8-NEXT: mtlr r0 @@ -140,10 +138,9 @@ ; PWR7-NEXT: stdu r1, -112(r1) ; PWR7-NEXT: .cfi_def_cfa_offset 112 ; PWR7-NEXT: .cfi_offset lr, 16 -; PWR7-NEXT: sync -; PWR7-NEXT: bl __sync_fetch_and_add_16 +; PWR7-NEXT: li r6, 5 +; PWR7-NEXT: bl __atomic_fetch_add_16 ; PWR7-NEXT: nop -; PWR7-NEXT: lwsync ; PWR7-NEXT: addi r1, r1, 112 ; PWR7-NEXT: ld r0, 16(r1) ; PWR7-NEXT: mtlr r0 @@ -170,10 +167,9 @@ ; AIX64-PWR8-NEXT: mflr r0 ; AIX64-PWR8-NEXT: std r0, 16(r1) ; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: sync -; AIX64-PWR8-NEXT: bl .__sync_fetch_and_add_16[PR] +; AIX64-PWR8-NEXT: li r6, 5 +; AIX64-PWR8-NEXT: bl .__atomic_fetch_add_16[PR] ; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: lwsync ; AIX64-PWR8-NEXT: addi r1, r1, 112 ; AIX64-PWR8-NEXT: ld r0, 16(r1) ; AIX64-PWR8-NEXT: mtlr r0 @@ -280,10 +276,9 @@ ; PWR7-NEXT: stdu r1, -112(r1) ; PWR7-NEXT: .cfi_def_cfa_offset 112 ; PWR7-NEXT: .cfi_offset lr, 16 -; PWR7-NEXT: sync -; PWR7-NEXT: bl __sync_fetch_and_sub_16 +; PWR7-NEXT: li r6, 5 +; PWR7-NEXT: bl __atomic_fetch_sub_16 ; PWR7-NEXT: nop -; PWR7-NEXT: lwsync ; PWR7-NEXT: addi r1, r1, 112 ; PWR7-NEXT: ld r0, 16(r1) ; PWR7-NEXT: mtlr r0 @@ -310,10 +305,9 @@ ; AIX64-PWR8-NEXT: mflr r0 ; AIX64-PWR8-NEXT: std r0, 16(r1) ; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: sync -; AIX64-PWR8-NEXT: bl .__sync_fetch_and_sub_16[PR] +; AIX64-PWR8-NEXT: li r6, 5 +; AIX64-PWR8-NEXT: bl .__atomic_fetch_sub_16[PR] ; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: lwsync ; AIX64-PWR8-NEXT: addi r1, r1, 112 ; AIX64-PWR8-NEXT: ld r0, 16(r1) ; AIX64-PWR8-NEXT: mtlr r0 @@ -420,10 +414,9 @@ ; PWR7-NEXT: stdu r1, -112(r1) ; PWR7-NEXT: .cfi_def_cfa_offset 112 ; PWR7-NEXT: .cfi_offset lr, 16 -; PWR7-NEXT: sync -; PWR7-NEXT: bl __sync_fetch_and_and_16 +; PWR7-NEXT: li r6, 5 +; PWR7-NEXT: bl __atomic_fetch_and_16 ; PWR7-NEXT: nop -; PWR7-NEXT: lwsync ; PWR7-NEXT: addi r1, r1, 112 ; PWR7-NEXT: ld r0, 16(r1) ; PWR7-NEXT: mtlr r0 @@ -450,10 +443,9 @@ ; AIX64-PWR8-NEXT: mflr r0 ; AIX64-PWR8-NEXT: std r0, 16(r1) ; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: sync -; AIX64-PWR8-NEXT: bl .__sync_fetch_and_and_16[PR] +; AIX64-PWR8-NEXT: li r6, 5 +; AIX64-PWR8-NEXT: bl .__atomic_fetch_and_16[PR] ; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: lwsync ; AIX64-PWR8-NEXT: addi r1, r1, 112 ; AIX64-PWR8-NEXT: ld r0, 16(r1) ; AIX64-PWR8-NEXT: mtlr r0 @@ -560,10 +552,9 @@ ; PWR7-NEXT: stdu r1, -112(r1) ; PWR7-NEXT: .cfi_def_cfa_offset 112 ; PWR7-NEXT: .cfi_offset lr, 16 -; PWR7-NEXT: sync -; PWR7-NEXT: bl __sync_fetch_and_or_16 +; PWR7-NEXT: li r6, 5 +; PWR7-NEXT: bl __atomic_fetch_or_16 ; PWR7-NEXT: nop -; PWR7-NEXT: lwsync ; PWR7-NEXT: addi r1, r1, 112 ; PWR7-NEXT: ld r0, 16(r1) ; PWR7-NEXT: mtlr r0 @@ -590,10 +581,9 @@ ; AIX64-PWR8-NEXT: mflr r0 ; AIX64-PWR8-NEXT: std r0, 16(r1) ; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: sync -; AIX64-PWR8-NEXT: bl .__sync_fetch_and_or_16[PR] +; AIX64-PWR8-NEXT: li r6, 5 +; AIX64-PWR8-NEXT: bl .__atomic_fetch_or_16[PR] ; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: lwsync ; AIX64-PWR8-NEXT: addi r1, r1, 112 ; AIX64-PWR8-NEXT: ld r0, 16(r1) ; AIX64-PWR8-NEXT: mtlr r0 @@ -700,10 +690,9 @@ ; PWR7-NEXT: stdu r1, -112(r1) ; PWR7-NEXT: .cfi_def_cfa_offset 112 ; PWR7-NEXT: .cfi_offset lr, 16 -; PWR7-NEXT: sync -; PWR7-NEXT: bl __sync_fetch_and_xor_16 +; PWR7-NEXT: li r6, 5 +; PWR7-NEXT: bl __atomic_fetch_xor_16 ; PWR7-NEXT: nop -; PWR7-NEXT: lwsync ; PWR7-NEXT: addi r1, r1, 112 ; PWR7-NEXT: ld r0, 16(r1) ; PWR7-NEXT: mtlr r0 @@ -730,10 +719,9 @@ ; AIX64-PWR8-NEXT: mflr r0 ; AIX64-PWR8-NEXT: std r0, 16(r1) ; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: sync -; AIX64-PWR8-NEXT: bl .__sync_fetch_and_xor_16[PR] +; AIX64-PWR8-NEXT: li r6, 5 +; AIX64-PWR8-NEXT: bl .__atomic_fetch_xor_16[PR] ; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: lwsync ; AIX64-PWR8-NEXT: addi r1, r1, 112 ; AIX64-PWR8-NEXT: ld r0, 16(r1) ; AIX64-PWR8-NEXT: mtlr r0 @@ -840,10 +828,9 @@ ; PWR7-NEXT: stdu r1, -112(r1) ; PWR7-NEXT: .cfi_def_cfa_offset 112 ; PWR7-NEXT: .cfi_offset lr, 16 -; PWR7-NEXT: sync -; PWR7-NEXT: bl __sync_fetch_and_nand_16 +; PWR7-NEXT: li r6, 5 +; PWR7-NEXT: bl __atomic_fetch_nand_16 ; PWR7-NEXT: nop -; PWR7-NEXT: lwsync ; PWR7-NEXT: addi r1, r1, 112 ; PWR7-NEXT: ld r0, 16(r1) ; PWR7-NEXT: mtlr r0 @@ -870,10 +857,9 @@ ; AIX64-PWR8-NEXT: mflr r0 ; AIX64-PWR8-NEXT: std r0, 16(r1) ; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: sync -; AIX64-PWR8-NEXT: bl .__sync_fetch_and_nand_16[PR] +; AIX64-PWR8-NEXT: li r6, 5 +; AIX64-PWR8-NEXT: bl .__atomic_fetch_nand_16[PR] ; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: lwsync ; AIX64-PWR8-NEXT: addi r1, r1, 112 ; AIX64-PWR8-NEXT: ld r0, 16(r1) ; AIX64-PWR8-NEXT: mtlr r0 @@ -986,13 +972,21 @@ ; PWR7: # %bb.0: # %entry ; PWR7-NEXT: mflr r0 ; PWR7-NEXT: std r0, 16(r1) -; PWR7-NEXT: stdu r1, -112(r1) -; PWR7-NEXT: .cfi_def_cfa_offset 112 +; PWR7-NEXT: stdu r1, -128(r1) +; PWR7-NEXT: .cfi_def_cfa_offset 128 ; PWR7-NEXT: .cfi_offset lr, 16 -; PWR7-NEXT: bl __sync_val_compare_and_swap_16 +; PWR7-NEXT: std r5, 120(r1) +; PWR7-NEXT: std r4, 112(r1) +; PWR7-NEXT: addi r4, r1, 112 +; PWR7-NEXT: mr r5, r6 +; PWR7-NEXT: mr r6, r7 +; PWR7-NEXT: li r7, 2 +; PWR7-NEXT: li r8, 2 +; PWR7-NEXT: bl __atomic_compare_exchange_16 ; PWR7-NEXT: nop -; PWR7-NEXT: lwsync -; PWR7-NEXT: addi r1, r1, 112 +; PWR7-NEXT: ld r4, 120(r1) +; PWR7-NEXT: ld r3, 112(r1) +; PWR7-NEXT: addi r1, r1, 128 ; PWR7-NEXT: ld r0, 16(r1) ; PWR7-NEXT: mtlr r0 ; PWR7-NEXT: blr @@ -1025,11 +1019,19 @@ ; AIX64-PWR8: # %bb.0: # %entry ; AIX64-PWR8-NEXT: mflr r0 ; AIX64-PWR8-NEXT: std r0, 16(r1) -; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: bl .__sync_val_compare_and_swap_16[PR] +; AIX64-PWR8-NEXT: stdu r1, -128(r1) +; AIX64-PWR8-NEXT: std r5, 120(r1) +; AIX64-PWR8-NEXT: std r4, 112(r1) +; AIX64-PWR8-NEXT: addi r4, r1, 112 +; AIX64-PWR8-NEXT: mr r5, r6 +; AIX64-PWR8-NEXT: mr r6, r7 +; AIX64-PWR8-NEXT: li r7, 2 +; AIX64-PWR8-NEXT: li r8, 2 +; AIX64-PWR8-NEXT: bl .__atomic_compare_exchange_16[PR] ; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: lwsync -; AIX64-PWR8-NEXT: addi r1, r1, 112 +; AIX64-PWR8-NEXT: ld r4, 120(r1) +; AIX64-PWR8-NEXT: ld r3, 112(r1) +; AIX64-PWR8-NEXT: addi r1, r1, 128 ; AIX64-PWR8-NEXT: ld r0, 16(r1) ; AIX64-PWR8-NEXT: mtlr r0 ; AIX64-PWR8-NEXT: blr @@ -1101,13 +1103,21 @@ ; PWR7: # %bb.0: # %entry ; PWR7-NEXT: mflr r0 ; PWR7-NEXT: std r0, 16(r1) -; PWR7-NEXT: stdu r1, -112(r1) -; PWR7-NEXT: .cfi_def_cfa_offset 112 +; PWR7-NEXT: stdu r1, -128(r1) +; PWR7-NEXT: .cfi_def_cfa_offset 128 ; PWR7-NEXT: .cfi_offset lr, 16 -; PWR7-NEXT: lwsync -; PWR7-NEXT: bl __sync_val_compare_and_swap_16 +; PWR7-NEXT: std r5, 120(r1) +; PWR7-NEXT: std r4, 112(r1) +; PWR7-NEXT: addi r4, r1, 112 +; PWR7-NEXT: mr r5, r6 +; PWR7-NEXT: mr r6, r7 +; PWR7-NEXT: li r7, 3 +; PWR7-NEXT: li r8, 0 +; PWR7-NEXT: bl __atomic_compare_exchange_16 ; PWR7-NEXT: nop -; PWR7-NEXT: addi r1, r1, 112 +; PWR7-NEXT: ld r4, 120(r1) +; PWR7-NEXT: ld r3, 112(r1) +; PWR7-NEXT: addi r1, r1, 128 ; PWR7-NEXT: ld r0, 16(r1) ; PWR7-NEXT: mtlr r0 ; PWR7-NEXT: blr @@ -1140,11 +1150,19 @@ ; AIX64-PWR8: # %bb.0: # %entry ; AIX64-PWR8-NEXT: mflr r0 ; AIX64-PWR8-NEXT: std r0, 16(r1) -; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: lwsync -; AIX64-PWR8-NEXT: bl .__sync_val_compare_and_swap_16[PR] +; AIX64-PWR8-NEXT: stdu r1, -128(r1) +; AIX64-PWR8-NEXT: std r5, 120(r1) +; AIX64-PWR8-NEXT: std r4, 112(r1) +; AIX64-PWR8-NEXT: addi r4, r1, 112 +; AIX64-PWR8-NEXT: mr r5, r6 +; AIX64-PWR8-NEXT: mr r6, r7 +; AIX64-PWR8-NEXT: li r7, 3 +; AIX64-PWR8-NEXT: li r8, 0 +; AIX64-PWR8-NEXT: bl .__atomic_compare_exchange_16[PR] ; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: addi r1, r1, 112 +; AIX64-PWR8-NEXT: ld r4, 120(r1) +; AIX64-PWR8-NEXT: ld r3, 112(r1) +; AIX64-PWR8-NEXT: addi r1, r1, 128 ; AIX64-PWR8-NEXT: ld r0, 16(r1) ; AIX64-PWR8-NEXT: mtlr r0 ; AIX64-PWR8-NEXT: blr @@ -1217,14 +1235,21 @@ ; PWR7: # %bb.0: # %entry ; PWR7-NEXT: mflr r0 ; PWR7-NEXT: std r0, 16(r1) -; PWR7-NEXT: stdu r1, -112(r1) -; PWR7-NEXT: .cfi_def_cfa_offset 112 +; PWR7-NEXT: stdu r1, -128(r1) +; PWR7-NEXT: .cfi_def_cfa_offset 128 ; PWR7-NEXT: .cfi_offset lr, 16 -; PWR7-NEXT: sync -; PWR7-NEXT: bl __sync_val_compare_and_swap_16 +; PWR7-NEXT: std r5, 120(r1) +; PWR7-NEXT: std r4, 112(r1) +; PWR7-NEXT: addi r4, r1, 112 +; PWR7-NEXT: mr r5, r6 +; PWR7-NEXT: mr r6, r7 +; PWR7-NEXT: li r7, 5 +; PWR7-NEXT: li r8, 5 +; PWR7-NEXT: bl __atomic_compare_exchange_16 ; PWR7-NEXT: nop -; PWR7-NEXT: lwsync -; PWR7-NEXT: addi r1, r1, 112 +; PWR7-NEXT: ld r4, 120(r1) +; PWR7-NEXT: ld r3, 112(r1) +; PWR7-NEXT: addi r1, r1, 128 ; PWR7-NEXT: ld r0, 16(r1) ; PWR7-NEXT: mtlr r0 ; PWR7-NEXT: blr @@ -1258,12 +1283,19 @@ ; AIX64-PWR8: # %bb.0: # %entry ; AIX64-PWR8-NEXT: mflr r0 ; AIX64-PWR8-NEXT: std r0, 16(r1) -; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: sync -; AIX64-PWR8-NEXT: bl .__sync_val_compare_and_swap_16[PR] +; AIX64-PWR8-NEXT: stdu r1, -128(r1) +; AIX64-PWR8-NEXT: std r5, 120(r1) +; AIX64-PWR8-NEXT: std r4, 112(r1) +; AIX64-PWR8-NEXT: addi r4, r1, 112 +; AIX64-PWR8-NEXT: mr r5, r6 +; AIX64-PWR8-NEXT: mr r6, r7 +; AIX64-PWR8-NEXT: li r7, 5 +; AIX64-PWR8-NEXT: li r8, 5 +; AIX64-PWR8-NEXT: bl .__atomic_compare_exchange_16[PR] ; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: lwsync -; AIX64-PWR8-NEXT: addi r1, r1, 112 +; AIX64-PWR8-NEXT: ld r4, 120(r1) +; AIX64-PWR8-NEXT: ld r3, 112(r1) +; AIX64-PWR8-NEXT: addi r1, r1, 128 ; AIX64-PWR8-NEXT: ld r0, 16(r1) ; AIX64-PWR8-NEXT: mtlr r0 ; AIX64-PWR8-NEXT: blr @@ -1336,14 +1368,21 @@ ; PWR7: # %bb.0: # %entry ; PWR7-NEXT: mflr r0 ; PWR7-NEXT: std r0, 16(r1) -; PWR7-NEXT: stdu r1, -112(r1) -; PWR7-NEXT: .cfi_def_cfa_offset 112 +; PWR7-NEXT: stdu r1, -128(r1) +; PWR7-NEXT: .cfi_def_cfa_offset 128 ; PWR7-NEXT: .cfi_offset lr, 16 -; PWR7-NEXT: lwsync -; PWR7-NEXT: bl __sync_val_compare_and_swap_16 +; PWR7-NEXT: std r5, 120(r1) +; PWR7-NEXT: std r4, 112(r1) +; PWR7-NEXT: addi r4, r1, 112 +; PWR7-NEXT: mr r5, r6 +; PWR7-NEXT: mr r6, r7 +; PWR7-NEXT: li r7, 4 +; PWR7-NEXT: li r8, 2 +; PWR7-NEXT: bl __atomic_compare_exchange_16 ; PWR7-NEXT: nop -; PWR7-NEXT: lwsync -; PWR7-NEXT: addi r1, r1, 112 +; PWR7-NEXT: ld r4, 120(r1) +; PWR7-NEXT: ld r3, 112(r1) +; PWR7-NEXT: addi r1, r1, 128 ; PWR7-NEXT: ld r0, 16(r1) ; PWR7-NEXT: mtlr r0 ; PWR7-NEXT: blr @@ -1377,12 +1416,19 @@ ; AIX64-PWR8: # %bb.0: # %entry ; AIX64-PWR8-NEXT: mflr r0 ; AIX64-PWR8-NEXT: std r0, 16(r1) -; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: lwsync -; AIX64-PWR8-NEXT: bl .__sync_val_compare_and_swap_16[PR] +; AIX64-PWR8-NEXT: stdu r1, -128(r1) +; AIX64-PWR8-NEXT: std r5, 120(r1) +; AIX64-PWR8-NEXT: std r4, 112(r1) +; AIX64-PWR8-NEXT: addi r4, r1, 112 +; AIX64-PWR8-NEXT: mr r5, r6 +; AIX64-PWR8-NEXT: mr r6, r7 +; AIX64-PWR8-NEXT: li r7, 4 +; AIX64-PWR8-NEXT: li r8, 2 +; AIX64-PWR8-NEXT: bl .__atomic_compare_exchange_16[PR] ; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: lwsync -; AIX64-PWR8-NEXT: addi r1, r1, 112 +; AIX64-PWR8-NEXT: ld r4, 120(r1) +; AIX64-PWR8-NEXT: ld r3, 112(r1) +; AIX64-PWR8-NEXT: addi r1, r1, 128 ; AIX64-PWR8-NEXT: ld r0, 16(r1) ; AIX64-PWR8-NEXT: mtlr r0 ; AIX64-PWR8-NEXT: blr @@ -1458,27 +1504,19 @@ ; PWR7: # %bb.0: # %entry ; PWR7-NEXT: mflr r0 ; PWR7-NEXT: std r0, 16(r1) -; PWR7-NEXT: stdu r1, -144(r1) -; PWR7-NEXT: .cfi_def_cfa_offset 144 +; PWR7-NEXT: stdu r1, -128(r1) +; PWR7-NEXT: .cfi_def_cfa_offset 128 ; PWR7-NEXT: .cfi_offset lr, 16 -; PWR7-NEXT: .cfi_offset r29, -24 -; PWR7-NEXT: .cfi_offset r30, -16 -; PWR7-NEXT: std r29, 120(r1) # 8-byte Folded Spill -; PWR7-NEXT: std r30, 128(r1) # 8-byte Folded Spill -; PWR7-NEXT: mr r30, r5 -; PWR7-NEXT: mr r29, r4 -; PWR7-NEXT: lwsync -; PWR7-NEXT: bl __sync_val_compare_and_swap_16 +; PWR7-NEXT: std r5, 120(r1) +; PWR7-NEXT: std r4, 112(r1) +; PWR7-NEXT: addi r4, r1, 112 +; PWR7-NEXT: mr r5, r6 +; PWR7-NEXT: mr r6, r7 +; PWR7-NEXT: li r7, 4 +; PWR7-NEXT: li r8, 2 +; PWR7-NEXT: bl __atomic_compare_exchange_16 ; PWR7-NEXT: nop -; PWR7-NEXT: xor r3, r3, r29 -; PWR7-NEXT: xor r4, r4, r30 -; PWR7-NEXT: lwsync -; PWR7-NEXT: or r3, r4, r3 -; PWR7-NEXT: ld r30, 128(r1) # 8-byte Folded Reload -; PWR7-NEXT: ld r29, 120(r1) # 8-byte Folded Reload -; PWR7-NEXT: cntlzd r3, r3 -; PWR7-NEXT: rldicl r3, r3, 58, 63 -; PWR7-NEXT: addi r1, r1, 144 +; PWR7-NEXT: addi r1, r1, 128 ; PWR7-NEXT: ld r0, 16(r1) ; PWR7-NEXT: mtlr r0 ; PWR7-NEXT: blr @@ -1516,21 +1554,15 @@ ; AIX64-PWR8-NEXT: mflr r0 ; AIX64-PWR8-NEXT: std r0, 16(r1) ; AIX64-PWR8-NEXT: stdu r1, -128(r1) -; AIX64-PWR8-NEXT: std r30, 112(r1) # 8-byte Folded Spill -; AIX64-PWR8-NEXT: std r31, 120(r1) # 8-byte Folded Spill -; AIX64-PWR8-NEXT: mr r31, r5 -; AIX64-PWR8-NEXT: mr r30, r4 -; AIX64-PWR8-NEXT: lwsync -; AIX64-PWR8-NEXT: bl .__sync_val_compare_and_swap_16[PR] +; AIX64-PWR8-NEXT: std r5, 120(r1) +; AIX64-PWR8-NEXT: std r4, 112(r1) +; AIX64-PWR8-NEXT: addi r4, r1, 112 +; AIX64-PWR8-NEXT: mr r5, r6 +; AIX64-PWR8-NEXT: mr r6, r7 +; AIX64-PWR8-NEXT: li r7, 4 +; AIX64-PWR8-NEXT: li r8, 2 +; AIX64-PWR8-NEXT: bl .__atomic_compare_exchange_16[PR] ; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: xor r3, r3, r30 -; AIX64-PWR8-NEXT: xor r4, r4, r31 -; AIX64-PWR8-NEXT: lwsync -; AIX64-PWR8-NEXT: or r3, r4, r3 -; AIX64-PWR8-NEXT: ld r31, 120(r1) # 8-byte Folded Reload -; AIX64-PWR8-NEXT: ld r30, 112(r1) # 8-byte Folded Reload -; AIX64-PWR8-NEXT: cntlzd r3, r3 -; AIX64-PWR8-NEXT: rldicl r3, r3, 58, 63 ; AIX64-PWR8-NEXT: addi r1, r1, 128 ; AIX64-PWR8-NEXT: ld r0, 16(r1) ; AIX64-PWR8-NEXT: mtlr r0 diff --git a/llvm/test/Transforms/AtomicExpand/PowerPC/cmpxchg.ll b/llvm/test/Transforms/AtomicExpand/PowerPC/cmpxchg.ll --- a/llvm/test/Transforms/AtomicExpand/PowerPC/cmpxchg.ll +++ b/llvm/test/Transforms/AtomicExpand/PowerPC/cmpxchg.ll @@ -34,10 +34,22 @@ ; ; PWR7-LABEL: @test_cmpxchg_seq_cst( ; PWR7-NEXT: entry: -; PWR7-NEXT: call void @llvm.ppc.sync() -; PWR7-NEXT: [[PAIR:%.*]] = cmpxchg weak i128* [[ADDR:%.*]], i128 [[DESIRE:%.*]], i128 [[NEW:%.*]] monotonic monotonic, align 16 -; PWR7-NEXT: call void @llvm.ppc.lwsync() -; PWR7-NEXT: [[SUCC:%.*]] = extractvalue { i128, i1 } [[PAIR]], 1 +; PWR7-NEXT: [[TMP0:%.*]] = bitcast i128* [[ADDR:%.*]] to i8* +; PWR7-NEXT: [[TMP1:%.*]] = alloca i128, align 8 +; PWR7-NEXT: [[TMP2:%.*]] = bitcast i128* [[TMP1]] to i8* +; PWR7-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP2]]) +; PWR7-NEXT: store i128 [[DESIRE:%.*]], i128* [[TMP1]], align 8 +; PWR7-NEXT: [[TMP3:%.*]] = alloca i128, align 8 +; PWR7-NEXT: [[TMP4:%.*]] = bitcast i128* [[TMP3]] to i8* +; PWR7-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP4]]) +; PWR7-NEXT: store i128 [[NEW:%.*]], i128* [[TMP3]], align 8 +; PWR7-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 16, i8* [[TMP0]], i8* [[TMP2]], i8* [[TMP4]], i32 5, i32 5) +; PWR7-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP4]]) +; PWR7-NEXT: [[TMP6:%.*]] = load i128, i128* [[TMP1]], align 8 +; PWR7-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP2]]) +; PWR7-NEXT: [[TMP7:%.*]] = insertvalue { i128, i1 } undef, i128 [[TMP6]], 0 +; PWR7-NEXT: [[TMP8:%.*]] = insertvalue { i128, i1 } [[TMP7]], i1 [[TMP5]], 1 +; PWR7-NEXT: [[SUCC:%.*]] = extractvalue { i128, i1 } [[TMP8]], 1 ; PWR7-NEXT: ret i1 [[SUCC]] ; entry: