diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1322,7 +1322,6 @@ } if (shouldInlineQuadwordAtomics()) { - setMaxAtomicSizeInBitsSupported(128); setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom); setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::i128, Custom); @@ -1347,6 +1346,10 @@ if (!isPPC64) setMaxAtomicSizeInBitsSupported(32); + else if (shouldInlineQuadwordAtomics()) + setMaxAtomicSizeInBitsSupported(128); + else + setMaxAtomicSizeInBitsSupported(64); setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1); diff --git a/llvm/test/CodeGen/PowerPC/atomics-i128.ll b/llvm/test/CodeGen/PowerPC/atomics-i128.ll --- a/llvm/test/CodeGen/PowerPC/atomics-i128.ll +++ b/llvm/test/CodeGen/PowerPC/atomics-i128.ll @@ -43,10 +43,9 @@ ; PWR7-NEXT: stdu r1, -112(r1) ; PWR7-NEXT: .cfi_def_cfa_offset 112 ; PWR7-NEXT: .cfi_offset lr, 16 -; PWR7-NEXT: sync -; PWR7-NEXT: bl __sync_lock_test_and_set_16 +; PWR7-NEXT: li r6, 5 +; PWR7-NEXT: bl __atomic_exchange_16 ; PWR7-NEXT: nop -; PWR7-NEXT: lwsync ; PWR7-NEXT: addi r1, r1, 112 ; PWR7-NEXT: ld r0, 16(r1) ; PWR7-NEXT: mtlr r0 @@ -73,10 +72,9 @@ ; AIX64-PWR8-NEXT: mflr r0 ; AIX64-PWR8-NEXT: std r0, 16(r1) ; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: sync -; AIX64-PWR8-NEXT: bl .__sync_lock_test_and_set_16[PR] +; AIX64-PWR8-NEXT: li r6, 5 +; AIX64-PWR8-NEXT: bl .__atomic_exchange_16[PR] ; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: lwsync ; AIX64-PWR8-NEXT: addi r1, r1, 112 ; AIX64-PWR8-NEXT: ld r0, 16(r1) ; AIX64-PWR8-NEXT: mtlr r0 @@ -136,10 +134,9 @@ ; PWR7-NEXT: stdu r1, -112(r1) ; PWR7-NEXT: .cfi_def_cfa_offset 112 ; PWR7-NEXT: .cfi_offset lr, 16 -; PWR7-NEXT: sync -; PWR7-NEXT: bl __sync_fetch_and_add_16 +; PWR7-NEXT: li r6, 5 +; PWR7-NEXT: bl __atomic_fetch_add_16 ; PWR7-NEXT: nop -; PWR7-NEXT: lwsync ; PWR7-NEXT: addi r1, r1, 112 ; PWR7-NEXT: ld r0, 16(r1) ; PWR7-NEXT: mtlr r0 @@ -166,10 +163,9 @@ ; AIX64-PWR8-NEXT: mflr r0 ; AIX64-PWR8-NEXT: std r0, 16(r1) ; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: sync -; AIX64-PWR8-NEXT: bl .__sync_fetch_and_add_16[PR] +; AIX64-PWR8-NEXT: li r6, 5 +; AIX64-PWR8-NEXT: bl .__atomic_fetch_add_16[PR] ; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: lwsync ; AIX64-PWR8-NEXT: addi r1, r1, 112 ; AIX64-PWR8-NEXT: ld r0, 16(r1) ; AIX64-PWR8-NEXT: mtlr r0 @@ -276,10 +272,9 @@ ; PWR7-NEXT: stdu r1, -112(r1) ; PWR7-NEXT: .cfi_def_cfa_offset 112 ; PWR7-NEXT: .cfi_offset lr, 16 -; PWR7-NEXT: sync -; PWR7-NEXT: bl __sync_fetch_and_sub_16 +; PWR7-NEXT: li r6, 5 +; PWR7-NEXT: bl __atomic_fetch_sub_16 ; PWR7-NEXT: nop -; PWR7-NEXT: lwsync ; PWR7-NEXT: addi r1, r1, 112 ; PWR7-NEXT: ld r0, 16(r1) ; PWR7-NEXT: mtlr r0 @@ -306,10 +301,9 @@ ; AIX64-PWR8-NEXT: mflr r0 ; AIX64-PWR8-NEXT: std r0, 16(r1) ; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: sync -; AIX64-PWR8-NEXT: bl .__sync_fetch_and_sub_16[PR] +; AIX64-PWR8-NEXT: li r6, 5 +; AIX64-PWR8-NEXT: bl .__atomic_fetch_sub_16[PR] ; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: lwsync ; AIX64-PWR8-NEXT: addi r1, r1, 112 ; AIX64-PWR8-NEXT: ld r0, 16(r1) ; AIX64-PWR8-NEXT: mtlr r0 @@ -416,10 +410,9 @@ ; PWR7-NEXT: stdu r1, -112(r1) ; PWR7-NEXT: .cfi_def_cfa_offset 112 ; PWR7-NEXT: .cfi_offset lr, 16 -; PWR7-NEXT: sync -; PWR7-NEXT: bl __sync_fetch_and_and_16 +; PWR7-NEXT: li r6, 5 +; PWR7-NEXT: bl __atomic_fetch_and_16 ; PWR7-NEXT: nop -; PWR7-NEXT: lwsync ; PWR7-NEXT: addi r1, r1, 112 ; PWR7-NEXT: ld r0, 16(r1) ; PWR7-NEXT: mtlr r0 @@ -446,10 +439,9 @@ ; AIX64-PWR8-NEXT: mflr r0 ; AIX64-PWR8-NEXT: std r0, 16(r1) ; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: sync -; AIX64-PWR8-NEXT: bl .__sync_fetch_and_and_16[PR] +; AIX64-PWR8-NEXT: li r6, 5 +; AIX64-PWR8-NEXT: bl .__atomic_fetch_and_16[PR] ; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: lwsync ; AIX64-PWR8-NEXT: addi r1, r1, 112 ; AIX64-PWR8-NEXT: ld r0, 16(r1) ; AIX64-PWR8-NEXT: mtlr r0 @@ -556,10 +548,9 @@ ; PWR7-NEXT: stdu r1, -112(r1) ; PWR7-NEXT: .cfi_def_cfa_offset 112 ; PWR7-NEXT: .cfi_offset lr, 16 -; PWR7-NEXT: sync -; PWR7-NEXT: bl __sync_fetch_and_or_16 +; PWR7-NEXT: li r6, 5 +; PWR7-NEXT: bl __atomic_fetch_or_16 ; PWR7-NEXT: nop -; PWR7-NEXT: lwsync ; PWR7-NEXT: addi r1, r1, 112 ; PWR7-NEXT: ld r0, 16(r1) ; PWR7-NEXT: mtlr r0 @@ -586,10 +577,9 @@ ; AIX64-PWR8-NEXT: mflr r0 ; AIX64-PWR8-NEXT: std r0, 16(r1) ; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: sync -; AIX64-PWR8-NEXT: bl .__sync_fetch_and_or_16[PR] +; AIX64-PWR8-NEXT: li r6, 5 +; AIX64-PWR8-NEXT: bl .__atomic_fetch_or_16[PR] ; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: lwsync ; AIX64-PWR8-NEXT: addi r1, r1, 112 ; AIX64-PWR8-NEXT: ld r0, 16(r1) ; AIX64-PWR8-NEXT: mtlr r0 @@ -696,10 +686,9 @@ ; PWR7-NEXT: stdu r1, -112(r1) ; PWR7-NEXT: .cfi_def_cfa_offset 112 ; PWR7-NEXT: .cfi_offset lr, 16 -; PWR7-NEXT: sync -; PWR7-NEXT: bl __sync_fetch_and_xor_16 +; PWR7-NEXT: li r6, 5 +; PWR7-NEXT: bl __atomic_fetch_xor_16 ; PWR7-NEXT: nop -; PWR7-NEXT: lwsync ; PWR7-NEXT: addi r1, r1, 112 ; PWR7-NEXT: ld r0, 16(r1) ; PWR7-NEXT: mtlr r0 @@ -726,10 +715,9 @@ ; AIX64-PWR8-NEXT: mflr r0 ; AIX64-PWR8-NEXT: std r0, 16(r1) ; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: sync -; AIX64-PWR8-NEXT: bl .__sync_fetch_and_xor_16[PR] +; AIX64-PWR8-NEXT: li r6, 5 +; AIX64-PWR8-NEXT: bl .__atomic_fetch_xor_16[PR] ; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: lwsync ; AIX64-PWR8-NEXT: addi r1, r1, 112 ; AIX64-PWR8-NEXT: ld r0, 16(r1) ; AIX64-PWR8-NEXT: mtlr r0 @@ -836,10 +824,9 @@ ; PWR7-NEXT: stdu r1, -112(r1) ; PWR7-NEXT: .cfi_def_cfa_offset 112 ; PWR7-NEXT: .cfi_offset lr, 16 -; PWR7-NEXT: sync -; PWR7-NEXT: bl __sync_fetch_and_nand_16 +; PWR7-NEXT: li r6, 5 +; PWR7-NEXT: bl __atomic_fetch_nand_16 ; PWR7-NEXT: nop -; PWR7-NEXT: lwsync ; PWR7-NEXT: addi r1, r1, 112 ; PWR7-NEXT: ld r0, 16(r1) ; PWR7-NEXT: mtlr r0 @@ -866,10 +853,9 @@ ; AIX64-PWR8-NEXT: mflr r0 ; AIX64-PWR8-NEXT: std r0, 16(r1) ; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: sync -; AIX64-PWR8-NEXT: bl .__sync_fetch_and_nand_16[PR] +; AIX64-PWR8-NEXT: li r6, 5 +; AIX64-PWR8-NEXT: bl .__atomic_fetch_nand_16[PR] ; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: lwsync ; AIX64-PWR8-NEXT: addi r1, r1, 112 ; AIX64-PWR8-NEXT: ld r0, 16(r1) ; AIX64-PWR8-NEXT: mtlr r0 @@ -982,13 +968,21 @@ ; PWR7: # %bb.0: # %entry ; PWR7-NEXT: mflr r0 ; PWR7-NEXT: std r0, 16(r1) -; PWR7-NEXT: stdu r1, -112(r1) -; PWR7-NEXT: .cfi_def_cfa_offset 112 +; PWR7-NEXT: stdu r1, -128(r1) +; PWR7-NEXT: .cfi_def_cfa_offset 128 ; PWR7-NEXT: .cfi_offset lr, 16 -; PWR7-NEXT: bl __sync_val_compare_and_swap_16 +; PWR7-NEXT: std r5, 120(r1) +; PWR7-NEXT: std r4, 112(r1) +; PWR7-NEXT: addi r4, r1, 112 +; PWR7-NEXT: mr r5, r6 +; PWR7-NEXT: mr r6, r7 +; PWR7-NEXT: li r7, 2 +; PWR7-NEXT: li r8, 2 +; PWR7-NEXT: bl __atomic_compare_exchange_16 ; PWR7-NEXT: nop -; PWR7-NEXT: lwsync -; PWR7-NEXT: addi r1, r1, 112 +; PWR7-NEXT: ld r4, 120(r1) +; PWR7-NEXT: ld r3, 112(r1) +; PWR7-NEXT: addi r1, r1, 128 ; PWR7-NEXT: ld r0, 16(r1) ; PWR7-NEXT: mtlr r0 ; PWR7-NEXT: blr @@ -1021,11 +1015,19 @@ ; AIX64-PWR8: # %bb.0: # %entry ; AIX64-PWR8-NEXT: mflr r0 ; AIX64-PWR8-NEXT: std r0, 16(r1) -; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: bl .__sync_val_compare_and_swap_16[PR] +; AIX64-PWR8-NEXT: stdu r1, -128(r1) +; AIX64-PWR8-NEXT: std r5, 120(r1) +; AIX64-PWR8-NEXT: std r4, 112(r1) +; AIX64-PWR8-NEXT: addi r4, r1, 112 +; AIX64-PWR8-NEXT: mr r5, r6 +; AIX64-PWR8-NEXT: mr r6, r7 +; AIX64-PWR8-NEXT: li r7, 2 +; AIX64-PWR8-NEXT: li r8, 2 +; AIX64-PWR8-NEXT: bl .__atomic_compare_exchange_16[PR] ; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: lwsync -; AIX64-PWR8-NEXT: addi r1, r1, 112 +; AIX64-PWR8-NEXT: ld r4, 120(r1) +; AIX64-PWR8-NEXT: ld r3, 112(r1) +; AIX64-PWR8-NEXT: addi r1, r1, 128 ; AIX64-PWR8-NEXT: ld r0, 16(r1) ; AIX64-PWR8-NEXT: mtlr r0 ; AIX64-PWR8-NEXT: blr @@ -1097,13 +1099,21 @@ ; PWR7: # %bb.0: # %entry ; PWR7-NEXT: mflr r0 ; PWR7-NEXT: std r0, 16(r1) -; PWR7-NEXT: stdu r1, -112(r1) -; PWR7-NEXT: .cfi_def_cfa_offset 112 +; PWR7-NEXT: stdu r1, -128(r1) +; PWR7-NEXT: .cfi_def_cfa_offset 128 ; PWR7-NEXT: .cfi_offset lr, 16 -; PWR7-NEXT: lwsync -; PWR7-NEXT: bl __sync_val_compare_and_swap_16 +; PWR7-NEXT: std r5, 120(r1) +; PWR7-NEXT: std r4, 112(r1) +; PWR7-NEXT: addi r4, r1, 112 +; PWR7-NEXT: mr r5, r6 +; PWR7-NEXT: mr r6, r7 +; PWR7-NEXT: li r7, 3 +; PWR7-NEXT: li r8, 0 +; PWR7-NEXT: bl __atomic_compare_exchange_16 ; PWR7-NEXT: nop -; PWR7-NEXT: addi r1, r1, 112 +; PWR7-NEXT: ld r4, 120(r1) +; PWR7-NEXT: ld r3, 112(r1) +; PWR7-NEXT: addi r1, r1, 128 ; PWR7-NEXT: ld r0, 16(r1) ; PWR7-NEXT: mtlr r0 ; PWR7-NEXT: blr @@ -1136,11 +1146,19 @@ ; AIX64-PWR8: # %bb.0: # %entry ; AIX64-PWR8-NEXT: mflr r0 ; AIX64-PWR8-NEXT: std r0, 16(r1) -; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: lwsync -; AIX64-PWR8-NEXT: bl .__sync_val_compare_and_swap_16[PR] +; AIX64-PWR8-NEXT: stdu r1, -128(r1) +; AIX64-PWR8-NEXT: std r5, 120(r1) +; AIX64-PWR8-NEXT: std r4, 112(r1) +; AIX64-PWR8-NEXT: addi r4, r1, 112 +; AIX64-PWR8-NEXT: mr r5, r6 +; AIX64-PWR8-NEXT: mr r6, r7 +; AIX64-PWR8-NEXT: li r7, 3 +; AIX64-PWR8-NEXT: li r8, 0 +; AIX64-PWR8-NEXT: bl .__atomic_compare_exchange_16[PR] ; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: addi r1, r1, 112 +; AIX64-PWR8-NEXT: ld r4, 120(r1) +; AIX64-PWR8-NEXT: ld r3, 112(r1) +; AIX64-PWR8-NEXT: addi r1, r1, 128 ; AIX64-PWR8-NEXT: ld r0, 16(r1) ; AIX64-PWR8-NEXT: mtlr r0 ; AIX64-PWR8-NEXT: blr @@ -1213,14 +1231,21 @@ ; PWR7: # %bb.0: # %entry ; PWR7-NEXT: mflr r0 ; PWR7-NEXT: std r0, 16(r1) -; PWR7-NEXT: stdu r1, -112(r1) -; PWR7-NEXT: .cfi_def_cfa_offset 112 +; PWR7-NEXT: stdu r1, -128(r1) +; PWR7-NEXT: .cfi_def_cfa_offset 128 ; PWR7-NEXT: .cfi_offset lr, 16 -; PWR7-NEXT: sync -; PWR7-NEXT: bl __sync_val_compare_and_swap_16 +; PWR7-NEXT: std r5, 120(r1) +; PWR7-NEXT: std r4, 112(r1) +; PWR7-NEXT: addi r4, r1, 112 +; PWR7-NEXT: mr r5, r6 +; PWR7-NEXT: mr r6, r7 +; PWR7-NEXT: li r7, 5 +; PWR7-NEXT: li r8, 5 +; PWR7-NEXT: bl __atomic_compare_exchange_16 ; PWR7-NEXT: nop -; PWR7-NEXT: lwsync -; PWR7-NEXT: addi r1, r1, 112 +; PWR7-NEXT: ld r4, 120(r1) +; PWR7-NEXT: ld r3, 112(r1) +; PWR7-NEXT: addi r1, r1, 128 ; PWR7-NEXT: ld r0, 16(r1) ; PWR7-NEXT: mtlr r0 ; PWR7-NEXT: blr @@ -1254,12 +1279,19 @@ ; AIX64-PWR8: # %bb.0: # %entry ; AIX64-PWR8-NEXT: mflr r0 ; AIX64-PWR8-NEXT: std r0, 16(r1) -; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: sync -; AIX64-PWR8-NEXT: bl .__sync_val_compare_and_swap_16[PR] +; AIX64-PWR8-NEXT: stdu r1, -128(r1) +; AIX64-PWR8-NEXT: std r5, 120(r1) +; AIX64-PWR8-NEXT: std r4, 112(r1) +; AIX64-PWR8-NEXT: addi r4, r1, 112 +; AIX64-PWR8-NEXT: mr r5, r6 +; AIX64-PWR8-NEXT: mr r6, r7 +; AIX64-PWR8-NEXT: li r7, 5 +; AIX64-PWR8-NEXT: li r8, 5 +; AIX64-PWR8-NEXT: bl .__atomic_compare_exchange_16[PR] ; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: lwsync -; AIX64-PWR8-NEXT: addi r1, r1, 112 +; AIX64-PWR8-NEXT: ld r4, 120(r1) +; AIX64-PWR8-NEXT: ld r3, 112(r1) +; AIX64-PWR8-NEXT: addi r1, r1, 128 ; AIX64-PWR8-NEXT: ld r0, 16(r1) ; AIX64-PWR8-NEXT: mtlr r0 ; AIX64-PWR8-NEXT: blr @@ -1332,14 +1364,21 @@ ; PWR7: # %bb.0: # %entry ; PWR7-NEXT: mflr r0 ; PWR7-NEXT: std r0, 16(r1) -; PWR7-NEXT: stdu r1, -112(r1) -; PWR7-NEXT: .cfi_def_cfa_offset 112 +; PWR7-NEXT: stdu r1, -128(r1) +; PWR7-NEXT: .cfi_def_cfa_offset 128 ; PWR7-NEXT: .cfi_offset lr, 16 -; PWR7-NEXT: lwsync -; PWR7-NEXT: bl __sync_val_compare_and_swap_16 +; PWR7-NEXT: std r5, 120(r1) +; PWR7-NEXT: std r4, 112(r1) +; PWR7-NEXT: addi r4, r1, 112 +; PWR7-NEXT: mr r5, r6 +; PWR7-NEXT: mr r6, r7 +; PWR7-NEXT: li r7, 4 +; PWR7-NEXT: li r8, 2 +; PWR7-NEXT: bl __atomic_compare_exchange_16 ; PWR7-NEXT: nop -; PWR7-NEXT: lwsync -; PWR7-NEXT: addi r1, r1, 112 +; PWR7-NEXT: ld r4, 120(r1) +; PWR7-NEXT: ld r3, 112(r1) +; PWR7-NEXT: addi r1, r1, 128 ; PWR7-NEXT: ld r0, 16(r1) ; PWR7-NEXT: mtlr r0 ; PWR7-NEXT: blr @@ -1373,12 +1412,19 @@ ; AIX64-PWR8: # %bb.0: # %entry ; AIX64-PWR8-NEXT: mflr r0 ; AIX64-PWR8-NEXT: std r0, 16(r1) -; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: lwsync -; AIX64-PWR8-NEXT: bl .__sync_val_compare_and_swap_16[PR] +; AIX64-PWR8-NEXT: stdu r1, -128(r1) +; AIX64-PWR8-NEXT: std r5, 120(r1) +; AIX64-PWR8-NEXT: std r4, 112(r1) +; AIX64-PWR8-NEXT: addi r4, r1, 112 +; AIX64-PWR8-NEXT: mr r5, r6 +; AIX64-PWR8-NEXT: mr r6, r7 +; AIX64-PWR8-NEXT: li r7, 4 +; AIX64-PWR8-NEXT: li r8, 2 +; AIX64-PWR8-NEXT: bl .__atomic_compare_exchange_16[PR] ; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: lwsync -; AIX64-PWR8-NEXT: addi r1, r1, 112 +; AIX64-PWR8-NEXT: ld r4, 120(r1) +; AIX64-PWR8-NEXT: ld r3, 112(r1) +; AIX64-PWR8-NEXT: addi r1, r1, 128 ; AIX64-PWR8-NEXT: ld r0, 16(r1) ; AIX64-PWR8-NEXT: mtlr r0 ; AIX64-PWR8-NEXT: blr @@ -1454,27 +1500,19 @@ ; PWR7: # %bb.0: # %entry ; PWR7-NEXT: mflr r0 ; PWR7-NEXT: std r0, 16(r1) -; PWR7-NEXT: stdu r1, -144(r1) -; PWR7-NEXT: .cfi_def_cfa_offset 144 +; PWR7-NEXT: stdu r1, -128(r1) +; PWR7-NEXT: .cfi_def_cfa_offset 128 ; PWR7-NEXT: .cfi_offset lr, 16 -; PWR7-NEXT: .cfi_offset r29, -24 -; PWR7-NEXT: .cfi_offset r30, -16 -; PWR7-NEXT: std r29, 120(r1) # 8-byte Folded Spill -; PWR7-NEXT: std r30, 128(r1) # 8-byte Folded Spill -; PWR7-NEXT: mr r30, r5 -; PWR7-NEXT: mr r29, r4 -; PWR7-NEXT: lwsync -; PWR7-NEXT: bl __sync_val_compare_and_swap_16 +; PWR7-NEXT: std r5, 120(r1) +; PWR7-NEXT: std r4, 112(r1) +; PWR7-NEXT: addi r4, r1, 112 +; PWR7-NEXT: mr r5, r6 +; PWR7-NEXT: mr r6, r7 +; PWR7-NEXT: li r7, 4 +; PWR7-NEXT: li r8, 2 +; PWR7-NEXT: bl __atomic_compare_exchange_16 ; PWR7-NEXT: nop -; PWR7-NEXT: xor r3, r3, r29 -; PWR7-NEXT: xor r4, r4, r30 -; PWR7-NEXT: lwsync -; PWR7-NEXT: or r3, r4, r3 -; PWR7-NEXT: ld r30, 128(r1) # 8-byte Folded Reload -; PWR7-NEXT: ld r29, 120(r1) # 8-byte Folded Reload -; PWR7-NEXT: cntlzd r3, r3 -; PWR7-NEXT: rldicl r3, r3, 58, 63 -; PWR7-NEXT: addi r1, r1, 144 +; PWR7-NEXT: addi r1, r1, 128 ; PWR7-NEXT: ld r0, 16(r1) ; PWR7-NEXT: mtlr r0 ; PWR7-NEXT: blr @@ -1512,21 +1550,15 @@ ; AIX64-PWR8-NEXT: mflr r0 ; AIX64-PWR8-NEXT: std r0, 16(r1) ; AIX64-PWR8-NEXT: stdu r1, -128(r1) -; AIX64-PWR8-NEXT: std r30, 112(r1) # 8-byte Folded Spill -; AIX64-PWR8-NEXT: std r31, 120(r1) # 8-byte Folded Spill -; AIX64-PWR8-NEXT: mr r31, r5 -; AIX64-PWR8-NEXT: mr r30, r4 -; AIX64-PWR8-NEXT: lwsync -; AIX64-PWR8-NEXT: bl .__sync_val_compare_and_swap_16[PR] +; AIX64-PWR8-NEXT: std r5, 120(r1) +; AIX64-PWR8-NEXT: std r4, 112(r1) +; AIX64-PWR8-NEXT: addi r4, r1, 112 +; AIX64-PWR8-NEXT: mr r5, r6 +; AIX64-PWR8-NEXT: mr r6, r7 +; AIX64-PWR8-NEXT: li r7, 4 +; AIX64-PWR8-NEXT: li r8, 2 +; AIX64-PWR8-NEXT: bl .__atomic_compare_exchange_16[PR] ; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: xor r3, r3, r30 -; AIX64-PWR8-NEXT: xor r4, r4, r31 -; AIX64-PWR8-NEXT: lwsync -; AIX64-PWR8-NEXT: or r3, r4, r3 -; AIX64-PWR8-NEXT: ld r31, 120(r1) # 8-byte Folded Reload -; AIX64-PWR8-NEXT: ld r30, 112(r1) # 8-byte Folded Reload -; AIX64-PWR8-NEXT: cntlzd r3, r3 -; AIX64-PWR8-NEXT: rldicl r3, r3, 58, 63 ; AIX64-PWR8-NEXT: addi r1, r1, 128 ; AIX64-PWR8-NEXT: ld r0, 16(r1) ; AIX64-PWR8-NEXT: mtlr r0 diff --git a/llvm/test/Transforms/AtomicExpand/PowerPC/cmpxchg.ll b/llvm/test/Transforms/AtomicExpand/PowerPC/cmpxchg.ll --- a/llvm/test/Transforms/AtomicExpand/PowerPC/cmpxchg.ll +++ b/llvm/test/Transforms/AtomicExpand/PowerPC/cmpxchg.ll @@ -34,10 +34,22 @@ ; ; PWR7-LABEL: @test_cmpxchg_seq_cst( ; PWR7-NEXT: entry: -; PWR7-NEXT: call void @llvm.ppc.sync() -; PWR7-NEXT: [[PAIR:%.*]] = cmpxchg weak i128* [[ADDR:%.*]], i128 [[DESIRE:%.*]], i128 [[NEW:%.*]] monotonic monotonic, align 16 -; PWR7-NEXT: call void @llvm.ppc.lwsync() -; PWR7-NEXT: [[SUCC:%.*]] = extractvalue { i128, i1 } [[PAIR]], 1 +; PWR7-NEXT: [[TMP0:%.*]] = bitcast i128* [[ADDR:%.*]] to i8* +; PWR7-NEXT: [[TMP1:%.*]] = alloca i128, align 8 +; PWR7-NEXT: [[TMP2:%.*]] = bitcast i128* [[TMP1]] to i8* +; PWR7-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP2]]) +; PWR7-NEXT: store i128 [[DESIRE:%.*]], i128* [[TMP1]], align 8 +; PWR7-NEXT: [[TMP3:%.*]] = alloca i128, align 8 +; PWR7-NEXT: [[TMP4:%.*]] = bitcast i128* [[TMP3]] to i8* +; PWR7-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP4]]) +; PWR7-NEXT: store i128 [[NEW:%.*]], i128* [[TMP3]], align 8 +; PWR7-NEXT: [[TMP5:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 16, i8* [[TMP0]], i8* [[TMP2]], i8* [[TMP4]], i32 5, i32 5) +; PWR7-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP4]]) +; PWR7-NEXT: [[TMP6:%.*]] = load i128, i128* [[TMP1]], align 8 +; PWR7-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP2]]) +; PWR7-NEXT: [[TMP7:%.*]] = insertvalue { i128, i1 } undef, i128 [[TMP6]], 0 +; PWR7-NEXT: [[TMP8:%.*]] = insertvalue { i128, i1 } [[TMP7]], i1 [[TMP5]], 1 +; PWR7-NEXT: [[SUCC:%.*]] = extractvalue { i128, i1 } [[TMP8]], 1 ; PWR7-NEXT: ret i1 [[SUCC]] ; entry: