diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -199,6 +199,11 @@ only supported on AIX. * On AIX, teach the profile runtime to check for a build-id string; such string can be created by the -mxcoff-build-id option. +* Removed ``-ppc-quadword-atomics`` which only affected lock-free quadword + atomics on AIX. Now backend generates lock-free quadword atomics code on AIX + by default. To support lock-free quadword atomics in libatomic, the os level + must be at least AIX 7.2 TL5 SP3 with libc++.rte of version 17.1.1 or above + installed. Changes to the RISC-V Backend ----------------------------- diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -122,11 +122,6 @@ static cl::opt UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden); -static cl::opt EnableQuadwordAtomics( - "ppc-quadword-atomics", - cl::desc("enable quadword lock-free atomic operations"), cl::init(false), - cl::Hidden); - static cl::opt DisablePerfectShuffle("ppc-disable-perfect-shuffle", cl::desc("disable vector permute decomposition"), @@ -18441,11 +18436,9 @@ } bool PPCTargetLowering::shouldInlineQuadwordAtomics() const { - // TODO: 16-byte atomic type support for AIX is in progress; we should be able - // to inline 16-byte atomic ops on AIX too in the future. - return Subtarget.isPPC64() && - (EnableQuadwordAtomics || !Subtarget.getTargetTriple().isOSAIX()) && - Subtarget.hasQuadwordAtomics(); + // The ability to turn off quadword atomics for AIX is a historical artifact, + // i.e., for transition of libatomic on AIX. + return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics(); } TargetLowering::AtomicExpansionKind diff --git a/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll b/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll --- a/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll +++ b/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \ ; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-track-subreg-liveness \ -; RUN: -ppc-quadword-atomics < %s | FileCheck --check-prefix=P8 %s +; RUN: < %s | FileCheck --check-prefix=P8 %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown -mcpu=pwr7 \ -; RUN: -ppc-asm-full-reg-names -ppc-quadword-atomics \ +; RUN: -ppc-asm-full-reg-names \ ; RUN: -ppc-track-subreg-liveness < %s | FileCheck --check-prefix=PWR7 %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 \ ; RUN: -ppc-asm-full-reg-names -ppc-track-subreg-liveness < %s | FileCheck \ @@ -15,7 +15,7 @@ ; RUN: -ppc-asm-full-reg-names -ppc-track-subreg-liveness < %s | FileCheck \ ; RUN: --check-prefix=AIX64-PWR8 %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-unknown -mcpu=pwr8 \ -; RUN: -ppc-quadword-atomics -ppc-asm-full-reg-names -ppc-track-subreg-liveness < %s \ +; RUN: -ppc-asm-full-reg-names -ppc-track-subreg-liveness < %s \ ; RUN: | FileCheck --check-prefix=PPC-PWR8 %s define dso_local i128 @lq_unordered(ptr %src) { @@ -49,15 +49,9 @@ ; ; AIX64-PWR8-LABEL: lq_unordered: ; AIX64-PWR8: # %bb.0: # %entry -; AIX64-PWR8-NEXT: mflr r0 -; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: li r4, 0 -; AIX64-PWR8-NEXT: std r0, 128(r1) -; AIX64-PWR8-NEXT: bl .__atomic_load_16[PR] -; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: addi r1, r1, 112 -; AIX64-PWR8-NEXT: ld r0, 16(r1) -; AIX64-PWR8-NEXT: mtlr r0 +; AIX64-PWR8-NEXT: lq r4, 0(r3) +; AIX64-PWR8-NEXT: mr r3, r4 +; AIX64-PWR8-NEXT: mr r4, r5 ; AIX64-PWR8-NEXT: blr ; ; PPC-PWR8-LABEL: lq_unordered: @@ -124,17 +118,11 @@ ; ; AIX64-PWR8-LABEL: lqx_unordered: ; AIX64-PWR8: # %bb.0: # %entry -; AIX64-PWR8-NEXT: mflr r0 -; AIX64-PWR8-NEXT: stdu r1, -112(r1) ; AIX64-PWR8-NEXT: sldi r4, r4, 4 -; AIX64-PWR8-NEXT: std r0, 128(r1) ; AIX64-PWR8-NEXT: add r3, r3, r4 -; AIX64-PWR8-NEXT: li r4, 0 -; AIX64-PWR8-NEXT: bl .__atomic_load_16[PR] -; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: addi r1, r1, 112 -; AIX64-PWR8-NEXT: ld r0, 16(r1) -; AIX64-PWR8-NEXT: mtlr r0 +; AIX64-PWR8-NEXT: lq r4, 0(r3) +; AIX64-PWR8-NEXT: mr r3, r4 +; AIX64-PWR8-NEXT: mr r4, r5 ; AIX64-PWR8-NEXT: blr ; ; PPC-PWR8-LABEL: lqx_unordered: @@ -202,16 +190,11 @@ ; ; AIX64-PWR8-LABEL: lq_big_offset_unordered: ; AIX64-PWR8: # %bb.0: # %entry -; AIX64-PWR8-NEXT: mflr r0 -; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: addis r3, r3, 32 -; AIX64-PWR8-NEXT: li r4, 0 -; AIX64-PWR8-NEXT: std r0, 128(r1) -; AIX64-PWR8-NEXT: bl .__atomic_load_16[PR] -; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: addi r1, r1, 112 -; AIX64-PWR8-NEXT: ld r0, 16(r1) -; AIX64-PWR8-NEXT: mtlr r0 +; AIX64-PWR8-NEXT: lis r4, 32 +; AIX64-PWR8-NEXT: add r3, r3, r4 +; AIX64-PWR8-NEXT: lq r4, 0(r3) +; AIX64-PWR8-NEXT: mr r3, r4 +; AIX64-PWR8-NEXT: mr r4, r5 ; AIX64-PWR8-NEXT: blr ; ; PPC-PWR8-LABEL: lq_big_offset_unordered: @@ -271,15 +254,9 @@ ; ; AIX64-PWR8-LABEL: lq_monotonic: ; AIX64-PWR8: # %bb.0: # %entry -; AIX64-PWR8-NEXT: mflr r0 -; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: li r4, 0 -; AIX64-PWR8-NEXT: std r0, 128(r1) -; AIX64-PWR8-NEXT: bl .__atomic_load_16[PR] -; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: addi r1, r1, 112 -; AIX64-PWR8-NEXT: ld r0, 16(r1) -; AIX64-PWR8-NEXT: mtlr r0 +; AIX64-PWR8-NEXT: lq r4, 0(r3) +; AIX64-PWR8-NEXT: mr r3, r4 +; AIX64-PWR8-NEXT: mr r4, r5 ; AIX64-PWR8-NEXT: blr ; ; PPC-PWR8-LABEL: lq_monotonic: @@ -344,15 +321,12 @@ ; ; AIX64-PWR8-LABEL: lq_acquire: ; AIX64-PWR8: # %bb.0: # %entry -; AIX64-PWR8-NEXT: mflr r0 -; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: li r4, 2 -; AIX64-PWR8-NEXT: std r0, 128(r1) -; AIX64-PWR8-NEXT: bl .__atomic_load_16[PR] -; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: addi r1, r1, 112 -; AIX64-PWR8-NEXT: ld r0, 16(r1) -; AIX64-PWR8-NEXT: mtlr r0 +; AIX64-PWR8-NEXT: lq r4, 0(r3) +; AIX64-PWR8-NEXT: cmpd cr7, r5, r5 +; AIX64-PWR8-NEXT: mr r3, r4 +; AIX64-PWR8-NEXT: mr r4, r5 +; AIX64-PWR8-NEXT: bne- cr7, $+4 +; AIX64-PWR8-NEXT: isync ; AIX64-PWR8-NEXT: blr ; ; PPC-PWR8-LABEL: lq_acquire: @@ -419,15 +393,13 @@ ; ; AIX64-PWR8-LABEL: lq_seqcst: ; AIX64-PWR8: # %bb.0: # %entry -; AIX64-PWR8-NEXT: mflr r0 -; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: li r4, 5 -; AIX64-PWR8-NEXT: std r0, 128(r1) -; AIX64-PWR8-NEXT: bl .__atomic_load_16[PR] -; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: addi r1, r1, 112 -; AIX64-PWR8-NEXT: ld r0, 16(r1) -; AIX64-PWR8-NEXT: mtlr r0 +; AIX64-PWR8-NEXT: sync +; AIX64-PWR8-NEXT: lq r4, 0(r3) +; AIX64-PWR8-NEXT: cmpd cr7, r5, r5 +; AIX64-PWR8-NEXT: mr r3, r4 +; AIX64-PWR8-NEXT: mr r4, r5 +; AIX64-PWR8-NEXT: bne- cr7, $+4 +; AIX64-PWR8-NEXT: isync ; AIX64-PWR8-NEXT: blr ; ; PPC-PWR8-LABEL: lq_seqcst: @@ -491,19 +463,9 @@ ; ; AIX64-PWR8-LABEL: stq_unordered: ; AIX64-PWR8: # %bb.0: # %entry -; AIX64-PWR8-NEXT: mflr r0 -; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: mr r6, r4 -; AIX64-PWR8-NEXT: mr r4, r3 -; AIX64-PWR8-NEXT: mr r3, r5 -; AIX64-PWR8-NEXT: std r0, 128(r1) -; AIX64-PWR8-NEXT: mr r5, r6 -; AIX64-PWR8-NEXT: li r6, 0 -; AIX64-PWR8-NEXT: bl .__atomic_store_16[PR] -; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: addi r1, r1, 112 -; AIX64-PWR8-NEXT: ld r0, 16(r1) -; AIX64-PWR8-NEXT: mtlr r0 +; AIX64-PWR8-NEXT: mr r7, r4 +; AIX64-PWR8-NEXT: mr r6, r3 +; AIX64-PWR8-NEXT: stq r6, 0(r5) ; AIX64-PWR8-NEXT: blr ; ; PPC-PWR8-LABEL: stq_unordered: @@ -572,20 +534,11 @@ ; ; AIX64-PWR8-LABEL: stqx_unordered: ; AIX64-PWR8: # %bb.0: # %entry -; AIX64-PWR8-NEXT: mflr r0 -; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: mr r7, r4 -; AIX64-PWR8-NEXT: mr r4, r3 -; AIX64-PWR8-NEXT: std r0, 128(r1) -; AIX64-PWR8-NEXT: sldi r3, r6, 4 -; AIX64-PWR8-NEXT: li r6, 0 -; AIX64-PWR8-NEXT: add r3, r5, r3 -; AIX64-PWR8-NEXT: mr r5, r7 -; AIX64-PWR8-NEXT: bl .__atomic_store_16[PR] -; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: addi r1, r1, 112 -; AIX64-PWR8-NEXT: ld r0, 16(r1) -; AIX64-PWR8-NEXT: mtlr r0 +; AIX64-PWR8-NEXT: sldi r6, r6, 4 +; AIX64-PWR8-NEXT: mr r9, r4 +; AIX64-PWR8-NEXT: mr r8, r3 +; AIX64-PWR8-NEXT: add r3, r5, r6 +; AIX64-PWR8-NEXT: stq r8, 0(r3) ; AIX64-PWR8-NEXT: blr ; ; PPC-PWR8-LABEL: stqx_unordered: @@ -656,19 +609,11 @@ ; ; AIX64-PWR8-LABEL: stq_big_offset_unordered: ; AIX64-PWR8: # %bb.0: # %entry -; AIX64-PWR8-NEXT: mflr r0 -; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: mr r6, r4 -; AIX64-PWR8-NEXT: mr r4, r3 -; AIX64-PWR8-NEXT: addis r3, r5, 32 -; AIX64-PWR8-NEXT: std r0, 128(r1) -; AIX64-PWR8-NEXT: mr r5, r6 -; AIX64-PWR8-NEXT: li r6, 0 -; AIX64-PWR8-NEXT: bl .__atomic_store_16[PR] -; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: addi r1, r1, 112 -; AIX64-PWR8-NEXT: ld r0, 16(r1) -; AIX64-PWR8-NEXT: mtlr r0 +; AIX64-PWR8-NEXT: lis r6, 32 +; AIX64-PWR8-NEXT: mr r9, r4 +; AIX64-PWR8-NEXT: mr r8, r3 +; AIX64-PWR8-NEXT: add r3, r5, r6 +; AIX64-PWR8-NEXT: stq r8, 0(r3) ; AIX64-PWR8-NEXT: blr ; ; PPC-PWR8-LABEL: stq_big_offset_unordered: @@ -734,19 +679,9 @@ ; ; AIX64-PWR8-LABEL: stq_monotonic: ; AIX64-PWR8: # %bb.0: # %entry -; AIX64-PWR8-NEXT: mflr r0 -; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: mr r6, r4 -; AIX64-PWR8-NEXT: mr r4, r3 -; AIX64-PWR8-NEXT: mr r3, r5 -; AIX64-PWR8-NEXT: std r0, 128(r1) -; AIX64-PWR8-NEXT: mr r5, r6 -; AIX64-PWR8-NEXT: li r6, 0 -; AIX64-PWR8-NEXT: bl .__atomic_store_16[PR] -; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: addi r1, r1, 112 -; AIX64-PWR8-NEXT: ld r0, 16(r1) -; AIX64-PWR8-NEXT: mtlr r0 +; AIX64-PWR8-NEXT: mr r7, r4 +; AIX64-PWR8-NEXT: mr r6, r3 +; AIX64-PWR8-NEXT: stq r6, 0(r5) ; AIX64-PWR8-NEXT: blr ; ; PPC-PWR8-LABEL: stq_monotonic: @@ -812,19 +747,10 @@ ; ; AIX64-PWR8-LABEL: stq_release: ; AIX64-PWR8: # %bb.0: # %entry -; AIX64-PWR8-NEXT: mflr r0 -; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: mr r6, r4 -; AIX64-PWR8-NEXT: mr r4, r3 -; AIX64-PWR8-NEXT: mr r3, r5 -; AIX64-PWR8-NEXT: std r0, 128(r1) -; AIX64-PWR8-NEXT: mr r5, r6 -; AIX64-PWR8-NEXT: li r6, 3 -; AIX64-PWR8-NEXT: bl .__atomic_store_16[PR] -; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: addi r1, r1, 112 -; AIX64-PWR8-NEXT: ld r0, 16(r1) -; AIX64-PWR8-NEXT: mtlr r0 +; AIX64-PWR8-NEXT: lwsync +; AIX64-PWR8-NEXT: mr r7, r4 +; AIX64-PWR8-NEXT: mr r6, r3 +; AIX64-PWR8-NEXT: stq r6, 0(r5) ; AIX64-PWR8-NEXT: blr ; ; PPC-PWR8-LABEL: stq_release: @@ -890,19 +816,10 @@ ; ; AIX64-PWR8-LABEL: stq_seqcst: ; AIX64-PWR8: # %bb.0: # %entry -; AIX64-PWR8-NEXT: mflr r0 -; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: mr r6, r4 -; AIX64-PWR8-NEXT: mr r4, r3 -; AIX64-PWR8-NEXT: mr r3, r5 -; AIX64-PWR8-NEXT: std r0, 128(r1) -; AIX64-PWR8-NEXT: mr r5, r6 -; AIX64-PWR8-NEXT: li r6, 5 -; AIX64-PWR8-NEXT: bl .__atomic_store_16[PR] -; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: addi r1, r1, 112 -; AIX64-PWR8-NEXT: ld r0, 16(r1) -; AIX64-PWR8-NEXT: mtlr r0 +; AIX64-PWR8-NEXT: sync +; AIX64-PWR8-NEXT: mr r7, r4 +; AIX64-PWR8-NEXT: mr r6, r3 +; AIX64-PWR8-NEXT: stq r6, 0(r5) ; AIX64-PWR8-NEXT: blr ; ; PPC-PWR8-LABEL: stq_seqcst: diff --git a/llvm/test/CodeGen/PowerPC/atomics-i128.ll b/llvm/test/CodeGen/PowerPC/atomics-i128.ll --- a/llvm/test/CodeGen/PowerPC/atomics-i128.ll +++ b/llvm/test/CodeGen/PowerPC/atomics-i128.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown -mcpu=pwr8 \ -; RUN: -ppc-asm-full-reg-names -ppc-quadword-atomics \ +; RUN: -ppc-asm-full-reg-names \ ; RUN: -ppc-track-subreg-liveness < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown -mcpu=pwr7 \ -; RUN: -ppc-asm-full-reg-names -ppc-quadword-atomics \ +; RUN: -ppc-asm-full-reg-names \ ; RUN: -ppc-track-subreg-liveness < %s | FileCheck --check-prefix=PWR7 %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 \ ; RUN: -ppc-asm-full-reg-names -ppc-track-subreg-liveness < %s | FileCheck \ @@ -19,7 +19,7 @@ ; it's expected not to generate inlined lock-free code on such platforms, even arch level ; is pwr8+ and `-ppc-quadword-atomics` is on. ; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-unknown -mcpu=pwr8 \ -; RUN: -ppc-quadword-atomics -ppc-asm-full-reg-names -ppc-track-subreg-liveness < %s \ +; RUN: -ppc-asm-full-reg-names -ppc-track-subreg-liveness < %s \ ; RUN: | FileCheck --check-prefix=PPC-PWR8 %s @@ -73,15 +73,18 @@ ; ; AIX64-PWR8-LABEL: swap: ; AIX64-PWR8: # %bb.0: # %entry -; AIX64-PWR8-NEXT: mflr r0 -; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: li r6, 5 -; AIX64-PWR8-NEXT: std r0, 128(r1) -; AIX64-PWR8-NEXT: bl .__atomic_exchange_16[PR] -; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: addi r1, r1, 112 -; AIX64-PWR8-NEXT: ld r0, 16(r1) -; AIX64-PWR8-NEXT: mtlr r0 +; AIX64-PWR8-NEXT: sync +; AIX64-PWR8-NEXT: L..BB0_1: # %entry +; AIX64-PWR8-NEXT: # +; AIX64-PWR8-NEXT: lqarx r6, 0, r3 +; AIX64-PWR8-NEXT: mr r9, r5 +; AIX64-PWR8-NEXT: mr r8, r4 +; AIX64-PWR8-NEXT: stqcx. r8, 0, r3 +; AIX64-PWR8-NEXT: bne cr0, L..BB0_1 +; AIX64-PWR8-NEXT: # %bb.2: # %entry +; AIX64-PWR8-NEXT: lwsync +; AIX64-PWR8-NEXT: mr r3, r6 +; AIX64-PWR8-NEXT: mr r4, r7 ; AIX64-PWR8-NEXT: blr ; ; PPC-PWR8-LABEL: swap: @@ -164,15 +167,18 @@ ; ; AIX64-PWR8-LABEL: add: ; AIX64-PWR8: # %bb.0: # %entry -; AIX64-PWR8-NEXT: mflr r0 -; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: li r6, 5 -; AIX64-PWR8-NEXT: std r0, 128(r1) -; AIX64-PWR8-NEXT: bl .__atomic_fetch_add_16[PR] -; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: addi r1, r1, 112 -; AIX64-PWR8-NEXT: ld r0, 16(r1) -; AIX64-PWR8-NEXT: mtlr r0 +; AIX64-PWR8-NEXT: sync +; AIX64-PWR8-NEXT: L..BB1_1: # %entry +; AIX64-PWR8-NEXT: # +; AIX64-PWR8-NEXT: lqarx r6, 0, r3 +; AIX64-PWR8-NEXT: addc r9, r5, r7 +; AIX64-PWR8-NEXT: adde r8, r4, r6 +; AIX64-PWR8-NEXT: stqcx. r8, 0, r3 +; AIX64-PWR8-NEXT: bne cr0, L..BB1_1 +; AIX64-PWR8-NEXT: # %bb.2: # %entry +; AIX64-PWR8-NEXT: lwsync +; AIX64-PWR8-NEXT: mr r3, r6 +; AIX64-PWR8-NEXT: mr r4, r7 ; AIX64-PWR8-NEXT: blr ; ; PPC-PWR8-LABEL: add: @@ -302,15 +308,18 @@ ; ; AIX64-PWR8-LABEL: sub: ; AIX64-PWR8: # %bb.0: # %entry -; AIX64-PWR8-NEXT: mflr r0 -; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: li r6, 5 -; AIX64-PWR8-NEXT: std r0, 128(r1) -; AIX64-PWR8-NEXT: bl .__atomic_fetch_sub_16[PR] -; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: addi r1, r1, 112 -; AIX64-PWR8-NEXT: ld r0, 16(r1) -; AIX64-PWR8-NEXT: mtlr r0 +; AIX64-PWR8-NEXT: sync +; AIX64-PWR8-NEXT: L..BB2_1: # %entry +; AIX64-PWR8-NEXT: # +; AIX64-PWR8-NEXT: lqarx r6, 0, r3 +; AIX64-PWR8-NEXT: subc r9, r7, r5 +; AIX64-PWR8-NEXT: subfe r8, r4, r6 +; AIX64-PWR8-NEXT: stqcx. r8, 0, r3 +; AIX64-PWR8-NEXT: bne cr0, L..BB2_1 +; AIX64-PWR8-NEXT: # %bb.2: # %entry +; AIX64-PWR8-NEXT: lwsync +; AIX64-PWR8-NEXT: mr r3, r6 +; AIX64-PWR8-NEXT: mr r4, r7 ; AIX64-PWR8-NEXT: blr ; ; PPC-PWR8-LABEL: sub: @@ -440,15 +449,18 @@ ; ; AIX64-PWR8-LABEL: and: ; AIX64-PWR8: # %bb.0: # %entry -; AIX64-PWR8-NEXT: mflr r0 -; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: li r6, 5 -; AIX64-PWR8-NEXT: std r0, 128(r1) -; AIX64-PWR8-NEXT: bl .__atomic_fetch_and_16[PR] -; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: addi r1, r1, 112 -; AIX64-PWR8-NEXT: ld r0, 16(r1) -; AIX64-PWR8-NEXT: mtlr r0 +; AIX64-PWR8-NEXT: sync +; AIX64-PWR8-NEXT: L..BB3_1: # %entry +; AIX64-PWR8-NEXT: # +; AIX64-PWR8-NEXT: lqarx r6, 0, r3 +; AIX64-PWR8-NEXT: and r9, r5, r7 +; AIX64-PWR8-NEXT: and r8, r4, r6 +; AIX64-PWR8-NEXT: stqcx. r8, 0, r3 +; AIX64-PWR8-NEXT: bne cr0, L..BB3_1 +; AIX64-PWR8-NEXT: # %bb.2: # %entry +; AIX64-PWR8-NEXT: lwsync +; AIX64-PWR8-NEXT: mr r3, r6 +; AIX64-PWR8-NEXT: mr r4, r7 ; AIX64-PWR8-NEXT: blr ; ; PPC-PWR8-LABEL: and: @@ -578,15 +590,18 @@ ; ; AIX64-PWR8-LABEL: or: ; AIX64-PWR8: # %bb.0: # %entry -; AIX64-PWR8-NEXT: mflr r0 -; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: li r6, 5 -; AIX64-PWR8-NEXT: std r0, 128(r1) -; AIX64-PWR8-NEXT: bl .__atomic_fetch_or_16[PR] -; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: addi r1, r1, 112 -; AIX64-PWR8-NEXT: ld r0, 16(r1) -; AIX64-PWR8-NEXT: mtlr r0 +; AIX64-PWR8-NEXT: sync +; AIX64-PWR8-NEXT: L..BB4_1: # %entry +; AIX64-PWR8-NEXT: # +; AIX64-PWR8-NEXT: lqarx r6, 0, r3 +; AIX64-PWR8-NEXT: or r9, r5, r7 +; AIX64-PWR8-NEXT: or r8, r4, r6 +; AIX64-PWR8-NEXT: stqcx. r8, 0, r3 +; AIX64-PWR8-NEXT: bne cr0, L..BB4_1 +; AIX64-PWR8-NEXT: # %bb.2: # %entry +; AIX64-PWR8-NEXT: lwsync +; AIX64-PWR8-NEXT: mr r3, r6 +; AIX64-PWR8-NEXT: mr r4, r7 ; AIX64-PWR8-NEXT: blr ; ; PPC-PWR8-LABEL: or: @@ -716,15 +731,18 @@ ; ; AIX64-PWR8-LABEL: xor: ; AIX64-PWR8: # %bb.0: # %entry -; AIX64-PWR8-NEXT: mflr r0 -; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: li r6, 5 -; AIX64-PWR8-NEXT: std r0, 128(r1) -; AIX64-PWR8-NEXT: bl .__atomic_fetch_xor_16[PR] -; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: addi r1, r1, 112 -; AIX64-PWR8-NEXT: ld r0, 16(r1) -; AIX64-PWR8-NEXT: mtlr r0 +; AIX64-PWR8-NEXT: sync +; AIX64-PWR8-NEXT: L..BB5_1: # %entry +; AIX64-PWR8-NEXT: # +; AIX64-PWR8-NEXT: lqarx r6, 0, r3 +; AIX64-PWR8-NEXT: xor r9, r5, r7 +; AIX64-PWR8-NEXT: xor r8, r4, r6 +; AIX64-PWR8-NEXT: stqcx. r8, 0, r3 +; AIX64-PWR8-NEXT: bne cr0, L..BB5_1 +; AIX64-PWR8-NEXT: # %bb.2: # %entry +; AIX64-PWR8-NEXT: lwsync +; AIX64-PWR8-NEXT: mr r3, r6 +; AIX64-PWR8-NEXT: mr r4, r7 ; AIX64-PWR8-NEXT: blr ; ; PPC-PWR8-LABEL: xor: @@ -854,15 +872,18 @@ ; ; AIX64-PWR8-LABEL: nand: ; AIX64-PWR8: # %bb.0: # %entry -; AIX64-PWR8-NEXT: mflr r0 -; AIX64-PWR8-NEXT: stdu r1, -112(r1) -; AIX64-PWR8-NEXT: li r6, 5 -; AIX64-PWR8-NEXT: std r0, 128(r1) -; AIX64-PWR8-NEXT: bl .__atomic_fetch_nand_16[PR] -; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: addi r1, r1, 112 -; AIX64-PWR8-NEXT: ld r0, 16(r1) -; AIX64-PWR8-NEXT: mtlr r0 +; AIX64-PWR8-NEXT: sync +; AIX64-PWR8-NEXT: L..BB6_1: # %entry +; AIX64-PWR8-NEXT: # +; AIX64-PWR8-NEXT: lqarx r6, 0, r3 +; AIX64-PWR8-NEXT: nand r9, r5, r7 +; AIX64-PWR8-NEXT: nand r8, r4, r6 +; AIX64-PWR8-NEXT: stqcx. r8, 0, r3 +; AIX64-PWR8-NEXT: bne cr0, L..BB6_1 +; AIX64-PWR8-NEXT: # %bb.2: # %entry +; AIX64-PWR8-NEXT: lwsync +; AIX64-PWR8-NEXT: mr r3, r6 +; AIX64-PWR8-NEXT: mr r4, r7 ; AIX64-PWR8-NEXT: blr ; ; PPC-PWR8-LABEL: nand: @@ -1017,23 +1038,26 @@ ; ; AIX64-PWR8-LABEL: cas_weak_acquire_acquire: ; AIX64-PWR8: # %bb.0: # %entry -; AIX64-PWR8-NEXT: mflr r0 -; AIX64-PWR8-NEXT: stdu r1, -128(r1) -; AIX64-PWR8-NEXT: std r0, 144(r1) -; AIX64-PWR8-NEXT: std r5, 120(r1) -; AIX64-PWR8-NEXT: std r4, 112(r1) -; AIX64-PWR8-NEXT: addi r4, r1, 112 -; AIX64-PWR8-NEXT: mr r5, r6 -; AIX64-PWR8-NEXT: mr r6, r7 -; AIX64-PWR8-NEXT: li r7, 2 -; AIX64-PWR8-NEXT: li r8, 2 -; AIX64-PWR8-NEXT: bl .__atomic_compare_exchange_16[PR] -; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: ld r4, 120(r1) -; AIX64-PWR8-NEXT: ld r3, 112(r1) -; AIX64-PWR8-NEXT: addi r1, r1, 128 -; AIX64-PWR8-NEXT: ld r0, 16(r1) -; AIX64-PWR8-NEXT: mtlr r0 +; AIX64-PWR8-NEXT: L..BB7_1: # %entry +; AIX64-PWR8-NEXT: # +; AIX64-PWR8-NEXT: lqarx r8, 0, r3 +; AIX64-PWR8-NEXT: xor r11, r9, r5 +; AIX64-PWR8-NEXT: xor r10, r8, r4 +; AIX64-PWR8-NEXT: or. r11, r11, r10 +; AIX64-PWR8-NEXT: bne cr0, L..BB7_3 +; AIX64-PWR8-NEXT: # %bb.2: # %entry +; AIX64-PWR8-NEXT: # +; AIX64-PWR8-NEXT: mr r11, r7 +; AIX64-PWR8-NEXT: mr r10, r6 +; AIX64-PWR8-NEXT: stqcx. r10, 0, r3 +; AIX64-PWR8-NEXT: bne cr0, L..BB7_1 +; AIX64-PWR8-NEXT: b L..BB7_4 +; AIX64-PWR8-NEXT: L..BB7_3: # %entry +; AIX64-PWR8-NEXT: stqcx. r8, 0, r3 +; AIX64-PWR8-NEXT: L..BB7_4: # %entry +; AIX64-PWR8-NEXT: lwsync +; AIX64-PWR8-NEXT: mr r3, r8 +; AIX64-PWR8-NEXT: mr r4, r9 ; AIX64-PWR8-NEXT: blr ; ; PPC-PWR8-LABEL: cas_weak_acquire_acquire: @@ -1148,23 +1172,26 @@ ; ; AIX64-PWR8-LABEL: cas_weak_release_monotonic: ; AIX64-PWR8: # %bb.0: # %entry -; AIX64-PWR8-NEXT: mflr r0 -; AIX64-PWR8-NEXT: stdu r1, -128(r1) -; AIX64-PWR8-NEXT: std r0, 144(r1) -; AIX64-PWR8-NEXT: std r5, 120(r1) -; AIX64-PWR8-NEXT: std r4, 112(r1) -; AIX64-PWR8-NEXT: addi r4, r1, 112 -; AIX64-PWR8-NEXT: mr r5, r6 -; AIX64-PWR8-NEXT: mr r6, r7 -; AIX64-PWR8-NEXT: li r7, 3 -; AIX64-PWR8-NEXT: li r8, 0 -; AIX64-PWR8-NEXT: bl .__atomic_compare_exchange_16[PR] -; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: ld r4, 120(r1) -; AIX64-PWR8-NEXT: ld r3, 112(r1) -; AIX64-PWR8-NEXT: addi r1, r1, 128 -; AIX64-PWR8-NEXT: ld r0, 16(r1) -; AIX64-PWR8-NEXT: mtlr r0 +; AIX64-PWR8-NEXT: lwsync +; AIX64-PWR8-NEXT: L..BB8_1: # %entry +; AIX64-PWR8-NEXT: # +; AIX64-PWR8-NEXT: lqarx r8, 0, r3 +; AIX64-PWR8-NEXT: xor r11, r9, r5 +; AIX64-PWR8-NEXT: xor r10, r8, r4 +; AIX64-PWR8-NEXT: or. r11, r11, r10 +; AIX64-PWR8-NEXT: bne cr0, L..BB8_3 +; AIX64-PWR8-NEXT: # %bb.2: # %entry +; AIX64-PWR8-NEXT: # +; AIX64-PWR8-NEXT: mr r11, r7 +; AIX64-PWR8-NEXT: mr r10, r6 +; AIX64-PWR8-NEXT: stqcx. r10, 0, r3 +; AIX64-PWR8-NEXT: bne cr0, L..BB8_1 +; AIX64-PWR8-NEXT: b L..BB8_4 +; AIX64-PWR8-NEXT: L..BB8_3: # %entry +; AIX64-PWR8-NEXT: stqcx. r8, 0, r3 +; AIX64-PWR8-NEXT: L..BB8_4: # %entry +; AIX64-PWR8-NEXT: mr r3, r8 +; AIX64-PWR8-NEXT: mr r4, r9 ; AIX64-PWR8-NEXT: blr ; ; PPC-PWR8-LABEL: cas_weak_release_monotonic: @@ -1281,23 +1308,27 @@ ; ; AIX64-PWR8-LABEL: cas_sc_sc: ; AIX64-PWR8: # %bb.0: # %entry -; AIX64-PWR8-NEXT: mflr r0 -; AIX64-PWR8-NEXT: stdu r1, -128(r1) -; AIX64-PWR8-NEXT: std r0, 144(r1) -; AIX64-PWR8-NEXT: std r5, 120(r1) -; AIX64-PWR8-NEXT: std r4, 112(r1) -; AIX64-PWR8-NEXT: addi r4, r1, 112 -; AIX64-PWR8-NEXT: mr r5, r6 -; AIX64-PWR8-NEXT: mr r6, r7 -; AIX64-PWR8-NEXT: li r7, 5 -; AIX64-PWR8-NEXT: li r8, 5 -; AIX64-PWR8-NEXT: bl .__atomic_compare_exchange_16[PR] -; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: ld r4, 120(r1) -; AIX64-PWR8-NEXT: ld r3, 112(r1) -; AIX64-PWR8-NEXT: addi r1, r1, 128 -; AIX64-PWR8-NEXT: ld r0, 16(r1) -; AIX64-PWR8-NEXT: mtlr r0 +; AIX64-PWR8-NEXT: sync +; AIX64-PWR8-NEXT: L..BB9_1: # %entry +; AIX64-PWR8-NEXT: # +; AIX64-PWR8-NEXT: lqarx r8, 0, r3 +; AIX64-PWR8-NEXT: xor r11, r9, r5 +; AIX64-PWR8-NEXT: xor r10, r8, r4 +; AIX64-PWR8-NEXT: or. r11, r11, r10 +; AIX64-PWR8-NEXT: bne cr0, L..BB9_3 +; AIX64-PWR8-NEXT: # %bb.2: # %entry +; AIX64-PWR8-NEXT: # +; AIX64-PWR8-NEXT: mr r11, r7 +; AIX64-PWR8-NEXT: mr r10, r6 +; AIX64-PWR8-NEXT: stqcx. r10, 0, r3 +; AIX64-PWR8-NEXT: bne cr0, L..BB9_1 +; AIX64-PWR8-NEXT: b L..BB9_4 +; AIX64-PWR8-NEXT: L..BB9_3: # %entry +; AIX64-PWR8-NEXT: stqcx. r8, 0, r3 +; AIX64-PWR8-NEXT: L..BB9_4: # %entry +; AIX64-PWR8-NEXT: lwsync +; AIX64-PWR8-NEXT: mr r3, r8 +; AIX64-PWR8-NEXT: mr r4, r9 ; AIX64-PWR8-NEXT: blr ; ; PPC-PWR8-LABEL: cas_sc_sc: @@ -1414,23 +1445,27 @@ ; ; AIX64-PWR8-LABEL: cas_acqrel_acquire: ; AIX64-PWR8: # %bb.0: # %entry -; AIX64-PWR8-NEXT: mflr r0 -; AIX64-PWR8-NEXT: stdu r1, -128(r1) -; AIX64-PWR8-NEXT: std r0, 144(r1) -; AIX64-PWR8-NEXT: std r5, 120(r1) -; AIX64-PWR8-NEXT: std r4, 112(r1) -; AIX64-PWR8-NEXT: addi r4, r1, 112 -; AIX64-PWR8-NEXT: mr r5, r6 -; AIX64-PWR8-NEXT: mr r6, r7 -; AIX64-PWR8-NEXT: li r7, 4 -; AIX64-PWR8-NEXT: li r8, 2 -; AIX64-PWR8-NEXT: bl .__atomic_compare_exchange_16[PR] -; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: ld r4, 120(r1) -; AIX64-PWR8-NEXT: ld r3, 112(r1) -; AIX64-PWR8-NEXT: addi r1, r1, 128 -; AIX64-PWR8-NEXT: ld r0, 16(r1) -; AIX64-PWR8-NEXT: mtlr r0 +; AIX64-PWR8-NEXT: lwsync +; AIX64-PWR8-NEXT: L..BB10_1: # %entry +; AIX64-PWR8-NEXT: # +; AIX64-PWR8-NEXT: lqarx r8, 0, r3 +; AIX64-PWR8-NEXT: xor r11, r9, r5 +; AIX64-PWR8-NEXT: xor r10, r8, r4 +; AIX64-PWR8-NEXT: or. r11, r11, r10 +; AIX64-PWR8-NEXT: bne cr0, L..BB10_3 +; AIX64-PWR8-NEXT: # %bb.2: # %entry +; AIX64-PWR8-NEXT: # +; AIX64-PWR8-NEXT: mr r11, r7 +; AIX64-PWR8-NEXT: mr r10, r6 +; AIX64-PWR8-NEXT: stqcx. r10, 0, r3 +; AIX64-PWR8-NEXT: bne cr0, L..BB10_1 +; AIX64-PWR8-NEXT: b L..BB10_4 +; AIX64-PWR8-NEXT: L..BB10_3: # %entry +; AIX64-PWR8-NEXT: stqcx. r8, 0, r3 +; AIX64-PWR8-NEXT: L..BB10_4: # %entry +; AIX64-PWR8-NEXT: lwsync +; AIX64-PWR8-NEXT: mr r3, r8 +; AIX64-PWR8-NEXT: mr r4, r9 ; AIX64-PWR8-NEXT: blr ; ; PPC-PWR8-LABEL: cas_acqrel_acquire: @@ -1551,21 +1586,30 @@ ; ; AIX64-PWR8-LABEL: cas_acqrel_acquire_check_succ: ; AIX64-PWR8: # %bb.0: # %entry -; AIX64-PWR8-NEXT: mflr r0 -; AIX64-PWR8-NEXT: stdu r1, -128(r1) -; AIX64-PWR8-NEXT: std r0, 144(r1) -; AIX64-PWR8-NEXT: std r5, 120(r1) -; AIX64-PWR8-NEXT: std r4, 112(r1) -; AIX64-PWR8-NEXT: addi r4, r1, 112 -; AIX64-PWR8-NEXT: mr r5, r6 -; AIX64-PWR8-NEXT: mr r6, r7 -; AIX64-PWR8-NEXT: li r7, 4 -; AIX64-PWR8-NEXT: li r8, 2 -; AIX64-PWR8-NEXT: bl .__atomic_compare_exchange_16[PR] -; AIX64-PWR8-NEXT: nop -; AIX64-PWR8-NEXT: addi r1, r1, 128 -; AIX64-PWR8-NEXT: ld r0, 16(r1) -; AIX64-PWR8-NEXT: mtlr r0 +; AIX64-PWR8-NEXT: lwsync +; AIX64-PWR8-NEXT: L..BB11_1: # %entry +; AIX64-PWR8-NEXT: # +; AIX64-PWR8-NEXT: lqarx r8, 0, r3 +; AIX64-PWR8-NEXT: xor r11, r9, r5 +; AIX64-PWR8-NEXT: xor r10, r8, r4 +; AIX64-PWR8-NEXT: or. r11, r11, r10 +; AIX64-PWR8-NEXT: bne cr0, L..BB11_3 +; AIX64-PWR8-NEXT: # %bb.2: # %entry +; AIX64-PWR8-NEXT: # +; AIX64-PWR8-NEXT: mr r11, r7 +; AIX64-PWR8-NEXT: mr r10, r6 +; AIX64-PWR8-NEXT: stqcx. r10, 0, r3 +; AIX64-PWR8-NEXT: bne cr0, L..BB11_1 +; AIX64-PWR8-NEXT: b L..BB11_4 +; AIX64-PWR8-NEXT: L..BB11_3: # %entry +; AIX64-PWR8-NEXT: stqcx. r8, 0, r3 +; AIX64-PWR8-NEXT: L..BB11_4: # %entry +; AIX64-PWR8-NEXT: lwsync +; AIX64-PWR8-NEXT: xor r3, r4, r8 +; AIX64-PWR8-NEXT: xor r4, r5, r9 +; AIX64-PWR8-NEXT: or r3, r4, r3 +; AIX64-PWR8-NEXT: cntlzd r3, r3 +; AIX64-PWR8-NEXT: rldicl r3, r3, 58, 63 ; AIX64-PWR8-NEXT: blr ; ; PPC-PWR8-LABEL: cas_acqrel_acquire_check_succ: diff --git a/llvm/test/Transforms/AtomicExpand/PowerPC/cmpxchg.ll b/llvm/test/Transforms/AtomicExpand/PowerPC/cmpxchg.ll --- a/llvm/test/Transforms/AtomicExpand/PowerPC/cmpxchg.ll +++ b/llvm/test/Transforms/AtomicExpand/PowerPC/cmpxchg.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -atomic-expand -S -mtriple=powerpc64-unknown-unknown \ -; RUN: -ppc-quadword-atomics -mcpu=pwr8 %s | FileCheck %s +; RUN: -mcpu=pwr8 %s | FileCheck %s ; RUN: opt -atomic-expand -S -mtriple=powerpc64-unknown-unknown \ ; RUN: -mcpu=pwr7 %s | FileCheck --check-prefix=PWR7 %s