diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h --- a/clang/lib/Basic/Targets/PPC.h +++ b/clang/lib/Basic/Targets/PPC.h @@ -78,6 +78,7 @@ bool IsISA2_07 = false; bool IsISA3_0 = false; bool IsISA3_1 = false; + bool HasQuadwordAtomics = false; protected: std::string ABI; @@ -439,8 +440,18 @@ DataLayout += "-S128-v256:256:256-v512:512:512"; resetDataLayout(DataLayout); - // PPC64 supports atomics up to 8 bytes. - MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64; + // Newer PPC64 instruction sets support atomics up to 16 bytes. + MaxAtomicPromoteWidth = 128; + // Baseline PPC64 supports inlining atomics up to 8 bytes. + MaxAtomicInlineWidth = 64; + } + + void setMaxAtomicWidth() override { + // For power8 and up, backend is able to inline 16-byte atomic lock free + // code. + // TODO: We should allow AIX to inline quadword atomics in the future. + if (!getTriple().isOSAIX() && hasFeature("quadword-atomics")) + MaxAtomicInlineWidth = 128; } BuiltinVaListKind getBuiltinVaListKind() const override { diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -81,6 +81,8 @@ IsISA3_0 = true; } else if (Feature == "+isa-v31-instructions") { IsISA3_1 = true; + } else if (Feature == "+quadword-atomics") { + HasQuadwordAtomics = true; } // TODO: Finish this list and add an assert that we've handled them // all. @@ -550,6 +552,12 @@ Features["isa-v30-instructions"] = llvm::StringSwitch(CPU).Case("pwr9", true).Default(false); + Features["quadword-atomics"] = + getTriple().isArch64Bit() && llvm::StringSwitch(CPU) + .Case("pwr9", true) + .Case("pwr8", true) + .Default(false); + // Power10 includes all the same features as Power9 plus any features specific // to the Power10 core. if (CPU == "pwr10" || CPU == "power10") { @@ -660,6 +668,7 @@ .Case("isa-v207-instructions", IsISA2_07) .Case("isa-v30-instructions", IsISA3_0) .Case("isa-v31-instructions", IsISA3_1) + .Case("quadword-atomics", HasQuadwordAtomics) .Default(false); } diff --git a/clang/test/CodeGen/PowerPC/atomic-alignment.c b/clang/test/CodeGen/PowerPC/atomic-alignment.c --- a/clang/test/CodeGen/PowerPC/atomic-alignment.c +++ b/clang/test/CodeGen/PowerPC/atomic-alignment.c @@ -1,25 +1,30 @@ -// RUN: %clang_cc1 -verify -triple powerpc-unknown-unknown -emit-llvm -o - %s | \ +// RUN: %clang_cc1 -Werror -triple powerpc-unknown-unknown -emit-llvm -o - %s | \ // RUN: FileCheck %s --check-prefixes=PPC,PPC32 -// RUN: %clang_cc1 -verify -triple powerpc64le-unknown-linux -emit-llvm -o - %s | \ -// RUN: FileCheck %s --check-prefixes=PPC,PPC64 -// RUN: %clang_cc1 -verify -triple powerpc64-unknown-aix -emit-llvm -o - %s | \ +// RUN: %clang_cc1 -Werror -triple powerpc64le-unknown-linux -emit-llvm -o - %s | \ // RUN: FileCheck %s --check-prefixes=PPC,PPC64 +// RUN: %clang_cc1 -Werror -triple powerpc64le-unknown-linux -emit-llvm -o - %s \ +// RUN: -target-cpu pwr8 | FileCheck %s --check-prefixes=PPC,PPC64 +// RUN: %clang_cc1 -Werror -triple powerpc64-unknown-aix -emit-llvm -o - %s | \ +// RUN: FileCheck %s --check-prefixes=PPC,AIX64 +// RUN: %clang_cc1 -Werror -triple powerpc64-unknown-aix -emit-llvm -o - %s \ +// RUN: -target-cpu pwr8 | FileCheck %s --check-prefixes=PPC,AIX64 // PPC: @c = global i8 0, align 1{{$}} -_Atomic(char) c; // expected-no-diagnostics +_Atomic(char) c; // PPC: @s = global i16 0, align 2{{$}} -_Atomic(short) s; // expected-no-diagnostics +_Atomic(short) s; // PPC: @i = global i32 0, align 4{{$}} -_Atomic(int) i; // expected-no-diagnostics +_Atomic(int) i; // PPC32: @l = global i32 0, align 4{{$}} // PPC64: @l = global i64 0, align 8{{$}} -_Atomic(long) l; // expected-no-diagnostics +// AIX64: @l = global i64 0, align 8{{$}} +_Atomic(long) l; // PPC: @ll = global i64 0, align 8{{$}} -_Atomic(long long) ll; // expected-no-diagnostics +_Atomic(long long) ll; typedef struct { char x[8]; @@ -27,11 +32,14 @@ // PPC32: @o = global %struct.O zeroinitializer, align 1{{$}} // PPC64: @o = global %struct.O zeroinitializer, align 8{{$}} -_Atomic(O) o; // expected-no-diagnostics +// AIX64: @o = global %struct.O zeroinitializer, align 8{{$}} +_Atomic(O) o; typedef struct { char x[16]; } Q; -// PPC: @q = global %struct.Q zeroinitializer, align 1{{$}} -_Atomic(Q) q; // expected-no-diagnostics +// PPC32: @q = global %struct.Q zeroinitializer, align 1{{$}} +// PPC64: @q = global %struct.Q zeroinitializer, align 16{{$}} +// AIX64: @q = global %struct.Q zeroinitializer, align 16{{$}} +_Atomic(Q) q; diff --git a/clang/test/CodeGen/PowerPC/quadword-atomics.c b/clang/test/CodeGen/PowerPC/quadword-atomics.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/PowerPC/quadword-atomics.c @@ -0,0 +1,92 @@ +// RUN: %clang_cc1 -Werror -Wno-atomic-alignment -triple powerpc64le-linux-gnu \ +// RUN: -target-cpu pwr8 -emit-llvm -o - %s | FileCheck %s --check-prefix=PPC64-PWR8 +// RUN: %clang_cc1 -Werror -Wno-atomic-alignment -triple powerpc64le-linux-gnu \ +// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefix=PPC64 +// RUN: %clang_cc1 -Werror -Wno-atomic-alignment -triple powerpc64-unknown-aix \ +// RUN: -target-cpu pwr7 -emit-llvm -o - %s | FileCheck %s --check-prefix=PPC64 + +typedef struct { + char x[16]; +} Q; + +typedef _Atomic(Q) AtomicQ; + +typedef __int128_t int128_t; + +// PPC64-PWR8-LABEL: @test_load( +// PPC64-PWR8: [[TMP3:%.*]] = load atomic i128, i128* [[TMP1:%.*]] acquire, align 16 +// +// PPC64-LABEL: @test_load( +// PPC64: call void @__atomic_load(i64 noundef 16, i8* noundef [[TMP3:%.*]], i8* noundef [[TMP4:%.*]], i32 noundef signext 2) +// +Q test_load(AtomicQ *ptr) { + // expected-no-diagnostics + return __c11_atomic_load(ptr, __ATOMIC_ACQUIRE); +} + +// PPC64-PWR8-LABEL: @test_store( +// PPC64-PWR8: store atomic i128 [[TMP6:%.*]], i128* [[TMP4:%.*]] release, align 16 +// +// PPC64-LABEL: @test_store( +// PPC64: call void @__atomic_store(i64 noundef 16, i8* noundef [[TMP6:%.*]], i8* noundef [[TMP7:%.*]], i32 noundef signext 3) +// +void test_store(Q val, AtomicQ *ptr) { + // expected-no-diagnostics + __c11_atomic_store(ptr, val, __ATOMIC_RELEASE); +} + +// PPC64-PWR8-LABEL: @test_add( +// PPC64-PWR8: [[TMP3:%.*]] = atomicrmw add i128* [[TMP0:%.*]], i128 [[TMP2:%.*]] monotonic, align 16 +// +// PPC64-LABEL: @test_add( +// PPC64: [[CALL:%.*]] = call i128 @__atomic_fetch_add_16(i8* noundef [[TMP2:%.*]], i128 noundef [[TMP3:%.*]], i32 noundef signext 0) +// +void test_add(_Atomic(int128_t) *ptr, int128_t x) { + // expected-no-diagnostics + __c11_atomic_fetch_add(ptr, x, __ATOMIC_RELAXED); +} + +// PPC64-PWR8-LABEL: @test_xchg( +// PPC64-PWR8: [[TMP8:%.*]] = atomicrmw xchg i128* [[TMP4:%.*]], i128 [[TMP7:%.*]] seq_cst, align 16 +// +// PPC64-LABEL: @test_xchg( +// PPC64: call void @__atomic_exchange(i64 noundef 16, i8* noundef [[TMP7:%.*]], i8* noundef [[TMP8:%.*]], i8* noundef [[TMP9:%.*]], i32 noundef signext 5) +// +Q test_xchg(AtomicQ *ptr, Q new) { + // expected-no-diagnostics + return __c11_atomic_exchange(ptr, new, __ATOMIC_SEQ_CST); +} + +// PPC64-PWR8-LABEL: @test_cmpxchg( +// PPC64-PWR8: [[TMP10:%.*]] = cmpxchg i128* [[TMP5:%.*]], i128 [[TMP8:%.*]], i128 [[TMP9:%.*]] seq_cst monotonic, align 16 +// +// PPC64-LABEL: @test_cmpxchg( +// PPC64: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 noundef 16, i8* noundef [[TMP8:%.*]], i8* noundef [[TMP9:%.*]], i8* noundef [[TMP10:%.*]], i32 noundef signext 5, i32 noundef signext 0) +// +int test_cmpxchg(AtomicQ *ptr, Q *cmp, Q new) { + // expected-no-diagnostics + return __c11_atomic_compare_exchange_strong(ptr, cmp, new, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED); +} + +// PPC64-PWR8-LABEL: @test_cmpxchg_weak( +// PPC64-PWR8: [[TMP10:%.*]] = cmpxchg weak i128* [[TMP5:%.*]], i128 [[TMP8:%.*]], i128 [[TMP9:%.*]] seq_cst monotonic, align 16 +// +// PPC64-LABEL: @test_cmpxchg_weak( +// PPC64: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange(i64 noundef 16, i8* noundef [[TMP8:%.*]], i8* noundef [[TMP9:%.*]], i8* noundef [[TMP10:%.*]], i32 noundef signext 5, i32 noundef signext 0) +// +int test_cmpxchg_weak(AtomicQ *ptr, Q *cmp, Q new) { + // expected-no-diagnostics + return __c11_atomic_compare_exchange_weak(ptr, cmp, new, __ATOMIC_SEQ_CST, __ATOMIC_RELAXED); +} + +// PPC64-PWR8-LABEL: @is_lock_free( +// PPC64-PWR8: ret i32 1 +// +// PPC64-LABEL: @is_lock_free( +// PPC64: [[CALL:%.*]] = call zeroext i1 @__atomic_is_lock_free(i64 noundef 16, i8* noundef null) +// +int is_lock_free() { + AtomicQ q; + // expected-no-diagnostics + return __c11_atomic_is_lock_free(sizeof(q)); +} diff --git a/clang/test/Sema/atomic-ops.c b/clang/test/Sema/atomic-ops.c --- a/clang/test/Sema/atomic-ops.c +++ b/clang/test/Sema/atomic-ops.c @@ -9,7 +9,7 @@ // RUN: -target-cpu pwr7 // RUN: %clang_cc1 %s -verify -fgnuc-version=4.2.1 -ffreestanding \ // RUN: -fsyntax-only -triple=powerpc64le-linux-gnu -std=c11 \ -// RUN: -target-cpu pwr8 +// RUN: -target-cpu pwr8 -DPPC64_PWR8 // Basic parsing/Sema tests for __c11_atomic_* @@ -47,7 +47,11 @@ _Static_assert(__c11_atomic_is_lock_free(3), ""); // expected-error {{not an integral constant expression}} _Static_assert(__c11_atomic_is_lock_free(4), ""); _Static_assert(__c11_atomic_is_lock_free(8), ""); +#ifndef PPC64_PWR8 _Static_assert(__c11_atomic_is_lock_free(16), ""); // expected-error {{not an integral constant expression}} +#else +_Static_assert(__c11_atomic_is_lock_free(16), ""); // expected-no-error +#endif _Static_assert(__c11_atomic_is_lock_free(17), ""); // expected-error {{not an integral constant expression}} _Static_assert(__atomic_is_lock_free(1, 0), ""); @@ -55,15 +59,23 @@ _Static_assert(__atomic_is_lock_free(3, 0), ""); // expected-error {{not an integral constant expression}} _Static_assert(__atomic_is_lock_free(4, 0), ""); _Static_assert(__atomic_is_lock_free(8, 0), ""); +#ifndef PPC64_PWR8 _Static_assert(__atomic_is_lock_free(16, 0), ""); // expected-error {{not an integral constant expression}} +#else +_Static_assert(__atomic_is_lock_free(16, 0), ""); // expected-no-error +#endif _Static_assert(__atomic_is_lock_free(17, 0), ""); // expected-error {{not an integral constant expression}} _Static_assert(atomic_is_lock_free((atomic_char*)0), ""); _Static_assert(atomic_is_lock_free((atomic_short*)0), ""); _Static_assert(atomic_is_lock_free((atomic_int*)0), ""); _Static_assert(atomic_is_lock_free((atomic_long*)0), ""); +#ifndef PPC64_PWR8 // noi128-error@+1 {{__int128 is not supported on this target}} _Static_assert(atomic_is_lock_free((_Atomic(__int128)*)0), ""); // expected-error {{not an integral constant expression}} +#else +_Static_assert(atomic_is_lock_free((_Atomic(__int128)*)0), ""); // expected-no-error +#endif _Static_assert(atomic_is_lock_free(0 + (atomic_char*)0), ""); char i8; @@ -88,7 +100,11 @@ _Static_assert(!__atomic_always_lock_free(3, 0), ""); _Static_assert(__atomic_always_lock_free(4, 0), ""); _Static_assert(__atomic_always_lock_free(8, 0), ""); +#ifndef PPC64_PWR8 _Static_assert(!__atomic_always_lock_free(16, 0), ""); +#else +_Static_assert(__atomic_always_lock_free(16, 0), ""); +#endif _Static_assert(!__atomic_always_lock_free(17, 0), ""); _Static_assert(__atomic_always_lock_free(1, incomplete), ""); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -910,6 +910,8 @@ Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override; + bool shouldInlineQuadwordAtomics() const; + TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1321,7 +1321,7 @@ setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); } - if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics()) { + if (shouldInlineQuadwordAtomics()) { setMaxAtomicSizeInBitsSupported(128); setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom); setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom); @@ -18053,10 +18053,18 @@ } } +bool PPCTargetLowering::shouldInlineQuadwordAtomics() const { + // TODO: 16-byte atomic type support for AIX is in progress; we should be able + // to inline 16-byte atomic ops on AIX too in the future. + return Subtarget.isPPC64() && + (EnableQuadwordAtomics || !Subtarget.getTargetTriple().isOSAIX()) && + Subtarget.hasQuadwordAtomics(); +} + TargetLowering::AtomicExpansionKind PPCTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { unsigned Size = AI->getType()->getPrimitiveSizeInBits(); - if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() && Size == 128) + if (shouldInlineQuadwordAtomics() && Size == 128) return AtomicExpansionKind::MaskedIntrinsic; return TargetLowering::shouldExpandAtomicRMWInIR(AI); } @@ -18064,7 +18072,7 @@ TargetLowering::AtomicExpansionKind PPCTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const { unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits(); - if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() && Size == 128) + if (shouldInlineQuadwordAtomics() && Size == 128) return AtomicExpansionKind::MaskedIntrinsic; return TargetLowering::shouldExpandAtomicCmpXchgInIR(AI); } @@ -18094,8 +18102,7 @@ Value *PPCTargetLowering::emitMaskedAtomicRMWIntrinsic( IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const { - assert(EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() && - "Only support quadword now"); + assert(shouldInlineQuadwordAtomics() && "Only support quadword now"); Module *M = Builder.GetInsertBlock()->getParent()->getParent(); Type *ValTy = Incr->getType(); assert(ValTy->getPrimitiveSizeInBits() == 128); @@ -18119,8 +18126,7 @@ Value *PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic( IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { - assert(EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() && - "Only support quadword now"); + assert(shouldInlineQuadwordAtomics() && "Only support quadword now"); Module *M = Builder.GetInsertBlock()->getParent()->getParent(); Type *ValTy = CmpVal->getType(); assert(ValTy->getPrimitiveSizeInBits() == 128); diff --git a/llvm/test/CodeGen/PowerPC/atomics-i128.ll b/llvm/test/CodeGen/PowerPC/atomics-i128.ll --- a/llvm/test/CodeGen/PowerPC/atomics-i128.ll +++ b/llvm/test/CodeGen/PowerPC/atomics-i128.ll @@ -5,6 +5,22 @@ ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown -mcpu=pwr7 \ ; RUN: -ppc-asm-full-reg-names -ppc-quadword-atomics \ ; RUN: -ppc-track-subreg-liveness < %s | FileCheck --check-prefix=PWR7 %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 \ +; RUN: -ppc-asm-full-reg-names -ppc-track-subreg-liveness < %s | FileCheck \ +; RUN: --check-prefix=LE-PWR8 %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-freebsd -mcpu=pwr8 \ +; RUN: -ppc-asm-full-reg-names -ppc-track-subreg-liveness < %s | FileCheck \ +; RUN: --check-prefix=LE-PWR8 %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix -mcpu=pwr8 \ +; RUN: -ppc-asm-full-reg-names -ppc-track-subreg-liveness < %s | FileCheck \ +; RUN: --check-prefix=AIX64-PWR8 %s + +; On 32-bit PPC platform, 16-byte lock free atomic instructions are not available, +; it's expected not to generate inlined lock-free code on such platforms, even arch level +; is pwr8+ and `-ppc-quadword-atomics` is on. +; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-unknown -mcpu=pwr8 \ +; RUN: -ppc-quadword-atomics -ppc-asm-full-reg-names -ppc-track-subreg-liveness < %s \ +; RUN: | FileCheck --check-prefix=PPC-PWR8 %s define i128 @swap(i128* %a, i128 %x) { @@ -39,6 +55,62 @@ ; PWR7-NEXT: ld r0, 16(r1) ; PWR7-NEXT: mtlr r0 ; PWR7-NEXT: blr +; +; LE-PWR8-LABEL: swap: +; LE-PWR8: # %bb.0: # %entry +; LE-PWR8-NEXT: sync +; LE-PWR8-NEXT: .LBB0_1: # %entry +; LE-PWR8-NEXT: # +; LE-PWR8-NEXT: lqarx r6, 0, r3 +; LE-PWR8-NEXT: mr r9, r4 +; LE-PWR8-NEXT: mr r8, r5 +; LE-PWR8-NEXT: stqcx. r8, 0, r3 +; LE-PWR8-NEXT: bne cr0, .LBB0_1 +; LE-PWR8-NEXT: # %bb.2: # %entry +; LE-PWR8-NEXT: lwsync +; LE-PWR8-NEXT: mr r3, r7 +; LE-PWR8-NEXT: mr r4, r6 +; LE-PWR8-NEXT: blr +; +; AIX64-PWR8-LABEL: swap: +; AIX64-PWR8: # %bb.0: # %entry +; AIX64-PWR8-NEXT: mflr r0 +; AIX64-PWR8-NEXT: std r0, 16(r1) +; AIX64-PWR8-NEXT: stdu r1, -112(r1) +; AIX64-PWR8-NEXT: sync +; AIX64-PWR8-NEXT: bl .__sync_lock_test_and_set_16[PR] +; AIX64-PWR8-NEXT: nop +; AIX64-PWR8-NEXT: lwsync +; AIX64-PWR8-NEXT: addi r1, r1, 112 +; AIX64-PWR8-NEXT: ld r0, 16(r1) +; AIX64-PWR8-NEXT: mtlr r0 +; AIX64-PWR8-NEXT: blr +; +; PPC-PWR8-LABEL: swap: +; PPC-PWR8: # %bb.0: # %entry +; PPC-PWR8-NEXT: mflr r0 +; PPC-PWR8-NEXT: stw r0, 4(r1) +; PPC-PWR8-NEXT: stwu r1, -48(r1) +; PPC-PWR8-NEXT: .cfi_def_cfa_offset 48 +; PPC-PWR8-NEXT: .cfi_offset lr, 4 +; PPC-PWR8-NEXT: mr r4, r3 +; PPC-PWR8-NEXT: stw r7, 40(r1) +; PPC-PWR8-NEXT: stw r6, 36(r1) +; PPC-PWR8-NEXT: addi r6, r1, 16 +; PPC-PWR8-NEXT: li r3, 16 +; PPC-PWR8-NEXT: li r7, 5 +; PPC-PWR8-NEXT: stw r5, 32(r1) +; PPC-PWR8-NEXT: addi r5, r1, 32 +; PPC-PWR8-NEXT: stw r8, 44(r1) +; PPC-PWR8-NEXT: bl __atomic_exchange +; PPC-PWR8-NEXT: lwz r6, 28(r1) +; PPC-PWR8-NEXT: lwz r5, 24(r1) +; PPC-PWR8-NEXT: lwz r4, 20(r1) +; PPC-PWR8-NEXT: lwz r3, 16(r1) +; PPC-PWR8-NEXT: lwz r0, 52(r1) +; PPC-PWR8-NEXT: addi r1, r1, 48 +; PPC-PWR8-NEXT: mtlr r0 +; PPC-PWR8-NEXT: blr entry: %0 = atomicrmw xchg i128* %a, i128 %x seq_cst, align 16 ret i128 %0 @@ -76,6 +148,109 @@ ; PWR7-NEXT: ld r0, 16(r1) ; PWR7-NEXT: mtlr r0 ; PWR7-NEXT: blr +; +; LE-PWR8-LABEL: add: +; LE-PWR8: # %bb.0: # %entry +; LE-PWR8-NEXT: sync +; LE-PWR8-NEXT: .LBB1_1: # %entry +; LE-PWR8-NEXT: # +; LE-PWR8-NEXT: lqarx r6, 0, r3 +; LE-PWR8-NEXT: addc r9, r4, r7 +; LE-PWR8-NEXT: adde r8, r5, r6 +; LE-PWR8-NEXT: stqcx. r8, 0, r3 +; LE-PWR8-NEXT: bne cr0, .LBB1_1 +; LE-PWR8-NEXT: # %bb.2: # %entry +; LE-PWR8-NEXT: lwsync +; LE-PWR8-NEXT: mr r3, r7 +; LE-PWR8-NEXT: mr r4, r6 +; LE-PWR8-NEXT: blr +; +; AIX64-PWR8-LABEL: add: +; AIX64-PWR8: # %bb.0: # %entry +; AIX64-PWR8-NEXT: mflr r0 +; AIX64-PWR8-NEXT: std r0, 16(r1) +; AIX64-PWR8-NEXT: stdu r1, -112(r1) +; AIX64-PWR8-NEXT: sync +; AIX64-PWR8-NEXT: bl .__sync_fetch_and_add_16[PR] +; AIX64-PWR8-NEXT: nop +; AIX64-PWR8-NEXT: lwsync +; AIX64-PWR8-NEXT: addi r1, r1, 112 +; AIX64-PWR8-NEXT: ld r0, 16(r1) +; AIX64-PWR8-NEXT: mtlr r0 +; AIX64-PWR8-NEXT: blr +; +; PPC-PWR8-LABEL: add: +; PPC-PWR8: # %bb.0: # %entry +; PPC-PWR8-NEXT: mflr r0 +; PPC-PWR8-NEXT: stw r0, 4(r1) +; PPC-PWR8-NEXT: stwu r1, -80(r1) +; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80 +; PPC-PWR8-NEXT: .cfi_offset lr, 4 +; PPC-PWR8-NEXT: .cfi_offset r24, -32 +; PPC-PWR8-NEXT: .cfi_offset r25, -28 +; PPC-PWR8-NEXT: .cfi_offset r26, -24 +; PPC-PWR8-NEXT: .cfi_offset r27, -20 +; PPC-PWR8-NEXT: .cfi_offset r28, -16 +; PPC-PWR8-NEXT: .cfi_offset r29, -12 +; PPC-PWR8-NEXT: .cfi_offset r30, -8 +; PPC-PWR8-NEXT: stw r26, 56(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: stw r27, 60(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: mr r27, r5 +; PPC-PWR8-NEXT: mr r26, r3 +; PPC-PWR8-NEXT: lwz r5, 8(r3) +; PPC-PWR8-NEXT: lwz r4, 4(r3) +; PPC-PWR8-NEXT: stw r28, 64(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: mr r28, r6 +; PPC-PWR8-NEXT: lwz r6, 12(r3) +; PPC-PWR8-NEXT: lwz r3, 0(r3) +; PPC-PWR8-NEXT: stw r24, 48(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: addi r24, r1, 16 +; PPC-PWR8-NEXT: stw r25, 52(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: stw r29, 68(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: mr r29, r7 +; PPC-PWR8-NEXT: addi r25, r1, 32 +; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: mr r30, r8 +; PPC-PWR8-NEXT: .p2align 4 +; PPC-PWR8-NEXT: .LBB1_1: # %atomicrmw.start +; PPC-PWR8-NEXT: # +; PPC-PWR8-NEXT: addc r7, r6, r30 +; PPC-PWR8-NEXT: stw r4, 36(r1) +; PPC-PWR8-NEXT: stw r3, 32(r1) +; PPC-PWR8-NEXT: adde r8, r5, r29 +; PPC-PWR8-NEXT: stw r5, 40(r1) +; PPC-PWR8-NEXT: stw r6, 44(r1) +; PPC-PWR8-NEXT: mr r5, r25 +; PPC-PWR8-NEXT: adde r4, r4, r28 +; PPC-PWR8-NEXT: stw r7, 28(r1) +; PPC-PWR8-NEXT: stw r8, 24(r1) +; PPC-PWR8-NEXT: mr r6, r24 +; PPC-PWR8-NEXT: adde r3, r3, r27 +; PPC-PWR8-NEXT: stw r4, 20(r1) +; PPC-PWR8-NEXT: mr r4, r26 +; PPC-PWR8-NEXT: li r7, 5 +; PPC-PWR8-NEXT: stw r3, 16(r1) +; PPC-PWR8-NEXT: li r3, 16 +; PPC-PWR8-NEXT: li r8, 5 +; PPC-PWR8-NEXT: bl __atomic_compare_exchange +; PPC-PWR8-NEXT: cmplwi r3, 0 +; PPC-PWR8-NEXT: lwz r6, 44(r1) +; PPC-PWR8-NEXT: lwz r5, 40(r1) +; PPC-PWR8-NEXT: lwz r4, 36(r1) +; PPC-PWR8-NEXT: lwz r3, 32(r1) +; PPC-PWR8-NEXT: beq cr0, .LBB1_1 +; PPC-PWR8-NEXT: # %bb.2: # %atomicrmw.end +; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r29, 68(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r28, 64(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r27, 60(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r26, 56(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r25, 52(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r24, 48(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r0, 84(r1) +; PPC-PWR8-NEXT: addi r1, r1, 80 +; PPC-PWR8-NEXT: mtlr r0 +; PPC-PWR8-NEXT: blr entry: %0 = atomicrmw add i128* %a, i128 %x seq_cst, align 16 ret i128 %0 @@ -113,6 +288,109 @@ ; PWR7-NEXT: ld r0, 16(r1) ; PWR7-NEXT: mtlr r0 ; PWR7-NEXT: blr +; +; LE-PWR8-LABEL: sub: +; LE-PWR8: # %bb.0: # %entry +; LE-PWR8-NEXT: sync +; LE-PWR8-NEXT: .LBB2_1: # %entry +; LE-PWR8-NEXT: # +; LE-PWR8-NEXT: lqarx r6, 0, r3 +; LE-PWR8-NEXT: subc r9, r7, r4 +; LE-PWR8-NEXT: subfe r8, r5, r6 +; LE-PWR8-NEXT: stqcx. r8, 0, r3 +; LE-PWR8-NEXT: bne cr0, .LBB2_1 +; LE-PWR8-NEXT: # %bb.2: # %entry +; LE-PWR8-NEXT: lwsync +; LE-PWR8-NEXT: mr r3, r7 +; LE-PWR8-NEXT: mr r4, r6 +; LE-PWR8-NEXT: blr +; +; AIX64-PWR8-LABEL: sub: +; AIX64-PWR8: # %bb.0: # %entry +; AIX64-PWR8-NEXT: mflr r0 +; AIX64-PWR8-NEXT: std r0, 16(r1) +; AIX64-PWR8-NEXT: stdu r1, -112(r1) +; AIX64-PWR8-NEXT: sync +; AIX64-PWR8-NEXT: bl .__sync_fetch_and_sub_16[PR] +; AIX64-PWR8-NEXT: nop +; AIX64-PWR8-NEXT: lwsync +; AIX64-PWR8-NEXT: addi r1, r1, 112 +; AIX64-PWR8-NEXT: ld r0, 16(r1) +; AIX64-PWR8-NEXT: mtlr r0 +; AIX64-PWR8-NEXT: blr +; +; PPC-PWR8-LABEL: sub: +; PPC-PWR8: # %bb.0: # %entry +; PPC-PWR8-NEXT: mflr r0 +; PPC-PWR8-NEXT: stw r0, 4(r1) +; PPC-PWR8-NEXT: stwu r1, -80(r1) +; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80 +; PPC-PWR8-NEXT: .cfi_offset lr, 4 +; PPC-PWR8-NEXT: .cfi_offset r24, -32 +; PPC-PWR8-NEXT: .cfi_offset r25, -28 +; PPC-PWR8-NEXT: .cfi_offset r26, -24 +; PPC-PWR8-NEXT: .cfi_offset r27, -20 +; PPC-PWR8-NEXT: .cfi_offset r28, -16 +; PPC-PWR8-NEXT: .cfi_offset r29, -12 +; PPC-PWR8-NEXT: .cfi_offset r30, -8 +; PPC-PWR8-NEXT: stw r26, 56(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: stw r27, 60(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: mr r27, r5 +; PPC-PWR8-NEXT: mr r26, r3 +; PPC-PWR8-NEXT: lwz r5, 8(r3) +; PPC-PWR8-NEXT: lwz r4, 4(r3) +; PPC-PWR8-NEXT: stw r28, 64(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: mr r28, r6 +; PPC-PWR8-NEXT: lwz r6, 12(r3) +; PPC-PWR8-NEXT: lwz r3, 0(r3) +; PPC-PWR8-NEXT: stw r24, 48(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: addi r24, r1, 16 +; PPC-PWR8-NEXT: stw r25, 52(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: stw r29, 68(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: mr r29, r7 +; PPC-PWR8-NEXT: addi r25, r1, 32 +; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: mr r30, r8 +; PPC-PWR8-NEXT: .p2align 4 +; PPC-PWR8-NEXT: .LBB2_1: # %atomicrmw.start +; PPC-PWR8-NEXT: # +; PPC-PWR8-NEXT: subc r7, r6, r30 +; PPC-PWR8-NEXT: stw r4, 36(r1) +; PPC-PWR8-NEXT: stw r3, 32(r1) +; PPC-PWR8-NEXT: subfe r8, r29, r5 +; PPC-PWR8-NEXT: stw r5, 40(r1) +; PPC-PWR8-NEXT: stw r6, 44(r1) +; PPC-PWR8-NEXT: mr r5, r25 +; PPC-PWR8-NEXT: subfe r4, r28, r4 +; PPC-PWR8-NEXT: stw r7, 28(r1) +; PPC-PWR8-NEXT: stw r8, 24(r1) +; PPC-PWR8-NEXT: mr r6, r24 +; PPC-PWR8-NEXT: subfe r3, r27, r3 +; PPC-PWR8-NEXT: stw r4, 20(r1) +; PPC-PWR8-NEXT: mr r4, r26 +; PPC-PWR8-NEXT: li r7, 5 +; PPC-PWR8-NEXT: stw r3, 16(r1) +; PPC-PWR8-NEXT: li r3, 16 +; PPC-PWR8-NEXT: li r8, 5 +; PPC-PWR8-NEXT: bl __atomic_compare_exchange +; PPC-PWR8-NEXT: cmplwi r3, 0 +; PPC-PWR8-NEXT: lwz r6, 44(r1) +; PPC-PWR8-NEXT: lwz r5, 40(r1) +; PPC-PWR8-NEXT: lwz r4, 36(r1) +; PPC-PWR8-NEXT: lwz r3, 32(r1) +; PPC-PWR8-NEXT: beq cr0, .LBB2_1 +; PPC-PWR8-NEXT: # %bb.2: # %atomicrmw.end +; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r29, 68(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r28, 64(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r27, 60(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r26, 56(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r25, 52(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r24, 48(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r0, 84(r1) +; PPC-PWR8-NEXT: addi r1, r1, 80 +; PPC-PWR8-NEXT: mtlr r0 +; PPC-PWR8-NEXT: blr entry: %0 = atomicrmw sub i128* %a, i128 %x seq_cst, align 16 ret i128 %0 @@ -150,6 +428,109 @@ ; PWR7-NEXT: ld r0, 16(r1) ; PWR7-NEXT: mtlr r0 ; PWR7-NEXT: blr +; +; LE-PWR8-LABEL: and: +; LE-PWR8: # %bb.0: # %entry +; LE-PWR8-NEXT: sync +; LE-PWR8-NEXT: .LBB3_1: # %entry +; LE-PWR8-NEXT: # +; LE-PWR8-NEXT: lqarx r6, 0, r3 +; LE-PWR8-NEXT: and r9, r4, r7 +; LE-PWR8-NEXT: and r8, r5, r6 +; LE-PWR8-NEXT: stqcx. r8, 0, r3 +; LE-PWR8-NEXT: bne cr0, .LBB3_1 +; LE-PWR8-NEXT: # %bb.2: # %entry +; LE-PWR8-NEXT: lwsync +; LE-PWR8-NEXT: mr r3, r7 +; LE-PWR8-NEXT: mr r4, r6 +; LE-PWR8-NEXT: blr +; +; AIX64-PWR8-LABEL: and: +; AIX64-PWR8: # %bb.0: # %entry +; AIX64-PWR8-NEXT: mflr r0 +; AIX64-PWR8-NEXT: std r0, 16(r1) +; AIX64-PWR8-NEXT: stdu r1, -112(r1) +; AIX64-PWR8-NEXT: sync +; AIX64-PWR8-NEXT: bl .__sync_fetch_and_and_16[PR] +; AIX64-PWR8-NEXT: nop +; AIX64-PWR8-NEXT: lwsync +; AIX64-PWR8-NEXT: addi r1, r1, 112 +; AIX64-PWR8-NEXT: ld r0, 16(r1) +; AIX64-PWR8-NEXT: mtlr r0 +; AIX64-PWR8-NEXT: blr +; +; PPC-PWR8-LABEL: and: +; PPC-PWR8: # %bb.0: # %entry +; PPC-PWR8-NEXT: mflr r0 +; PPC-PWR8-NEXT: stw r0, 4(r1) +; PPC-PWR8-NEXT: stwu r1, -80(r1) +; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80 +; PPC-PWR8-NEXT: .cfi_offset lr, 4 +; PPC-PWR8-NEXT: .cfi_offset r24, -32 +; PPC-PWR8-NEXT: .cfi_offset r25, -28 +; PPC-PWR8-NEXT: .cfi_offset r26, -24 +; PPC-PWR8-NEXT: .cfi_offset r27, -20 +; PPC-PWR8-NEXT: .cfi_offset r28, -16 +; PPC-PWR8-NEXT: .cfi_offset r29, -12 +; PPC-PWR8-NEXT: .cfi_offset r30, -8 +; PPC-PWR8-NEXT: stw r26, 56(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: stw r27, 60(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: mr r27, r5 +; PPC-PWR8-NEXT: mr r26, r3 +; PPC-PWR8-NEXT: lwz r5, 8(r3) +; PPC-PWR8-NEXT: lwz r4, 4(r3) +; PPC-PWR8-NEXT: stw r28, 64(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: mr r28, r6 +; PPC-PWR8-NEXT: lwz r6, 12(r3) +; PPC-PWR8-NEXT: lwz r3, 0(r3) +; PPC-PWR8-NEXT: stw r24, 48(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: addi r24, r1, 16 +; PPC-PWR8-NEXT: stw r25, 52(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: stw r29, 68(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: mr r29, r7 +; PPC-PWR8-NEXT: addi r25, r1, 32 +; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: mr r30, r8 +; PPC-PWR8-NEXT: .p2align 4 +; PPC-PWR8-NEXT: .LBB3_1: # %atomicrmw.start +; PPC-PWR8-NEXT: # +; PPC-PWR8-NEXT: stw r3, 32(r1) +; PPC-PWR8-NEXT: stw r4, 36(r1) +; PPC-PWR8-NEXT: and r7, r5, r29 +; PPC-PWR8-NEXT: and r8, r6, r30 +; PPC-PWR8-NEXT: and r3, r3, r27 +; PPC-PWR8-NEXT: and r4, r4, r28 +; PPC-PWR8-NEXT: stw r5, 40(r1) +; PPC-PWR8-NEXT: stw r6, 44(r1) +; PPC-PWR8-NEXT: mr r5, r25 +; PPC-PWR8-NEXT: mr r6, r24 +; PPC-PWR8-NEXT: stw r8, 28(r1) +; PPC-PWR8-NEXT: stw r7, 24(r1) +; PPC-PWR8-NEXT: li r7, 5 +; PPC-PWR8-NEXT: li r8, 5 +; PPC-PWR8-NEXT: stw r4, 20(r1) +; PPC-PWR8-NEXT: stw r3, 16(r1) +; PPC-PWR8-NEXT: li r3, 16 +; PPC-PWR8-NEXT: mr r4, r26 +; PPC-PWR8-NEXT: bl __atomic_compare_exchange +; PPC-PWR8-NEXT: cmplwi r3, 0 +; PPC-PWR8-NEXT: lwz r6, 44(r1) +; PPC-PWR8-NEXT: lwz r5, 40(r1) +; PPC-PWR8-NEXT: lwz r4, 36(r1) +; PPC-PWR8-NEXT: lwz r3, 32(r1) +; PPC-PWR8-NEXT: beq cr0, .LBB3_1 +; PPC-PWR8-NEXT: # %bb.2: # %atomicrmw.end +; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r29, 68(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r28, 64(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r27, 60(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r26, 56(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r25, 52(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r24, 48(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r0, 84(r1) +; PPC-PWR8-NEXT: addi r1, r1, 80 +; PPC-PWR8-NEXT: mtlr r0 +; PPC-PWR8-NEXT: blr entry: %0 = atomicrmw and i128* %a, i128 %x seq_cst, align 16 ret i128 %0 @@ -187,6 +568,109 @@ ; PWR7-NEXT: ld r0, 16(r1) ; PWR7-NEXT: mtlr r0 ; PWR7-NEXT: blr +; +; LE-PWR8-LABEL: or: +; LE-PWR8: # %bb.0: # %entry +; LE-PWR8-NEXT: sync +; LE-PWR8-NEXT: .LBB4_1: # %entry +; LE-PWR8-NEXT: # +; LE-PWR8-NEXT: lqarx r6, 0, r3 +; LE-PWR8-NEXT: or r9, r4, r7 +; LE-PWR8-NEXT: or r8, r5, r6 +; LE-PWR8-NEXT: stqcx. r8, 0, r3 +; LE-PWR8-NEXT: bne cr0, .LBB4_1 +; LE-PWR8-NEXT: # %bb.2: # %entry +; LE-PWR8-NEXT: lwsync +; LE-PWR8-NEXT: mr r3, r7 +; LE-PWR8-NEXT: mr r4, r6 +; LE-PWR8-NEXT: blr +; +; AIX64-PWR8-LABEL: or: +; AIX64-PWR8: # %bb.0: # %entry +; AIX64-PWR8-NEXT: mflr r0 +; AIX64-PWR8-NEXT: std r0, 16(r1) +; AIX64-PWR8-NEXT: stdu r1, -112(r1) +; AIX64-PWR8-NEXT: sync +; AIX64-PWR8-NEXT: bl .__sync_fetch_and_or_16[PR] +; AIX64-PWR8-NEXT: nop +; AIX64-PWR8-NEXT: lwsync +; AIX64-PWR8-NEXT: addi r1, r1, 112 +; AIX64-PWR8-NEXT: ld r0, 16(r1) +; AIX64-PWR8-NEXT: mtlr r0 +; AIX64-PWR8-NEXT: blr +; +; PPC-PWR8-LABEL: or: +; PPC-PWR8: # %bb.0: # %entry +; PPC-PWR8-NEXT: mflr r0 +; PPC-PWR8-NEXT: stw r0, 4(r1) +; PPC-PWR8-NEXT: stwu r1, -80(r1) +; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80 +; PPC-PWR8-NEXT: .cfi_offset lr, 4 +; PPC-PWR8-NEXT: .cfi_offset r24, -32 +; PPC-PWR8-NEXT: .cfi_offset r25, -28 +; PPC-PWR8-NEXT: .cfi_offset r26, -24 +; PPC-PWR8-NEXT: .cfi_offset r27, -20 +; PPC-PWR8-NEXT: .cfi_offset r28, -16 +; PPC-PWR8-NEXT: .cfi_offset r29, -12 +; PPC-PWR8-NEXT: .cfi_offset r30, -8 +; PPC-PWR8-NEXT: stw r26, 56(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: stw r27, 60(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: mr r27, r5 +; PPC-PWR8-NEXT: mr r26, r3 +; PPC-PWR8-NEXT: lwz r5, 8(r3) +; PPC-PWR8-NEXT: lwz r4, 4(r3) +; PPC-PWR8-NEXT: stw r28, 64(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: mr r28, r6 +; PPC-PWR8-NEXT: lwz r6, 12(r3) +; PPC-PWR8-NEXT: lwz r3, 0(r3) +; PPC-PWR8-NEXT: stw r24, 48(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: addi r24, r1, 16 +; PPC-PWR8-NEXT: stw r25, 52(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: stw r29, 68(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: mr r29, r7 +; PPC-PWR8-NEXT: addi r25, r1, 32 +; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: mr r30, r8 +; PPC-PWR8-NEXT: .p2align 4 +; PPC-PWR8-NEXT: .LBB4_1: # %atomicrmw.start +; PPC-PWR8-NEXT: # +; PPC-PWR8-NEXT: stw r3, 32(r1) +; PPC-PWR8-NEXT: stw r4, 36(r1) +; PPC-PWR8-NEXT: or r7, r5, r29 +; PPC-PWR8-NEXT: or r8, r6, r30 +; PPC-PWR8-NEXT: or r3, r3, r27 +; PPC-PWR8-NEXT: or r4, r4, r28 +; PPC-PWR8-NEXT: stw r5, 40(r1) +; PPC-PWR8-NEXT: stw r6, 44(r1) +; PPC-PWR8-NEXT: mr r5, r25 +; PPC-PWR8-NEXT: mr r6, r24 +; PPC-PWR8-NEXT: stw r8, 28(r1) +; PPC-PWR8-NEXT: stw r7, 24(r1) +; PPC-PWR8-NEXT: li r7, 5 +; PPC-PWR8-NEXT: li r8, 5 +; PPC-PWR8-NEXT: stw r4, 20(r1) +; PPC-PWR8-NEXT: stw r3, 16(r1) +; PPC-PWR8-NEXT: li r3, 16 +; PPC-PWR8-NEXT: mr r4, r26 +; PPC-PWR8-NEXT: bl __atomic_compare_exchange +; PPC-PWR8-NEXT: cmplwi r3, 0 +; PPC-PWR8-NEXT: lwz r6, 44(r1) +; PPC-PWR8-NEXT: lwz r5, 40(r1) +; PPC-PWR8-NEXT: lwz r4, 36(r1) +; PPC-PWR8-NEXT: lwz r3, 32(r1) +; PPC-PWR8-NEXT: beq cr0, .LBB4_1 +; PPC-PWR8-NEXT: # %bb.2: # %atomicrmw.end +; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r29, 68(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r28, 64(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r27, 60(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r26, 56(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r25, 52(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r24, 48(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r0, 84(r1) +; PPC-PWR8-NEXT: addi r1, r1, 80 +; PPC-PWR8-NEXT: mtlr r0 +; PPC-PWR8-NEXT: blr entry: %0 = atomicrmw or i128* %a, i128 %x seq_cst, align 16 ret i128 %0 @@ -224,6 +708,109 @@ ; PWR7-NEXT: ld r0, 16(r1) ; PWR7-NEXT: mtlr r0 ; PWR7-NEXT: blr +; +; LE-PWR8-LABEL: xor: +; LE-PWR8: # %bb.0: # %entry +; LE-PWR8-NEXT: sync +; LE-PWR8-NEXT: .LBB5_1: # %entry +; LE-PWR8-NEXT: # +; LE-PWR8-NEXT: lqarx r6, 0, r3 +; LE-PWR8-NEXT: xor r9, r4, r7 +; LE-PWR8-NEXT: xor r8, r5, r6 +; LE-PWR8-NEXT: stqcx. r8, 0, r3 +; LE-PWR8-NEXT: bne cr0, .LBB5_1 +; LE-PWR8-NEXT: # %bb.2: # %entry +; LE-PWR8-NEXT: lwsync +; LE-PWR8-NEXT: mr r3, r7 +; LE-PWR8-NEXT: mr r4, r6 +; LE-PWR8-NEXT: blr +; +; AIX64-PWR8-LABEL: xor: +; AIX64-PWR8: # %bb.0: # %entry +; AIX64-PWR8-NEXT: mflr r0 +; AIX64-PWR8-NEXT: std r0, 16(r1) +; AIX64-PWR8-NEXT: stdu r1, -112(r1) +; AIX64-PWR8-NEXT: sync +; AIX64-PWR8-NEXT: bl .__sync_fetch_and_xor_16[PR] +; AIX64-PWR8-NEXT: nop +; AIX64-PWR8-NEXT: lwsync +; AIX64-PWR8-NEXT: addi r1, r1, 112 +; AIX64-PWR8-NEXT: ld r0, 16(r1) +; AIX64-PWR8-NEXT: mtlr r0 +; AIX64-PWR8-NEXT: blr +; +; PPC-PWR8-LABEL: xor: +; PPC-PWR8: # %bb.0: # %entry +; PPC-PWR8-NEXT: mflr r0 +; PPC-PWR8-NEXT: stw r0, 4(r1) +; PPC-PWR8-NEXT: stwu r1, -80(r1) +; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80 +; PPC-PWR8-NEXT: .cfi_offset lr, 4 +; PPC-PWR8-NEXT: .cfi_offset r24, -32 +; PPC-PWR8-NEXT: .cfi_offset r25, -28 +; PPC-PWR8-NEXT: .cfi_offset r26, -24 +; PPC-PWR8-NEXT: .cfi_offset r27, -20 +; PPC-PWR8-NEXT: .cfi_offset r28, -16 +; PPC-PWR8-NEXT: .cfi_offset r29, -12 +; PPC-PWR8-NEXT: .cfi_offset r30, -8 +; PPC-PWR8-NEXT: stw r26, 56(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: stw r27, 60(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: mr r27, r5 +; PPC-PWR8-NEXT: mr r26, r3 +; PPC-PWR8-NEXT: lwz r5, 8(r3) +; PPC-PWR8-NEXT: lwz r4, 4(r3) +; PPC-PWR8-NEXT: stw r28, 64(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: mr r28, r6 +; PPC-PWR8-NEXT: lwz r6, 12(r3) +; PPC-PWR8-NEXT: lwz r3, 0(r3) +; PPC-PWR8-NEXT: stw r24, 48(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: addi r24, r1, 16 +; PPC-PWR8-NEXT: stw r25, 52(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: stw r29, 68(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: mr r29, r7 +; PPC-PWR8-NEXT: addi r25, r1, 32 +; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: mr r30, r8 +; PPC-PWR8-NEXT: .p2align 4 +; PPC-PWR8-NEXT: .LBB5_1: # %atomicrmw.start +; PPC-PWR8-NEXT: # +; PPC-PWR8-NEXT: stw r3, 32(r1) +; PPC-PWR8-NEXT: stw r4, 36(r1) +; PPC-PWR8-NEXT: xor r7, r5, r29 +; PPC-PWR8-NEXT: xor r8, r6, r30 +; PPC-PWR8-NEXT: xor r3, r3, r27 +; PPC-PWR8-NEXT: xor r4, r4, r28 +; PPC-PWR8-NEXT: stw r5, 40(r1) +; PPC-PWR8-NEXT: stw r6, 44(r1) +; PPC-PWR8-NEXT: mr r5, r25 +; PPC-PWR8-NEXT: mr r6, r24 +; PPC-PWR8-NEXT: stw r8, 28(r1) +; PPC-PWR8-NEXT: stw r7, 24(r1) +; PPC-PWR8-NEXT: li r7, 5 +; PPC-PWR8-NEXT: li r8, 5 +; PPC-PWR8-NEXT: stw r4, 20(r1) +; PPC-PWR8-NEXT: stw r3, 16(r1) +; PPC-PWR8-NEXT: li r3, 16 +; PPC-PWR8-NEXT: mr r4, r26 +; PPC-PWR8-NEXT: bl __atomic_compare_exchange +; PPC-PWR8-NEXT: cmplwi r3, 0 +; PPC-PWR8-NEXT: lwz r6, 44(r1) +; PPC-PWR8-NEXT: lwz r5, 40(r1) +; PPC-PWR8-NEXT: lwz r4, 36(r1) +; PPC-PWR8-NEXT: lwz r3, 32(r1) +; PPC-PWR8-NEXT: beq cr0, .LBB5_1 +; PPC-PWR8-NEXT: # %bb.2: # %atomicrmw.end +; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r29, 68(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r28, 64(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r27, 60(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r26, 56(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r25, 52(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r24, 48(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r0, 84(r1) +; PPC-PWR8-NEXT: addi r1, r1, 80 +; PPC-PWR8-NEXT: mtlr r0 +; PPC-PWR8-NEXT: blr entry: %0 = atomicrmw xor i128* %a, i128 %x seq_cst, align 16 ret i128 %0 @@ -261,6 +848,109 @@ ; PWR7-NEXT: ld r0, 16(r1) ; PWR7-NEXT: mtlr r0 ; PWR7-NEXT: blr +; +; LE-PWR8-LABEL: nand: +; LE-PWR8: # %bb.0: # %entry +; LE-PWR8-NEXT: sync +; LE-PWR8-NEXT: .LBB6_1: # %entry +; LE-PWR8-NEXT: # +; LE-PWR8-NEXT: lqarx r6, 0, r3 +; LE-PWR8-NEXT: nand r9, r4, r7 +; LE-PWR8-NEXT: nand r8, r5, r6 +; LE-PWR8-NEXT: stqcx. r8, 0, r3 +; LE-PWR8-NEXT: bne cr0, .LBB6_1 +; LE-PWR8-NEXT: # %bb.2: # %entry +; LE-PWR8-NEXT: lwsync +; LE-PWR8-NEXT: mr r3, r7 +; LE-PWR8-NEXT: mr r4, r6 +; LE-PWR8-NEXT: blr +; +; AIX64-PWR8-LABEL: nand: +; AIX64-PWR8: # %bb.0: # %entry +; AIX64-PWR8-NEXT: mflr r0 +; AIX64-PWR8-NEXT: std r0, 16(r1) +; AIX64-PWR8-NEXT: stdu r1, -112(r1) +; AIX64-PWR8-NEXT: sync +; AIX64-PWR8-NEXT: bl .__sync_fetch_and_nand_16[PR] +; AIX64-PWR8-NEXT: nop +; AIX64-PWR8-NEXT: lwsync +; AIX64-PWR8-NEXT: addi r1, r1, 112 +; AIX64-PWR8-NEXT: ld r0, 16(r1) +; AIX64-PWR8-NEXT: mtlr r0 +; AIX64-PWR8-NEXT: blr +; +; PPC-PWR8-LABEL: nand: +; PPC-PWR8: # %bb.0: # %entry +; PPC-PWR8-NEXT: mflr r0 +; PPC-PWR8-NEXT: stw r0, 4(r1) +; PPC-PWR8-NEXT: stwu r1, -80(r1) +; PPC-PWR8-NEXT: .cfi_def_cfa_offset 80 +; PPC-PWR8-NEXT: .cfi_offset lr, 4 +; PPC-PWR8-NEXT: .cfi_offset r24, -32 +; PPC-PWR8-NEXT: .cfi_offset r25, -28 +; PPC-PWR8-NEXT: .cfi_offset r26, -24 +; PPC-PWR8-NEXT: .cfi_offset r27, -20 +; PPC-PWR8-NEXT: .cfi_offset r28, -16 +; PPC-PWR8-NEXT: .cfi_offset r29, -12 +; PPC-PWR8-NEXT: .cfi_offset r30, -8 +; PPC-PWR8-NEXT: stw r26, 56(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: stw r27, 60(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: mr r27, r5 +; PPC-PWR8-NEXT: mr r26, r3 +; PPC-PWR8-NEXT: lwz r5, 8(r3) +; PPC-PWR8-NEXT: lwz r4, 4(r3) +; PPC-PWR8-NEXT: stw r28, 64(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: mr r28, r6 +; PPC-PWR8-NEXT: lwz r6, 12(r3) +; PPC-PWR8-NEXT: lwz r3, 0(r3) +; PPC-PWR8-NEXT: stw r24, 48(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: addi r24, r1, 16 +; PPC-PWR8-NEXT: stw r25, 52(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: stw r29, 68(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: mr r29, r7 +; PPC-PWR8-NEXT: addi r25, r1, 32 +; PPC-PWR8-NEXT: stw r30, 72(r1) # 4-byte Folded Spill +; PPC-PWR8-NEXT: mr r30, r8 +; PPC-PWR8-NEXT: .p2align 4 +; PPC-PWR8-NEXT: .LBB6_1: # %atomicrmw.start +; PPC-PWR8-NEXT: # +; PPC-PWR8-NEXT: stw r3, 32(r1) +; PPC-PWR8-NEXT: stw r4, 36(r1) +; PPC-PWR8-NEXT: nand r7, r5, r29 +; PPC-PWR8-NEXT: nand r8, r6, r30 +; PPC-PWR8-NEXT: nand r3, r3, r27 +; PPC-PWR8-NEXT: nand r4, r4, r28 +; PPC-PWR8-NEXT: stw r5, 40(r1) +; PPC-PWR8-NEXT: stw r6, 44(r1) +; PPC-PWR8-NEXT: mr r5, r25 +; PPC-PWR8-NEXT: mr r6, r24 +; PPC-PWR8-NEXT: stw r8, 28(r1) +; PPC-PWR8-NEXT: stw r7, 24(r1) +; PPC-PWR8-NEXT: li r7, 5 +; PPC-PWR8-NEXT: li r8, 5 +; PPC-PWR8-NEXT: stw r4, 20(r1) +; PPC-PWR8-NEXT: stw r3, 16(r1) +; PPC-PWR8-NEXT: li r3, 16 +; PPC-PWR8-NEXT: mr r4, r26 +; PPC-PWR8-NEXT: bl __atomic_compare_exchange +; PPC-PWR8-NEXT: cmplwi r3, 0 +; PPC-PWR8-NEXT: lwz r6, 44(r1) +; PPC-PWR8-NEXT: lwz r5, 40(r1) +; PPC-PWR8-NEXT: lwz r4, 36(r1) +; PPC-PWR8-NEXT: lwz r3, 32(r1) +; PPC-PWR8-NEXT: beq cr0, .LBB6_1 +; PPC-PWR8-NEXT: # %bb.2: # %atomicrmw.end +; PPC-PWR8-NEXT: lwz r30, 72(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r29, 68(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r28, 64(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r27, 60(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r26, 56(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r25, 52(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r24, 48(r1) # 4-byte Folded Reload +; PPC-PWR8-NEXT: lwz r0, 84(r1) +; PPC-PWR8-NEXT: addi r1, r1, 80 +; PPC-PWR8-NEXT: mtlr r0 +; PPC-PWR8-NEXT: blr entry: %0 = atomicrmw nand i128* %a, i128 %x seq_cst, align 16 ret i128 %0 @@ -306,6 +996,76 @@ ; PWR7-NEXT: ld r0, 16(r1) ; PWR7-NEXT: mtlr r0 ; PWR7-NEXT: blr +; +; LE-PWR8-LABEL: cas_weak_acquire_acquire: +; LE-PWR8: # %bb.0: # %entry +; LE-PWR8-NEXT: .LBB7_1: # %entry +; LE-PWR8-NEXT: # +; LE-PWR8-NEXT: lqarx r8, 0, r3 +; LE-PWR8-NEXT: xor r11, r9, r4 +; LE-PWR8-NEXT: xor r10, r8, r5 +; LE-PWR8-NEXT: or. r11, r11, r10 +; LE-PWR8-NEXT: bne cr0, .LBB7_3 +; LE-PWR8-NEXT: # %bb.2: # %entry +; LE-PWR8-NEXT: # +; LE-PWR8-NEXT: mr r11, r6 +; LE-PWR8-NEXT: mr r10, r7 +; LE-PWR8-NEXT: stqcx. r10, 0, r3 +; LE-PWR8-NEXT: bne cr0, .LBB7_1 +; LE-PWR8-NEXT: b .LBB7_4 +; LE-PWR8-NEXT: .LBB7_3: # %entry +; LE-PWR8-NEXT: stqcx. r8, 0, r3 +; LE-PWR8-NEXT: .LBB7_4: # %entry +; LE-PWR8-NEXT: lwsync +; LE-PWR8-NEXT: mr r3, r9 +; LE-PWR8-NEXT: mr r4, r8 +; LE-PWR8-NEXT: blr +; +; AIX64-PWR8-LABEL: cas_weak_acquire_acquire: +; AIX64-PWR8: # %bb.0: # %entry +; AIX64-PWR8-NEXT: mflr r0 +; AIX64-PWR8-NEXT: std r0, 16(r1) +; AIX64-PWR8-NEXT: stdu r1, -112(r1) +; AIX64-PWR8-NEXT: bl .__sync_val_compare_and_swap_16[PR] +; AIX64-PWR8-NEXT: nop +; AIX64-PWR8-NEXT: lwsync +; AIX64-PWR8-NEXT: addi r1, r1, 112 +; AIX64-PWR8-NEXT: ld r0, 16(r1) +; AIX64-PWR8-NEXT: mtlr r0 +; AIX64-PWR8-NEXT: blr +; +; PPC-PWR8-LABEL: cas_weak_acquire_acquire: +; PPC-PWR8: # %bb.0: # %entry +; PPC-PWR8-NEXT: mflr r0 +; PPC-PWR8-NEXT: stw r0, 4(r1) +; PPC-PWR8-NEXT: stwu r1, -48(r1) +; PPC-PWR8-NEXT: .cfi_def_cfa_offset 48 +; PPC-PWR8-NEXT: .cfi_offset lr, 4 +; PPC-PWR8-NEXT: mr r4, r3 +; PPC-PWR8-NEXT: lwz r3, 56(r1) +; PPC-PWR8-NEXT: lwz r11, 60(r1) +; PPC-PWR8-NEXT: stw r8, 44(r1) +; PPC-PWR8-NEXT: stw r7, 40(r1) +; PPC-PWR8-NEXT: li r7, 2 +; PPC-PWR8-NEXT: li r8, 2 +; PPC-PWR8-NEXT: stw r6, 36(r1) +; PPC-PWR8-NEXT: stw r5, 32(r1) +; PPC-PWR8-NEXT: addi r5, r1, 32 +; PPC-PWR8-NEXT: addi r6, r1, 16 +; PPC-PWR8-NEXT: stw r3, 24(r1) +; PPC-PWR8-NEXT: li r3, 16 +; PPC-PWR8-NEXT: stw r11, 28(r1) +; PPC-PWR8-NEXT: stw r10, 20(r1) +; PPC-PWR8-NEXT: stw r9, 16(r1) +; PPC-PWR8-NEXT: bl __atomic_compare_exchange +; PPC-PWR8-NEXT: lwz r6, 44(r1) +; PPC-PWR8-NEXT: lwz r5, 40(r1) +; PPC-PWR8-NEXT: lwz r4, 36(r1) +; PPC-PWR8-NEXT: lwz r3, 32(r1) +; PPC-PWR8-NEXT: lwz r0, 52(r1) +; PPC-PWR8-NEXT: addi r1, r1, 48 +; PPC-PWR8-NEXT: mtlr r0 +; PPC-PWR8-NEXT: blr entry: %0 = cmpxchg weak i128* %a, i128 %cmp, i128 %new acquire acquire %1 = extractvalue { i128, i1 } %0, 0 @@ -351,6 +1111,76 @@ ; PWR7-NEXT: ld r0, 16(r1) ; PWR7-NEXT: mtlr r0 ; PWR7-NEXT: blr +; +; LE-PWR8-LABEL: cas_weak_release_monotonic: +; LE-PWR8: # %bb.0: # %entry +; LE-PWR8-NEXT: lwsync +; LE-PWR8-NEXT: .LBB8_1: # %entry +; LE-PWR8-NEXT: # +; LE-PWR8-NEXT: lqarx r8, 0, r3 +; LE-PWR8-NEXT: xor r11, r9, r4 +; LE-PWR8-NEXT: xor r10, r8, r5 +; LE-PWR8-NEXT: or. r11, r11, r10 +; LE-PWR8-NEXT: bne cr0, .LBB8_3 +; LE-PWR8-NEXT: # %bb.2: # %entry +; LE-PWR8-NEXT: # +; LE-PWR8-NEXT: mr r11, r6 +; LE-PWR8-NEXT: mr r10, r7 +; LE-PWR8-NEXT: stqcx. r10, 0, r3 +; LE-PWR8-NEXT: bne cr0, .LBB8_1 +; LE-PWR8-NEXT: b .LBB8_4 +; LE-PWR8-NEXT: .LBB8_3: # %entry +; LE-PWR8-NEXT: stqcx. r8, 0, r3 +; LE-PWR8-NEXT: .LBB8_4: # %entry +; LE-PWR8-NEXT: mr r3, r9 +; LE-PWR8-NEXT: mr r4, r8 +; LE-PWR8-NEXT: blr +; +; AIX64-PWR8-LABEL: cas_weak_release_monotonic: +; AIX64-PWR8: # %bb.0: # %entry +; AIX64-PWR8-NEXT: mflr r0 +; AIX64-PWR8-NEXT: std r0, 16(r1) +; AIX64-PWR8-NEXT: stdu r1, -112(r1) +; AIX64-PWR8-NEXT: lwsync +; AIX64-PWR8-NEXT: bl .__sync_val_compare_and_swap_16[PR] +; AIX64-PWR8-NEXT: nop +; AIX64-PWR8-NEXT: addi r1, r1, 112 +; AIX64-PWR8-NEXT: ld r0, 16(r1) +; AIX64-PWR8-NEXT: mtlr r0 +; AIX64-PWR8-NEXT: blr +; +; PPC-PWR8-LABEL: cas_weak_release_monotonic: +; PPC-PWR8: # %bb.0: # %entry +; PPC-PWR8-NEXT: mflr r0 +; PPC-PWR8-NEXT: stw r0, 4(r1) +; PPC-PWR8-NEXT: stwu r1, -48(r1) +; PPC-PWR8-NEXT: .cfi_def_cfa_offset 48 +; PPC-PWR8-NEXT: .cfi_offset lr, 4 +; PPC-PWR8-NEXT: mr r4, r3 +; PPC-PWR8-NEXT: lwz r3, 56(r1) +; PPC-PWR8-NEXT: lwz r11, 60(r1) +; PPC-PWR8-NEXT: stw r8, 44(r1) +; PPC-PWR8-NEXT: stw r7, 40(r1) +; PPC-PWR8-NEXT: li r7, 3 +; PPC-PWR8-NEXT: li r8, 0 +; PPC-PWR8-NEXT: stw r6, 36(r1) +; PPC-PWR8-NEXT: stw r5, 32(r1) +; PPC-PWR8-NEXT: addi r5, r1, 32 +; PPC-PWR8-NEXT: addi r6, r1, 16 +; PPC-PWR8-NEXT: stw r3, 24(r1) +; PPC-PWR8-NEXT: li r3, 16 +; PPC-PWR8-NEXT: stw r11, 28(r1) +; PPC-PWR8-NEXT: stw r10, 20(r1) +; PPC-PWR8-NEXT: stw r9, 16(r1) +; PPC-PWR8-NEXT: bl __atomic_compare_exchange +; PPC-PWR8-NEXT: lwz r6, 44(r1) +; PPC-PWR8-NEXT: lwz r5, 40(r1) +; PPC-PWR8-NEXT: lwz r4, 36(r1) +; PPC-PWR8-NEXT: lwz r3, 32(r1) +; PPC-PWR8-NEXT: lwz r0, 52(r1) +; PPC-PWR8-NEXT: addi r1, r1, 48 +; PPC-PWR8-NEXT: mtlr r0 +; PPC-PWR8-NEXT: blr entry: %0 = cmpxchg weak i128* %a, i128 %cmp, i128 %new release monotonic %1 = extractvalue { i128, i1 } %0, 0 @@ -398,6 +1228,78 @@ ; PWR7-NEXT: ld r0, 16(r1) ; PWR7-NEXT: mtlr r0 ; PWR7-NEXT: blr +; +; LE-PWR8-LABEL: cas_sc_sc: +; LE-PWR8: # %bb.0: # %entry +; LE-PWR8-NEXT: sync +; LE-PWR8-NEXT: .LBB9_1: # %entry +; LE-PWR8-NEXT: # +; LE-PWR8-NEXT: lqarx r8, 0, r3 +; LE-PWR8-NEXT: xor r11, r9, r4 +; LE-PWR8-NEXT: xor r10, r8, r5 +; LE-PWR8-NEXT: or. r11, r11, r10 +; LE-PWR8-NEXT: bne cr0, .LBB9_3 +; LE-PWR8-NEXT: # %bb.2: # %entry +; LE-PWR8-NEXT: # +; LE-PWR8-NEXT: mr r11, r6 +; LE-PWR8-NEXT: mr r10, r7 +; LE-PWR8-NEXT: stqcx. r10, 0, r3 +; LE-PWR8-NEXT: bne cr0, .LBB9_1 +; LE-PWR8-NEXT: b .LBB9_4 +; LE-PWR8-NEXT: .LBB9_3: # %entry +; LE-PWR8-NEXT: stqcx. r8, 0, r3 +; LE-PWR8-NEXT: .LBB9_4: # %entry +; LE-PWR8-NEXT: lwsync +; LE-PWR8-NEXT: mr r3, r9 +; LE-PWR8-NEXT: mr r4, r8 +; LE-PWR8-NEXT: blr +; +; AIX64-PWR8-LABEL: cas_sc_sc: +; AIX64-PWR8: # %bb.0: # %entry +; AIX64-PWR8-NEXT: mflr r0 +; AIX64-PWR8-NEXT: std r0, 16(r1) +; AIX64-PWR8-NEXT: stdu r1, -112(r1) +; AIX64-PWR8-NEXT: sync +; AIX64-PWR8-NEXT: bl .__sync_val_compare_and_swap_16[PR] +; AIX64-PWR8-NEXT: nop +; AIX64-PWR8-NEXT: lwsync +; AIX64-PWR8-NEXT: addi r1, r1, 112 +; AIX64-PWR8-NEXT: ld r0, 16(r1) +; AIX64-PWR8-NEXT: mtlr r0 +; AIX64-PWR8-NEXT: blr +; +; PPC-PWR8-LABEL: cas_sc_sc: +; PPC-PWR8: # %bb.0: # %entry +; PPC-PWR8-NEXT: mflr r0 +; PPC-PWR8-NEXT: stw r0, 4(r1) +; PPC-PWR8-NEXT: stwu r1, -48(r1) +; PPC-PWR8-NEXT: .cfi_def_cfa_offset 48 +; PPC-PWR8-NEXT: .cfi_offset lr, 4 +; PPC-PWR8-NEXT: mr r4, r3 +; PPC-PWR8-NEXT: lwz r3, 56(r1) +; PPC-PWR8-NEXT: lwz r11, 60(r1) +; PPC-PWR8-NEXT: stw r8, 44(r1) +; PPC-PWR8-NEXT: stw r7, 40(r1) +; PPC-PWR8-NEXT: li r7, 5 +; PPC-PWR8-NEXT: li r8, 5 +; PPC-PWR8-NEXT: stw r6, 36(r1) +; PPC-PWR8-NEXT: stw r5, 32(r1) +; PPC-PWR8-NEXT: addi r5, r1, 32 +; PPC-PWR8-NEXT: addi r6, r1, 16 +; PPC-PWR8-NEXT: stw r3, 24(r1) +; PPC-PWR8-NEXT: li r3, 16 +; PPC-PWR8-NEXT: stw r11, 28(r1) +; PPC-PWR8-NEXT: stw r10, 20(r1) +; PPC-PWR8-NEXT: stw r9, 16(r1) +; PPC-PWR8-NEXT: bl __atomic_compare_exchange +; PPC-PWR8-NEXT: lwz r6, 44(r1) +; PPC-PWR8-NEXT: lwz r5, 40(r1) +; PPC-PWR8-NEXT: lwz r4, 36(r1) +; PPC-PWR8-NEXT: lwz r3, 32(r1) +; PPC-PWR8-NEXT: lwz r0, 52(r1) +; PPC-PWR8-NEXT: addi r1, r1, 48 +; PPC-PWR8-NEXT: mtlr r0 +; PPC-PWR8-NEXT: blr entry: %0 = cmpxchg i128* %a, i128 %cmp, i128 %new seq_cst seq_cst %1 = extractvalue { i128, i1 } %0, 0 @@ -445,6 +1347,78 @@ ; PWR7-NEXT: ld r0, 16(r1) ; PWR7-NEXT: mtlr r0 ; PWR7-NEXT: blr +; +; LE-PWR8-LABEL: cas_acqrel_acquire: +; LE-PWR8: # %bb.0: # %entry +; LE-PWR8-NEXT: lwsync +; LE-PWR8-NEXT: .LBB10_1: # %entry +; LE-PWR8-NEXT: # +; LE-PWR8-NEXT: lqarx r8, 0, r3 +; LE-PWR8-NEXT: xor r11, r9, r4 +; LE-PWR8-NEXT: xor r10, r8, r5 +; LE-PWR8-NEXT: or. r11, r11, r10 +; LE-PWR8-NEXT: bne cr0, .LBB10_3 +; LE-PWR8-NEXT: # %bb.2: # %entry +; LE-PWR8-NEXT: # +; LE-PWR8-NEXT: mr r11, r6 +; LE-PWR8-NEXT: mr r10, r7 +; LE-PWR8-NEXT: stqcx. r10, 0, r3 +; LE-PWR8-NEXT: bne cr0, .LBB10_1 +; LE-PWR8-NEXT: b .LBB10_4 +; LE-PWR8-NEXT: .LBB10_3: # %entry +; LE-PWR8-NEXT: stqcx. r8, 0, r3 +; LE-PWR8-NEXT: .LBB10_4: # %entry +; LE-PWR8-NEXT: lwsync +; LE-PWR8-NEXT: mr r3, r9 +; LE-PWR8-NEXT: mr r4, r8 +; LE-PWR8-NEXT: blr +; +; AIX64-PWR8-LABEL: cas_acqrel_acquire: +; AIX64-PWR8: # %bb.0: # %entry +; AIX64-PWR8-NEXT: mflr r0 +; AIX64-PWR8-NEXT: std r0, 16(r1) +; AIX64-PWR8-NEXT: stdu r1, -112(r1) +; AIX64-PWR8-NEXT: lwsync +; AIX64-PWR8-NEXT: bl .__sync_val_compare_and_swap_16[PR] +; AIX64-PWR8-NEXT: nop +; AIX64-PWR8-NEXT: lwsync +; AIX64-PWR8-NEXT: addi r1, r1, 112 +; AIX64-PWR8-NEXT: ld r0, 16(r1) +; AIX64-PWR8-NEXT: mtlr r0 +; AIX64-PWR8-NEXT: blr +; +; PPC-PWR8-LABEL: cas_acqrel_acquire: +; PPC-PWR8: # %bb.0: # %entry +; PPC-PWR8-NEXT: mflr r0 +; PPC-PWR8-NEXT: stw r0, 4(r1) +; PPC-PWR8-NEXT: stwu r1, -48(r1) +; PPC-PWR8-NEXT: .cfi_def_cfa_offset 48 +; PPC-PWR8-NEXT: .cfi_offset lr, 4 +; PPC-PWR8-NEXT: mr r4, r3 +; PPC-PWR8-NEXT: lwz r3, 56(r1) +; PPC-PWR8-NEXT: lwz r11, 60(r1) +; PPC-PWR8-NEXT: stw r8, 44(r1) +; PPC-PWR8-NEXT: stw r7, 40(r1) +; PPC-PWR8-NEXT: li r7, 4 +; PPC-PWR8-NEXT: li r8, 2 +; PPC-PWR8-NEXT: stw r6, 36(r1) +; PPC-PWR8-NEXT: stw r5, 32(r1) +; PPC-PWR8-NEXT: addi r5, r1, 32 +; PPC-PWR8-NEXT: addi r6, r1, 16 +; PPC-PWR8-NEXT: stw r3, 24(r1) +; PPC-PWR8-NEXT: li r3, 16 +; PPC-PWR8-NEXT: stw r11, 28(r1) +; PPC-PWR8-NEXT: stw r10, 20(r1) +; PPC-PWR8-NEXT: stw r9, 16(r1) +; PPC-PWR8-NEXT: bl __atomic_compare_exchange +; PPC-PWR8-NEXT: lwz r6, 44(r1) +; PPC-PWR8-NEXT: lwz r5, 40(r1) +; PPC-PWR8-NEXT: lwz r4, 36(r1) +; PPC-PWR8-NEXT: lwz r3, 32(r1) +; PPC-PWR8-NEXT: lwz r0, 52(r1) +; PPC-PWR8-NEXT: addi r1, r1, 48 +; PPC-PWR8-NEXT: mtlr r0 +; PPC-PWR8-NEXT: blr entry: %0 = cmpxchg i128* %a, i128 %cmp, i128 %new acq_rel acquire %1 = extractvalue { i128, i1 } %0, 0 @@ -508,6 +1482,88 @@ ; PWR7-NEXT: ld r0, 16(r1) ; PWR7-NEXT: mtlr r0 ; PWR7-NEXT: blr +; +; LE-PWR8-LABEL: cas_acqrel_acquire_check_succ: +; LE-PWR8: # %bb.0: # %entry +; LE-PWR8-NEXT: lwsync +; LE-PWR8-NEXT: .LBB11_1: # %entry +; LE-PWR8-NEXT: # +; LE-PWR8-NEXT: lqarx r8, 0, r3 +; LE-PWR8-NEXT: xor r11, r9, r4 +; LE-PWR8-NEXT: xor r10, r8, r5 +; LE-PWR8-NEXT: or. r11, r11, r10 +; LE-PWR8-NEXT: bne cr0, .LBB11_3 +; LE-PWR8-NEXT: # %bb.2: # %entry +; LE-PWR8-NEXT: # +; LE-PWR8-NEXT: mr r11, r6 +; LE-PWR8-NEXT: mr r10, r7 +; LE-PWR8-NEXT: stqcx. r10, 0, r3 +; LE-PWR8-NEXT: bne cr0, .LBB11_1 +; LE-PWR8-NEXT: b .LBB11_4 +; LE-PWR8-NEXT: .LBB11_3: # %entry +; LE-PWR8-NEXT: stqcx. r8, 0, r3 +; LE-PWR8-NEXT: .LBB11_4: # %entry +; LE-PWR8-NEXT: lwsync +; LE-PWR8-NEXT: xor r3, r5, r8 +; LE-PWR8-NEXT: xor r4, r4, r9 +; LE-PWR8-NEXT: or r3, r4, r3 +; LE-PWR8-NEXT: cntlzd r3, r3 +; LE-PWR8-NEXT: rldicl r3, r3, 58, 63 +; LE-PWR8-NEXT: blr +; +; AIX64-PWR8-LABEL: cas_acqrel_acquire_check_succ: +; AIX64-PWR8: # %bb.0: # %entry +; AIX64-PWR8-NEXT: mflr r0 +; AIX64-PWR8-NEXT: std r0, 16(r1) +; AIX64-PWR8-NEXT: stdu r1, -128(r1) +; AIX64-PWR8-NEXT: std r30, 112(r1) # 8-byte Folded Spill +; AIX64-PWR8-NEXT: std r31, 120(r1) # 8-byte Folded Spill +; AIX64-PWR8-NEXT: mr r31, r5 +; AIX64-PWR8-NEXT: mr r30, r4 +; AIX64-PWR8-NEXT: lwsync +; AIX64-PWR8-NEXT: bl .__sync_val_compare_and_swap_16[PR] +; AIX64-PWR8-NEXT: nop +; AIX64-PWR8-NEXT: xor r3, r3, r30 +; AIX64-PWR8-NEXT: xor r4, r4, r31 +; AIX64-PWR8-NEXT: lwsync +; AIX64-PWR8-NEXT: or r3, r4, r3 +; AIX64-PWR8-NEXT: ld r31, 120(r1) # 8-byte Folded Reload +; AIX64-PWR8-NEXT: ld r30, 112(r1) # 8-byte Folded Reload +; AIX64-PWR8-NEXT: cntlzd r3, r3 +; AIX64-PWR8-NEXT: rldicl r3, r3, 58, 63 +; AIX64-PWR8-NEXT: addi r1, r1, 128 +; AIX64-PWR8-NEXT: ld r0, 16(r1) +; AIX64-PWR8-NEXT: mtlr r0 +; AIX64-PWR8-NEXT: blr +; +; PPC-PWR8-LABEL: cas_acqrel_acquire_check_succ: +; PPC-PWR8: # %bb.0: # %entry +; PPC-PWR8-NEXT: mflr r0 +; PPC-PWR8-NEXT: stw r0, 4(r1) +; PPC-PWR8-NEXT: stwu r1, -48(r1) +; PPC-PWR8-NEXT: .cfi_def_cfa_offset 48 +; PPC-PWR8-NEXT: .cfi_offset lr, 4 +; PPC-PWR8-NEXT: mr r4, r3 +; PPC-PWR8-NEXT: lwz r3, 56(r1) +; PPC-PWR8-NEXT: lwz r11, 60(r1) +; PPC-PWR8-NEXT: stw r8, 44(r1) +; PPC-PWR8-NEXT: stw r7, 40(r1) +; PPC-PWR8-NEXT: li r7, 4 +; PPC-PWR8-NEXT: li r8, 2 +; PPC-PWR8-NEXT: stw r6, 36(r1) +; PPC-PWR8-NEXT: stw r5, 32(r1) +; PPC-PWR8-NEXT: addi r5, r1, 32 +; PPC-PWR8-NEXT: addi r6, r1, 16 +; PPC-PWR8-NEXT: stw r3, 24(r1) +; PPC-PWR8-NEXT: li r3, 16 +; PPC-PWR8-NEXT: stw r11, 28(r1) +; PPC-PWR8-NEXT: stw r10, 20(r1) +; PPC-PWR8-NEXT: stw r9, 16(r1) +; PPC-PWR8-NEXT: bl __atomic_compare_exchange +; PPC-PWR8-NEXT: lwz r0, 52(r1) +; PPC-PWR8-NEXT: addi r1, r1, 48 +; PPC-PWR8-NEXT: mtlr r0 +; PPC-PWR8-NEXT: blr entry: %0 = cmpxchg i128* %a, i128 %cmp, i128 %new acq_rel acquire %1 = extractvalue { i128, i1 } %0, 1