Index: include/clang/Basic/TargetInfo.h =================================================================== --- include/clang/Basic/TargetInfo.h +++ include/clang/Basic/TargetInfo.h @@ -370,6 +370,15 @@ /// \brief Return the maximum width lock-free atomic operation which can be /// inlined given the supported features of the given target. unsigned getMaxAtomicInlineWidth() const { return MaxAtomicInlineWidth; } + /// \brief Returns true if the given target supports lock-free atomic + /// operations at the specified width and alignment. + virtual bool hasBuiltinAtomic(uint64_t AtomicSizeInBits, + uint64_t AlignmentInBits) const { + return AtomicSizeInBits <= AlignmentInBits && + AtomicSizeInBits <= getMaxAtomicInlineWidth() && + (AtomicSizeInBits <= getCharWidth() || + llvm::isPowerOf2_64(AtomicSizeInBits / getCharWidth())); + } /// \brief Return the maximum vector alignment supported for the given target. unsigned getMaxVectorAlign() const { return MaxVectorAlign; } Index: lib/CodeGen/CGAtomic.cpp =================================================================== --- lib/CodeGen/CGAtomic.cpp +++ lib/CodeGen/CGAtomic.cpp @@ -64,9 +64,8 @@ if (lvalue.getAlignment().isZero()) lvalue.setAlignment(AtomicAlign); - UseLibcall = - (AtomicSizeInBits > uint64_t(C.toBits(lvalue.getAlignment())) || - AtomicSizeInBits > C.getTargetInfo().getMaxAtomicInlineWidth()); + UseLibcall = !C.getTargetInfo().hasBuiltinAtomic( + AtomicSizeInBits, C.toBits(lvalue.getAlignment())); } QualType getAtomicType() const { return AtomicTy; } @@ -74,9 +73,17 @@ CharUnits getAtomicAlignment() const { return AtomicAlign; } CharUnits getValueAlignment() const { return ValueAlign; } uint64_t getAtomicSizeInBits() const { return AtomicSizeInBits; } - uint64_t getValueSizeInBits() const { return AtomicSizeInBits; } + uint64_t getValueSizeInBits() const { return ValueSizeInBits; } TypeEvaluationKind getEvaluationKind() const { return EvaluationKind; } bool shouldUseLibcall() const { return UseLibcall; } + bool shouldUseOptimizedLibcall() const { + auto Size = + CGF.getContext().toCharUnitsFromBits(AtomicSizeInBits).getQuantity(); + // Can we use optimized libcalls (see + // https://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary)? + return UseLibcall && ValueTy->isIntegerType() && + (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16); + } /// Is the atomic size larger than the underlying value type? /// @@ -914,29 +921,47 @@ // Check whether we should use a library call. if (atomics.shouldUseLibcall()) { - llvm::Value *tempAddr; - if (!resultSlot.isIgnored()) { - assert(atomics.getEvaluationKind() == TEK_Aggregate); - tempAddr = resultSlot.getAddr(); + if (atomics.shouldUseOptimizedLibcall()) { + // IN __atomic_load_N(IN *mem, int order); + CallArgList args; + args.add(RValue::get(src.getAddress()), + getContext().getPointerType(src.getType())); + args.add(RValue::get(llvm::ConstantInt::get( + IntTy, AtomicExpr::AO_ABI_memory_order_seq_cst)), + getContext().IntTy); + SmallString<128> Buffer; + llvm::raw_svector_ostream LibFnName(Buffer); + LibFnName << "__atomic_load_" + << getContext() + .toCharUnitsFromBits(atomics.getAtomicSizeInBits()) + .getQuantity(); + return emitAtomicLibcall(*this, LibFnName.str(), atomics.getAtomicType(), + args); } else { - tempAddr = CreateMemTemp(atomics.getAtomicType(), "atomic-load-temp"); - } + llvm::Value *tempAddr; + if (!resultSlot.isIgnored()) { + assert(atomics.getEvaluationKind() == TEK_Aggregate); + tempAddr = resultSlot.getAddr(); + } else { + tempAddr = CreateMemTemp(atomics.getAtomicType(), "atomic-load-temp"); + } - // void __atomic_load(size_t size, void *mem, void *return, int order); - CallArgList args; - args.add(RValue::get(atomics.getAtomicSizeValue()), - getContext().getSizeType()); - args.add(RValue::get(EmitCastToVoidPtr(src.getAddress())), - getContext().VoidPtrTy); - args.add(RValue::get(EmitCastToVoidPtr(tempAddr)), - getContext().VoidPtrTy); - args.add(RValue::get(llvm::ConstantInt::get( - IntTy, AtomicExpr::AO_ABI_memory_order_seq_cst)), - getContext().IntTy); - emitAtomicLibcall(*this, "__atomic_load", getContext().VoidTy, args); + // void __atomic_load(size_t size, void *mem, void *return, int order); + CallArgList args; + args.add(RValue::get(atomics.getAtomicSizeValue()), + getContext().getSizeType()); + args.add(RValue::get(EmitCastToVoidPtr(src.getAddress())), + getContext().VoidPtrTy); + args.add(RValue::get(EmitCastToVoidPtr(tempAddr)), + getContext().VoidPtrTy); + args.add(RValue::get(llvm::ConstantInt::get( + IntTy, AtomicExpr::AO_ABI_memory_order_seq_cst)), + getContext().IntTy); + emitAtomicLibcall(*this, "__atomic_load", getContext().VoidTy, args); - // Produce the r-value. - return atomics.convertTempToRValue(tempAddr, resultSlot, loc); + // Produce the r-value. + return atomics.convertTempToRValue(tempAddr, resultSlot, loc); + } } // Okay, we're doing this natively. @@ -965,13 +990,11 @@ llvm::Type *resultTy = CGM.getTypes().ConvertTypeForMem(valueType); if (isa(resultTy)) { assert(result->getType() == resultTy); - result = EmitFromMemory(result, valueType); - } else if (isa(resultTy)) { - result = Builder.CreateIntToPtr(result, resultTy); - } else { - result = Builder.CreateBitCast(result, resultTy); - } - return RValue::get(result); + return RValue::get(EmitFromMemory(result, valueType)); + } else if (isa(resultTy)) + return RValue::get(Builder.CreateIntToPtr(result, resultTy)); + else if (llvm::CastInst::isBitCastable(result->getType(), resultTy)) + return RValue::get(Builder.CreateBitCast(result, resultTy)); } // Create a temporary. This needs to be big enough to hold the @@ -1047,6 +1070,33 @@ return temp; } +static llvm::Value *convertRValueToInt(CodeGenFunction &CGF, + const AtomicInfo &AI, RValue RVal) { + // If we've got a scalar value of the right size, try to avoid going + // through memory. + if (RVal.isScalar() && !AI.hasPadding()) { + llvm::Value *Value = RVal.getScalarVal(); + if (isa(Value->getType())) + return Value; + else { + llvm::IntegerType *InputIntTy = + llvm::IntegerType::get(CGF.getLLVMContext(), AI.getValueSizeInBits()); + if (isa(Value->getType())) + return CGF.Builder.CreatePtrToInt(Value, InputIntTy); + else if (llvm::BitCastInst::isBitCastable(Value->getType(), InputIntTy)) + return CGF.Builder.CreateBitCast(Value, InputIntTy); + } + } + // Otherwise, we need to go through memory. + // Put the r-value in memory. + llvm::Value *Addr = AI.materializeRValue(RVal); + + // Cast the temporary to the atomic int type and pull a value out. + Addr = AI.emitCastToAtomicIntPointer(Addr); + return CGF.Builder.CreateAlignedLoad(Addr, + AI.getAtomicAlignment().getQuantity()); +} + /// Emit a store to an l-value of atomic type. /// /// Note that the r-value is expected to be an r-value *of the atomic @@ -1069,53 +1119,43 @@ // Check whether we should use a library call. if (atomics.shouldUseLibcall()) { - // Produce a source address. - llvm::Value *srcAddr = atomics.materializeRValue(rvalue); + if (atomics.shouldUseOptimizedLibcall()) { + // void __atomic_store_N(IN *mem, IN val, int order); + CallArgList args; + args.add(RValue::get(dest.getAddress()), + getContext().getPointerType(dest.getType())); + args.add(rvalue, dest.getType()); + args.add(RValue::get(llvm::ConstantInt::get( + IntTy, AtomicExpr::AO_ABI_memory_order_seq_cst)), + getContext().IntTy); + SmallString<128> Buffer; + llvm::raw_svector_ostream LibFnName(Buffer); + LibFnName << "__atomic_store_" + << getContext() + .toCharUnitsFromBits(atomics.getAtomicSizeInBits()) + .getQuantity(); + emitAtomicLibcall(*this, LibFnName.str(), getContext().VoidTy, args); + } else { + // Produce a source address. + llvm::Value *srcAddr = atomics.materializeRValue(rvalue); - // void __atomic_store(size_t size, void *mem, void *val, int order) - CallArgList args; - args.add(RValue::get(atomics.getAtomicSizeValue()), - getContext().getSizeType()); - args.add(RValue::get(EmitCastToVoidPtr(dest.getAddress())), - getContext().VoidPtrTy); - args.add(RValue::get(EmitCastToVoidPtr(srcAddr)), - getContext().VoidPtrTy); - args.add(RValue::get(llvm::ConstantInt::get( - IntTy, AtomicExpr::AO_ABI_memory_order_seq_cst)), - getContext().IntTy); - emitAtomicLibcall(*this, "__atomic_store", getContext().VoidTy, args); + // void __atomic_store(size_t size, void *mem, void *val, int order) + CallArgList args; + args.add(RValue::get(atomics.getAtomicSizeValue()), + getContext().getSizeType()); + args.add(RValue::get(EmitCastToVoidPtr(dest.getAddress())), + getContext().VoidPtrTy); + args.add(RValue::get(EmitCastToVoidPtr(srcAddr)), getContext().VoidPtrTy); + args.add(RValue::get(llvm::ConstantInt::get( + IntTy, AtomicExpr::AO_ABI_memory_order_seq_cst)), + getContext().IntTy); + emitAtomicLibcall(*this, "__atomic_store", getContext().VoidTy, args); + } return; } // Okay, we're doing this natively. - llvm::Value *intValue; - - // If we've got a scalar value of the right size, try to avoid going - // through memory. - if (rvalue.isScalar() && !atomics.hasPadding()) { - llvm::Value *value = rvalue.getScalarVal(); - if (isa(value->getType())) { - intValue = value; - } else { - llvm::IntegerType *inputIntTy = - llvm::IntegerType::get(getLLVMContext(), atomics.getValueSizeInBits()); - if (isa(value->getType())) { - intValue = Builder.CreatePtrToInt(value, inputIntTy); - } else { - intValue = Builder.CreateBitCast(value, inputIntTy); - } - } - - // Otherwise, we need to go through memory. - } else { - // Put the r-value in memory. - llvm::Value *addr = atomics.materializeRValue(rvalue); - - // Cast the temporary to the atomic int type and pull a value out. - addr = atomics.emitCastToAtomicIntPointer(addr); - intValue = Builder.CreateAlignedLoad(addr, - atomics.getAtomicAlignment().getQuantity()); - } + llvm::Value *intValue = convertRValueToInt(*this, atomics, rvalue); // Do the atomic store. llvm::Value *addr = atomics.emitCastToAtomicIntPointer(dest.getAddress()); @@ -1132,6 +1172,144 @@ CGM.DecorateInstruction(store, dest.getTBAAInfo()); } +/// Emit a compare-and-exchange op for atomic type. +/// +llvm::Value *CodeGenFunction::EmitAtomicCompareExchange( + LValue Obj, RValue Expected, RValue Desired, SourceLocation Loc, + llvm::AtomicOrdering Success, llvm::AtomicOrdering Failure, bool IsWeak) { + // If this is an aggregate r-value, it should agree in type except + // maybe for address-space qualification. + assert(!Expected.isAggregate() || + Expected.getAggregateAddr()->getType()->getPointerElementType() == + Obj.getAddress()->getType()->getPointerElementType()); + assert(!Desired.isAggregate() || + Desired.getAggregateAddr()->getType()->getPointerElementType() == + Obj.getAddress()->getType()->getPointerElementType()); + AtomicInfo Atomics(*this, Obj); + + if (Failure >= Success) + // Don't assert on undefined behavior. + Failure = llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(Success); + + QualType ValueType = Atomics.getValueType(); + if (Obj.isVolatileQualified()) + ValueType = getContext().getVolatileType(ValueType); + auto Alignment = Atomics.getValueAlignment(); + auto *ResType = llvm::StructType::get(ConvertTypeForMem(ValueType), + Builder.getInt1Ty(), nullptr); + // Check whether we should use a library call. + if (Atomics.shouldUseLibcall()) { + auto *ExpectedAddr = Atomics.materializeRValue(Expected); + RValue ResRVal; + if (Atomics.shouldUseOptimizedLibcall()) { + // bool __atomic_compare_exchange_N(IN *mem, IN *expected, IN desired, int + // success, int failure); + CallArgList Args; + auto PtrTy = getContext().getPointerType(Obj.getType()); + Args.add(RValue::get(Obj.getAddress()), PtrTy); + Args.add(RValue::get(ExpectedAddr), PtrTy); + Args.add(Desired, Obj.getType()); + Args.add(RValue::get(llvm::ConstantInt::get(IntTy, Success)), + getContext().IntTy); + Args.add(RValue::get(llvm::ConstantInt::get(IntTy, Failure)), + getContext().IntTy); + SmallString<128> Buffer; + llvm::raw_svector_ostream LibFnName(Buffer); + LibFnName << "__atomic_compare_exchange_" + << getContext() + .toCharUnitsFromBits(Atomics.getAtomicSizeInBits()) + .getQuantity(); + ResRVal = + emitAtomicLibcall(*this, LibFnName.str(), getContext().BoolTy, Args); + } else { + // Produce a source address. + auto *DesiredAddr = Atomics.materializeRValue(Desired); + // bool __atomic_compare_exchange (size_t size, void *obj, void *expected, + // void *desired, int success, int failure); + CallArgList Args; + Args.add(RValue::get(Atomics.getAtomicSizeValue()), + getContext().getSizeType()); + Args.add(RValue::get(EmitCastToVoidPtr(Obj.getAddress())), + getContext().VoidPtrTy); + Args.add(RValue::get(EmitCastToVoidPtr(ExpectedAddr)), + getContext().VoidPtrTy); + Args.add(RValue::get(EmitCastToVoidPtr(DesiredAddr)), + getContext().VoidPtrTy); + Args.add(RValue::get(llvm::ConstantInt::get(IntTy, Success)), + getContext().IntTy); + Args.add(RValue::get(llvm::ConstantInt::get(IntTy, Failure)), + getContext().IntTy); + ResRVal = emitAtomicLibcall(*this, "__atomic_compare_exchange", + getContext().BoolTy, Args); + } + auto *Alloc = CreateTempAlloca(ResType, "atomic-cmpxchg-temp"); + Alloc->setAlignment(Alignment.getQuantity()); + EmitStoreThroughLValue( + EmitLoadOfLValue(LValue::MakeAddr(ExpectedAddr, Atomics.getAtomicType(), + Atomics.getAtomicAlignment(), + getContext()), + Loc), + LValue::MakeAddr(Builder.CreateStructGEP(Alloc, /*Idx=*/0), ValueType, + Alignment, getContext())); + Builder.CreateAlignedStore(ResRVal.getScalarVal(), + Builder.CreateStructGEP(Alloc, /*Idx=*/1), + getContext().getTypeAlign(getContext().BoolTy)) + ->setVolatile(Obj.isVolatileQualified()); + auto *Res = Builder.CreateAlignedLoad(Alloc, Alignment.getQuantity()); + if (Obj.isVolatileQualified()) + Res->setVolatile(/*V=*/true); + return Res; + } + + // If we've got a scalar value of the right size, try to avoid going + // through memory. + auto *ExpectedIntVal = convertRValueToInt(*this, Atomics, Expected); + auto *DesiredIntVal = convertRValueToInt(*this, Atomics, Desired); + + // Do the atomic store. + auto *Addr = Atomics.emitCastToAtomicIntPointer(Obj.getAddress()); + auto *Inst = Builder.CreateAtomicCmpXchg(Addr, ExpectedIntVal, DesiredIntVal, + Success, Failure); + // Other decoration. + if (Obj.isVolatileQualified()) + Inst->setVolatile(/*V=*/true); + if (IsWeak) + Inst->setWeak(/*IsWeak=*/true); + + // Okay, turn that back into the original value type. + auto *Result = Builder.CreateExtractValue(Inst, /*Idxs=*/0); + + // The easiest way to do this this is to go through memory, but we + // try not to in some easy cases. + if (Atomics.getEvaluationKind() == TEK_Scalar && !Atomics.hasPadding()) { + auto *ResultTy = CGM.getTypes().ConvertTypeForMem(ValueType); + if (isa(ResultTy)) { + assert(Result->getType() == ResultTy); + return Inst; + } + } + + // Create a temporary. This needs to be big enough to hold the + // atomic integer. + auto *Alloc = CreateTempAlloca(ResType, "atomic-cmpxchg-temp"); + Alloc->setAlignment(Alignment.getQuantity()); + + // Slam the integer into the temporary. + auto *CastTemp = Atomics.emitCastToAtomicIntPointer( + Builder.CreateStructGEP(Alloc, /*Idx=*/0)); + Builder.CreateAlignedStore(Result, CastTemp, Alignment.getQuantity()) + ->setVolatile(Obj.isVolatileQualified()); + Builder.CreateAlignedStore(Builder.CreateExtractValue(Inst, /*Idxs=*/1), + Builder.CreateStructGEP(Alloc, /*Idx=*/1), + getContext().getTypeAlign(getContext().BoolTy)) + ->setVolatile(Obj.isVolatileQualified()); + + auto *Res = Builder.CreateAlignedLoad(Alloc, Alignment.getQuantity()); + if (Obj.isVolatileQualified()) + Res->setVolatile(/*V=*/true); + return Res; +} + void CodeGenFunction::EmitAtomicInit(Expr *init, LValue dest) { AtomicInfo atomics(*this, dest); Index: lib/CodeGen/CGExprScalar.cpp =================================================================== --- lib/CodeGen/CGExprScalar.cpp +++ lib/CodeGen/CGExprScalar.cpp @@ -1789,9 +1789,10 @@ if (atomicPHI) { llvm::BasicBlock *opBB = Builder.GetInsertBlock(); llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", CGF.CurFn); - llvm::Value *pair = Builder.CreateAtomicCmpXchg( - LV.getAddress(), atomicPHI, CGF.EmitToMemory(value, type), - llvm::SequentiallyConsistent, llvm::SequentiallyConsistent); + llvm::Value *pair = CGF.EmitAtomicCompareExchange( + LV, RValue::get(atomicPHI), + RValue::get(CGF.EmitToMemory(value, type)), + E->getExprLoc()); llvm::Value *old = Builder.CreateExtractValue(pair, 0); llvm::Value *success = Builder.CreateExtractValue(pair, 1); atomicPHI->addIncoming(old, opBB); @@ -2133,9 +2134,10 @@ if (atomicPHI) { llvm::BasicBlock *opBB = Builder.GetInsertBlock(); llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", CGF.CurFn); - llvm::Value *pair = Builder.CreateAtomicCmpXchg( - LHSLV.getAddress(), atomicPHI, CGF.EmitToMemory(Result, LHSTy), - llvm::SequentiallyConsistent, llvm::SequentiallyConsistent); + llvm::Value *pair = CGF.EmitAtomicCompareExchange( + LHSLV, RValue::get(atomicPHI), + RValue::get(CGF.EmitToMemory(Result, LHSTy)), + E->getExprLoc()); llvm::Value *old = Builder.CreateExtractValue(pair, 0); llvm::Value *success = Builder.CreateExtractValue(pair, 1); atomicPHI->addIncoming(old, opBB); Index: lib/CodeGen/CodeGenFunction.h =================================================================== --- lib/CodeGen/CodeGenFunction.h +++ lib/CodeGen/CodeGenFunction.h @@ -2099,6 +2099,12 @@ void EmitAtomicStore(RValue rvalue, LValue lvalue, bool isInit); + llvm::Value *EmitAtomicCompareExchange( + LValue Obj, RValue Expected, RValue Desired, SourceLocation Loc, + llvm::AtomicOrdering Success = llvm::SequentiallyConsistent, + llvm::AtomicOrdering Failure = llvm::SequentiallyConsistent, + bool IsWeak = false); + /// EmitToMemory - Change a scalar value from its value /// representation to its in-memory representation. llvm::Value *EmitToMemory(llvm::Value *Value, QualType Ty); Index: test/CodeGen/c11atomics.c =================================================================== --- test/CodeGen/c11atomics.c +++ test/CodeGen/c11atomics.c @@ -57,7 +57,7 @@ // CHECK: testdec void testdec(void) { - // CHECK: cmpxchg i8* @b + // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange_1(i8* @b b--; // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst i--; @@ -65,7 +65,7 @@ l--; // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst s--; - // CHECK: cmpxchg i8* @b + // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange_1(i8* @b --b; // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst // CHECK: sub i32 @@ -80,7 +80,7 @@ // CHECK: testaddeq void testaddeq(void) { - // CHECK: cmpxchg i8* @b + // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange_1(i8* @b // CHECK: atomicrmw add i32* @i, i32 42 seq_cst // CHECK: atomicrmw add i64* @l, i64 42 seq_cst // CHECK: atomicrmw add i16* @s, i16 42 seq_cst @@ -92,7 +92,7 @@ // CHECK: testsubeq void testsubeq(void) { - // CHECK: cmpxchg i8* @b + // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange_1(i8* @b // CHECK: atomicrmw sub i32* @i, i32 42 seq_cst // CHECK: atomicrmw sub i64* @l, i64 42 seq_cst // CHECK: atomicrmw sub i16* @s, i16 42 seq_cst @@ -104,7 +104,7 @@ // CHECK: testxoreq void testxoreq(void) { - // CHECK: cmpxchg i8* @b + // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange_1(i8* @b // CHECK: atomicrmw xor i32* @i, i32 42 seq_cst // CHECK: atomicrmw xor i64* @l, i64 42 seq_cst // CHECK: atomicrmw xor i16* @s, i16 42 seq_cst @@ -116,7 +116,7 @@ // CHECK: testoreq void testoreq(void) { - // CHECK: cmpxchg i8* @b + // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange_1(i8* @b // CHECK: atomicrmw or i32* @i, i32 42 seq_cst // CHECK: atomicrmw or i64* @l, i64 42 seq_cst // CHECK: atomicrmw or i16* @s, i16 42 seq_cst @@ -128,7 +128,7 @@ // CHECK: testandeq void testandeq(void) { - // CHECK: cmpxchg i8* @b + // CHECK: call arm_aapcscc zeroext i1 @__atomic_compare_exchange_1(i8* @b // CHECK: atomicrmw and i32* @i, i32 42 seq_cst // CHECK: atomicrmw and i64* @l, i64 42 seq_cst // CHECK: atomicrmw and i16* @s, i16 42 seq_cst Index: test/CodeGen/x86-atomic-long_double.c =================================================================== --- test/CodeGen/x86-atomic-long_double.c +++ test/CodeGen/x86-atomic-long_double.c @@ -0,0 +1,559 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu -target-cpu core2 %s -S -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -triple i686-linux-gnu -target-cpu core2 %s -S -emit-llvm -o - | FileCheck -check-prefix=CHECK32 %s + +long double testinc(_Atomic long double *addr) { + // CHECK-LABEL: @testinc + // CHECK: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 8 + // CHECK: [[ADDR:%.+]] = load x86_fp80** [[ADDR_ADDR]], align 8 + // CHECK: [[INT_ADDR:%.+]] = bitcast x86_fp80* [[ADDR]] to i128* + // CHECK: [[INT_VALUE:%.+]] = load atomic i128* [[INT_ADDR]] seq_cst, align 16 + // CHECK: [[INT_LOAD_ADDR:%.+]] = bitcast x86_fp80* [[LD_ADDR:%.+]] to i128* + // CHECK: store i128 [[INT_VALUE]], i128* [[INT_LOAD_ADDR]], align 16 + // CHECK: [[LD_VALUE:%.+]] = load x86_fp80* [[LD_ADDR]], align 16 + // CHECK: br label %[[ATOMIC_OP:.+]] + // CHECK: [[ATOMIC_OP]] + // CHECK: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ] + // CHECK: [[INC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]], + // CHECK: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR:%.+]] to i8* + // CHECK: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false) + // CHECK: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 16 + // CHECK: [[OLD_INT_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i128* + // CHECK: [[OLD_INT:%.+]] = load i128* [[OLD_INT_ADDR]], align 16 + // CHECK: [[NEW_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR:%.+]] to i8* + // CHECK: call void @llvm.memset.p0i8.i64(i8* [[NEW_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false) + // CHECK: store x86_fp80 [[INC_VALUE]], x86_fp80* [[NEW_VALUE_ADDR]], align 16 + // CHECK: [[NEW_INT_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR]] to i128* + // CHECK: [[NEW_INT:%.+]] = load i128* [[NEW_INT_ADDR]], align 16 + // CHECK: [[OBJ_INT_ADDR:%.+]] = bitcast x86_fp80* [[ADDR]] to i128* + // CHECK: [[RES:%.+]] = cmpxchg i128* [[OBJ_INT_ADDR]], i128 [[OLD_INT]], i128 [[NEW_INT]] seq_cst seq_cst + // CHECK: [[OLD_VALUE:%.+]] = extractvalue { i128, i1 } [[RES]], 0 + // CHECK: [[OLD_VALUE_RES_PTR:%.+]] = getelementptr inbounds { x86_fp80, i1 }* [[RES_PTR:%.+]], i32 0, i32 0 + // CHECK: [[OLD_VALUE_RES_INT_PTR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_RES_PTR]] to i128* + // CHECK: store i128 [[OLD_VALUE]], i128* [[OLD_VALUE_RES_INT_PTR]], align 16 + // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i128, i1 } [[RES]], 1 + // CHECK: [[FAIL_SUCCESS_RES_PTR:%.+]] = getelementptr inbounds { x86_fp80, i1 }* [[RES_PTR]], i32 0, i32 1 + // CHECK: store i1 [[FAIL_SUCCESS]], i1* [[FAIL_SUCCESS_RES_PTR]], align 8 + // CHECK: [[RES:%.+]] = load { x86_fp80, i1 }* [[RES_PTR]], align 16 + // CHECK: [[LD_VALUE]] = extractvalue { x86_fp80, i1 } [[RES]], 0 + // CHECK: [[FAIL_SUCCESS_RES:%.+]] = extractvalue { x86_fp80, i1 } [[RES]], 1 + // CHECK: br i1 [[FAIL_SUCCESS_RES]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]] + // CHECK: [[ATOMIC_CONT]] + // CHECK: ret x86_fp80 [[INC_VALUE]] + // CHECK32-LABEL: @testinc + // CHECK32: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 4 + // CHECK32: [[ADDR:%.+]] = load x86_fp80** [[ADDR_ADDR]], align 4 + // CHECK32: [[VOID_PTR:%.+]] = bitcast x86_fp80* [[ADDR]] to i8* + // CHECK32: [[TEMP_LD_PTR:%.+]] = bitcast x86_fp80* [[TEMP_LD_ADDR:%.+]] to i8* + // CHECK32: call void @__atomic_load(i32 12, i8* [[VOID_PTR]], i8* [[TEMP_LD_PTR]], i32 5) + // CHECK32: [[LD_VALUE:%.+]] = load x86_fp80* [[TEMP_LD_ADDR]], align 4 + // CHECK32: br label %[[ATOMIC_OP:.+]] + // CHECK32: [[ATOMIC_OP]] + // CHECK32: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ] + // CHECK32: [[INC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]], + // CHECK32: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR:%.+]] to i8* + // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 12, i32 4, i1 false) + // CHECK32: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 4 + // CHECK32: [[DESIRED_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[DESIRED_VALUE_ADDR:%.+]] to i8* + // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[DESIRED_VALUE_VOID_ADDR]], i8 0, i64 12, i32 4, i1 false) + // CHECK32: store x86_fp80 [[INC_VALUE]], x86_fp80* [[DESIRED_VALUE_ADDR]], align 4 + // CHECK32: [[OBJ:%.+]] = bitcast x86_fp80* [[ADDR]] to i8* + // CHECK32: [[EXPECTED:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i8* + // CHECK32: [[DESIRED:%.+]] = bitcast x86_fp80* [[DESIRED_VALUE_ADDR]] to i8* + // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 12, i8* [[OBJ]], i8* [[EXPECTED]], i8* [[DESIRED]], i32 7, i32 7) + // CHECK32: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i8* + // CHECK32: [[OLD_VALUE_VOID_GET_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_GET_ADDR:%.+]] to i8* + // CHECK32: call void @__atomic_load(i32 12, i8* [[OLD_VALUE_VOID_ADDR]], i8* [[OLD_VALUE_VOID_GET_ADDR]], i32 5) + // CHECK32: [[OLD_VALUE:%.+]] = load x86_fp80* [[OLD_VALUE_GET_ADDR]], align 4 + // CHECK32: [[LD_VALUE_RES_PTR:%.+]] = getelementptr inbounds { x86_fp80, i1 }* [[RES_PTR:%.+]], i32 0, i32 0 + // CHECK32: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[LD_VALUE_RES_PTR]], align 4 + // CHECK32: [[FAIL_SUCCESS_RES_PTR:%.+]] = getelementptr inbounds { x86_fp80, i1 }* [[RES_PTR]], i32 0, i32 1 + // CHECK32: store i1 [[FAIL_SUCCESS]], i1* [[FAIL_SUCCESS_RES_PTR]], align 8 + // CHECK32: [[RES:%.+]] = load { x86_fp80, i1 }* [[RES_PTR]], align 4 + // CHECK32: [[LD_VALUE]] = extractvalue { x86_fp80, i1 } [[RES]], 0 + // CHECK32: [[FAIL_SUCCESS_RES:%.+]] = extractvalue { x86_fp80, i1 } [[RES]], 1 + // CHECK32: br i1 [[FAIL_SUCCESS_RES]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]] + // CHECK32: [[ATOMIC_CONT]] + // CHECK32: ret x86_fp80 [[INC_VALUE]] + + return ++*addr; +} + +long double testdec(_Atomic long double *addr) { + // CHECK-LABEL: @testdec + // CHECK: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 8 + // CHECK: [[ADDR:%.+]] = load x86_fp80** [[ADDR_ADDR]], align 8 + // CHECK: [[INT_ADDR:%.+]] = bitcast x86_fp80* [[ADDR]] to i128* + // CHECK: [[INT_VALUE:%.+]] = load atomic i128* [[INT_ADDR]] seq_cst, align 16 + // CHECK: [[INT_LOAD_ADDR:%.+]] = bitcast x86_fp80* [[LD_ADDR:%.+]] to i128* + // CHECK: store i128 [[INT_VALUE]], i128* [[INT_LOAD_ADDR]], align 16 + // CHECK: [[ORIG_LD_VALUE:%.+]] = load x86_fp80* [[LD_ADDR]], align 16 + // CHECK: br label %[[ATOMIC_OP:.+]] + // CHECK: [[ATOMIC_OP]] + // CHECK: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[ORIG_LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ] + // CHECK: [[DEC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]], + // CHECK: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR:%.+]] to i8* + // CHECK: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false) + // CHECK: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 16 + // CHECK: [[OLD_INT_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i128* + // CHECK: [[OLD_INT:%.+]] = load i128* [[OLD_INT_ADDR]], align 16 + // CHECK: [[NEW_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR:%.+]] to i8* + // CHECK: call void @llvm.memset.p0i8.i64(i8* [[NEW_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false) + // CHECK: store x86_fp80 [[DEC_VALUE]], x86_fp80* [[NEW_VALUE_ADDR]], align 16 + // CHECK: [[NEW_INT_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR]] to i128* + // CHECK: [[NEW_INT:%.+]] = load i128* [[NEW_INT_ADDR]], align 16 + // CHECK: [[OBJ_INT_ADDR:%.+]] = bitcast x86_fp80* [[ADDR]] to i128* + // CHECK: [[RES:%.+]] = cmpxchg i128* [[OBJ_INT_ADDR]], i128 [[OLD_INT]], i128 [[NEW_INT]] seq_cst seq_cst + // CHECK: [[OLD_VALUE:%.+]] = extractvalue { i128, i1 } [[RES]], 0 + // CHECK: [[OLD_VALUE_RES_PTR:%.+]] = getelementptr inbounds { x86_fp80, i1 }* [[RES_PTR:%.+]], i32 0, i32 0 + // CHECK: [[OLD_VALUE_RES_INT_PTR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_RES_PTR]] to i128* + // CHECK: store i128 [[OLD_VALUE]], i128* [[OLD_VALUE_RES_INT_PTR]], align 16 + // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i128, i1 } [[RES]], 1 + // CHECK: [[FAIL_SUCCESS_RES_PTR:%.+]] = getelementptr inbounds { x86_fp80, i1 }* [[RES_PTR]], i32 0, i32 1 + // CHECK: store i1 [[FAIL_SUCCESS]], i1* [[FAIL_SUCCESS_RES_PTR]], align 8 + // CHECK: [[RES:%.+]] = load { x86_fp80, i1 }* [[RES_PTR]], align 16 + // CHECK: [[LD_VALUE]] = extractvalue { x86_fp80, i1 } [[RES]], 0 + // CHECK: [[FAIL_SUCCESS_RES:%.+]] = extractvalue { x86_fp80, i1 } [[RES]], 1 + // CHECK: br i1 [[FAIL_SUCCESS_RES]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]] + // CHECK: [[ATOMIC_CONT]] + // CHECK: ret x86_fp80 [[ORIG_LD_VALUE]] + // CHECK32-LABEL: @testdec + // CHECK32: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 4 + // CHECK32: [[ADDR:%.+]] = load x86_fp80** [[ADDR_ADDR]], align 4 + // CHECK32: [[VOID_PTR:%.+]] = bitcast x86_fp80* [[ADDR]] to i8* + // CHECK32: [[TEMP_LD_PTR:%.+]] = bitcast x86_fp80* [[TEMP_LD_ADDR:%.+]] to i8* + // CHECK32: call void @__atomic_load(i32 12, i8* [[VOID_PTR]], i8* [[TEMP_LD_PTR]], i32 5) + // CHECK32: [[ORIG_LD_VALUE:%.+]] = load x86_fp80* [[TEMP_LD_ADDR]], align 4 + // CHECK32: br label %[[ATOMIC_OP:.+]] + // CHECK32: [[ATOMIC_OP]] + // CHECK32: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[ORIG_LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ] + // CHECK32: [[DEC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]], + // CHECK32: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR:%.+]] to i8* + // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 12, i32 4, i1 false) + // CHECK32: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 4 + // CHECK32: [[DESIRED_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[DESIRED_VALUE_ADDR:%.+]] to i8* + // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[DESIRED_VALUE_VOID_ADDR]], i8 0, i64 12, i32 4, i1 false) + // CHECK32: store x86_fp80 [[DEC_VALUE]], x86_fp80* [[DESIRED_VALUE_ADDR]], align 4 + // CHECK32: [[OBJ:%.+]] = bitcast x86_fp80* [[ADDR]] to i8* + // CHECK32: [[EXPECTED:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i8* + // CHECK32: [[DESIRED:%.+]] = bitcast x86_fp80* [[DESIRED_VALUE_ADDR]] to i8* + // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 12, i8* [[OBJ]], i8* [[EXPECTED]], i8* [[DESIRED]], i32 7, i32 7) + // CHECK32: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i8* + // CHECK32: [[OLD_VALUE_VOID_GET_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_GET_ADDR:%.+]] to i8* + // CHECK32: call void @__atomic_load(i32 12, i8* [[OLD_VALUE_VOID_ADDR]], i8* [[OLD_VALUE_VOID_GET_ADDR]], i32 5) + // CHECK32: [[OLD_VALUE:%.+]] = load x86_fp80* [[OLD_VALUE_GET_ADDR]], align 4 + // CHECK32: [[LD_VALUE_RES_PTR:%.+]] = getelementptr inbounds { x86_fp80, i1 }* [[RES_PTR:%.+]], i32 0, i32 0 + // CHECK32: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[LD_VALUE_RES_PTR]], align 4 + // CHECK32: [[FAIL_SUCCESS_RES_PTR:%.+]] = getelementptr inbounds { x86_fp80, i1 }* [[RES_PTR]], i32 0, i32 1 + // CHECK32: store i1 [[FAIL_SUCCESS]], i1* [[FAIL_SUCCESS_RES_PTR]], align 8 + // CHECK32: [[RES:%.+]] = load { x86_fp80, i1 }* [[RES_PTR]], align 4 + // CHECK32: [[LD_VALUE]] = extractvalue { x86_fp80, i1 } [[RES]], 0 + // CHECK32: [[FAIL_SUCCESS_RES:%.+]] = extractvalue { x86_fp80, i1 } [[RES]], 1 + // CHECK32: br i1 [[FAIL_SUCCESS_RES]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]] + // CHECK32: [[ATOMIC_CONT]] + // CHECK32: ret x86_fp80 [[ORIG_LD_VALUE]] + + return (*addr)--; +} + +long double testcompassign(_Atomic long double *addr) { + *addr -= 25; + // CHECK-LABEL: @testcompassign + // CHECK: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 8 + // CHECK: [[ADDR:%.+]] = load x86_fp80** [[ADDR_ADDR]], align 8 + // CHECK: [[INT_ADDR:%.+]] = bitcast x86_fp80* [[ADDR]] to i128* + // CHECK: [[INT_VALUE:%.+]] = load atomic i128* [[INT_ADDR]] seq_cst, align 16 + // CHECK: [[INT_LOAD_ADDR:%.+]] = bitcast x86_fp80* [[LD_ADDR:%.+]] to i128* + // CHECK: store i128 [[INT_VALUE]], i128* [[INT_LOAD_ADDR]], align 16 + // CHECK: [[LD_VALUE:%.+]] = load x86_fp80* [[LD_ADDR]], align 16 + // CHECK: br label %[[ATOMIC_OP:.+]] + // CHECK: [[ATOMIC_OP]] + // CHECK: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ] + // CHECK: [[SUB_VALUE:%.+]] = fsub x86_fp80 [[OLD_VALUE]], + // CHECK: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR:%.+]] to i8* + // CHECK: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false) + // CHECK: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 16 + // CHECK: [[OLD_INT_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i128* + // CHECK: [[OLD_INT:%.+]] = load i128* [[OLD_INT_ADDR]], align 16 + // CHECK: [[NEW_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR:%.+]] to i8* + // CHECK: call void @llvm.memset.p0i8.i64(i8* [[NEW_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false) + // CHECK: store x86_fp80 [[SUB_VALUE]], x86_fp80* [[NEW_VALUE_ADDR]], align 16 + // CHECK: [[NEW_INT_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR]] to i128* + // CHECK: [[NEW_INT:%.+]] = load i128* [[NEW_INT_ADDR]], align 16 + // CHECK: [[OBJ_INT_ADDR:%.+]] = bitcast x86_fp80* [[ADDR]] to i128* + // CHECK: [[RES:%.+]] = cmpxchg i128* [[OBJ_INT_ADDR]], i128 [[OLD_INT]], i128 [[NEW_INT]] seq_cst seq_cst + // CHECK: [[OLD_VALUE:%.+]] = extractvalue { i128, i1 } [[RES]], 0 + // CHECK: [[OLD_VALUE_RES_PTR:%.+]] = getelementptr inbounds { x86_fp80, i1 }* [[RES_PTR:%.+]], i32 0, i32 0 + // CHECK: [[OLD_VALUE_RES_INT_PTR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_RES_PTR]] to i128* + // CHECK: store i128 [[OLD_VALUE]], i128* [[OLD_VALUE_RES_INT_PTR]], align 16 + // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i128, i1 } [[RES]], 1 + // CHECK: [[FAIL_SUCCESS_RES_PTR:%.+]] = getelementptr inbounds { x86_fp80, i1 }* [[RES_PTR]], i32 0, i32 1 + // CHECK: store i1 [[FAIL_SUCCESS]], i1* [[FAIL_SUCCESS_RES_PTR]], align 8 + // CHECK: [[RES:%.+]] = load { x86_fp80, i1 }* [[RES_PTR]], align 16 + // CHECK: [[LD_VALUE]] = extractvalue { x86_fp80, i1 } [[RES]], 0 + // CHECK: [[FAIL_SUCCESS_RES:%.+]] = extractvalue { x86_fp80, i1 } [[RES]], 1 + // CHECK: br i1 [[FAIL_SUCCESS_RES]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]] + // CHECK: [[ATOMIC_CONT]] + // CHECK: [[ADDR:%.+]] = load x86_fp80** %{{.+}}, align 8 + // CHECK: [[ADDR_INT:%.+]] = bitcast x86_fp80* [[ADDR]] to i128* + // CHECK: [[INT_VAL:%.+]] = load atomic i128* [[ADDR_INT]] seq_cst, align 16 + // CHECK: [[INT_LD_TEMP:%.+]] = bitcast x86_fp80* [[LD_TEMP:%.+]] to i128* + // CHECK: store i128 [[INT_VAL]], i128* [[INT_LD_TEMP:%.+]], align 16 + // CHECK: [[RET_VAL:%.+]] = load x86_fp80* [[LD_TEMP]], align 16 + // CHECK: ret x86_fp80 [[RET_VAL]] + // CHECK32-LABEL: @testcompassign + // CHECK32: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 4 + // CHECK32: [[ADDR:%.+]] = load x86_fp80** [[ADDR_ADDR]], align 4 + // CHECK32: [[VOID_PTR:%.+]] = bitcast x86_fp80* [[ADDR]] to i8* + // CHECK32: [[TEMP_LD_PTR:%.+]] = bitcast x86_fp80* [[TEMP_LD_ADDR:%.+]] to i8* + // CHECK32: call void @__atomic_load(i32 12, i8* [[VOID_PTR]], i8* [[TEMP_LD_PTR]], i32 5) + // CHECK32: [[LD_VALUE:%.+]] = load x86_fp80* [[TEMP_LD_ADDR]], align 4 + // CHECK32: br label %[[ATOMIC_OP:.+]] + // CHECK32: [[ATOMIC_OP]] + // CHECK32: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ] + // CHECK32: [[INC_VALUE:%.+]] = fsub x86_fp80 [[OLD_VALUE]], + // CHECK32: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR:%.+]] to i8* + // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 12, i32 4, i1 false) + // CHECK32: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 4 + // CHECK32: [[DESIRED_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[DESIRED_VALUE_ADDR:%.+]] to i8* + // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[DESIRED_VALUE_VOID_ADDR]], i8 0, i64 12, i32 4, i1 false) + // CHECK32: store x86_fp80 [[INC_VALUE]], x86_fp80* [[DESIRED_VALUE_ADDR]], align 4 + // CHECK32: [[OBJ:%.+]] = bitcast x86_fp80* [[ADDR]] to i8* + // CHECK32: [[EXPECTED:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i8* + // CHECK32: [[DESIRED:%.+]] = bitcast x86_fp80* [[DESIRED_VALUE_ADDR]] to i8* + // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 12, i8* [[OBJ]], i8* [[EXPECTED]], i8* [[DESIRED]], i32 7, i32 7) + // CHECK32: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i8* + // CHECK32: [[OLD_VALUE_VOID_GET_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_GET_ADDR:%.+]] to i8* + // CHECK32: call void @__atomic_load(i32 12, i8* [[OLD_VALUE_VOID_ADDR]], i8* [[OLD_VALUE_VOID_GET_ADDR]], i32 5) + // CHECK32: [[OLD_VALUE:%.+]] = load x86_fp80* [[OLD_VALUE_GET_ADDR]], align 4 + // CHECK32: [[LD_VALUE_RES_PTR:%.+]] = getelementptr inbounds { x86_fp80, i1 }* [[RES_PTR:%.+]], i32 0, i32 0 + // CHECK32: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[LD_VALUE_RES_PTR]], align 4 + // CHECK32: [[FAIL_SUCCESS_RES_PTR:%.+]] = getelementptr inbounds { x86_fp80, i1 }* [[RES_PTR]], i32 0, i32 1 + // CHECK32: store i1 [[FAIL_SUCCESS]], i1* [[FAIL_SUCCESS_RES_PTR]], align 8 + // CHECK32: [[RES:%.+]] = load { x86_fp80, i1 }* [[RES_PTR]], align 4 + // CHECK32: [[LD_VALUE]] = extractvalue { x86_fp80, i1 } [[RES]], 0 + // CHECK32: [[FAIL_SUCCESS_RES:%.+]] = extractvalue { x86_fp80, i1 } [[RES]], 1 + // CHECK32: br i1 [[FAIL_SUCCESS_RES]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]] + // CHECK32: [[ATOMIC_CONT]] + // CHECK32: [[ADDR:%.+]] = load x86_fp80** %{{.+}}, align 4 + // CHECK32: [[VOID_ADDR:%.+]] = bitcast x86_fp80* [[ADDR]] to i8* + // CHECK32: [[VOID_GET_ADDR:%.+]] = bitcast x86_fp80* [[GET_ADDR:%.+]] to i8* + // CHECK32: call void @__atomic_load(i32 12, i8* [[VOID_ADDR]], i8* [[VOID_GET_ADDR]], i32 5) + // CHECK32: [[RET_VAL:%.+]] = load x86_fp80* [[GET_ADDR]], align 4 + // CHECK32: ret x86_fp80 [[RET_VAL]] + return *addr; +} + +long double testassign(_Atomic long double *addr) { + // CHECK-LABEL: @testassign + // CHECK: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 8 + // CHECK: [[ADDR:%.+]] = load x86_fp80** [[ADDR_ADDR]], align 8 + // CHECK: [[STORE_TEMP_VOID_PTR:%.+]] = bitcast x86_fp80* [[STORE_TEMP_PTR:%.+]] to i8* + // CHECK: call void @llvm.memset.p0i8.i64(i8* [[STORE_TEMP_VOID_PTR]], i8 0, i64 16, i32 16, i1 false) + // CHECK: store x86_fp80 {{.+}}, x86_fp80* [[STORE_TEMP_PTR]], align 16 + // CHECK: [[STORE_TEMP_INT_PTR:%.+]] = bitcast x86_fp80* [[STORE_TEMP_PTR]] to i128* + // CHECK: [[STORE_TEMP_INT:%.+]] = load i128* [[STORE_TEMP_INT_PTR]], align 16 + // CHECK: [[ADDR_INT:%.+]] = bitcast x86_fp80* [[ADDR]] to i128* + // CHECK: store atomic i128 [[STORE_TEMP_INT]], i128* [[ADDR_INT]] seq_cst, align 16 + // CHECK32-LABEL: @testassign + // CHECK32: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 4 + // CHECK32: [[ADDR:%.+]] = load x86_fp80** [[ADDR_ADDR]], align 4 + // CHECK32: [[STORE_TEMP_VOID_PTR:%.+]] = bitcast x86_fp80* [[STORE_TEMP_PTR:%.+]] to i8* + // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[STORE_TEMP_VOID_PTR]], i8 0, i64 12, i32 4, i1 false) + // CHECK32: store x86_fp80 {{.+}}, x86_fp80* [[STORE_TEMP_PTR]], align 4 + // CHECK32: [[ADDR_VOID:%.+]] = bitcast x86_fp80* [[ADDR]] to i8* + // CHECK32: [[STORE_TEMP_VOID_PTR:%.+]] = bitcast x86_fp80* [[STORE_TEMP_PTR]] to i8* + // CHECK32: call void @__atomic_store(i32 12, i8* [[ADDR_VOID]], i8* [[STORE_TEMP_VOID_PTR]], i32 5) + *addr = 115; + // CHECK: [[ADDR:%.+]] = load x86_fp80** %{{.+}}, align 8 + // CHECK: [[ADDR_INT:%.+]] = bitcast x86_fp80* [[ADDR]] to i128* + // CHECK: [[INT_VAL:%.+]] = load atomic i128* [[ADDR_INT]] seq_cst, align 16 + // CHECK: [[INT_LD_TEMP:%.+]] = bitcast x86_fp80* [[LD_TEMP:%.+]] to i128* + // CHECK: store i128 [[INT_VAL]], i128* [[INT_LD_TEMP:%.+]], align 16 + // CHECK: [[RET_VAL:%.+]] = load x86_fp80* [[LD_TEMP]], align 16 + // CHECK: ret x86_fp80 [[RET_VAL]] + // CHECK32: [[ADDR:%.+]] = load x86_fp80** %{{.+}}, align 4 + // CHECK32: [[VOID_ADDR:%.+]] = bitcast x86_fp80* [[ADDR]] to i8* + // CHECK32: [[VOID_LD_TEMP:%.+]] = bitcast x86_fp80* [[LD_TEMP:%.+]] to i8* + // CHECK32: call void @__atomic_load(i32 12, i8* [[VOID_ADDR]], i8* [[VOID_LD_TEMP]], i32 5) + // CHECK32: [[RET_VAL:%.+]] = load x86_fp80* [[LD_TEMP]], align 4 + // CHECK32: ret x86_fp80 [[RET_VAL]] + + return *addr; +} + +long double test_volatile_inc(volatile _Atomic long double *addr) { + // CHECK-LABEL: @test_volatile_inc + // CHECK: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 8 + // CHECK: [[ADDR:%.+]] = load x86_fp80** [[ADDR_ADDR]], align 8 + // CHECK: [[INT_ADDR:%.+]] = bitcast x86_fp80* [[ADDR]] to i128* + // CHECK: [[INT_VALUE:%.+]] = load atomic volatile i128* [[INT_ADDR]] seq_cst, align 16 + // CHECK: [[INT_LOAD_ADDR:%.+]] = bitcast x86_fp80* [[LD_ADDR:%.+]] to i128* + // CHECK: store i128 [[INT_VALUE]], i128* [[INT_LOAD_ADDR]], align 16 + // CHECK: [[LD_VALUE:%.+]] = load x86_fp80* [[LD_ADDR]], align 16 + // CHECK: br label %[[ATOMIC_OP:.+]] + // CHECK: [[ATOMIC_OP]] + // CHECK: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ] + // CHECK: [[INC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]], + // CHECK: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR:%.+]] to i8* + // CHECK: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false) + // CHECK: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 16 + // CHECK: [[OLD_INT_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i128* + // CHECK: [[OLD_INT:%.+]] = load i128* [[OLD_INT_ADDR]], align 16 + // CHECK: [[NEW_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR:%.+]] to i8* + // CHECK: call void @llvm.memset.p0i8.i64(i8* [[NEW_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false) + // CHECK: store x86_fp80 [[INC_VALUE]], x86_fp80* [[NEW_VALUE_ADDR]], align 16 + // CHECK: [[NEW_INT_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR]] to i128* + // CHECK: [[NEW_INT:%.+]] = load i128* [[NEW_INT_ADDR]], align 16 + // CHECK: [[OBJ_INT_ADDR:%.+]] = bitcast x86_fp80* [[ADDR]] to i128* + // CHECK: [[RES:%.+]] = cmpxchg volatile i128* [[OBJ_INT_ADDR]], i128 [[OLD_INT]], i128 [[NEW_INT]] seq_cst seq_cst + // CHECK: [[OLD_VALUE:%.+]] = extractvalue { i128, i1 } [[RES]], 0 + // CHECK: [[OLD_VALUE_RES_PTR:%.+]] = getelementptr inbounds { x86_fp80, i1 }* [[RES_PTR:%.+]], i32 0, i32 0 + // CHECK: [[OLD_VALUE_RES_INT_PTR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_RES_PTR]] to i128* + // CHECK: store volatile i128 [[OLD_VALUE]], i128* [[OLD_VALUE_RES_INT_PTR]], align 16 + // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i128, i1 } [[RES]], 1 + // CHECK: [[FAIL_SUCCESS_RES_PTR:%.+]] = getelementptr inbounds { x86_fp80, i1 }* [[RES_PTR]], i32 0, i32 1 + // CHECK: store volatile i1 [[FAIL_SUCCESS]], i1* [[FAIL_SUCCESS_RES_PTR]], align 8 + // CHECK: [[RES:%.+]] = load volatile { x86_fp80, i1 }* [[RES_PTR]], align 16 + // CHECK: [[LD_VALUE]] = extractvalue { x86_fp80, i1 } [[RES]], 0 + // CHECK: [[FAIL_SUCCESS_RES:%.+]] = extractvalue { x86_fp80, i1 } [[RES]], 1 + // CHECK: br i1 [[FAIL_SUCCESS_RES]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]] + // CHECK: [[ATOMIC_CONT]] + // CHECK: ret x86_fp80 [[INC_VALUE]] + // CHECK32-LABEL: @test_volatile_inc + // CHECK32: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 4 + // CHECK32: [[ADDR:%.+]] = load x86_fp80** [[ADDR_ADDR]], align 4 + // CHECK32: [[VOID_PTR:%.+]] = bitcast x86_fp80* [[ADDR]] to i8* + // CHECK32: [[TEMP_LD_PTR:%.+]] = bitcast x86_fp80* [[TEMP_LD_ADDR:%.+]] to i8* + // CHECK32: call void @__atomic_load(i32 12, i8* [[VOID_PTR]], i8* [[TEMP_LD_PTR]], i32 5) + // CHECK32: [[LD_VALUE:%.+]] = load x86_fp80* [[TEMP_LD_ADDR]], align 4 + // CHECK32: br label %[[ATOMIC_OP:.+]] + // CHECK32: [[ATOMIC_OP]] + // CHECK32: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ] + // CHECK32: [[INC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]], + // CHECK32: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR:%.+]] to i8* + // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 12, i32 4, i1 false) + // CHECK32: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 4 + // CHECK32: [[DESIRED_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[DESIRED_VALUE_ADDR:%.+]] to i8* + // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[DESIRED_VALUE_VOID_ADDR]], i8 0, i64 12, i32 4, i1 false) + // CHECK32: store x86_fp80 [[INC_VALUE]], x86_fp80* [[DESIRED_VALUE_ADDR]], align 4 + // CHECK32: [[OBJ:%.+]] = bitcast x86_fp80* [[ADDR]] to i8* + // CHECK32: [[EXPECTED:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i8* + // CHECK32: [[DESIRED:%.+]] = bitcast x86_fp80* [[DESIRED_VALUE_ADDR]] to i8* + // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 12, i8* [[OBJ]], i8* [[EXPECTED]], i8* [[DESIRED]], i32 7, i32 7) + // CHECK32: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i8* + // CHECK32: [[OLD_VALUE_VOID_GET_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_GET_ADDR:%.+]] to i8* + // CHECK32: call void @__atomic_load(i32 12, i8* [[OLD_VALUE_VOID_ADDR]], i8* [[OLD_VALUE_VOID_GET_ADDR]], i32 5) + // CHECK32: [[OLD_VALUE:%.+]] = load x86_fp80* [[OLD_VALUE_GET_ADDR]], align 4 + // CHECK32: [[LD_VALUE_RES_PTR:%.+]] = getelementptr inbounds { x86_fp80, i1 }* [[RES_PTR:%.+]], i32 0, i32 0 + // CHECK32: store volatile x86_fp80 [[OLD_VALUE]], x86_fp80* [[LD_VALUE_RES_PTR]], align 4 + // CHECK32: [[FAIL_SUCCESS_RES_PTR:%.+]] = getelementptr inbounds { x86_fp80, i1 }* [[RES_PTR]], i32 0, i32 1 + // CHECK32: store volatile i1 [[FAIL_SUCCESS]], i1* [[FAIL_SUCCESS_RES_PTR]], align 8 + // CHECK32: [[RES:%.+]] = load volatile { x86_fp80, i1 }* [[RES_PTR]], align 4 + // CHECK32: [[LD_VALUE]] = extractvalue { x86_fp80, i1 } [[RES]], 0 + // CHECK32: [[FAIL_SUCCESS_RES:%.+]] = extractvalue { x86_fp80, i1 } [[RES]], 1 + // CHECK32: br i1 [[FAIL_SUCCESS_RES]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]] + // CHECK32: [[ATOMIC_CONT]] + // CHECK32: ret x86_fp80 [[INC_VALUE]] + return ++*addr; +} + +long double test_volatile_dec(volatile _Atomic long double *addr) { + // CHECK-LABEL: @test_volatile_dec + // CHECK: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 8 + // CHECK: [[ADDR:%.+]] = load x86_fp80** [[ADDR_ADDR]], align 8 + // CHECK: [[INT_ADDR:%.+]] = bitcast x86_fp80* [[ADDR]] to i128* + // CHECK: [[INT_VALUE:%.+]] = load atomic volatile i128* [[INT_ADDR]] seq_cst, align 16 + // CHECK: [[INT_LOAD_ADDR:%.+]] = bitcast x86_fp80* [[LD_ADDR:%.+]] to i128* + // CHECK: store i128 [[INT_VALUE]], i128* [[INT_LOAD_ADDR]], align 16 + // CHECK: [[ORIG_LD_VALUE:%.+]] = load x86_fp80* [[LD_ADDR]], align 16 + // CHECK: br label %[[ATOMIC_OP:.+]] + // CHECK: [[ATOMIC_OP]] + // CHECK: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[ORIG_LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ] + // CHECK: [[DEC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]], + // CHECK: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR:%.+]] to i8* + // CHECK: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false) + // CHECK: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 16 + // CHECK: [[OLD_INT_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i128* + // CHECK: [[OLD_INT:%.+]] = load i128* [[OLD_INT_ADDR]], align 16 + // CHECK: [[NEW_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR:%.+]] to i8* + // CHECK: call void @llvm.memset.p0i8.i64(i8* [[NEW_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false) + // CHECK: store x86_fp80 [[DEC_VALUE]], x86_fp80* [[NEW_VALUE_ADDR]], align 16 + // CHECK: [[NEW_INT_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR]] to i128* + // CHECK: [[NEW_INT:%.+]] = load i128* [[NEW_INT_ADDR]], align 16 + // CHECK: [[OBJ_INT_ADDR:%.+]] = bitcast x86_fp80* [[ADDR]] to i128* + // CHECK: [[RES:%.+]] = cmpxchg volatile i128* [[OBJ_INT_ADDR]], i128 [[OLD_INT]], i128 [[NEW_INT]] seq_cst seq_cst + // CHECK: [[OLD_VALUE:%.+]] = extractvalue { i128, i1 } [[RES]], 0 + // CHECK: [[OLD_VALUE_RES_PTR:%.+]] = getelementptr inbounds { x86_fp80, i1 }* [[RES_PTR:%.+]], i32 0, i32 0 + // CHECK: [[OLD_VALUE_RES_INT_PTR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_RES_PTR]] to i128* + // CHECK: store volatile i128 [[OLD_VALUE]], i128* [[OLD_VALUE_RES_INT_PTR]], align 16 + // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i128, i1 } [[RES]], 1 + // CHECK: [[FAIL_SUCCESS_RES_PTR:%.+]] = getelementptr inbounds { x86_fp80, i1 }* [[RES_PTR]], i32 0, i32 1 + // CHECK: store volatile i1 [[FAIL_SUCCESS]], i1* [[FAIL_SUCCESS_RES_PTR]], align 8 + // CHECK: [[RES:%.+]] = load volatile { x86_fp80, i1 }* [[RES_PTR]], align 16 + // CHECK: [[LD_VALUE]] = extractvalue { x86_fp80, i1 } [[RES]], 0 + // CHECK: [[FAIL_SUCCESS_RES:%.+]] = extractvalue { x86_fp80, i1 } [[RES]], 1 + // CHECK: br i1 [[FAIL_SUCCESS_RES]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]] + // CHECK: [[ATOMIC_CONT]] + // CHECK: ret x86_fp80 [[ORIG_LD_VALUE]] + // CHECK32-LABEL: @test_volatile_dec + // CHECK32: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 4 + // CHECK32: [[ADDR:%.+]] = load x86_fp80** [[ADDR_ADDR]], align 4 + // CHECK32: [[VOID_PTR:%.+]] = bitcast x86_fp80* [[ADDR]] to i8* + // CHECK32: [[TEMP_LD_PTR:%.+]] = bitcast x86_fp80* [[TEMP_LD_ADDR:%.+]] to i8* + // CHECK32: call void @__atomic_load(i32 12, i8* [[VOID_PTR]], i8* [[TEMP_LD_PTR]], i32 5) + // CHECK32: [[ORIG_LD_VALUE:%.+]] = load x86_fp80* [[TEMP_LD_ADDR]], align 4 + // CHECK32: br label %[[ATOMIC_OP:.+]] + // CHECK32: [[ATOMIC_OP]] + // CHECK32: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[ORIG_LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ] + // CHECK32: [[DEC_VALUE:%.+]] = fadd x86_fp80 [[OLD_VALUE]], + // CHECK32: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR:%.+]] to i8* + // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 12, i32 4, i1 false) + // CHECK32: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 4 + // CHECK32: [[DESIRED_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[DESIRED_VALUE_ADDR:%.+]] to i8* + // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[DESIRED_VALUE_VOID_ADDR]], i8 0, i64 12, i32 4, i1 false) + // CHECK32: store x86_fp80 [[DEC_VALUE]], x86_fp80* [[DESIRED_VALUE_ADDR]], align 4 + // CHECK32: [[OBJ:%.+]] = bitcast x86_fp80* [[ADDR]] to i8* + // CHECK32: [[EXPECTED:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i8* + // CHECK32: [[DESIRED:%.+]] = bitcast x86_fp80* [[DESIRED_VALUE_ADDR]] to i8* + // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 12, i8* [[OBJ]], i8* [[EXPECTED]], i8* [[DESIRED]], i32 7, i32 7) + // CHECK32: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i8* + // CHECK32: [[OLD_VALUE_VOID_GET_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_GET_ADDR:%.+]] to i8* + // CHECK32: call void @__atomic_load(i32 12, i8* [[OLD_VALUE_VOID_ADDR]], i8* [[OLD_VALUE_VOID_GET_ADDR]], i32 5) + // CHECK32: [[OLD_VALUE:%.+]] = load x86_fp80* [[OLD_VALUE_GET_ADDR]], align 4 + // CHECK32: [[LD_VALUE_RES_PTR:%.+]] = getelementptr inbounds { x86_fp80, i1 }* [[RES_PTR:%.+]], i32 0, i32 0 + // CHECK32: store volatile x86_fp80 [[OLD_VALUE]], x86_fp80* [[LD_VALUE_RES_PTR]], align 4 + // CHECK32: [[FAIL_SUCCESS_RES_PTR:%.+]] = getelementptr inbounds { x86_fp80, i1 }* [[RES_PTR]], i32 0, i32 1 + // CHECK32: store volatile i1 [[FAIL_SUCCESS]], i1* [[FAIL_SUCCESS_RES_PTR]], align 8 + // CHECK32: [[RES:%.+]] = load volatile { x86_fp80, i1 }* [[RES_PTR]], align 4 + // CHECK32: [[LD_VALUE]] = extractvalue { x86_fp80, i1 } [[RES]], 0 + // CHECK32: [[FAIL_SUCCESS_RES:%.+]] = extractvalue { x86_fp80, i1 } [[RES]], 1 + // CHECK32: br i1 [[FAIL_SUCCESS_RES]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]] + // CHECK32: [[ATOMIC_CONT]] + // CHECK32: ret x86_fp80 [[ORIG_LD_VALUE]] + return (*addr)--; +} + +long double test_volatile_compassign(volatile _Atomic long double *addr) { + *addr -= 25; + // CHECK-LABEL: @test_volatile_compassign + // CHECK: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 8 + // CHECK: [[ADDR:%.+]] = load x86_fp80** [[ADDR_ADDR]], align 8 + // CHECK: [[INT_ADDR:%.+]] = bitcast x86_fp80* [[ADDR]] to i128* + // CHECK: [[INT_VALUE:%.+]] = load atomic volatile i128* [[INT_ADDR]] seq_cst, align 16 + // CHECK: [[INT_LOAD_ADDR:%.+]] = bitcast x86_fp80* [[LD_ADDR:%.+]] to i128* + // CHECK: store i128 [[INT_VALUE]], i128* [[INT_LOAD_ADDR]], align 16 + // CHECK: [[LD_VALUE:%.+]] = load x86_fp80* [[LD_ADDR]], align 16 + // CHECK: br label %[[ATOMIC_OP:.+]] + // CHECK: [[ATOMIC_OP]] + // CHECK: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ] + // CHECK: [[SUB_VALUE:%.+]] = fsub x86_fp80 [[OLD_VALUE]], + // CHECK: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR:%.+]] to i8* + // CHECK: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false) + // CHECK: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 16 + // CHECK: [[OLD_INT_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i128* + // CHECK: [[OLD_INT:%.+]] = load i128* [[OLD_INT_ADDR]], align 16 + // CHECK: [[NEW_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR:%.+]] to i8* + // CHECK: call void @llvm.memset.p0i8.i64(i8* [[NEW_VALUE_VOID_ADDR]], i8 0, i64 16, i32 16, i1 false) + // CHECK: store x86_fp80 [[SUB_VALUE]], x86_fp80* [[NEW_VALUE_ADDR]], align 16 + // CHECK: [[NEW_INT_ADDR:%.+]] = bitcast x86_fp80* [[NEW_VALUE_ADDR]] to i128* + // CHECK: [[NEW_INT:%.+]] = load i128* [[NEW_INT_ADDR]], align 16 + // CHECK: [[OBJ_INT_ADDR:%.+]] = bitcast x86_fp80* [[ADDR]] to i128* + // CHECK: [[RES:%.+]] = cmpxchg volatile i128* [[OBJ_INT_ADDR]], i128 [[OLD_INT]], i128 [[NEW_INT]] seq_cst seq_cst + // CHECK: [[OLD_VALUE:%.+]] = extractvalue { i128, i1 } [[RES]], 0 + // CHECK: [[OLD_VALUE_RES_PTR:%.+]] = getelementptr inbounds { x86_fp80, i1 }* [[RES_PTR:%.+]], i32 0, i32 0 + // CHECK: [[OLD_VALUE_RES_INT_PTR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_RES_PTR]] to i128* + // CHECK: store volatile i128 [[OLD_VALUE]], i128* [[OLD_VALUE_RES_INT_PTR]], align 16 + // CHECK: [[FAIL_SUCCESS:%.+]] = extractvalue { i128, i1 } [[RES]], 1 + // CHECK: [[FAIL_SUCCESS_RES_PTR:%.+]] = getelementptr inbounds { x86_fp80, i1 }* [[RES_PTR]], i32 0, i32 1 + // CHECK: store volatile i1 [[FAIL_SUCCESS]], i1* [[FAIL_SUCCESS_RES_PTR]], align 8 + // CHECK: [[RES:%.+]] = load volatile { x86_fp80, i1 }* [[RES_PTR]], align 16 + // CHECK: [[LD_VALUE]] = extractvalue { x86_fp80, i1 } [[RES]], 0 + // CHECK: [[FAIL_SUCCESS_RES:%.+]] = extractvalue { x86_fp80, i1 } [[RES]], 1 + // CHECK: br i1 [[FAIL_SUCCESS_RES]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]] + // CHECK: [[ATOMIC_CONT]] + // CHECK: [[ADDR:%.+]] = load x86_fp80** %{{.+}}, align 8 + // CHECK: [[ADDR_INT:%.+]] = bitcast x86_fp80* [[ADDR]] to i128* + // CHECK: [[INT_VAL:%.+]] = load atomic volatile i128* [[ADDR_INT]] seq_cst, align 16 + // CHECK: [[INT_LD_TEMP:%.+]] = bitcast x86_fp80* [[LD_TEMP:%.+]] to i128* + // CHECK: store i128 [[INT_VAL]], i128* [[INT_LD_TEMP:%.+]], align 16 + // CHECK: [[RET_VAL:%.+]] = load x86_fp80* [[LD_TEMP]], align 16 + // CHECK32-LABEL: @test_volatile_compassign + // CHECK32: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 4 + // CHECK32: [[ADDR:%.+]] = load x86_fp80** [[ADDR_ADDR]], align 4 + // CHECK32: [[VOID_PTR:%.+]] = bitcast x86_fp80* [[ADDR]] to i8* + // CHECK32: [[TEMP_LD_PTR:%.+]] = bitcast x86_fp80* [[TEMP_LD_ADDR:%.+]] to i8* + // CHECK32: call void @__atomic_load(i32 12, i8* [[VOID_PTR]], i8* [[TEMP_LD_PTR]], i32 5) + // CHECK32: [[LD_VALUE:%.+]] = load x86_fp80* [[TEMP_LD_ADDR]], align 4 + // CHECK32: br label %[[ATOMIC_OP:.+]] + // CHECK32: [[ATOMIC_OP]] + // CHECK32: [[OLD_VALUE:%.+]] = phi x86_fp80 [ [[LD_VALUE]], %{{.+}} ], [ [[LD_VALUE:%.+]], %[[ATOMIC_OP]] ] + // CHECK32: [[INC_VALUE:%.+]] = fsub x86_fp80 [[OLD_VALUE]], + // CHECK32: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR:%.+]] to i8* + // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[OLD_VALUE_VOID_ADDR]], i8 0, i64 12, i32 4, i1 false) + // CHECK32: store x86_fp80 [[OLD_VALUE]], x86_fp80* [[OLD_VALUE_ADDR]], align 4 + // CHECK32: [[DESIRED_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[DESIRED_VALUE_ADDR:%.+]] to i8* + // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[DESIRED_VALUE_VOID_ADDR]], i8 0, i64 12, i32 4, i1 false) + // CHECK32: store x86_fp80 [[INC_VALUE]], x86_fp80* [[DESIRED_VALUE_ADDR]], align 4 + // CHECK32: [[OBJ:%.+]] = bitcast x86_fp80* [[ADDR]] to i8* + // CHECK32: [[EXPECTED:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i8* + // CHECK32: [[DESIRED:%.+]] = bitcast x86_fp80* [[DESIRED_VALUE_ADDR]] to i8* + // CHECK32: [[FAIL_SUCCESS:%.+]] = call zeroext i1 @__atomic_compare_exchange(i32 12, i8* [[OBJ]], i8* [[EXPECTED]], i8* [[DESIRED]], i32 7, i32 7) + // CHECK32: [[OLD_VALUE_VOID_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_ADDR]] to i8* + // CHECK32: [[OLD_VALUE_VOID_GET_ADDR:%.+]] = bitcast x86_fp80* [[OLD_VALUE_GET_ADDR:%.+]] to i8* + // CHECK32: call void @__atomic_load(i32 12, i8* [[OLD_VALUE_VOID_ADDR]], i8* [[OLD_VALUE_VOID_GET_ADDR]], i32 5) + // CHECK32: [[OLD_VALUE:%.+]] = load x86_fp80* [[OLD_VALUE_GET_ADDR]], align 4 + // CHECK32: [[LD_VALUE_RES_PTR:%.+]] = getelementptr inbounds { x86_fp80, i1 }* [[RES_PTR:%.+]], i32 0, i32 0 + // CHECK32: store volatile x86_fp80 [[OLD_VALUE]], x86_fp80* [[LD_VALUE_RES_PTR]], align 4 + // CHECK32: [[FAIL_SUCCESS_RES_PTR:%.+]] = getelementptr inbounds { x86_fp80, i1 }* [[RES_PTR]], i32 0, i32 1 + // CHECK32: store volatile i1 [[FAIL_SUCCESS]], i1* [[FAIL_SUCCESS_RES_PTR]], align 8 + // CHECK32: [[RES:%.+]] = load volatile { x86_fp80, i1 }* [[RES_PTR]], align 4 + // CHECK32: [[LD_VALUE]] = extractvalue { x86_fp80, i1 } [[RES]], 0 + // CHECK32: [[FAIL_SUCCESS_RES:%.+]] = extractvalue { x86_fp80, i1 } [[RES]], 1 + // CHECK32: br i1 [[FAIL_SUCCESS_RES]], label %[[ATOMIC_CONT:.+]], label %[[ATOMIC_OP]] + // CHECK32: [[ATOMIC_CONT]] + // CHECK32: [[ADDR:%.+]] = load x86_fp80** %{{.+}}, align 4 + // CHECK32: [[VOID_ADDR:%.+]] = bitcast x86_fp80* [[ADDR]] to i8* + // CHECK32: [[VOID_GET_ADDR:%.+]] = bitcast x86_fp80* [[GET_ADDR:%.+]] to i8* + // CHECK32: call void @__atomic_load(i32 12, i8* [[VOID_ADDR]], i8* [[VOID_GET_ADDR]], i32 5) + // CHECK32: [[RET_VAL:%.+]] = load x86_fp80* [[GET_ADDR]], align 4 + // CHECK32: ret x86_fp80 [[RET_VAL]] + return *addr; +} + +long double test_volatile_assign(volatile _Atomic long double *addr) { + // CHECK-LABEL: @test_volatile_assign + // CHECK: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 8 + // CHECK: [[ADDR:%.+]] = load x86_fp80** [[ADDR_ADDR]], align 8 + // CHECK: [[STORE_TEMP_VOID_PTR:%.+]] = bitcast x86_fp80* [[STORE_TEMP_PTR:%.+]] to i8* + // CHECK: call void @llvm.memset.p0i8.i64(i8* [[STORE_TEMP_VOID_PTR]], i8 0, i64 16, i32 16, i1 false) + // CHECK: store x86_fp80 {{.+}}, x86_fp80* [[STORE_TEMP_PTR]], align 16 + // CHECK: [[STORE_TEMP_INT_PTR:%.+]] = bitcast x86_fp80* [[STORE_TEMP_PTR]] to i128* + // CHECK: [[STORE_TEMP_INT:%.+]] = load i128* [[STORE_TEMP_INT_PTR]], align 16 + // CHECK: [[ADDR_INT:%.+]] = bitcast x86_fp80* [[ADDR]] to i128* + // CHECK: store atomic volatile i128 [[STORE_TEMP_INT]], i128* [[ADDR_INT]] seq_cst, align 16 + // CHECK32-LABEL: @test_volatile_assign + // CHECK32: store x86_fp80* %{{.+}}, x86_fp80** [[ADDR_ADDR:%.+]], align 4 + // CHECK32: [[ADDR:%.+]] = load x86_fp80** [[ADDR_ADDR]], align 4 + // CHECK32: [[STORE_TEMP_VOID_PTR:%.+]] = bitcast x86_fp80* [[STORE_TEMP_PTR:%.+]] to i8* + // CHECK32: call void @llvm.memset.p0i8.i64(i8* [[STORE_TEMP_VOID_PTR]], i8 0, i64 12, i32 4, i1 false) + // CHECK32: store x86_fp80 {{.+}}, x86_fp80* [[STORE_TEMP_PTR]], align 4 + // CHECK32: [[ADDR_VOID:%.+]] = bitcast x86_fp80* [[ADDR]] to i8* + // CHECK32: [[STORE_TEMP_VOID_PTR:%.+]] = bitcast x86_fp80* [[STORE_TEMP_PTR]] to i8* + // CHECK32: call void @__atomic_store(i32 12, i8* [[ADDR_VOID]], i8* [[STORE_TEMP_VOID_PTR]], i32 5) + *addr = 115; + // CHECK: [[ADDR:%.+]] = load x86_fp80** %{{.+}}, align 8 + // CHECK: [[ADDR_INT:%.+]] = bitcast x86_fp80* [[ADDR]] to i128* + // CHECK: [[INT_VAL:%.+]] = load atomic volatile i128* [[ADDR_INT]] seq_cst, align 16 + // CHECK: [[INT_LD_TEMP:%.+]] = bitcast x86_fp80* [[LD_TEMP:%.+]] to i128* + // CHECK: store i128 [[INT_VAL]], i128* [[INT_LD_TEMP:%.+]], align 16 + // CHECK: [[RET_VAL:%.+]] = load x86_fp80* [[LD_TEMP]], align 16 + // CHECK: ret x86_fp80 [[RET_VAL]] + // CHECK32: [[ADDR:%.+]] = load x86_fp80** %{{.+}}, align 4 + // CHECK32: [[VOID_ADDR:%.+]] = bitcast x86_fp80* [[ADDR]] to i8* + // CHECK32: [[VOID_LD_TEMP:%.+]] = bitcast x86_fp80* [[LD_TEMP:%.+]] to i8* + // CHECK32: call void @__atomic_load(i32 12, i8* [[VOID_ADDR]], i8* [[VOID_LD_TEMP]], i32 5) + // CHECK32: [[RET_VAL:%.+]] = load x86_fp80* [[LD_TEMP]], align 4 + // CHECK32: ret x86_fp80 [[RET_VAL]] + + return *addr; +}