Index: lib/CodeGen/CGAtomic.cpp =================================================================== --- lib/CodeGen/CGAtomic.cpp +++ lib/CodeGen/CGAtomic.cpp @@ -173,9 +173,14 @@ return CGF.CGM.getSize(size); } - /// Cast the given pointer to an integer pointer suitable for - /// atomic operations. - Address emitCastToAtomicIntPointer(Address addr) const; + /// Cast the given pointer to an integer pointer suitable for atomic + /// operations if the source. + Address emitCastToAtomicIntPointer(Address Addr) const; + + /// If Addr is compatible with the iN that will be used for an atomic + /// operation, bitcast it. Otherwise, create a temporary that is suitable + /// and copy the value across. + Address convertToAtomicIntPointer(Address Addr) const; /// Turn an atomic-layout object into an r-value. RValue convertAtomicTempToRValue(Address addr, AggValueSlot resultSlot, @@ -241,11 +246,11 @@ static AtomicExpr::AtomicOrderingKind translateAtomicOrdering(const llvm::AtomicOrdering AO); + /// \brief Creates temp alloca for intermediate operations on atomic value. + Address CreateTempAlloca() const; private: bool requiresMemSetZero(llvm::Type *type) const; - /// \brief Creates temp alloca for intermediate operations on atomic value. - Address CreateTempAlloca() const; /// \brief Emits atomic load as a libcall. void EmitAtomicLoadLibcall(llvm::Value *AddForLoaded, @@ -554,7 +559,6 @@ case AtomicExpr::AO__c11_atomic_store: case AtomicExpr::AO__atomic_store: case AtomicExpr::AO__atomic_store_n: { - assert(!Dest.isValid() && "Store does not return a value"); llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1); llvm::StoreInst *Store = CGF.Builder.CreateStore(LoadVal1, Ptr); Store->setAtomic(Order); @@ -666,7 +670,7 @@ } } -RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E, Address Dest) { +RValue CodeGenFunction::EmitAtomicExpr(AtomicExpr *E) { QualType AtomicTy = E->getPtr()->getType()->getPointeeType(); QualType MemTy = AtomicTy; if (const AtomicType *AT = AtomicTy->getAs()) @@ -682,10 +686,10 @@ Address Val1 = Address::invalid(); Address Val2 = Address::invalid(); + Address Dest = Address::invalid(); Address Ptr(EmitScalarExpr(E->getPtr()), alignChars); if (E->getOp() == AtomicExpr::AO__c11_atomic_init) { - assert(!Dest.isValid() && "Init does not return a value"); LValue lvalue = MakeAddrLValue(Ptr, AtomicTy); EmitAtomicInit(E->getVal1(), lvalue); return RValue::get(nullptr); @@ -771,12 +775,21 @@ QualType RValTy = E->getType().getUnqualifiedType(); - auto GetDest = [&] { - if (!RValTy->isVoidType() && !Dest.isValid()) { - Dest = CreateMemTemp(RValTy, ".atomicdst"); - } - return Dest; - }; + // The inlined atomics only function on iN types, where N is a power of 2. We + // need to make sure (via temporaries if necessary) that all incoming values + // are compatible. + LValue AtomicVal = MakeAddrLValue(Ptr, AtomicTy); + AtomicInfo Atomics(*this, AtomicVal); + + Ptr = Atomics.emitCastToAtomicIntPointer(Ptr); + if (Val1.isValid()) Val1 = Atomics.convertToAtomicIntPointer(Val1); + if (Val2.isValid()) Val2 = Atomics.convertToAtomicIntPointer(Val2); + if (Dest.isValid()) + Dest = Atomics.emitCastToAtomicIntPointer(Dest); + else if (E->isCmpXChg()) + Dest = CreateMemTemp(RValTy, "cmpxchg.bool"); + else if (!RValTy->isVoidType()) + Dest = Atomics.emitCastToAtomicIntPointer(Atomics.CreateTempAlloca()); // Use a library call. See: http://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary . if (UseLibcall) { @@ -996,19 +1009,24 @@ RValue Res = emitAtomicLibcall(*this, LibCallName, RetTy, Args); // The value is returned directly from the libcall. - if (HaveRetTy && !RetTy->isVoidType()) + if (E->isCmpXChg()) return Res; - // The value is returned via an explicit out param. - if (RetTy->isVoidType()) - return RValue::get(nullptr); - // The value is returned directly for optimized libcalls but the caller is - // expected an out-param. - if (UseOptimizedLibcall) { + + // The value is returned directly for optimized libcalls but the expr + // provided an out-param. + if (UseOptimizedLibcall && Res.getScalarVal()) { llvm::Value *ResVal = Res.getScalarVal(); - Builder.CreateStore(ResVal, - Builder.CreateBitCast(GetDest(), ResVal->getType()->getPointerTo())); + Builder.CreateStore( + ResVal, + Builder.CreateBitCast(Dest, ResVal->getType()->getPointerTo())); } - return convertTempToRValue(Dest, RValTy, E->getExprLoc()); + + if (RValTy->isVoidType()) + return RValue::get(nullptr); + + return convertTempToRValue( + Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo()), + RValTy, E->getExprLoc()); } bool IsStore = E->getOp() == AtomicExpr::AO__c11_atomic_store || @@ -1018,16 +1036,6 @@ E->getOp() == AtomicExpr::AO__atomic_load || E->getOp() == AtomicExpr::AO__atomic_load_n; - llvm::Type *ITy = - llvm::IntegerType::get(getLLVMContext(), Size * 8); - Address OrigDest = GetDest(); - Ptr = Builder.CreateBitCast( - Ptr, ITy->getPointerTo(Ptr.getType()->getPointerAddressSpace())); - if (Val1.isValid()) Val1 = Builder.CreateBitCast(Val1, ITy->getPointerTo()); - if (Val2.isValid()) Val2 = Builder.CreateBitCast(Val2, ITy->getPointerTo()); - if (Dest.isValid() && !E->isCmpXChg()) - Dest = Builder.CreateBitCast(Dest, ITy->getPointerTo()); - if (isa(Order)) { int ord = cast(Order)->getZExtValue(); switch (ord) { @@ -1065,7 +1073,10 @@ } if (RValTy->isVoidType()) return RValue::get(nullptr); - return convertTempToRValue(OrigDest, RValTy, E->getExprLoc()); + + return convertTempToRValue( + Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo()), + RValTy, E->getExprLoc()); } // Long case, when Order isn't obviously constant. @@ -1133,7 +1144,11 @@ Builder.SetInsertPoint(ContBB); if (RValTy->isVoidType()) return RValue::get(nullptr); - return convertTempToRValue(OrigDest, RValTy, E->getExprLoc()); + + assert(Atomics.getValueSizeInBits() <= Atomics.getAtomicSizeInBits()); + return convertTempToRValue( + Builder.CreateBitCast(Dest, ConvertTypeForMem(RValTy)->getPointerTo()), + RValTy, E->getExprLoc()); } Address AtomicInfo::emitCastToAtomicIntPointer(Address addr) const { @@ -1144,6 +1159,19 @@ return CGF.Builder.CreateBitCast(addr, ty->getPointerTo(addrspace)); } +Address AtomicInfo::convertToAtomicIntPointer(Address Addr) const { + llvm::Type *Ty = Addr.getElementType(); + uint64_t SourceSizeInBits = CGF.CGM.getDataLayout().getTypeSizeInBits(Ty); + if (SourceSizeInBits != AtomicSizeInBits) { + Address Tmp = CreateTempAlloca(); + CGF.Builder.CreateMemCpy(Tmp, Addr, + std::min(AtomicSizeInBits, SourceSizeInBits) / 8); + Addr = Tmp; + } + + return emitCastToAtomicIntPointer(Addr); +} + RValue AtomicInfo::convertAtomicTempToRValue(Address addr, AggValueSlot resultSlot, SourceLocation loc, Index: lib/CodeGen/CGExprAgg.cpp =================================================================== --- lib/CodeGen/CGExprAgg.cpp +++ lib/CodeGen/CGExprAgg.cpp @@ -199,7 +199,8 @@ // case Expr::ChooseExprClass: void VisitCXXThrowExpr(const CXXThrowExpr *E) { CGF.EmitCXXThrowExpr(E); } void VisitAtomicExpr(AtomicExpr *E) { - CGF.EmitAtomicExpr(E, EnsureSlot(E->getType()).getAddress()); + RValue Res = CGF.EmitAtomicExpr(E); + EmitFinalDestCopy(E->getType(), Res); } }; } // end anonymous namespace. Index: lib/CodeGen/CodeGenFunction.h =================================================================== --- lib/CodeGen/CodeGenFunction.h +++ lib/CodeGen/CodeGenFunction.h @@ -2930,7 +2930,7 @@ void EmitLambdaExpr(const LambdaExpr *E, AggValueSlot Dest); - RValue EmitAtomicExpr(AtomicExpr *E, Address Dest = Address::invalid()); + RValue EmitAtomicExpr(AtomicExpr *E); //===--------------------------------------------------------------------===// // Annotations Emission Index: test/CodeGen/atomic-arm64.c =================================================================== --- test/CodeGen/atomic-arm64.c +++ test/CodeGen/atomic-arm64.c @@ -66,8 +66,9 @@ // CHECK-NEXT: [[T0:%.*]] = bitcast [[QUAD_T]]* [[TEMP]] to i8* // CHECK-NEXT: [[T1:%.*]] = bitcast [[QUAD_T]]* {{%.*}} to i8* // CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[T0]], i8* [[T1]], i64 32, i32 8, i1 false) -// CHECK-NEXT: [[T0:%.*]] = bitcast [[QUAD_T]]* [[TEMP]] to i8* -// CHECK-NEXT: call void @__atomic_store(i64 32, i8* bitcast ([[QUAD_T]]* @a_pointer_quad to i8*), i8* [[T0]], i32 5) +// CHECK-NEXT: [[T0:%.*]] = bitcast [[QUAD_T]]* [[TEMP]] to i256* +// CHECK-NEXT: [[T1:%.*]] = bitcast i256* [[T0]] to i8* +// CHECK-NEXT: call void @__atomic_store(i64 32, i8* bitcast ([[QUAD_T]]* @a_pointer_quad to i8*), i8* [[T1]], i32 5) void test4(pointer_quad_t quad) { __c11_atomic_store(&a_pointer_quad, quad, memory_order_seq_cst); } Index: test/CodeGen/atomic-ops.c =================================================================== --- test/CodeGen/atomic-ops.c +++ test/CodeGen/atomic-ops.c @@ -179,8 +179,8 @@ // CHECK-LABEL: @fd1 // CHECK: [[RETVAL:%.*]] = alloca %struct.S, align 4 // CHECK: [[RET:%.*]] = alloca %struct.S, align 4 - // CHECK: [[CALL:%.*]] = call i64 @__atomic_load_8( // CHECK: [[CAST:%.*]] = bitcast %struct.S* [[RET]] to i64* + // CHECK: [[CALL:%.*]] = call i64 @__atomic_load_8( // CHECK: store i64 [[CALL]], i64* [[CAST]], align 4 struct S ret; __atomic_load(a, &ret, memory_order_seq_cst); @@ -195,8 +195,9 @@ // CHECK-NEXT: store %struct.S* %b, %struct.S** [[B_ADDR]], align 4 // CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load %struct.S*, %struct.S** [[A_ADDR]], align 4 // CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load %struct.S*, %struct.S** [[B_ADDR]], align 4 - // CHECK-NEXT: [[COERCED_A:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i8* + // CHECK-NEXT: [[COERCED_A_TMP:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i64* // CHECK-NEXT: [[COERCED_B:%.*]] = bitcast %struct.S* [[LOAD_B_PTR]] to i64* + // CHECK-NEXT: [[COERCED_A:%.*]] = bitcast i64* [[COERCED_A_TMP]] to i8* // CHECK-NEXT: [[LOAD_B:%.*]] = load i64, i64* [[COERCED_B]], align 4 // CHECK-NEXT: call void @__atomic_store_8(i8* [[COERCED_A]], i64 [[LOAD_B]], // CHECK-NEXT: ret void @@ -214,11 +215,12 @@ // CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load %struct.S*, %struct.S** [[A_ADDR]], align 4 // CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load %struct.S*, %struct.S** [[B_ADDR]], align 4 // CHECK-NEXT: [[LOAD_C_PTR:%.*]] = load %struct.S*, %struct.S** [[C_ADDR]], align 4 - // CHECK-NEXT: [[COERCED_A:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i8* + // CHECK-NEXT: [[COERCED_A_TMP:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i64* // CHECK-NEXT: [[COERCED_B:%.*]] = bitcast %struct.S* [[LOAD_B_PTR]] to i64* + // CHECK-NEXT: [[COERCED_C:%.*]] = bitcast %struct.S* [[LOAD_C_PTR]] to i64* + // CHECK-NEXT: [[COERCED_A:%.*]] = bitcast i64* [[COERCED_A_TMP]] to i8* // CHECK-NEXT: [[LOAD_B:%.*]] = load i64, i64* [[COERCED_B]], align 4 // CHECK-NEXT: [[CALL:%.*]] = call i64 @__atomic_exchange_8(i8* [[COERCED_A]], i64 [[LOAD_B]], - // CHECK-NEXT: [[COERCED_C:%.*]] = bitcast %struct.S* [[LOAD_C_PTR]] to i64* // CHECK-NEXT: store i64 [[CALL]], i64* [[COERCED_C]], align 4 __atomic_exchange(a, b, c, memory_order_seq_cst); @@ -235,9 +237,11 @@ // CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load %struct.S*, %struct.S** [[A_ADDR]], align 4 // CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load %struct.S*, %struct.S** [[B_ADDR]], align 4 // CHECK-NEXT: [[LOAD_C_PTR:%.*]] = load %struct.S*, %struct.S** [[C_ADDR]], align 4 - // CHECK-NEXT: [[COERCED_A:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i8* - // CHECK-NEXT: [[COERCED_B:%.*]] = bitcast %struct.S* [[LOAD_B_PTR]] to i8* + // CHECK-NEXT: [[COERCED_A_TMP:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i64* + // CHECK-NEXT: [[COERCED_B_TMP:%.*]] = bitcast %struct.S* [[LOAD_B_PTR]] to i64* // CHECK-NEXT: [[COERCED_C:%.*]] = bitcast %struct.S* [[LOAD_C_PTR]] to i64* + // CHECK-NEXT: [[COERCED_A:%.*]] = bitcast i64* [[COERCED_A_TMP]] to i8* + // CHECK-NEXT: [[COERCED_B:%.*]] = bitcast i64* [[COERCED_B_TMP]] to i8* // CHECK-NEXT: [[LOAD_C:%.*]] = load i64, i64* [[COERCED_C]], align 4 // CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange_8(i8* [[COERCED_A]], i8* [[COERCED_B]], i64 [[LOAD_C]] // CHECK-NEXT: ret i1 [[CALL]] @@ -312,6 +316,8 @@ char c[17]; } seventeen; +struct Incomplete; + int lock_free(struct Incomplete *incomplete) { // CHECK-LABEL: @lock_free Index: test/CodeGen/c11atomics-ios.c =================================================================== --- test/CodeGen/c11atomics-ios.c +++ test/CodeGen/c11atomics-ios.c @@ -202,11 +202,120 @@ // CHECK-NEXT: ret void } -void testPromotedStructOps(_Atomic(PS) *p) { - PS a = __c11_atomic_load(p, 5); - __c11_atomic_store(p, a, 5); - PS b = __c11_atomic_exchange(p, a, 5); +PS test_promoted_load(_Atomic(PS) *addr) { + // CHECK-LABEL: @test_promoted_load(%struct.PS* noalias sret %agg.result, { %struct.PS, [2 x i8] }* %addr) + // CHECK: [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4 + // CHECK: [[ATOMIC_RES:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8 + // CHECK: store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4 + // CHECK: [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4 + // CHECK: [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64* + // CHECK: [[ATOMIC_RES64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_RES]] to i64* + // CHECK: [[VAL:%.*]] = load atomic i64, i64* [[ADDR64]] seq_cst, align 8 + // CHECK: store i64 [[VAL]], i64* [[ATOMIC_RES64]], align 8 + // CHECK: [[ATOMIC_RES_STRUCT:%.*]] = bitcast i64* [[ATOMIC_RES64]] to %struct.PS* + // CHECK: [[AGG_RESULT8:%.*]] = bitcast %struct.PS* %agg.result to i8* + // CHECK: [[ATOMIC_RES8:%.*]] = bitcast %struct.PS* [[ATOMIC_RES_STRUCT]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[AGG_RESULT8]], i8* [[ATOMIC_RES8]], i32 6, i32 2, i1 false) - _Bool v = __c11_atomic_compare_exchange_strong(p, &b, a, 5, 5); - v = __c11_atomic_compare_exchange_weak(p, &b, a, 5, 5); + return __c11_atomic_load(addr, 5); +} + +void test_promoted_store(_Atomic(PS) *addr, PS *val) { + // CHECK-LABEL: @test_promoted_store({ %struct.PS, [2 x i8] }* %addr, %struct.PS* %val) + // CHECK: [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4 + // CHECK: [[VAL_ARG:%.*]] = alloca %struct.PS*, align 4 + // CHECK: [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2 + // CHECK: [[ATOMIC_VAL:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8 + // CHECK: store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4 + // CHECK: store %struct.PS* %val, %struct.PS** [[VAL_ARG]], align 4 + // CHECK: [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4 + // CHECK: [[VAL:%.*]] = load %struct.PS*, %struct.PS** [[VAL_ARG]], align 4 + // CHECK: [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8* + // CHECK: [[VAL8:%.*]] = bitcast %struct.PS* [[VAL]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[VAL8]], i32 6, i32 2, i1 false) + // CHECK: [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64* + // CHECK: [[ATOMIC_VAL8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i8* + // CHECK: [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_VAL8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false) + // CHECK: [[ATOMIC_VAL64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i64* + // CHECK: [[VAL64:%.*]] = load i64, i64* [[ATOMIC_VAL64]], align 8 + // CHECK: store atomic i64 [[VAL64]], i64* [[ADDR64]] seq_cst, align 8 + + __c11_atomic_store(addr, *val, 5); +} + +PS test_promoted_exchange(_Atomic(PS) *addr, PS *val) { + // CHECK-LABEL: @test_promoted_exchange(%struct.PS* noalias sret %agg.result, { %struct.PS, [2 x i8] }* %addr, %struct.PS* %val) + // CHECK: [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4 + // CHECK: [[VAL_ARG:%.*]] = alloca %struct.PS*, align 4 + // CHECK: [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2 + // CHECK: [[ATOMIC_VAL:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8 + // CHECK: [[ATOMIC_RES:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8 + // CHECK: store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4 + // CHECK: store %struct.PS* %val, %struct.PS** [[VAL_ARG]], align 4 + // CHECK: [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4 + // CHECK: [[VAL:%.*]] = load %struct.PS*, %struct.PS** [[VAL_ARG]], align 4 + // CHECK: [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8* + // CHECK: [[VAL8:%.*]] = bitcast %struct.PS* [[VAL]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[VAL8]], i32 6, i32 2, i1 false) + // CHECK: [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64* + // CHECK: [[ATOMIC_VAL8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i8* + // CHECK: [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_VAL8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false) + // CHECK: [[ATOMIC_VAL64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i64* + // CHECK: [[ATOMIC_RES64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_RES]] to i64* + // CHECK: [[VAL64:%.*]] = load i64, i64* [[ATOMIC_VAL64]], align 8 + // CHECK: [[RES:%.*]] = atomicrmw xchg i64* [[ADDR64]], i64 [[VAL64]] seq_cst + // CHECK: store i64 [[RES]], i64* [[ATOMIC_RES64]], align 8 + // CHECK: [[ATOMIC_RES_STRUCT:%.*]] = bitcast i64* [[ATOMIC_RES64]] to %struct.PS* + // CHECK: [[AGG_RESULT8:%.*]] = bitcast %struct.PS* %agg.result to i8* + // CHECK: [[ATOMIC_RES8:%.*]] = bitcast %struct.PS* [[ATOMIC_RES_STRUCT]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[AGG_RESULT8]], i8* [[ATOMIC_RES8]], i32 6, i32 2, i1 false) + return __c11_atomic_exchange(addr, *val, 5); +} + +_Bool test_promoted_cmpxchg(_Atomic(PS) *addr, PS *desired, PS *new) { + // CHECK: define zeroext i1 @test_promoted_cmpxchg({ %struct.PS, [2 x i8] }* %addr, %struct.PS* %desired, %struct.PS* %new) #0 { + // CHECK: [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4 + // CHECK: [[DESIRED_ARG:%.*]] = alloca %struct.PS*, align 4 + // CHECK: [[NEW_ARG:%.*]] = alloca %struct.PS*, align 4 + // CHECK: [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2 + // CHECK: [[ATOMIC_DESIRED:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8 + // CHECK: [[ATOMIC_NEW:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8 + // CHECK: [[RES_ADDR:%.*]] = alloca i8, align 1 + // CHECK: store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4 + // CHECK: store %struct.PS* %desired, %struct.PS** [[DESIRED_ARG]], align 4 + // CHECK: store %struct.PS* %new, %struct.PS** [[NEW_ARG]], align 4 + // CHECK: [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4 + // CHECK: [[DESIRED:%.*]] = load %struct.PS*, %struct.PS** [[DESIRED_ARG]], align 4 + // CHECK: [[NEW:%.*]] = load %struct.PS*, %struct.PS** [[NEW_ARG]], align 4 + // CHECK: [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8* + // CHECK: [[NEW8:%.*]] = bitcast %struct.PS* [[NEW]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[NEW8]], i32 6, i32 2, i1 false) + // CHECK: [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64* + // CHECK: [[ATOMIC_DESIRED8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_DESIRED:%.*]] to i8* + // CHECK: [[DESIRED8:%.*]] = bitcast %struct.PS* [[DESIRED]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_DESIRED8]], i8* [[DESIRED8]], i64 6, i32 2, i1 false) + // CHECK: [[ATOMIC_DESIRED64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_DESIRED:%.*]] to i64* + // CHECK: [[ATOMIC_NEW8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_NEW]] to i8* + // CHECK: [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_NEW8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false) + // CHECK: [[ATOMIC_NEW64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_NEW]] to i64* + // CHECK: [[ATOMIC_DESIRED_VAL64:%.*]] = load i64, i64* [[ATOMIC_DESIRED64]], align 8 + // CHECK: [[ATOMIC_NEW_VAL64:%.*]] = load i64, i64* [[ATOMIC_NEW64]], align 8 + // CHECK: [[RES:%.*]] = cmpxchg i64* [[ADDR64]], i64 [[ATOMIC_DESIRED_VAL64]], i64 [[ATOMIC_NEW_VAL64]] seq_cst seq_cst + // CHECK: [[RES_VAL64:%.*]] = extractvalue { i64, i1 } [[RES]], 0 + // CHECK: [[RES_BOOL:%.*]] = extractvalue { i64, i1 } [[RES]], 1 + // CHECK: br i1 [[RES_BOOL]], label {{%.*}}, label {{%.*}} + + // CHECK: store i64 [[RES_VAL64]], i64* [[ATOMIC_DESIRED64]], align 8 + // CHECK: br label {{%.*}} + + // CHECK: [[RES_BOOL8:%.*]] = zext i1 [[RES_BOOL]] to i8 + // CHECK: store i8 [[RES_BOOL8]], i8* [[RES_ADDR]], align 1 + // CHECK: [[RES_BOOL8:%.*]] = load i8, i8* [[RES_ADDR]], align 1 + // CHECK: [[RETVAL:%.*]] = trunc i8 [[RES_BOOL8]] to i1 + // CHECK: ret i1 [[RETVAL]] + + return __c11_atomic_compare_exchange_strong(addr, desired, *new, 5, 5); } Index: test/CodeGen/c11atomics.c =================================================================== --- test/CodeGen/c11atomics.c +++ test/CodeGen/c11atomics.c @@ -367,14 +367,111 @@ // CHECK-NEXT: ret void } -// CHECK: define arm_aapcscc void @testPromotedStructOps([[APS:.*]]* - -// FIXME: none of these look right, but we can leave the "test" here -// to make sure they at least don't crash. -void testPromotedStructOps(_Atomic(PS) *p) { - PS a = __c11_atomic_load(p, 5); - __c11_atomic_store(p, a, 5); - PS b = __c11_atomic_exchange(p, a, 5); - _Bool v = __c11_atomic_compare_exchange_strong(p, &b, a, 5, 5); - v = __c11_atomic_compare_exchange_weak(p, &b, a, 5, 5); +PS test_promoted_load(_Atomic(PS) *addr) { + // CHECK-LABEL: @test_promoted_load(%struct.PS* noalias sret %agg.result, { %struct.PS, [2 x i8] }* %addr) + // CHECK: [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4 + // CHECK: [[ATOMIC_RES:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8 + // CHECK: store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4 + // CHECK: [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4 + // CHECK: [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64* + // CHECK: [[ATOMIC_RES64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_RES]] to i64* + // CHECK: [[ADDR8:%.*]] = bitcast i64* [[ADDR64]] to i8* + // CHECK: [[RES:%.*]] = call arm_aapcscc i64 @__atomic_load_8(i8* [[ADDR8]], i32 5) + // CHECK: store i64 [[RES]], i64* [[ATOMIC_RES64]], align 8 + // CHECK: [[ATOMIC_RES_STRUCT:%.*]] = bitcast i64* [[ATOMIC_RES64]] to %struct.PS* + // CHECK: [[AGG_RESULT8:%.*]] = bitcast %struct.PS* %agg.result to i8* + // CHECK: [[ATOMIC_RES8:%.*]] = bitcast %struct.PS* [[ATOMIC_RES_STRUCT]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[AGG_RESULT8]], i8* [[ATOMIC_RES8]], i32 6, i32 2, i1 false) + + return __c11_atomic_load(addr, 5); +} + +void test_promoted_store(_Atomic(PS) *addr, PS *val) { + // CHECK-LABEL: @test_promoted_store({ %struct.PS, [2 x i8] }* %addr, %struct.PS* %val) + // CHECK: [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4 + // CHECK: [[VAL_ARG:%.*]] = alloca %struct.PS*, align 4 + // CHECK: [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2 + // CHECK: [[ATOMIC_VAL:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8 + // CHECK: store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4 + // CHECK: store %struct.PS* %val, %struct.PS** [[VAL_ARG]], align 4 + // CHECK: [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4 + // CHECK: [[VAL:%.*]] = load %struct.PS*, %struct.PS** [[VAL_ARG]], align 4 + // CHECK: [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8* + // CHECK: [[VAL8:%.*]] = bitcast %struct.PS* [[VAL]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[VAL8]], i32 6, i32 2, i1 false) + // CHECK: [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64* + // CHECK: [[ATOMIC_VAL8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i8* + // CHECK: [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_VAL8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false) + // CHECK: [[ATOMIC_VAL64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i64* + // CHECK: [[ADDR8:%.*]] = bitcast i64* [[ADDR64]] to i8* + // CHECK: [[VAL64:%.*]] = load i64, i64* [[ATOMIC_VAL64]], align 2 + // CHECK: call arm_aapcscc void @__atomic_store_8(i8* [[ADDR8]], i64 [[VAL64]], i32 5) + __c11_atomic_store(addr, *val, 5); +} + +PS test_promoted_exchange(_Atomic(PS) *addr, PS *val) { + // CHECK-LABEL: @test_promoted_exchange(%struct.PS* noalias sret %agg.result, { %struct.PS, [2 x i8] }* %addr, %struct.PS* %val) + // CHECK: [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4 + // CHECK: [[VAL_ARG:%.*]] = alloca %struct.PS*, align 4 + // CHECK: [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2 + // CHECK: [[ATOMIC_VAL:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8 + // CHECK: [[ATOMIC_RES:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8 + // CHECK: store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4 + // CHECK: store %struct.PS* %val, %struct.PS** [[VAL_ARG]], align 4 + // CHECK: [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4 + // CHECK: [[VAL:%.*]] = load %struct.PS*, %struct.PS** [[VAL_ARG]], align 4 + // CHECK: [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8* + // CHECK: [[VAL8:%.*]] = bitcast %struct.PS* [[VAL]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[VAL8]], i32 6, i32 2, i1 false) + // CHECK: [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64* + // CHECK: [[ATOMIC_VAL8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i8* + // CHECK: [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_VAL8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false) + // CHECK: [[ATOMIC_VAL64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_VAL]] to i64* + // CHECK: [[ATOMIC_RES64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_RES]] to i64* + // CHECK: [[ADDR8:%.*]] = bitcast i64* [[ADDR64]] to i8* + // CHECK: [[VAL64:%.*]] = load i64, i64* [[ATOMIC_VAL64]], align 2 + // CHECK: [[RES:%.*]] = call arm_aapcscc i64 @__atomic_exchange_8(i8* [[ADDR8]], i64 [[VAL64]], i32 5) + // CHECK: store i64 [[RES]], i64* [[ATOMIC_RES64]], align 8 + // CHECK: [[ATOMIC_RES_STRUCT:%.*]] = bitcast i64* [[ATOMIC_RES64]] to %struct.PS* + // CHECK: [[AGG_RESULT8:%.*]] = bitcast %struct.PS* %agg.result to i8* + // CHECK: [[ATOMIC_RES8:%.*]] = bitcast %struct.PS* [[ATOMIC_RES_STRUCT]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[AGG_RESULT8]], i8* [[ATOMIC_RES8]], i32 6, i32 2, i1 false) + return __c11_atomic_exchange(addr, *val, 5); +} + +_Bool test_promoted_cmpxchg(_Atomic(PS) *addr, PS *desired, PS *new) { + // CHECK-LABEL: i1 @test_promoted_cmpxchg({ %struct.PS, [2 x i8] }* %addr, %struct.PS* %desired, %struct.PS* %new) #0 { + // CHECK: [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4 + // CHECK: [[DESIRED_ARG:%.*]] = alloca %struct.PS*, align 4 + // CHECK: [[NEW_ARG:%.*]] = alloca %struct.PS*, align 4 + // CHECK: [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2 + // CHECK: [[ATOMIC_DESIRED:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8 + // CHECK: [[ATOMIC_NEW:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8 + // %cmpxchg.bool = alloca i8, align 1 + // CHECK: store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4 + // CHECK: store %struct.PS* %desired, %struct.PS** [[DESIRED_ARG]], align 4 + // CHECK: store %struct.PS* %new, %struct.PS** [[NEW_ARG]], align 4 + // CHECK: [[ADDR:%.*]] = load { %struct.PS, [2 x i8] }*, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4 + // CHECK: [[DESIRED:%.*]]= load %struct.PS*, %struct.PS** [[DESIRED_ARG]], align 4 + // CHECK: [[NEW:%.*]] = load %struct.PS*, %struct.PS** [[NEW_ARG]], align 4 + // CHECK: [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8* + // CHECK: [[NEW8:%.*]] = bitcast %struct.PS* [[NEW]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* [[NONATOMIC_TMP8]], i8* [[NEW8]], i32 6, i32 2, i1 false) + // CHECK: [[ADDR64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ADDR]] to i64* + // CHECK: [[ATOMIC_DESIRED8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_DESIRED]] to i8* + // CHECK: [[DESIRED8:%.*]] = bitcast %struct.PS* [[DESIRED]]to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_DESIRED8]], i8* [[DESIRED8]], i64 6, i32 2, i1 false) + // CHECK: [[ATOMIC_DESIRED64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_DESIRED]] to i64* + // CHECK: [[ATOMIC_NEW8:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_NEW]] to i8* + // CHECK: [[NONATOMIC_TMP8:%.*]] = bitcast %struct.PS* [[NONATOMIC_TMP]] to i8* + // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[ATOMIC_NEW8]], i8* [[NONATOMIC_TMP8]], i64 6, i32 2, i1 false) + // CHECK: [[ATOMIC_NEW64:%.*]] = bitcast { %struct.PS, [2 x i8] }* [[ATOMIC_NEW]] to i64* + // CHECK: [[ADDR8:%.*]] = bitcast i64* [[ADDR64]] to i8* + // CHECK: [[ATOMIC_DESIRED8:%.*]] = bitcast i64* [[ATOMIC_DESIRED64]] to i8* + // CHECK: [[NEW64:%.*]] = load i64, i64* [[ATOMIC_NEW64]], align 2 + // CHECK: [[RES:%.*]] = call arm_aapcscc zeroext i1 @__atomic_compare_exchange_8(i8* [[ADDR8]], i8* [[ATOMIC_DESIRED8]], i64 [[NEW64]], i32 5, i32 5) + // CHECK: ret i1 [[RES]] + return __c11_atomic_compare_exchange_strong(addr, desired, *new, 5, 5); }