Index: cfe/trunk/lib/CodeGen/CGAtomic.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CGAtomic.cpp +++ cfe/trunk/lib/CodeGen/CGAtomic.cpp @@ -465,11 +465,19 @@ static void AddDirectArgument(CodeGenFunction &CGF, CallArgList &Args, bool UseOptimizedLibcall, llvm::Value *Val, QualType ValTy, - SourceLocation Loc) { + SourceLocation Loc, CharUnits SizeInChars) { if (UseOptimizedLibcall) { // Load value and pass it to the function directly. unsigned Align = CGF.getContext().getTypeAlignInChars(ValTy).getQuantity(); - Val = CGF.EmitLoadOfScalar(Val, false, Align, ValTy, Loc); + int64_t SizeInBits = CGF.getContext().toBits(SizeInChars); + ValTy = + CGF.getContext().getIntTypeForBitwidth(SizeInBits, /*Signed=*/false); + llvm::Type *IPtrTy = llvm::IntegerType::get(CGF.getLLVMContext(), + SizeInBits)->getPointerTo(); + Val = CGF.EmitLoadOfScalar(CGF.Builder.CreateBitCast(Val, IPtrTy), false, + Align, CGF.getContext().getPointerType(ValTy), + Loc); + // Coerce the value into an appropriately sized integer type. Args.add(RValue::get(Val), ValTy); } else { // Non-optimized functions always take a reference. @@ -638,7 +646,7 @@ HaveRetTy = true; Args.add(RValue::get(EmitCastToVoidPtr(Val1)), getContext().VoidPtrTy); AddDirectArgument(*this, Args, UseOptimizedLibcall, Val2, MemTy, - E->getExprLoc()); + E->getExprLoc(), sizeChars); Args.add(RValue::get(Order), getContext().IntTy); Order = OrderFail; break; @@ -650,7 +658,7 @@ case AtomicExpr::AO__atomic_exchange: LibCallName = "__atomic_exchange"; AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy, - E->getExprLoc()); + E->getExprLoc(), sizeChars); break; // void __atomic_store(size_t size, void *mem, void *val, int order) // void __atomic_store_N(T *mem, T val, int order) @@ -661,7 +669,7 @@ RetTy = getContext().VoidTy; HaveRetTy = true; AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy, - E->getExprLoc()); + E->getExprLoc(), sizeChars); break; // void __atomic_load(size_t size, void *mem, void *return, int order) // T __atomic_load_N(T *mem, int order) @@ -675,35 +683,35 @@ case AtomicExpr::AO__atomic_fetch_add: LibCallName = "__atomic_fetch_add"; AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, LoweredMemTy, - E->getExprLoc()); + E->getExprLoc(), sizeChars); break; // T __atomic_fetch_and_N(T *mem, T val, int order) case AtomicExpr::AO__c11_atomic_fetch_and: case AtomicExpr::AO__atomic_fetch_and: LibCallName = "__atomic_fetch_and"; AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy, - E->getExprLoc()); + E->getExprLoc(), sizeChars); break; // T __atomic_fetch_or_N(T *mem, T val, int order) case AtomicExpr::AO__c11_atomic_fetch_or: case AtomicExpr::AO__atomic_fetch_or: LibCallName = "__atomic_fetch_or"; AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy, - E->getExprLoc()); + E->getExprLoc(), sizeChars); break; // T __atomic_fetch_sub_N(T *mem, T val, int order) case AtomicExpr::AO__c11_atomic_fetch_sub: case AtomicExpr::AO__atomic_fetch_sub: LibCallName = "__atomic_fetch_sub"; AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, LoweredMemTy, - E->getExprLoc()); + E->getExprLoc(), sizeChars); break; // T __atomic_fetch_xor_N(T *mem, T val, int order) case AtomicExpr::AO__c11_atomic_fetch_xor: case AtomicExpr::AO__atomic_fetch_xor: LibCallName = "__atomic_fetch_xor"; AddDirectArgument(*this, Args, UseOptimizedLibcall, Val1, MemTy, - E->getExprLoc()); + E->getExprLoc(), sizeChars); break; default: return EmitUnsupportedRValue(E, "atomic library call"); } @@ -715,7 +723,9 @@ if (!HaveRetTy) { if (UseOptimizedLibcall) { // Value is returned directly. - RetTy = MemTy; + // The function returns an appropriately sized integer type. + RetTy = getContext().getIntTypeForBitwidth( + getContext().toBits(sizeChars), /*Signed=*/false); } else { // Value is returned through parameter before the order. RetTy = getContext().VoidTy; @@ -733,8 +743,16 @@ llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo); llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName); RValue Res = EmitCall(FuncInfo, Func, ReturnValueSlot(), Args); - if (!RetTy->isVoidType()) - return Res; + if (!RetTy->isVoidType()) { + if (UseOptimizedLibcall) { + if (HaveRetTy) + return Res; + llvm::StoreInst *StoreDest = Builder.CreateStore( + Res.getScalarVal(), + Builder.CreateBitCast(Dest, FTy->getReturnType()->getPointerTo())); + StoreDest->setAlignment(Align); + } + } if (E->getType()->isVoidType()) return RValue::get(nullptr); return convertTempToRValue(Dest, E->getType(), E->getExprLoc()); Index: cfe/trunk/test/CodeGen/atomic-ops-libcall.c =================================================================== --- cfe/trunk/test/CodeGen/atomic-ops-libcall.c +++ cfe/trunk/test/CodeGen/atomic-ops-libcall.c @@ -7,31 +7,31 @@ int *test_c11_atomic_fetch_add_int_ptr(_Atomic(int *) *p) { // CHECK: test_c11_atomic_fetch_add_int_ptr - // CHECK: {{%[^ ]*}} = tail call i32* @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 12, i32 5) + // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 12, i32 5) return __c11_atomic_fetch_add(p, 3, memory_order_seq_cst); } int *test_c11_atomic_fetch_sub_int_ptr(_Atomic(int *) *p) { // CHECK: test_c11_atomic_fetch_sub_int_ptr - // CHECK: {{%[^ ]*}} = tail call i32* @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 20, i32 5) + // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 20, i32 5) return __c11_atomic_fetch_sub(p, 5, memory_order_seq_cst); } int test_c11_atomic_fetch_add_int(_Atomic(int) *p) { // CHECK: test_c11_atomic_fetch_add_int - // CHECK: {{%[^ ]*}} = tail call i32 bitcast (i32* (i8*, i32, i32)* @__atomic_fetch_add_4 to i32 (i8*, i32, i32)*)(i8* {{%[0-9]+}}, i32 3, i32 5) + // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_add_4(i8* {{%[0-9]+}}, i32 3, i32 5) return __c11_atomic_fetch_add(p, 3, memory_order_seq_cst); } int test_c11_atomic_fetch_sub_int(_Atomic(int) *p) { // CHECK: test_c11_atomic_fetch_sub_int - // CHECK: {{%[^ ]*}} = tail call i32 bitcast (i32* (i8*, i32, i32)* @__atomic_fetch_sub_4 to i32 (i8*, i32, i32)*)(i8* {{%[0-9]+}}, i32 5, i32 5) + // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 5, i32 5) return __c11_atomic_fetch_sub(p, 5, memory_order_seq_cst); } int *fp2a(int **p) { // CHECK: @fp2a - // CHECK: {{%[^ ]*}} = tail call i32* @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 4, i32 0) + // CHECK: {{%[^ ]*}} = tail call i32 @__atomic_fetch_sub_4(i8* {{%[0-9]+}}, i32 4, i32 0) // Note, the GNU builtins do not multiply by sizeof(T)! return __atomic_fetch_sub(p, 4, memory_order_relaxed); } Index: cfe/trunk/test/CodeGen/atomic-ops.c =================================================================== --- cfe/trunk/test/CodeGen/atomic-ops.c +++ cfe/trunk/test/CodeGen/atomic-ops.c @@ -139,6 +139,79 @@ return __c11_atomic_exchange(d, 2, memory_order_seq_cst); } +struct S { + double x; +}; + +struct S fd1(struct S *a) { + // CHECK-LABEL: @fd1 + // CHECK: [[RETVAL:%.*]] = alloca %struct.S, align 4 + // CHECK: [[RET:%.*]] = alloca %struct.S, align 4 + // CHECK: [[CALL:%.*]] = call i64 @__atomic_load_8( + // CHECK: [[CAST:%.*]] = bitcast %struct.S* [[RET]] to i64* + // CHECK: store i64 [[CALL]], i64* [[CAST]], align 4 + struct S ret; + __atomic_load(a, &ret, memory_order_seq_cst); + return ret; +} + +void fd2(struct S *a, struct S *b) { + // CHECK-LABEL: @fd2 + // CHECK: [[A_ADDR:%.*]] = alloca %struct.S*, align 4 + // CHECK-NEXT: [[B_ADDR:%.*]] = alloca %struct.S*, align 4 + // CHECK-NEXT: store %struct.S* %a, %struct.S** [[A_ADDR]], align 4 + // CHECK-NEXT: store %struct.S* %b, %struct.S** [[B_ADDR]], align 4 + // CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load %struct.S** [[A_ADDR]], align 4 + // CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load %struct.S** [[B_ADDR]], align 4 + // CHECK-NEXT: [[COERCED_A:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i8* + // CHECK-NEXT: [[COERCED_B:%.*]] = bitcast %struct.S* [[LOAD_B_PTR]] to i64* + // CHECK-NEXT: [[LOAD_B:%.*]] = load i64* [[COERCED_B]], align 4 + // CHECK-NEXT: call void @__atomic_store_8(i8* [[COERCED_A]], i64 [[LOAD_B]], + // CHECK-NEXT: ret void + __atomic_store(a, b, memory_order_seq_cst); +} + +void fd3(struct S *a, struct S *b, struct S *c) { + // CHECK-LABEL: @fd3 + // CHECK: [[A_ADDR:%.*]] = alloca %struct.S*, align 4 + // CHECK-NEXT: [[B_ADDR:%.*]] = alloca %struct.S*, align 4 + // CHECK-NEXT: [[C_ADDR:%.*]] = alloca %struct.S*, align 4 + // CHECK-NEXT: store %struct.S* %a, %struct.S** [[A_ADDR]], align 4 + // CHECK-NEXT: store %struct.S* %b, %struct.S** [[B_ADDR]], align 4 + // CHECK-NEXT: store %struct.S* %c, %struct.S** [[C_ADDR]], align 4 + // CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load %struct.S** [[A_ADDR]], align 4 + // CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load %struct.S** [[B_ADDR]], align 4 + // CHECK-NEXT: [[LOAD_C_PTR:%.*]] = load %struct.S** [[C_ADDR]], align 4 + // CHECK-NEXT: [[COERCED_A:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i8* + // CHECK-NEXT: [[COERCED_B:%.*]] = bitcast %struct.S* [[LOAD_B_PTR]] to i64* + // CHECK-NEXT: [[LOAD_B:%.*]] = load i64* [[COERCED_B]], align 4 + // CHECK-NEXT: [[CALL:%.*]] = call i64 @__atomic_exchange_8(i8* [[COERCED_A]], i64 [[LOAD_B]], + // CHECK-NEXT: [[COERCED_C:%.*]] = bitcast %struct.S* [[LOAD_C_PTR]] to i64* + // CHECK-NEXT: store i64 [[CALL]], i64* [[COERCED_C]], align 4 + + __atomic_exchange(a, b, c, memory_order_seq_cst); +} + +_Bool fd4(struct S *a, struct S *b, struct S *c) { + // CHECK-LABEL: @fd4 + // CHECK: [[A_ADDR:%.*]] = alloca %struct.S*, align 4 + // CHECK-NEXT: [[B_ADDR:%.*]] = alloca %struct.S*, align 4 + // CHECK-NEXT: [[C_ADDR:%.*]] = alloca %struct.S*, align 4 + // CHECK: store %struct.S* %a, %struct.S** [[A_ADDR]], align 4 + // CHECK-NEXT: store %struct.S* %b, %struct.S** [[B_ADDR]], align 4 + // CHECK-NEXT: store %struct.S* %c, %struct.S** [[C_ADDR]], align 4 + // CHECK-NEXT: [[LOAD_A_PTR:%.*]] = load %struct.S** [[A_ADDR]], align 4 + // CHECK-NEXT: [[LOAD_B_PTR:%.*]] = load %struct.S** [[B_ADDR]], align 4 + // CHECK-NEXT: [[LOAD_C_PTR:%.*]] = load %struct.S** [[C_ADDR]], align 4 + // CHECK-NEXT: [[COERCED_A:%.*]] = bitcast %struct.S* [[LOAD_A_PTR]] to i8* + // CHECK-NEXT: [[COERCED_B:%.*]] = bitcast %struct.S* [[LOAD_B_PTR]] to i8* + // CHECK-NEXT: [[COERCED_C:%.*]] = bitcast %struct.S* [[LOAD_C_PTR]] to i64* + // CHECK-NEXT: [[LOAD_C:%.*]] = load i64* [[COERCED_C]], align 4 + // CHECK-NEXT: [[CALL:%.*]] = call zeroext i1 @__atomic_compare_exchange_8(i8* [[COERCED_A]], i8* [[COERCED_B]], i64 [[LOAD_C]] + // CHECK-NEXT: ret i1 [[CALL]] + return __atomic_compare_exchange(a, b, c, 1, 5, 5); +} + int* fp1(_Atomic(int*) *p) { // CHECK-LABEL: @fp1 // CHECK: load atomic i32* {{.*}} seq_cst