Index: cfe/trunk/lib/CodeGen/CGStmt.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CGStmt.cpp +++ cfe/trunk/lib/CodeGen/CGStmt.cpp @@ -1984,6 +1984,7 @@ std::vector ResultTruncRegTypes; std::vector ArgTypes; std::vector Args; + llvm::BitVector ResultTypeRequiresCast; // Keep track of inout constraints. std::string InOutConstraints; @@ -2022,13 +2023,23 @@ // If this is a register output, then make the inline asm return it // by-value. If this is a memory result, return the value by-reference. - if (!Info.allowsMemory() && hasScalarEvaluationKind(OutExpr->getType())) { + bool isScalarizableAggregate = + hasAggregateEvaluationKind(OutExpr->getType()); + if (!Info.allowsMemory() && (hasScalarEvaluationKind(OutExpr->getType()) || + isScalarizableAggregate)) { Constraints += "=" + OutputConstraint; ResultRegQualTys.push_back(OutExpr->getType()); ResultRegDests.push_back(Dest); - ResultRegTypes.push_back(ConvertTypeForMem(OutExpr->getType())); - ResultTruncRegTypes.push_back(ResultRegTypes.back()); - + ResultTruncRegTypes.push_back(ConvertTypeForMem(OutExpr->getType())); + if (Info.allowsRegister() && isScalarizableAggregate) { + ResultTypeRequiresCast.push_back(true); + unsigned Size = getContext().getTypeSize(OutExpr->getType()); + llvm::Type *ConvTy = llvm::IntegerType::get(getLLVMContext(), Size); + ResultRegTypes.push_back(ConvTy); + } else { + ResultTypeRequiresCast.push_back(false); + ResultRegTypes.push_back(ResultTruncRegTypes.back()); + } // If this output is tied to an input, and if the input is larger, then // we need to set the actual result type of the inline asm node to be the // same as the input type. @@ -2271,6 +2282,9 @@ assert(RegResults.size() == ResultRegTypes.size()); assert(RegResults.size() == ResultTruncRegTypes.size()); assert(RegResults.size() == ResultRegDests.size()); + // ResultRegDests can be also populated by addReturnRegisterOutputs() above, + // in which case its size may grow. + assert(ResultTypeRequiresCast.size() <= ResultRegDests.size()); for (unsigned i = 0, e = RegResults.size(); i != e; ++i) { llvm::Value *Tmp = RegResults[i]; @@ -2300,7 +2314,24 @@ } } - EmitStoreThroughLValue(RValue::get(Tmp), ResultRegDests[i]); + LValue Dest = ResultRegDests[i]; + // ResultTypeRequiresCast elements correspond to the first + // ResultTypeRequiresCast.size() elements of RegResults. + if ((i < ResultTypeRequiresCast.size()) && ResultTypeRequiresCast[i]) { + unsigned Size = getContext().getTypeSize(ResultRegQualTys[i]); + Address A = Builder.CreateBitCast(Dest.getAddress(), + ResultRegTypes[i]->getPointerTo()); + QualType Ty = getContext().getIntTypeForBitwidth(Size, /*Signed*/ false); + if (Ty.isNull()) { + const Expr *OutExpr = S.getOutputExpr(i); + CGM.Error( + OutExpr->getExprLoc(), + "impossible constraint in asm: can't store struct into a register"); + return; + } + Dest = MakeAddrLValue(A, Ty); + } + EmitStoreThroughLValue(RValue::get(Tmp), Dest); } } Index: cfe/trunk/test/CodeGen/asm-attrs.c =================================================================== --- cfe/trunk/test/CodeGen/asm-attrs.c +++ cfe/trunk/test/CodeGen/asm-attrs.c @@ -8,7 +8,7 @@ // CHECK: call i32 asm "foo5", {{.*}} [[READONLY]] // CHECK: call i32 asm "foo6", {{.*}} [[NOATTRS]] // CHECK: call void asm sideeffect "foo7", {{.*}} [[NOATTRS]] -// CHECK: call void asm "foo8", {{.*}} [[NOATTRS]] +// CHECK: call i32 asm "foo8", {{.*}} [[READNONE]] // CHECK: attributes [[READNONE]] = { nounwind readnone } // CHECK: attributes [[NOATTRS]] = { nounwind } Index: cfe/trunk/test/CodeGen/x86_64-PR42672.c =================================================================== --- cfe/trunk/test/CodeGen/x86_64-PR42672.c +++ cfe/trunk/test/CodeGen/x86_64-PR42672.c @@ -0,0 +1,102 @@ +// RUN: %clang_cc1 -triple=x86_64-unknown-linux-gnu -DSTRUCT -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-STRUCT +// RUN: %clang_cc1 -triple=x86_64-unknown-linux-gnu -USTRUCT -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-NOSTRUCT +// RUN: not %clang_cc1 -triple=x86_64-unknown-linux-gnu -DIMPOSSIBLE_ODD -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-IMPOSSIBLE_ODD +// RUN: not %clang_cc1 -triple=x86_64-unknown-linux-gnu -DIMPOSSIBLE_BIG -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-IMPOSSIBLE_BIG +// RUN: %clang_cc1 -triple=x86_64-unknown-linux-gnu -DPOSSIBLE_X -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-X +// RUN: not %clang_cc1 -triple=x86_64-unknown-linux-gnu -DIMPOSSIBLE_X -emit-llvm %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-IMPOSSIBLE_X + +// Make sure Clang doesn't treat |lockval| as asm input. +void _raw_spin_lock(void) { +#ifdef STRUCT + struct { + unsigned short owner, next; + } lockval; + lockval.owner = 1; + lockval.next = 2; +#else + int lockval; + lockval = 3; +#endif + asm("nop" + : "=r"(lockval)); +} +// CHECK-LABEL: _raw_spin_lock +// CHECK-LABEL: entry: + +// CHECK-STRUCT: %lockval = alloca %struct.anon, align 2 +// CHECK-STRUCT: store i16 1 +// CHECK-STRUCT: store i16 2 +// CHECK-STRUCT: [[RES:%[0-9]+]] = call i32 asm "nop", "=r,~{dirflag},~{fpsr},~{flags}"() +// CHECK-STRUCT: [[CAST:%[0-9]+]] = bitcast %struct.anon* %lockval to i32* +// CHECK-STRUCT: store i32 [[RES]], i32* [[CAST]], align 2 + +// CHECK-NOSTRUCT: %lockval = alloca i32, align 4 +// CHECK-NOSTRUCT: store i32 3 +// CHECK-NOSTRUCT: [[RES:%[0-9]+]] = call i32 asm "nop", "=r,~{dirflag},~{fpsr},~{flags}"() +// CHECK-NOSTRUCT: store i32 [[RES]], i32* %lockval, align 4 + +// Check Clang correctly handles a structure with padding. +void unusual_struct(void) { + struct { + unsigned short first; + unsigned char second; + } str; + asm("nop" + : "=r"(str)); +} + +// Check Clang reports an error if attempting to return a structure for which +// no direct conversion to a register is possible. +void odd_struct(void) { +#ifdef IMPOSSIBLE_ODD + struct __attribute__((__packed__)) { + unsigned short first; + unsigned char second; + } str; + asm("nop" + : "=r"(str)); +#endif +} +// CHECK-IMPOSSIBLE_ODD: impossible constraint in asm + +// Check Clang reports an error if attempting to return a big structure via a register. +void big_struct(void) { +#ifdef IMPOSSIBLE_BIG + struct { + long long int v1, v2, v3, v4; + } str; + asm("nop" + : "=r"(str)); +#endif +} +// CHECK-IMPOSSIBLE_BIG: impossible constraint in asm + +// Clang is able to emit LLVM IR for an 16-byte structure. +void x_constraint_fit() { +#ifdef POSSIBLE_X + struct S { + unsigned x[4]; + } z; + asm volatile("nop" + : "=x"(z)); +#endif +} +// CHECK-LABEL: x_constraint_fit +// CHECK-X: %z = alloca %struct.S, align 4 +// CHECK-X: [[RES:%[0-9]+]] = call i128 asm sideeffect "nop", "=x,~{dirflag},~{fpsr},~{flags}"() +// CHECK-X: [[CAST:%[0-9]+]] = bitcast %struct.S* %z to i128* +// CHECK-X: store i128 [[RES]], i128* [[CAST]], align 4 +// CHECK-X: ret + +// Clang is unable to emit LLVM IR for a 32-byte structure. +void x_constraint_nofit() { +#ifdef IMPOSSIBLE_X + struct S { + unsigned x[8]; + } z; + asm volatile("nop" + : "=x"(z)); +#endif +} + +// CHECK-IMPOSSIBLE_X: invalid output size for constraint