diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -1986,6 +1986,7 @@ std::vector ResultTruncRegTypes; std::vector ArgTypes; std::vector Args; + std::vector ResultTypeRequiresCast; // Keep track of inout constraints. std::string InOutConstraints; @@ -2024,13 +2025,23 @@ // If this is a register output, then make the inline asm return it // by-value. If this is a memory result, return the value by-reference. - if (!Info.allowsMemory() && hasScalarEvaluationKind(OutExpr->getType())) { + bool isScalarizableAggregate = + hasAggregateEvaluationKind(OutExpr->getType()); + if (!Info.allowsMemory() && (hasScalarEvaluationKind(OutExpr->getType()) || + isScalarizableAggregate)) { Constraints += "=" + OutputConstraint; ResultRegQualTys.push_back(OutExpr->getType()); ResultRegDests.push_back(Dest); - ResultRegTypes.push_back(ConvertTypeForMem(OutExpr->getType())); - ResultTruncRegTypes.push_back(ResultRegTypes.back()); - + ResultTruncRegTypes.push_back(ConvertTypeForMem(OutExpr->getType())); + if (Info.allowsRegister() && isScalarizableAggregate) { + ResultTypeRequiresCast.push_back(true); + unsigned Size = getContext().getTypeSize(OutExpr->getType()); + llvm::Type *ConvTy = llvm::IntegerType::get(getLLVMContext(), Size); + ResultRegTypes.push_back(ConvTy); + } else { + ResultTypeRequiresCast.push_back(false); + ResultRegTypes.push_back(ResultTruncRegTypes.back()); + } // If this output is tied to an input, and if the input is larger, then // we need to set the actual result type of the inline asm node to be the // same as the input type. @@ -2273,6 +2284,7 @@ assert(RegResults.size() == ResultRegTypes.size()); assert(RegResults.size() == ResultTruncRegTypes.size()); assert(RegResults.size() == ResultRegDests.size()); + assert(ResultTypeRequiresCast.size() <= ResultRegDests.size()); for (unsigned i = 0, e = RegResults.size(); i != e; ++i) { llvm::Value *Tmp = RegResults[i]; @@ -2302,7 +2314,17 @@ } } - EmitStoreThroughLValue(RValue::get(Tmp), ResultRegDests[i]); + LValue Dest = ResultRegDests[i]; + // ResultTypeRequiresCast elements correspond to the first + // ResultTypeRequiresCast.size() elements of RegResults. + if ((i < ResultTypeRequiresCast.size()) && ResultTypeRequiresCast[i]) { + unsigned Size = getContext().getTypeSize(ResultRegQualTys[i]); + Address A = Builder.CreateBitCast(Dest.getAddress(), + ResultRegTypes[i]->getPointerTo()); + Dest = MakeAddrLValue( + A, getContext().getIntTypeForBitwidth(Size, /*Signed*/ false)); + } + EmitStoreThroughLValue(RValue::get(Tmp), Dest); } } diff --git a/clang/test/CodeGen/PR42672.c b/clang/test/CodeGen/PR42672.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/PR42672.c @@ -0,0 +1,22 @@ +// RUN: %clang_cc1 -DSTRUCT -O2 -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -USTRUCT -O2 -emit-llvm %s -o - | FileCheck %s +// +// Make sure Clang doesn't treat |lockval| as asm input, and therefore +// optimizes away all stores to |lockval| together with the asm directive. +void _raw_spin_lock(void) { +#if STRUCT + struct { + unsigned short owner, next; + } lockval; + lockval.owner = 1; + lockval.next = 2; +#else + int lockval; + lockval = 3; +#endif + asm("nop" + : "=r"(lockval)); +} +// CHECK-LABEL: _raw_spin_lock +// CHECK-LABEL: entry: +// CHECK-NEXT: ret void diff --git a/clang/test/CodeGen/asm-attrs.c b/clang/test/CodeGen/asm-attrs.c --- a/clang/test/CodeGen/asm-attrs.c +++ b/clang/test/CodeGen/asm-attrs.c @@ -8,7 +8,7 @@ // CHECK: call i32 asm "foo5", {{.*}} [[READONLY]] // CHECK: call i32 asm "foo6", {{.*}} [[NOATTRS]] // CHECK: call void asm sideeffect "foo7", {{.*}} [[NOATTRS]] -// CHECK: call void asm "foo8", {{.*}} [[NOATTRS]] +// CHECK: call i32 asm "foo8", {{.*}} [[READNONE]] // CHECK: attributes [[READNONE]] = { nounwind readnone } // CHECK: attributes [[NOATTRS]] = { nounwind }