Index: cfe/trunk/lib/CodeGen/TargetInfo.cpp =================================================================== --- cfe/trunk/lib/CodeGen/TargetInfo.cpp +++ cfe/trunk/lib/CodeGen/TargetInfo.cpp @@ -503,72 +503,6 @@ return Found; } -static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) { - // Treat complex types as the element type. - if (const ComplexType *CTy = Ty->getAs()) - Ty = CTy->getElementType(); - - // Check for a type which we know has a simple scalar argument-passing - // convention without any padding. (We're specifically looking for 32 - // and 64-bit integer and integer-equivalents, float, and double.) - if (!Ty->getAs() && !Ty->hasPointerRepresentation() && - !Ty->isEnumeralType() && !Ty->isBlockPointerType()) - return false; - - uint64_t Size = Context.getTypeSize(Ty); - return Size == 32 || Size == 64; -} - -/// canExpandIndirectArgument - Test whether an argument type which is to be -/// passed indirectly (on the stack) would have the equivalent layout if it was -/// expanded into separate arguments. If so, we prefer to do the latter to avoid -/// inhibiting optimizations. -/// -// FIXME: This predicate is missing many cases, currently it just follows -// llvm-gcc (checks that all fields are 32-bit or 64-bit primitive types). We -// should probably make this smarter, or better yet make the LLVM backend -// capable of handling it. -static bool canExpandIndirectArgument(QualType Ty, ASTContext &Context) { - // We can only expand structure types. - const RecordType *RT = Ty->getAs(); - if (!RT) - return false; - - // We can only expand (C) structures. - // - // FIXME: This needs to be generalized to handle classes as well. - const RecordDecl *RD = RT->getDecl(); - if (!RD->isStruct()) - return false; - - // We try to expand CLike CXXRecordDecl. - if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) { - if (!CXXRD->isCLike()) - return false; - } - - uint64_t Size = 0; - - for (const auto *FD : RD->fields()) { - if (!is32Or64BitBasicType(FD->getType(), Context)) - return false; - - // FIXME: Reject bit-fields wholesale; there are two problems, we don't know - // how to expand them yet, and the predicate for telling if a bitfield still - // counts as "basic" is more complicated than what we were doing previously. - if (FD->isBitField()) - return false; - - Size += Context.getTypeSize(FD->getType()); - } - - // Make sure there are not any holes in the struct. - if (Size != Context.getTypeSize(Ty)) - return false; - - return true; -} - namespace { Address EmitVAArgInstr(CodeGenFunction &CGF, Address VAListAddr, QualType Ty, const ABIArgInfo &AI) { @@ -959,6 +893,8 @@ bool &NeedsPadding) const; bool shouldPrimitiveUseInReg(QualType Ty, CCState &State) const; + bool canExpandIndirectArgument(QualType Ty) const; + /// \brief Rewrite the function info so that all memory arguments use /// inalloca. void rewriteWithInAlloca(CGFunctionInfo &FI) const; @@ -1179,6 +1115,72 @@ return true; } +static bool is32Or64BitBasicType(QualType Ty, ASTContext &Context) { + // Treat complex types as the element type. + if (const ComplexType *CTy = Ty->getAs()) + Ty = CTy->getElementType(); + + // Check for a type which we know has a simple scalar argument-passing + // convention without any padding. (We're specifically looking for 32 + // and 64-bit integer and integer-equivalents, float, and double.) + if (!Ty->getAs() && !Ty->hasPointerRepresentation() && + !Ty->isEnumeralType() && !Ty->isBlockPointerType()) + return false; + + uint64_t Size = Context.getTypeSize(Ty); + return Size == 32 || Size == 64; +} + +/// Test whether an argument type which is to be passed indirectly (on the +/// stack) would have the equivalent layout if it was expanded into separate +/// arguments. If so, we prefer to do the latter to avoid inhibiting +/// optimizations. +bool X86_32ABIInfo::canExpandIndirectArgument(QualType Ty) const { + // We can only expand structure types. + const RecordType *RT = Ty->getAs(); + if (!RT) + return false; + const RecordDecl *RD = RT->getDecl(); + if (const CXXRecordDecl *CXXRD = dyn_cast(RD)) { + if (!IsWin32StructABI ) { + // On non-Windows, we have to conservatively match our old bitcode + // prototypes in order to be ABI-compatible at the bitcode level. + if (!CXXRD->isCLike()) + return false; + } else { + // Don't do this for dynamic classes. + if (CXXRD->isDynamicClass()) + return false; + // Don't do this if there are any non-empty bases. + for (const CXXBaseSpecifier &Base : CXXRD->bases()) { + if (!isEmptyRecord(getContext(), Base.getType(), /*AllowArrays=*/true)) + return false; + } + } + } + + uint64_t Size = 0; + + for (const auto *FD : RD->fields()) { + // Scalar arguments on the stack get 4 byte alignment on x86. If the + // argument is smaller than 32-bits, expanding the struct will create + // alignment padding. + if (!is32Or64BitBasicType(FD->getType(), getContext())) + return false; + + // FIXME: Reject bit-fields wholesale; there are two problems, we don't know + // how to expand them yet, and the predicate for telling if a bitfield still + // counts as "basic" is more complicated than what we were doing previously. + if (FD->isBitField()) + return false; + + Size += getContext().getTypeSize(FD->getType()); + } + + // We can do this if there was no alignment padding. + return Size == getContext().getTypeSize(Ty); +} + ABIArgInfo X86_32ABIInfo::getIndirectReturnResult(QualType RetTy, CCState &State) const { // If the return value is indirect, then the hidden argument is consuming one // integer register. @@ -1395,6 +1397,12 @@ bool X86_32ABIInfo::shouldAggregateUseDirect(QualType Ty, CCState &State, bool &InReg, bool &NeedsPadding) const { + // On Windows, aggregates other than HFAs are never passed in registers, and + // they do not consume register slots. Homogenous floating-point aggregates + // (HFAs) have already been dealt with at this point. + if (IsWin32StructABI && isAggregateTypeForABI(Ty)) + return false; + NeedsPadding = false; InReg = !IsMCUABI; @@ -1468,23 +1476,19 @@ } if (isAggregateTypeForABI(Ty)) { - if (RT) { - // Structs are always byval on win32, regardless of what they contain. - if (IsWin32StructABI) - return getIndirectResult(Ty, true, State); - - // Structures with flexible arrays are always indirect. - if (RT->getDecl()->hasFlexibleArrayMember()) - return getIndirectResult(Ty, true, State); - } + // Structures with flexible arrays are always indirect. + // FIXME: This should not be byval! + if (RT && RT->getDecl()->hasFlexibleArrayMember()) + return getIndirectResult(Ty, true, State); - // Ignore empty structs/unions. - if (isEmptyRecord(getContext(), Ty, true)) + // Ignore empty structs/unions on non-Windows. + if (!IsWin32StructABI && isEmptyRecord(getContext(), Ty, true)) return ABIArgInfo::getIgnore(); llvm::LLVMContext &LLVMContext = getVMContext(); llvm::IntegerType *Int32 = llvm::Type::getInt32Ty(LLVMContext); - bool NeedsPadding, InReg; + bool NeedsPadding = false; + bool InReg; if (shouldAggregateUseDirect(Ty, State, InReg, NeedsPadding)) { unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32; SmallVector Elements(SizeInRegs, Int32); @@ -1502,9 +1506,8 @@ // optimizations. // Don't do this for the MCU if there are still free integer registers // (see X86_64 ABI for full explanation). - if (getContext().getTypeSize(Ty) <= 4*32 && - canExpandIndirectArgument(Ty, getContext()) && - (!IsMCUABI || State.FreeRegs == 0)) + if (getContext().getTypeSize(Ty) <= 4 * 32 && + (!IsMCUABI || State.FreeRegs == 0) && canExpandIndirectArgument(Ty)) return ABIArgInfo::getExpandWithPadding( State.CC == llvm::CallingConv::X86_FastCall || State.CC == llvm::CallingConv::X86_VectorCall, @@ -1624,11 +1627,14 @@ return false; case ABIArgInfo::Direct: case ABIArgInfo::Extend: - case ABIArgInfo::Expand: - case ABIArgInfo::CoerceAndExpand: if (Info.getInReg()) return false; return true; + case ABIArgInfo::Expand: + case ABIArgInfo::CoerceAndExpand: + // These are aggregate types which are never passed in registers when + // inalloca is involved. + return true; } llvm_unreachable("invalid enum"); } Index: cfe/trunk/test/CodeGen/vectorcall.c =================================================================== --- cfe/trunk/test/CodeGen/vectorcall.c +++ cfe/trunk/test/CodeGen/vectorcall.c @@ -9,9 +9,9 @@ // CHECK: define x86_vectorcallcc void @"\01v2@@8"(i8 inreg signext %a, i8 inreg signext %b) // X64: define x86_vectorcallcc void @"\01v2@@16"(i8 %a, i8 %b) -struct Small { int a; }; +struct Small { int x; }; void __vectorcall v3(int a, struct Small b, int c) {} -// CHECK: define x86_vectorcallcc void @"\01v3@@12"(i32 inreg %a, %struct.Small* byval align 4 %b, i32 inreg %c) +// CHECK: define x86_vectorcallcc void @"\01v3@@12"(i32 inreg %a, i32 %b.0, i32 inreg %c) // X64: define x86_vectorcallcc void @"\01v3@@24"(i32 %a, i32 %b.coerce, i32 %c) struct Large { int a[5]; }; Index: cfe/trunk/test/CodeGen/windows-struct-abi.c =================================================================== --- cfe/trunk/test/CodeGen/windows-struct-abi.c +++ cfe/trunk/test/CodeGen/windows-struct-abi.c @@ -10,7 +10,7 @@ void receive_f1(struct f1 a0) { } -// CHECK: define void @receive_f1(%struct.f1* byval align 4 %a0) +// CHECK: define void @receive_f1(float %a0.0) struct f2 { float f; @@ -23,7 +23,7 @@ void receive_f2(struct f2 a0) { } -// CHECK: define void @receive_f2(%struct.f2* byval align 4 %a0) +// CHECK: define void @receive_f2(float %a0.0, float %a0.1) struct f4 { float f; @@ -38,5 +38,5 @@ void receive_f4(struct f4 a0) { } -// CHECK: define void @receive_f4(%struct.f4* byval align 4 %a0) +// CHECK: define void @receive_f4(float %a0.0, float %a0.1, float %a0.2, float %a0.3) Index: cfe/trunk/test/CodeGen/x86_32-arguments-win32.c =================================================================== --- cfe/trunk/test/CodeGen/x86_32-arguments-win32.c +++ cfe/trunk/test/CodeGen/x86_32-arguments-win32.c @@ -1,7 +1,7 @@ // RUN: %clang_cc1 -w -triple i386-pc-win32 -emit-llvm -o - %s | FileCheck %s // CHECK-LABEL: define i64 @f1_1() -// CHECK-LABEL: define void @f1_2(%struct.s1* byval align 4 %a0) +// CHECK-LABEL: define void @f1_2(i32 %a0.0, i32 %a0.1) struct s1 { int a; int b; @@ -31,7 +31,7 @@ struct s4 f4_1(void) { while (1) {} } // CHECK-LABEL: define i64 @f5_1() -// CHECK-LABEL: define void @f5_2(%struct.s5* byval align 4) +// CHECK-LABEL: define void @f5_2(double %a0.0) struct s5 { double a; }; @@ -39,7 +39,7 @@ void f5_2(struct s5 a0) {} // CHECK-LABEL: define i32 @f6_1() -// CHECK-LABEL: define void @f6_2(%struct.s6* byval align 4 %a0) +// CHECK-LABEL: define void @f6_2(float %a0.0) struct s6 { float a; }; Index: cfe/trunk/test/CodeGenCXX/microsoft-abi-cdecl-method-sret.cpp =================================================================== --- cfe/trunk/test/CodeGenCXX/microsoft-abi-cdecl-method-sret.cpp +++ cfe/trunk/test/CodeGenCXX/microsoft-abi-cdecl-method-sret.cpp @@ -2,10 +2,10 @@ // PR15768 -// A trivial 12 byte struct is returned indirectly. +// A trivial 20 byte struct is returned indirectly and taken as byval. struct S { S(); - int a, b, c; + int a, b, c, d, e; }; struct C { Index: cfe/trunk/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp =================================================================== --- cfe/trunk/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp +++ cfe/trunk/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp @@ -22,6 +22,16 @@ int x; }; +struct Multibyte { + char a, b, c, d; +}; + +struct Packed { + short a; + int b; + short c; +}; + struct SmallWithDtor { SmallWithDtor(); ~SmallWithDtor(); @@ -102,19 +112,30 @@ void small_arg(Small s) {} // LINUX-LABEL: define void @_Z9small_arg5Small(i32 %s.0) -// WIN32: define void @"\01?small_arg@@YAXUSmall@@@Z"(%struct.Small* byval align 4 %s) +// WIN32: define void @"\01?small_arg@@YAXUSmall@@@Z"(i32 %s.0) // WIN64: define void @"\01?small_arg@@YAXUSmall@@@Z"(i32 %s.coerce) void medium_arg(Medium s) {} // LINUX-LABEL: define void @_Z10medium_arg6Medium(i32 %s.0, i32 %s.1) -// WIN32: define void @"\01?medium_arg@@YAXUMedium@@@Z"(%struct.Medium* byval align 4 %s) +// WIN32: define void @"\01?medium_arg@@YAXUMedium@@@Z"(i32 %s.0, i32 %s.1) // WIN64: define void @"\01?medium_arg@@YAXUMedium@@@Z"(i64 %s.coerce) void small_arg_with_ctor(SmallWithCtor s) {} // LINUX-LABEL: define void @_Z19small_arg_with_ctor13SmallWithCtor(%struct.SmallWithCtor* byval align 4 %s) -// WIN32: define void @"\01?small_arg_with_ctor@@YAXUSmallWithCtor@@@Z"(%struct.SmallWithCtor* byval align 4 %s) +// WIN32: define void @"\01?small_arg_with_ctor@@YAXUSmallWithCtor@@@Z"(i32 %s.0) // WIN64: define void @"\01?small_arg_with_ctor@@YAXUSmallWithCtor@@@Z"(i32 %s.coerce) +// FIXME: We could coerce to a series of i32s here if we wanted to. +void multibyte_arg(Multibyte s) {} +// LINUX-LABEL: define void @_Z13multibyte_arg9Multibyte(%struct.Multibyte* byval align 4 %s) +// WIN32: define void @"\01?multibyte_arg@@YAXUMultibyte@@@Z"(%struct.Multibyte* byval align 4 %s) +// WIN64: define void @"\01?multibyte_arg@@YAXUMultibyte@@@Z"(i32 %s.coerce) + +void packed_arg(Packed s) {} +// LINUX-LABEL: define void @_Z10packed_arg6Packed(%struct.Packed* byval align 4 %s) +// WIN32: define void @"\01?packed_arg@@YAXUPacked@@@Z"(%struct.Packed* byval align 4 %s) +// WIN64: define void @"\01?packed_arg@@YAXUPacked@@@Z"(%struct.Packed* %s) + // Test that dtors are invoked in the callee. void small_arg_with_dtor(SmallWithDtor s) {} // WIN32: define void @"\01?small_arg_with_dtor@@YAXUSmallWithDtor@@@Z"(<{ %struct.SmallWithDtor }>* inalloca) {{.*}} { @@ -230,12 +251,12 @@ void thiscall_method_arg(Small s) {} // LINUX: define {{.*}} void @_ZN5Class19thiscall_method_argE5Small(%class.Class* %this, i32 %s.0) - // WIN32: define {{.*}} void @"\01?thiscall_method_arg@Class@@QAEXUSmall@@@Z"(%class.Class* %this, %struct.Small* byval align 4 %s) + // WIN32: define {{.*}} void @"\01?thiscall_method_arg@Class@@QAEXUSmall@@@Z"(%class.Class* %this, i32 %s.0) // WIN64: define linkonce_odr void @"\01?thiscall_method_arg@Class@@QEAAXUSmall@@@Z"(%class.Class* %this, i32 %s.coerce) void thiscall_method_arg(SmallWithCtor s) {} // LINUX: define {{.*}} void @_ZN5Class19thiscall_method_argE13SmallWithCtor(%class.Class* %this, %struct.SmallWithCtor* byval align 4 %s) - // WIN32: define {{.*}} void @"\01?thiscall_method_arg@Class@@QAEXUSmallWithCtor@@@Z"(%class.Class* %this, %struct.SmallWithCtor* byval align 4 %s) + // WIN32: define {{.*}} void @"\01?thiscall_method_arg@Class@@QAEXUSmallWithCtor@@@Z"(%class.Class* %this, i32 %s.0) // WIN64: define linkonce_odr void @"\01?thiscall_method_arg@Class@@QEAAXUSmallWithCtor@@@Z"(%class.Class* %this, i32 %s.coerce) void thiscall_method_arg(Big s) {}