Index: cfe/trunk/lib/CodeGen/TargetInfo.cpp =================================================================== --- cfe/trunk/lib/CodeGen/TargetInfo.cpp +++ cfe/trunk/lib/CodeGen/TargetInfo.cpp @@ -871,6 +871,14 @@ return NumMembers <= 4; } +/// Returns a Homogeneous Vector Aggregate ABIArgInfo, used in X86. +static ABIArgInfo getDirectX86Hva(llvm::Type* T = nullptr) { + auto AI = ABIArgInfo::getDirect(T); + AI.setInReg(true); + AI.setCanBeFlattened(false); + return AI; +} + //===----------------------------------------------------------------------===// // X86-32 ABI Implementation //===----------------------------------------------------------------------===// @@ -884,6 +892,11 @@ unsigned FreeSSERegs; }; +enum { + // Vectorcall only allows the first 6 parameters to be passed in registers. + VectorcallMaxParamNumAsReg = 6 +}; + /// X86_32ABIInfo - The X86-32 ABI information. class X86_32ABIInfo : public SwiftABIInfo { enum Class { @@ -929,6 +942,8 @@ Class classify(QualType Ty) const; ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const; ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const; + ABIArgInfo reclassifyHvaArgType(QualType RetTy, CCState &State, + const ABIArgInfo& current) const; /// \brief Updates the number of available free registers, returns /// true if any registers were allocated. bool updateFreeRegs(QualType Ty, CCState &State) const; @@ -946,6 +961,8 @@ void addFieldToArgStruct(SmallVector &FrameFields, CharUnits &StackOffset, ABIArgInfo &Info, QualType Type) const; + void computeVectorCallArgs(CGFunctionInfo &FI, CCState &State, + bool &UsedInAlloca) const; public: @@ -1494,6 +1511,27 @@ return true; } +ABIArgInfo +X86_32ABIInfo::reclassifyHvaArgType(QualType Ty, CCState &State, + const ABIArgInfo ¤t) const { + // Assumes vectorCall calling convention. + const Type *Base = nullptr; + uint64_t NumElts = 0; + + if (!Ty->isBuiltinType() && !Ty->isVectorType() && + isHomogeneousAggregate(Ty, Base, NumElts)) { + if (State.FreeSSERegs >= NumElts) { + // HVA types get passed directly in registers if there is room. + State.FreeSSERegs -= NumElts; + return getDirectX86Hva(); + } + // If there's no room, the HVA gets passed as normal indirect + // structure. + return getIndirectResult(Ty, /*ByVal=*/false, State); + } + return current; +} + ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, CCState &State) const { // FIXME: Set alignment on indirect arguments. @@ -1513,19 +1551,34 @@ } // vectorcall adds the concept of a homogenous vector aggregate, similar - // to other targets. + // to other targets, regcall uses some of the HVA rules. const Type *Base = nullptr; uint64_t NumElts = 0; if ((State.CC == llvm::CallingConv::X86_VectorCall || State.CC == llvm::CallingConv::X86_RegCall) && isHomogeneousAggregate(Ty, Base, NumElts)) { - if (State.FreeSSERegs >= NumElts) { - State.FreeSSERegs -= NumElts; - if (Ty->isBuiltinType() || Ty->isVectorType()) + + if (State.CC == llvm::CallingConv::X86_RegCall) { + if (State.FreeSSERegs >= NumElts) { + State.FreeSSERegs -= NumElts; + if (Ty->isBuiltinType() || Ty->isVectorType()) + return ABIArgInfo::getDirect(); + return ABIArgInfo::getExpand(); + + } + return getIndirectResult(Ty, /*ByVal=*/false, State); + } else if (State.CC == llvm::CallingConv::X86_VectorCall) { + if (State.FreeSSERegs >= NumElts && (Ty->isBuiltinType() || Ty->isVectorType())) { + // Actual floating-point types get registers first time through if + // there is registers available + State.FreeSSERegs -= NumElts; return ABIArgInfo::getDirect(); - return ABIArgInfo::getExpand(); + } else if (!Ty->isBuiltinType() && !Ty->isVectorType()) { + // HVA Types only get registers after everything else has been + // set, so it gets set as indirect for now. + return ABIArgInfo::getIndirect(getContext().getTypeAlignInChars(Ty)); + } } - return getIndirectResult(Ty, /*ByVal=*/false, State); } if (isAggregateTypeForABI(Ty)) { @@ -1604,6 +1657,36 @@ return ABIArgInfo::getDirect(); } +void X86_32ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI, CCState &State, + bool &UsedInAlloca) const { + // Vectorcall only allows the first 6 parameters to be passed in registers, + // and homogeneous vector aggregates are only put into registers as a second + // priority. + unsigned Count = 0; + CCState ZeroState = State; + ZeroState.FreeRegs = ZeroState.FreeSSERegs = 0; + // HVAs must be done as a second priority for registers, so the deferred + // items are dealt with by going through the pattern a second time. + for (auto &I : FI.arguments()) { + if (Count < VectorcallMaxParamNumAsReg) + I.info = classifyArgumentType(I.type, State); + else + // Parameters after the 6th cannot be passed in registers, + // so pretend there are no registers left for them. + I.info = classifyArgumentType(I.type, ZeroState); + UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca); + ++Count; + } + Count = 0; + // Go through the arguments a second time to get HVAs registers if there + // are still some available. + for (auto &I : FI.arguments()) { + if (Count < VectorcallMaxParamNumAsReg) + I.info = reclassifyHvaArgType(I.type, State, I.info); + ++Count; + } +} + void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const { CCState State(FI.getCallingConvention()); if (IsMCUABI) @@ -1638,9 +1721,14 @@ ++State.FreeRegs; bool UsedInAlloca = false; - for (auto &I : FI.arguments()) { - I.info = classifyArgumentType(I.type, State); - UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca); + if (State.CC == llvm::CallingConv::X86_VectorCall) { + computeVectorCallArgs(FI, State, UsedInAlloca); + } else { + // If not vectorcall, revert to normal behavior. + for (auto &I : FI.arguments()) { + I.info = classifyArgumentType(I.type, State); + UsedInAlloca |= (I.info.getKind() == ABIArgInfo::InAlloca); + } } // If we needed to use inalloca for any argument, do a second pass and rewrite @@ -2070,10 +2158,14 @@ } private: - ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs, - bool IsReturnType) const; + ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs, bool IsReturnType, + bool IsVectorCall, bool IsRegCall) const; + ABIArgInfo reclassifyHvaArgType(QualType Ty, unsigned &FreeSSERegs, + const ABIArgInfo ¤t) const; + void computeVectorCallArgs(CGFunctionInfo &FI, unsigned FreeSSERegs, + bool IsVectorCall, bool IsRegCall) const; - bool IsMingw64; + bool IsMingw64; }; class X86_64TargetCodeGenInfo : public TargetCodeGenInfo { @@ -3679,8 +3771,24 @@ /*allowHigherAlign*/ false); } +ABIArgInfo +WinX86_64ABIInfo::reclassifyHvaArgType(QualType Ty, unsigned &FreeSSERegs, + const ABIArgInfo ¤t) const { + // Assumes vectorCall calling convention. + const Type *Base = nullptr; + uint64_t NumElts = 0; + + if (!Ty->isBuiltinType() && !Ty->isVectorType() && + isHomogeneousAggregate(Ty, Base, NumElts) && FreeSSERegs >= NumElts) { + FreeSSERegs -= NumElts; + return getDirectX86Hva(); + } + return current; +} + ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, - bool IsReturnType) const { + bool IsReturnType, bool IsVectorCall, + bool IsRegCall) const { if (Ty->isVoidType()) return ABIArgInfo::getIgnore(); @@ -3704,21 +3812,34 @@ } - // vectorcall adds the concept of a homogenous vector aggregate, similar to - // other targets. const Type *Base = nullptr; uint64_t NumElts = 0; - if (FreeSSERegs && isHomogeneousAggregate(Ty, Base, NumElts)) { - if (FreeSSERegs >= NumElts) { - FreeSSERegs -= NumElts; - if (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType()) + // vectorcall adds the concept of a homogenous vector aggregate, similar to + // other targets. + if ((IsVectorCall || IsRegCall) && + isHomogeneousAggregate(Ty, Base, NumElts)) { + if (IsRegCall) { + if (FreeSSERegs >= NumElts) { + FreeSSERegs -= NumElts; + if (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType()) + return ABIArgInfo::getDirect(); + return ABIArgInfo::getExpand(); + } + return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); + } else if (IsVectorCall) { + if (FreeSSERegs >= NumElts && + (IsReturnType || Ty->isBuiltinType() || Ty->isVectorType())) { + FreeSSERegs -= NumElts; return ABIArgInfo::getDirect(); - return ABIArgInfo::getExpand(); + } else if (IsReturnType) { + return ABIArgInfo::getExpand(); + } else if (!Ty->isBuiltinType() && !Ty->isVectorType()) { + // HVAs are delayed and reclassified in the 2nd step. + return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); + } } - return ABIArgInfo::getIndirect(Align, /*ByVal=*/false); } - if (Ty->isMemberPointerType()) { // If the member pointer is represented by an LLVM int or ptr, pass it // directly. @@ -3754,6 +3875,32 @@ return ABIArgInfo::getDirect(); } +void WinX86_64ABIInfo::computeVectorCallArgs(CGFunctionInfo &FI, + unsigned FreeSSERegs, + bool IsVectorCall, + bool IsRegCall) const { + unsigned Count = 0; + for (auto &I : FI.arguments()) { + if (Count < VectorcallMaxParamNumAsReg) + I.info = classify(I.type, FreeSSERegs, false, IsVectorCall, IsRegCall); + else { + // Since these cannot be passed in registers, pretend no registers + // are left. + unsigned ZeroSSERegsAvail = 0; + I.info = classify(I.type, /*FreeSSERegs=*/ZeroSSERegsAvail, false, + IsVectorCall, IsRegCall); + } + ++Count; + } + + Count = 0; + for (auto &I : FI.arguments()) { + if (Count < VectorcallMaxParamNumAsReg) + I.info = reclassifyHvaArgType(I.type, FreeSSERegs, I.info); + ++Count; + } +} + void WinX86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { bool IsVectorCall = FI.getCallingConvention() == llvm::CallingConv::X86_VectorCall; @@ -3769,17 +3916,24 @@ } if (!getCXXABI().classifyReturnType(FI)) - FI.getReturnInfo() = classify(FI.getReturnType(), FreeSSERegs, true); + FI.getReturnInfo() = classify(FI.getReturnType(), FreeSSERegs, true, + IsVectorCall, IsRegCall); if (IsVectorCall) { // We can use up to 6 SSE register parameters with vectorcall. FreeSSERegs = 6; } else if (IsRegCall) { + // RegCall gives us 16 SSE registers, we can reuse the return registers. FreeSSERegs = 16; } - for (auto &I : FI.arguments()) - I.info = classify(I.type, FreeSSERegs, false); + if (IsVectorCall) { + computeVectorCallArgs(FI, FreeSSERegs, IsVectorCall, IsRegCall); + } else { + for (auto &I : FI.arguments()) + I.info = classify(I.type, FreeSSERegs, false, IsVectorCall, IsRegCall); + } + } Address WinX86_64ABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr, Index: cfe/trunk/test/CodeGen/vectorcall.c =================================================================== --- cfe/trunk/test/CodeGen/vectorcall.c +++ cfe/trunk/test/CodeGen/vectorcall.c @@ -1,22 +1,22 @@ -// RUN: %clang_cc1 -emit-llvm %s -o - -triple=i386-pc-win32 | FileCheck %s -// RUN: %clang_cc1 -emit-llvm %s -o - -triple=x86_64-pc-win32 | FileCheck %s --check-prefix=X64 +// RUN: %clang_cc1 -emit-llvm %s -o - -ffreestanding -triple=i386-pc-win32 | FileCheck %s --check-prefix=X32 +// RUN: %clang_cc1 -emit-llvm %s -o - -ffreestanding -triple=x86_64-pc-win32 | FileCheck %s --check-prefix=X64 void __vectorcall v1(int a, int b) {} -// CHECK: define x86_vectorcallcc void @"\01v1@@8"(i32 inreg %a, i32 inreg %b) +// X32: define x86_vectorcallcc void @"\01v1@@8"(i32 inreg %a, i32 inreg %b) // X64: define x86_vectorcallcc void @"\01v1@@16"(i32 %a, i32 %b) void __vectorcall v2(char a, char b) {} -// CHECK: define x86_vectorcallcc void @"\01v2@@8"(i8 inreg signext %a, i8 inreg signext %b) +// X32: define x86_vectorcallcc void @"\01v2@@8"(i8 inreg signext %a, i8 inreg signext %b) // X64: define x86_vectorcallcc void @"\01v2@@16"(i8 %a, i8 %b) struct Small { int x; }; void __vectorcall v3(int a, struct Small b, int c) {} -// CHECK: define x86_vectorcallcc void @"\01v3@@12"(i32 inreg %a, i32 %b.0, i32 inreg %c) +// X32: define x86_vectorcallcc void @"\01v3@@12"(i32 inreg %a, i32 %b.0, i32 inreg %c) // X64: define x86_vectorcallcc void @"\01v3@@24"(i32 %a, i32 %b.coerce, i32 %c) struct Large { int a[5]; }; void __vectorcall v4(int a, struct Large b, int c) {} -// CHECK: define x86_vectorcallcc void @"\01v4@@28"(i32 inreg %a, %struct.Large* byval align 4 %b, i32 inreg %c) +// X32: define x86_vectorcallcc void @"\01v4@@28"(i32 inreg %a, %struct.Large* byval align 4 %b, i32 inreg %c) // X64: define x86_vectorcallcc void @"\01v4@@40"(i32 %a, %struct.Large* %b, i32 %c) struct HFA2 { double x, y; }; @@ -24,54 +24,84 @@ struct HFA5 { double v, w, x, y, z; }; void __vectorcall hfa1(int a, struct HFA4 b, int c) {} -// CHECK: define x86_vectorcallcc void @"\01hfa1@@40"(i32 inreg %a, double %b.0, double %b.1, double %b.2, double %b.3, i32 inreg %c) -// X64: define x86_vectorcallcc void @"\01hfa1@@48"(i32 %a, double %b.0, double %b.1, double %b.2, double %b.3, i32 %c) +// X32: define x86_vectorcallcc void @"\01hfa1@@40"(i32 inreg %a, %struct.HFA4 inreg %b.coerce, i32 inreg %c) +// X64: define x86_vectorcallcc void @"\01hfa1@@48"(i32 %a, %struct.HFA4 inreg %b.coerce, i32 %c) // HFAs that would require more than six total SSE registers are passed // indirectly. Additional vector arguments can consume the rest of the SSE // registers. void __vectorcall hfa2(struct HFA4 a, struct HFA4 b, double c) {} -// CHECK: define x86_vectorcallcc void @"\01hfa2@@72"(double %a.0, double %a.1, double %a.2, double %a.3, %struct.HFA4* inreg %b, double %c) -// X64: define x86_vectorcallcc void @"\01hfa2@@72"(double %a.0, double %a.1, double %a.2, double %a.3, %struct.HFA4* %b, double %c) +// X32: define x86_vectorcallcc void @"\01hfa2@@72"(%struct.HFA4 inreg %a.coerce, %struct.HFA4* inreg %b, double %c) +// X64: define x86_vectorcallcc void @"\01hfa2@@72"(%struct.HFA4 inreg %a.coerce, %struct.HFA4* %b, double %c) // Ensure that we pass builtin types directly while counting them against the // SSE register usage. void __vectorcall hfa3(double a, double b, double c, double d, double e, struct HFA2 f) {} -// CHECK: define x86_vectorcallcc void @"\01hfa3@@56"(double %a, double %b, double %c, double %d, double %e, %struct.HFA2* inreg %f) +// X32: define x86_vectorcallcc void @"\01hfa3@@56"(double %a, double %b, double %c, double %d, double %e, %struct.HFA2* inreg %f) // X64: define x86_vectorcallcc void @"\01hfa3@@56"(double %a, double %b, double %c, double %d, double %e, %struct.HFA2* %f) // Aggregates with more than four elements are not HFAs and are passed byval. // Because they are not classified as homogeneous, they don't get special // handling to ensure alignment. void __vectorcall hfa4(struct HFA5 a) {} -// CHECK: define x86_vectorcallcc void @"\01hfa4@@40"(%struct.HFA5* byval align 4) +// X32: define x86_vectorcallcc void @"\01hfa4@@40"(%struct.HFA5* byval align 4) // X64: define x86_vectorcallcc void @"\01hfa4@@40"(%struct.HFA5* %a) // Return HFAs of 4 or fewer elements in registers. static struct HFA2 g_hfa2; struct HFA2 __vectorcall hfa5(void) { return g_hfa2; } -// CHECK: define x86_vectorcallcc %struct.HFA2 @"\01hfa5@@0"() +// X32: define x86_vectorcallcc %struct.HFA2 @"\01hfa5@@0"() // X64: define x86_vectorcallcc %struct.HFA2 @"\01hfa5@@0"() typedef float __attribute__((vector_size(16))) v4f32; struct HVA2 { v4f32 x, y; }; +struct HVA3 { v4f32 w, x, y; }; struct HVA4 { v4f32 w, x, y, z; }; +struct HVA5 { v4f32 w, x, y, z, p; }; -void __vectorcall hva1(int a, struct HVA4 b, int c) {} -// CHECK: define x86_vectorcallcc void @"\01hva1@@72"(i32 inreg %a, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, i32 inreg %c) -// X64: define x86_vectorcallcc void @"\01hva1@@80"(i32 %a, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, i32 %c) - -void __vectorcall hva2(struct HVA4 a, struct HVA4 b, v4f32 c) {} -// CHECK: define x86_vectorcallcc void @"\01hva2@@144"(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, %struct.HVA4* inreg %b, <4 x float> %c) -// X64: define x86_vectorcallcc void @"\01hva2@@144"(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, %struct.HVA4* %b, <4 x float> %c) - -void __vectorcall hva3(v4f32 a, v4f32 b, v4f32 c, v4f32 d, v4f32 e, struct HVA2 f) {} -// CHECK: define x86_vectorcallcc void @"\01hva3@@112"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* inreg %f) -// X64: define x86_vectorcallcc void @"\01hva3@@112"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* %f) +v4f32 __vectorcall hva1(int a, struct HVA4 b, int c) {return b.w;} +// X32: define x86_vectorcallcc <4 x float> @"\01hva1@@72"(i32 inreg %a, %struct.HVA4 inreg %b.coerce, i32 inreg %c) +// X64: define x86_vectorcallcc <4 x float> @"\01hva1@@80"(i32 %a, %struct.HVA4 inreg %b.coerce, i32 %c) + +v4f32 __vectorcall hva2(struct HVA4 a, struct HVA4 b, v4f32 c) {return c;} +// X32: define x86_vectorcallcc <4 x float> @"\01hva2@@144"(%struct.HVA4 inreg %a.coerce, %struct.HVA4* inreg %b, <4 x float> %c) +// X64: define x86_vectorcallcc <4 x float> @"\01hva2@@144"(%struct.HVA4 inreg %a.coerce, %struct.HVA4* %b, <4 x float> %c) + +v4f32 __vectorcall hva3(v4f32 a, v4f32 b, v4f32 c, v4f32 d, v4f32 e, struct HVA2 f) {return f.x;} +// X32: define x86_vectorcallcc <4 x float> @"\01hva3@@112"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* inreg %f) +// X64: define x86_vectorcallcc <4 x float> @"\01hva3@@112"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* %f) + +// vector types have higher priority then HVA structures, So vector types are allocated first +// and HVAs are allocated if enough registers are available +v4f32 __vectorcall hva4(struct HVA4 a, struct HVA2 b, v4f32 c) {return b.y;} +// X32: define x86_vectorcallcc <4 x float> @"\01hva4@@112"(%struct.HVA4 inreg %a.coerce, %struct.HVA2* inreg %b, <4 x float> %c) +// X64: define x86_vectorcallcc <4 x float> @"\01hva4@@112"(%struct.HVA4 inreg %a.coerce, %struct.HVA2* %b, <4 x float> %c) + +v4f32 __vectorcall hva5(struct HVA3 a, struct HVA3 b, v4f32 c, struct HVA2 d) {return d.y;} +// X32: define x86_vectorcallcc <4 x float> @"\01hva5@@144"(%struct.HVA3 inreg %a.coerce, %struct.HVA3* inreg %b, <4 x float> %c, %struct.HVA2 inreg %d.coerce) +// X64: define x86_vectorcallcc <4 x float> @"\01hva5@@144"(%struct.HVA3 inreg %a.coerce, %struct.HVA3* %b, <4 x float> %c, %struct.HVA2 inreg %d.coerce) + +struct HVA4 __vectorcall hva6(struct HVA4 a, struct HVA4 b) { return b;} +// X32: define x86_vectorcallcc %struct.HVA4 @"\01hva6@@128"(%struct.HVA4 inreg %a.coerce, %struct.HVA4* inreg %b) +// X64: define x86_vectorcallcc %struct.HVA4 @"\01hva6@@128"(%struct.HVA4 inreg %a.coerce, %struct.HVA4* %b) + +struct HVA5 __vectorcall hva7() {struct HVA5 a = {}; return a;} +// X32: define x86_vectorcallcc void @"\01hva7@@0"(%struct.HVA5* inreg noalias sret %agg.result) +// X64: define x86_vectorcallcc void @"\01hva7@@0"(%struct.HVA5* noalias sret %agg.result) + +v4f32 __vectorcall hva8(v4f32 a, v4f32 b, v4f32 c, v4f32 d, int e, v4f32 f) {return f;} +// X32: define x86_vectorcallcc <4 x float> @"\01hva8@@84"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, i32 inreg %e, <4 x float> %f) +// X64: define x86_vectorcallcc <4 x float> @"\01hva8@@88"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, i32 %e, <4 x float> %f) typedef float __attribute__((ext_vector_type(3))) v3f32; struct OddSizeHVA { v3f32 x, y; }; void __vectorcall odd_size_hva(struct OddSizeHVA a) {} -// CHECK: define x86_vectorcallcc void @"\01odd_size_hva@@32"(<3 x float> %a.0, <3 x float> %a.1) -// X64: define x86_vectorcallcc void @"\01odd_size_hva@@32"(<3 x float> %a.0, <3 x float> %a.1) +// X32: define x86_vectorcallcc void @"\01odd_size_hva@@32"(%struct.OddSizeHVA inreg %a.coerce) +// X64: define x86_vectorcallcc void @"\01odd_size_hva@@32"(%struct.OddSizeHVA inreg %a.coerce) + +// The Vectorcall ABI only allows passing the first 6 items in registers, so this shouldn't +// consider 'p7' as a register. Instead p5 gets put into the register on the second pass. +struct HFA2 __vectorcall AddParticles(struct HFA2 p1, float p2, struct HFA4 p3, int p4, struct HFA2 p5, float p6, float p7){ return p1;} +// X32: define x86_vectorcallcc %struct.HFA2 @"\01AddParticles@@80"(%struct.HFA2 inreg %p1.coerce, float %p2, %struct.HFA4* inreg %p3, i32 inreg %p4, %struct.HFA2 inreg %p5.coerce, float %p6, float %p7) +// X64: define x86_vectorcallcc %struct.HFA2 @"\01AddParticles@@96"(%struct.HFA2 inreg %p1.coerce, float %p2, %struct.HFA4* %p3, i32 %p4, %struct.HFA2 inreg %p5.coerce, float %p6, float %p7) Index: cfe/trunk/test/CodeGenCXX/homogeneous-aggregates.cpp =================================================================== --- cfe/trunk/test/CodeGenCXX/homogeneous-aggregates.cpp +++ cfe/trunk/test/CodeGenCXX/homogeneous-aggregates.cpp @@ -47,7 +47,7 @@ // PPC: define [3 x double] @_Z7func_D22D2([3 x double] %x.coerce) // ARM32: define arm_aapcs_vfpcc %struct.D2 @_Z7func_D22D2(%struct.D2 %x.coerce) // ARM64: define %struct.D2 @_Z7func_D22D2([3 x double] %x.coerce) -// X64: define x86_vectorcallcc %struct.D2 @"\01_Z7func_D22D2@@24"(double %x.0, double %x.1, double %x.2) +// X64: define x86_vectorcallcc %struct.D2 @"\01_Z7func_D22D2@@24"(%struct.D2 inreg %x.coerce) D2 CC func_D2(D2 x) { return x; } // PPC: define void @_Z7func_D32D3(%struct.D3* noalias sret %agg.result, [4 x i64] %x.coerce) @@ -92,7 +92,7 @@ void CC with_empty_base(HVAWithEmptyBase a) {} // FIXME: MSVC doesn't consider this an HVA because of the empty base. -// X64: define x86_vectorcallcc void @"\01_Z15with_empty_base16HVAWithEmptyBase@@16"(float %a.0, float %a.1, float %a.2) +// X64: define x86_vectorcallcc void @"\01_Z15with_empty_base16HVAWithEmptyBase@@16"(%struct.HVAWithEmptyBase inreg %a.coerce) struct HVAWithEmptyBitField : Float1, Float2 { int : 0; // Takes no space. @@ -102,5 +102,5 @@ // PPC: define void @_Z19with_empty_bitfield20HVAWithEmptyBitField([3 x float] %a.coerce) // ARM64: define void @_Z19with_empty_bitfield20HVAWithEmptyBitField([3 x float] %a.coerce) // ARM32: define arm_aapcs_vfpcc void @_Z19with_empty_bitfield20HVAWithEmptyBitField(%struct.HVAWithEmptyBitField %a.coerce) -// X64: define x86_vectorcallcc void @"\01_Z19with_empty_bitfield20HVAWithEmptyBitField@@16"(float %a.0, float %a.1, float %a.2) +// X64: define x86_vectorcallcc void @"\01_Z19with_empty_bitfield20HVAWithEmptyBitField@@16"(%struct.HVAWithEmptyBitField inreg %a.coerce) void CC with_empty_bitfield(HVAWithEmptyBitField a) {}