diff --git a/clang/include/clang/CodeGen/CGFunctionInfo.h b/clang/include/clang/CodeGen/CGFunctionInfo.h --- a/clang/include/clang/CodeGen/CGFunctionInfo.h +++ b/clang/include/clang/CodeGen/CGFunctionInfo.h @@ -586,6 +586,9 @@ /// Whether this function has nocf_check attribute. unsigned NoCfCheck : 1; + /// Log 2 of the maximum vector width. + unsigned MaxVectorWidth : 4; + RequiredArgs Required; /// The struct representing all arguments passed in memory. Only used when @@ -731,6 +734,17 @@ ArgStructAlign = Align.getQuantity(); } + /// Return the maximum vector width in the arguments. + unsigned getMaxVectorWidth() const { + return MaxVectorWidth ? 1U << (MaxVectorWidth - 1) : 0; + } + + /// Set the maximum vector width in the arguments. + void setMaxVectorWidth(unsigned Width) { + assert(llvm::isPowerOf2_32(Width) && "Expected power of 2 vector"); + MaxVectorWidth = llvm::countTrailingZeros(Width) + 1; + } + void Profile(llvm::FoldingSetNodeID &ID) { ID.AddInteger(getASTCallingConvention()); ID.AddBoolean(InstanceMethod); diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -833,6 +833,7 @@ FI->NumArgs = argTypes.size(); FI->HasExtParameterInfos = !paramInfos.empty(); FI->getArgsBuffer()[0].type = resultType; + FI->MaxVectorWidth = 0; for (unsigned i = 0, e = argTypes.size(); i != e; ++i) FI->getArgsBuffer()[i + 1].type = argTypes[i]; for (unsigned i = 0, e = paramInfos.size(); i != e; ++i) @@ -4668,6 +4669,19 @@ } // namespace +static unsigned getMaxVectorWidth(const llvm::Type *Ty) { + if (auto *VT = dyn_cast(Ty)) + return VT->getPrimitiveSizeInBits().getKnownMinSize(); + if (auto *AT = dyn_cast(Ty)) + return getMaxVectorWidth(AT->getElementType()); + + unsigned MaxVectorWidth = 0; + if (auto *ST = dyn_cast(Ty)) + for (auto *I : ST->elements()) + MaxVectorWidth = std::max(MaxVectorWidth, getMaxVectorWidth(I)); + return MaxVectorWidth; +} + RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee, ReturnValueSlot ReturnValue, @@ -5221,12 +5235,9 @@ #endif // Update the largest vector width if any arguments have vector types. - for (unsigned i = 0; i < IRCallArgs.size(); ++i) { - if (auto *VT = dyn_cast(IRCallArgs[i]->getType())) - LargestVectorWidth = - std::max((uint64_t)LargestVectorWidth, - VT->getPrimitiveSizeInBits().getKnownMinSize()); - } + for (unsigned i = 0; i < IRCallArgs.size(); ++i) + LargestVectorWidth = std::max(LargestVectorWidth, + getMaxVectorWidth(IRCallArgs[i]->getType())); // Compute the calling convention and attributes. unsigned CallingConv; @@ -5348,10 +5359,8 @@ CI->setName("call"); // Update largest vector width from the return type. - if (auto *VT = dyn_cast(CI->getType())) - LargestVectorWidth = - std::max((uint64_t)LargestVectorWidth, - VT->getPrimitiveSizeInBits().getKnownMinSize()); + LargestVectorWidth = + std::max(LargestVectorWidth, getMaxVectorWidth(CI->getType())); // Insert instrumentation or attach profile metadata at indirect call sites. // For more details, see the comment before the definition of diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -485,6 +485,9 @@ std::max((uint64_t)LargestVectorWidth, VT->getPrimitiveSizeInBits().getKnownMinSize()); + if (CurFnInfo->getMaxVectorWidth() > LargestVectorWidth) + LargestVectorWidth = CurFnInfo->getMaxVectorWidth(); + // Add the required-vector-width attribute. This contains the max width from: // 1. min-vector-width attribute used in the source program. // 2. Any builtins used that have a vector width specified. diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -2297,6 +2297,8 @@ /// \param isNamedArg - Whether the argument in question is a "named" /// argument, as used in AMD64-ABI 3.5.7. /// + /// \param IsRegCall - Whether the calling conversion is regcall. + /// /// If a word is unused its result will be NoClass; if a type should /// be passed in Memory then at least the classification of \arg Lo /// will be Memory. @@ -2306,7 +2308,7 @@ /// If the \arg Lo class is ComplexX87, then the \arg Hi class will /// also be ComplexX87. void classify(QualType T, uint64_t OffsetBase, Class &Lo, Class &Hi, - bool isNamedArg) const; + bool isNamedArg, bool IsRegCall = false) const; llvm::Type *GetByteVectorType(QualType Ty) const; llvm::Type *GetSSETypeAtOffset(llvm::Type *IRType, @@ -2331,13 +2333,16 @@ ABIArgInfo classifyArgumentType(QualType Ty, unsigned freeIntRegs, unsigned &neededInt, unsigned &neededSSE, - bool isNamedArg) const; + bool isNamedArg, + bool IsRegCall = false) const; ABIArgInfo classifyRegCallStructType(QualType Ty, unsigned &NeededInt, - unsigned &NeededSSE) const; + unsigned &NeededSSE, + unsigned &MaxVectorWidth) const; ABIArgInfo classifyRegCallStructTypeImpl(QualType Ty, unsigned &NeededInt, - unsigned &NeededSSE) const; + unsigned &NeededSSE, + unsigned &MaxVectorWidth) const; bool IsIllegalVectorType(QualType Ty) const; @@ -2832,8 +2837,8 @@ return SSE; } -void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, - Class &Lo, Class &Hi, bool isNamedArg) const { +void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Class &Lo, + Class &Hi, bool isNamedArg, bool IsRegCall) const { // FIXME: This code can be simplified by introducing a simple value class for // Class pairs with appropriate constructor methods for the various // situations. @@ -3031,7 +3036,9 @@ // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger // than eight eightbytes, ..., it has class MEMORY. - if (Size > 512) + // regcall ABI doesn't have limitation to an object. The only limitation + // is the free registers, which will be checked in computeInfo. + if (!IsRegCall && Size > 512) return; // AMD64-ABI 3.2.3p2: Rule 1. If ..., or it contains unaligned @@ -3738,15 +3745,14 @@ return ABIArgInfo::getDirect(ResType); } -ABIArgInfo X86_64ABIInfo::classifyArgumentType( - QualType Ty, unsigned freeIntRegs, unsigned &neededInt, unsigned &neededSSE, - bool isNamedArg) - const -{ +ABIArgInfo +X86_64ABIInfo::classifyArgumentType(QualType Ty, unsigned freeIntRegs, + unsigned &neededInt, unsigned &neededSSE, + bool isNamedArg, bool IsRegCall) const { Ty = useFirstFieldIfTransparentUnion(Ty); X86_64ABIInfo::Class Lo, Hi; - classify(Ty, 0, Lo, Hi, isNamedArg); + classify(Ty, 0, Lo, Hi, isNamedArg, IsRegCall); // Check some invariants. // FIXME: Enforce these by construction. @@ -3869,7 +3875,8 @@ ABIArgInfo X86_64ABIInfo::classifyRegCallStructTypeImpl(QualType Ty, unsigned &NeededInt, - unsigned &NeededSSE) const { + unsigned &NeededSSE, + unsigned &MaxVectorWidth) const { auto RT = Ty->getAs(); assert(RT && "classifyRegCallStructType only valid with struct types"); @@ -3884,7 +3891,8 @@ } for (const auto &I : CXXRD->bases()) - if (classifyRegCallStructTypeImpl(I.getType(), NeededInt, NeededSSE) + if (classifyRegCallStructTypeImpl(I.getType(), NeededInt, NeededSSE, + MaxVectorWidth) .isIndirect()) { NeededInt = NeededSSE = 0; return getIndirectReturnResult(Ty); @@ -3893,20 +3901,27 @@ // Sum up members for (const auto *FD : RT->getDecl()->fields()) { - if (FD->getType()->isRecordType() && !FD->getType()->isUnionType()) { - if (classifyRegCallStructTypeImpl(FD->getType(), NeededInt, NeededSSE) + QualType MTy = FD->getType(); + if (MTy->isRecordType() && !MTy->isUnionType()) { + if (classifyRegCallStructTypeImpl(MTy, NeededInt, NeededSSE, + MaxVectorWidth) .isIndirect()) { NeededInt = NeededSSE = 0; return getIndirectReturnResult(Ty); } } else { unsigned LocalNeededInt, LocalNeededSSE; - if (classifyArgumentType(FD->getType(), UINT_MAX, LocalNeededInt, - LocalNeededSSE, true) + if (classifyArgumentType(MTy, UINT_MAX, LocalNeededInt, LocalNeededSSE, + true, true) .isIndirect()) { NeededInt = NeededSSE = 0; return getIndirectReturnResult(Ty); } + if (const auto *AT = getContext().getAsConstantArrayType(MTy)) + MTy = AT->getElementType(); + if (const auto *VT = MTy->getAs()) + if (getContext().getTypeSize(VT) > MaxVectorWidth) + MaxVectorWidth = getContext().getTypeSize(VT); NeededInt += LocalNeededInt; NeededSSE += LocalNeededSSE; } @@ -3915,14 +3930,17 @@ return ABIArgInfo::getDirect(); } -ABIArgInfo X86_64ABIInfo::classifyRegCallStructType(QualType Ty, - unsigned &NeededInt, - unsigned &NeededSSE) const { +ABIArgInfo +X86_64ABIInfo::classifyRegCallStructType(QualType Ty, unsigned &NeededInt, + unsigned &NeededSSE, + unsigned &MaxVectorWidth) const { NeededInt = 0; NeededSSE = 0; + MaxVectorWidth = 0; - return classifyRegCallStructTypeImpl(Ty, NeededInt, NeededSSE); + return classifyRegCallStructTypeImpl(Ty, NeededInt, NeededSSE, + MaxVectorWidth); } void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { @@ -3942,13 +3960,13 @@ // Keep track of the number of assigned registers. unsigned FreeIntRegs = IsRegCall ? 11 : 6; unsigned FreeSSERegs = IsRegCall ? 16 : 8; - unsigned NeededInt, NeededSSE; + unsigned NeededInt, NeededSSE, MaxVectorWidth = 0; if (!::classifyReturnType(getCXXABI(), FI, *this)) { if (IsRegCall && FI.getReturnType()->getTypePtr()->isRecordType() && !FI.getReturnType()->getTypePtr()->isUnionType()) { - FI.getReturnInfo() = - classifyRegCallStructType(FI.getReturnType(), NeededInt, NeededSSE); + FI.getReturnInfo() = classifyRegCallStructType( + FI.getReturnType(), NeededInt, NeededSSE, MaxVectorWidth); if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) { FreeIntRegs -= NeededInt; FreeSSERegs -= NeededSSE; @@ -3971,6 +3989,8 @@ // integer register. if (FI.getReturnInfo().isIndirect()) --FreeIntRegs; + else if (NeededSSE && MaxVectorWidth > 0) + FI.setMaxVectorWidth(MaxVectorWidth); // The chain argument effectively gives us another free register. if (FI.isChainCall()) @@ -3985,7 +4005,8 @@ bool IsNamedArg = ArgNo < NumRequiredArgs; if (IsRegCall && it->type->isStructureOrClassType()) - it->info = classifyRegCallStructType(it->type, NeededInt, NeededSSE); + it->info = classifyRegCallStructType(it->type, NeededInt, NeededSSE, + MaxVectorWidth); else it->info = classifyArgumentType(it->type, FreeIntRegs, NeededInt, NeededSSE, IsNamedArg); @@ -3997,6 +4018,8 @@ if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) { FreeIntRegs -= NeededInt; FreeSSERegs -= NeededSSE; + if (MaxVectorWidth > FI.getMaxVectorWidth()) + FI.setMaxVectorWidth(MaxVectorWidth); } else { it->info = getIndirectResult(it->type, FreeIntRegs); } diff --git a/clang/test/CodeGen/X86/x86_64-arguments.c b/clang/test/CodeGen/X86/x86_64-arguments.c --- a/clang/test/CodeGen/X86/x86_64-arguments.c +++ b/clang/test/CodeGen/X86/x86_64-arguments.c @@ -546,6 +546,12 @@ void f65(struct t65 a0) { } +typedef float t66 __attribute__((__vector_size__(128), __aligned__(128))); + +// AVX512: @f66(<32 x float>* noundef byval(<32 x float>) align 128 %0) +void f66(t66 a0) { +} + /// The synthesized __va_list_tag does not have file/line fields. // CHECK: = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "__va_list_tag", // CHECK-NOT: file: diff --git a/clang/test/CodeGen/aarch64-neon-tbl.c b/clang/test/CodeGen/aarch64-neon-tbl.c --- a/clang/test/CodeGen/aarch64-neon-tbl.c +++ b/clang/test/CodeGen/aarch64-neon-tbl.c @@ -42,7 +42,7 @@ return vtbl2_s8(a, b); } -// CHECK-LABEL: define{{.*}} <8 x i8> @test_vqtbl2_s8([2 x <16 x i8>] %a.coerce, <8 x i8> noundef %b) #0 { +// CHECK-LABEL: define{{.*}} <8 x i8> @test_vqtbl2_s8([2 x <16 x i8>] %a.coerce, <8 x i8> noundef %b) #1 { // CHECK: [[__P0_I:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[A:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[A]], i32 0, i32 0 @@ -89,7 +89,7 @@ return vtbl3_s8(a, b); } -// CHECK-LABEL: define{{.*}} <8 x i8> @test_vqtbl3_s8([3 x <16 x i8>] %a.coerce, <8 x i8> noundef %b) #0 { +// CHECK-LABEL: define{{.*}} <8 x i8> @test_vqtbl3_s8([3 x <16 x i8>] %a.coerce, <8 x i8> noundef %b) #1 { // CHECK: [[__P0_I:%.*]] = alloca %struct.int8x16x3_t, align 16 // CHECK: [[A:%.*]] = alloca %struct.int8x16x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[A]], i32 0, i32 0 @@ -142,7 +142,7 @@ return vtbl4_s8(a, b); } -// CHECK-LABEL: define{{.*}} <8 x i8> @test_vqtbl4_s8([4 x <16 x i8>] %a.coerce, <8 x i8> noundef %b) #0 { +// CHECK-LABEL: define{{.*}} <8 x i8> @test_vqtbl4_s8([4 x <16 x i8>] %a.coerce, <8 x i8> noundef %b) #1 { // CHECK: [[__P0_I:%.*]] = alloca %struct.int8x16x4_t, align 16 // CHECK: [[A:%.*]] = alloca %struct.int8x16x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[A]], i32 0, i32 0 @@ -352,7 +352,7 @@ return vqtbx1_s8(a, b, c); } -// CHECK-LABEL: define{{.*}} <8 x i8> @test_vqtbx2_s8(<8 x i8> noundef %a, [2 x <16 x i8>] %b.coerce, <8 x i8> noundef %c) #0 { +// CHECK-LABEL: define{{.*}} <8 x i8> @test_vqtbx2_s8(<8 x i8> noundef %a, [2 x <16 x i8>] %b.coerce, <8 x i8> noundef %c) #1 { // CHECK: [[__P1_I:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.int8x16x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0 @@ -373,7 +373,7 @@ return vqtbx2_s8(a, b, c); } -// CHECK-LABEL: define{{.*}} <8 x i8> @test_vqtbx3_s8(<8 x i8> noundef %a, [3 x <16 x i8>] %b.coerce, <8 x i8> noundef %c) #0 { +// CHECK-LABEL: define{{.*}} <8 x i8> @test_vqtbx3_s8(<8 x i8> noundef %a, [3 x <16 x i8>] %b.coerce, <8 x i8> noundef %c) #1 { // CHECK: [[__P1_I:%.*]] = alloca %struct.int8x16x3_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.int8x16x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0 @@ -397,7 +397,7 @@ return vqtbx3_s8(a, b, c); } -// CHECK-LABEL: define{{.*}} <8 x i8> @test_vqtbx4_s8(<8 x i8> noundef %a, [4 x <16 x i8>] %b.coerce, <8 x i8> noundef %c) #0 { +// CHECK-LABEL: define{{.*}} <8 x i8> @test_vqtbx4_s8(<8 x i8> noundef %a, [4 x <16 x i8>] %b.coerce, <8 x i8> noundef %c) #1 { // CHECK: [[__P1_I:%.*]] = alloca %struct.int8x16x4_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.int8x16x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0 @@ -540,7 +540,7 @@ return vtbl2_u8(a, b); } -// CHECK-LABEL: define{{.*}} <8 x i8> @test_vqtbl2_u8([2 x <16 x i8>] %a.coerce, <8 x i8> noundef %b) #0 { +// CHECK-LABEL: define{{.*}} <8 x i8> @test_vqtbl2_u8([2 x <16 x i8>] %a.coerce, <8 x i8> noundef %b) #1 { // CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[A:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[A]], i32 0, i32 0 @@ -587,7 +587,7 @@ return vtbl3_u8(a, b); } -// CHECK-LABEL: define{{.*}} <8 x i8> @test_vqtbl3_u8([3 x <16 x i8>] %a.coerce, <8 x i8> noundef %b) #0 { +// CHECK-LABEL: define{{.*}} <8 x i8> @test_vqtbl3_u8([3 x <16 x i8>] %a.coerce, <8 x i8> noundef %b) #1 { // CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x16x3_t, align 16 // CHECK: [[A:%.*]] = alloca %struct.uint8x16x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[A]], i32 0, i32 0 @@ -640,7 +640,7 @@ return vtbl4_u8(a, b); } -// CHECK-LABEL: define{{.*}} <8 x i8> @test_vqtbl4_u8([4 x <16 x i8>] %a.coerce, <8 x i8> noundef %b) #0 { +// CHECK-LABEL: define{{.*}} <8 x i8> @test_vqtbl4_u8([4 x <16 x i8>] %a.coerce, <8 x i8> noundef %b) #1 { // CHECK: [[__P0_I:%.*]] = alloca %struct.uint8x16x4_t, align 16 // CHECK: [[A:%.*]] = alloca %struct.uint8x16x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[A]], i32 0, i32 0 @@ -850,7 +850,7 @@ return vqtbx1_u8(a, b, c); } -// CHECK-LABEL: define{{.*}} <8 x i8> @test_vqtbx2_u8(<8 x i8> noundef %a, [2 x <16 x i8>] %b.coerce, <8 x i8> noundef %c) #0 { +// CHECK-LABEL: define{{.*}} <8 x i8> @test_vqtbx2_u8(<8 x i8> noundef %a, [2 x <16 x i8>] %b.coerce, <8 x i8> noundef %c) #1 { // CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0 @@ -871,7 +871,7 @@ return vqtbx2_u8(a, b, c); } -// CHECK-LABEL: define{{.*}} <8 x i8> @test_vqtbx3_u8(<8 x i8> noundef %a, [3 x <16 x i8>] %b.coerce, <8 x i8> noundef %c) #0 { +// CHECK-LABEL: define{{.*}} <8 x i8> @test_vqtbx3_u8(<8 x i8> noundef %a, [3 x <16 x i8>] %b.coerce, <8 x i8> noundef %c) #1 { // CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x16x3_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0 @@ -895,7 +895,7 @@ return vqtbx3_u8(a, b, c); } -// CHECK-LABEL: define{{.*}} <8 x i8> @test_vqtbx4_u8(<8 x i8> noundef %a, [4 x <16 x i8>] %b.coerce, <8 x i8> noundef %c) #0 { +// CHECK-LABEL: define{{.*}} <8 x i8> @test_vqtbx4_u8(<8 x i8> noundef %a, [4 x <16 x i8>] %b.coerce, <8 x i8> noundef %c) #1 { // CHECK: [[__P1_I:%.*]] = alloca %struct.uint8x16x4_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0 @@ -1038,7 +1038,7 @@ return vtbl2_p8(a, b); } -// CHECK-LABEL: define{{.*}} <8 x i8> @test_vqtbl2_p8([2 x <16 x i8>] %a.coerce, <8 x i8> noundef %b) #0 { +// CHECK-LABEL: define{{.*}} <8 x i8> @test_vqtbl2_p8([2 x <16 x i8>] %a.coerce, <8 x i8> noundef %b) #1 { // CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[A:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[A]], i32 0, i32 0 @@ -1085,7 +1085,7 @@ return vtbl3_p8(a, b); } -// CHECK-LABEL: define{{.*}} <8 x i8> @test_vqtbl3_p8([3 x <16 x i8>] %a.coerce, <8 x i8> noundef %b) #0 { +// CHECK-LABEL: define{{.*}} <8 x i8> @test_vqtbl3_p8([3 x <16 x i8>] %a.coerce, <8 x i8> noundef %b) #1 { // CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x16x3_t, align 16 // CHECK: [[A:%.*]] = alloca %struct.poly8x16x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[A]], i32 0, i32 0 @@ -1138,7 +1138,7 @@ return vtbl4_p8(a, b); } -// CHECK-LABEL: define{{.*}} <8 x i8> @test_vqtbl4_p8([4 x <16 x i8>] %a.coerce, <8 x i8> noundef %b) #0 { +// CHECK-LABEL: define{{.*}} <8 x i8> @test_vqtbl4_p8([4 x <16 x i8>] %a.coerce, <8 x i8> noundef %b) #1 { // CHECK: [[__P0_I:%.*]] = alloca %struct.poly8x16x4_t, align 16 // CHECK: [[A:%.*]] = alloca %struct.poly8x16x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[A]], i32 0, i32 0 @@ -1348,7 +1348,7 @@ return vqtbx1_p8(a, b, c); } -// CHECK-LABEL: define{{.*}} <8 x i8> @test_vqtbx2_p8(<8 x i8> noundef %a, [2 x <16 x i8>] %b.coerce, <8 x i8> noundef %c) #0 { +// CHECK-LABEL: define{{.*}} <8 x i8> @test_vqtbx2_p8(<8 x i8> noundef %a, [2 x <16 x i8>] %b.coerce, <8 x i8> noundef %c) #1 { // CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0 @@ -1369,7 +1369,7 @@ return vqtbx2_p8(a, b, c); } -// CHECK-LABEL: define{{.*}} <8 x i8> @test_vqtbx3_p8(<8 x i8> noundef %a, [3 x <16 x i8>] %b.coerce, <8 x i8> noundef %c) #0 { +// CHECK-LABEL: define{{.*}} <8 x i8> @test_vqtbx3_p8(<8 x i8> noundef %a, [3 x <16 x i8>] %b.coerce, <8 x i8> noundef %c) #1 { // CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x16x3_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0 @@ -1393,7 +1393,7 @@ return vqtbx3_p8(a, b, c); } -// CHECK-LABEL: define{{.*}} <8 x i8> @test_vqtbx4_p8(<8 x i8> noundef %a, [4 x <16 x i8>] %b.coerce, <8 x i8> noundef %c) #0 { +// CHECK-LABEL: define{{.*}} <8 x i8> @test_vqtbx4_p8(<8 x i8> noundef %a, [4 x <16 x i8>] %b.coerce, <8 x i8> noundef %c) #1 { // CHECK: [[__P1_I:%.*]] = alloca %struct.poly8x16x4_t, align 16 // CHECK: [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0 diff --git a/clang/test/CodeGen/regcall2.c b/clang/test/CodeGen/regcall2.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/regcall2.c @@ -0,0 +1,28 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -emit-llvm %s -o - -ffreestanding -target-feature +avx512vl -triple=x86_64-pc-win32 | FileCheck %s --check-prefix=Win +// RUN: %clang_cc1 -emit-llvm %s -o - -ffreestanding -target-feature +avx512vl -triple=x86_64-pc-linux-gnu | FileCheck %s --check-prefix=Lin + +#include + +typedef struct { + __m512d r1[4]; + __m512 r2[4]; +} __sVector; +__sVector A; + +__sVector __regcall foo(int a) { + return A; +} + +double __regcall bar(__sVector a) { + return a.r1[0][4]; +} + +// FIXME: Do we need to change for Windows? +// Win: define dso_local x86_regcallcc void @__regcall3__foo(%struct.__sVector* noalias sret(%struct.__sVector) align 64 %agg.result, i32 noundef %a) #0 +// Win: define dso_local x86_regcallcc double @__regcall3__bar(%struct.__sVector* noundef %a) #0 +// Win: attributes #0 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="0" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+avx,+avx2,+avx512f,+avx512vl,+crc32,+cx8,+f16c,+fma,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" } + +// Lin: define dso_local x86_regcallcc %struct.__sVector @__regcall3__foo(i32 noundef %a) #0 +// Lin: define dso_local x86_regcallcc double @__regcall3__bar([4 x <8 x double>] %a.coerce0, [4 x <16 x float>] %a.coerce1) #0 +// Lin: attributes #0 = { noinline nounwind optnone "frame-pointer"="none" "min-legal-vector-width"="512" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+avx,+avx2,+avx512f,+avx512vl,+crc32,+cx8,+f16c,+fma,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" }