Index: lib/Basic/Targets.cpp =================================================================== --- lib/Basic/Targets.cpp +++ lib/Basic/Targets.cpp @@ -2244,7 +2244,9 @@ bool handleTargetFeatures(std::vector &Features, DiagnosticsEngine &Diags) override; StringRef getABI() const override { - if (getTriple().getArch() == llvm::Triple::x86_64 && SSELevel >= AVX) + if (getTriple().getArch() == llvm::Triple::x86_64 && SSELevel >= AVX512F) + return "avx512"; + else if (getTriple().getArch() == llvm::Triple::x86_64 && SSELevel >= AVX) return "avx"; else if (getTriple().getArch() == llvm::Triple::x86 && MMX3DNowLevel == NoMMX3DNow) Index: lib/CodeGen/TargetInfo.cpp =================================================================== --- lib/CodeGen/TargetInfo.cpp +++ lib/CodeGen/TargetInfo.cpp @@ -1391,6 +1391,25 @@ namespace { +/// The AVX ABI level for X86 targets. +enum class X86AVXABILevel { + None, + AVX, + AVX512 +}; + +/// \p returns the size in bits of the largest (native) vector for \p AVXLevel. +static unsigned getNativeVectorSizeForAVXABI(X86AVXABILevel AVXLevel) { + switch (AVXLevel) { + case X86AVXABILevel::AVX512: + return 512; + case X86AVXABILevel::AVX: + return 256; + case X86AVXABILevel::None: + return 128; + } +} + /// X86_64ABIInfo - The X86_64 ABI information. class X86_64ABIInfo : public ABIInfo { enum Class { @@ -1496,14 +1515,14 @@ return !getTarget().getTriple().isOSDarwin(); } - bool HasAVX; + X86AVXABILevel AVXLevel; // Some ABIs (e.g. X32 ABI and Native Client OS) use 32 bit pointers on // 64-bit hardware. bool Has64BitPointers; public: - X86_64ABIInfo(CodeGen::CodeGenTypes &CGT, bool hasavx) : - ABIInfo(CGT), HasAVX(hasavx), + X86_64ABIInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) : + ABIInfo(CGT), AVXLevel(AVXLevel), Has64BitPointers(CGT.getDataLayout().getPointerSize(0) == 8) { } @@ -1557,10 +1576,11 @@ }; class X86_64TargetCodeGenInfo : public TargetCodeGenInfo { - bool HasAVX; + X86AVXABILevel AVXLevel; public: - X86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool HasAVX) - : TargetCodeGenInfo(new X86_64ABIInfo(CGT, HasAVX)), HasAVX(HasAVX) {} + X86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) + : TargetCodeGenInfo(new X86_64ABIInfo(CGT, AVXLevel)), + AVXLevel(AVXLevel) {} const X86_64ABIInfo &getABIInfo() const { return static_cast(TargetCodeGenInfo::getABIInfo()); @@ -1628,14 +1648,14 @@ } unsigned getOpenMPSimdDefaultAlignment(QualType) const override { - return HasAVX ? 32 : 16; + return getNativeVectorSizeForAVXABI(AVXLevel) / 8; } }; class PS4TargetCodeGenInfo : public X86_64TargetCodeGenInfo { public: - PS4TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool HasAVX) - : X86_64TargetCodeGenInfo(CGT, HasAVX) {} + PS4TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, X86AVXABILevel AVXLevel) + : X86_64TargetCodeGenInfo(CGT, AVXLevel) {} void getDependentLibraryOption(llvm::StringRef Lib, llvm::SmallString<24> &Opt) const override { @@ -1701,10 +1721,11 @@ } class WinX86_64TargetCodeGenInfo : public TargetCodeGenInfo { - bool HasAVX; + X86AVXABILevel AVXLevel; public: - WinX86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool HasAVX) - : TargetCodeGenInfo(new WinX86_64ABIInfo(CGT)), HasAVX(HasAVX) {} + WinX86_64TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, + X86AVXABILevel AVXLevel) + : TargetCodeGenInfo(new WinX86_64ABIInfo(CGT)), AVXLevel(AVXLevel) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override; @@ -1736,7 +1757,7 @@ } unsigned getOpenMPSimdDefaultAlignment(QualType) const override { - return HasAVX ? 32 : 16; + return getNativeVectorSizeForAVXABI(AVXLevel) / 8; } }; @@ -1925,7 +1946,8 @@ // split. if (OffsetBase && OffsetBase != 64) Hi = Lo; - } else if (Size == 128 || (HasAVX && isNamedArg && Size == 256)) { + } else if (Size == 128 || + (isNamedArg && Size <= getNativeVectorSizeForAVXABI(AVXLevel))) { // Arguments of 256-bits are split into four eightbyte chunks. The // least significant one belongs to class SSE and all the others to class // SSEUP. The original Lo and Hi design considers that types can't be @@ -1937,6 +1959,9 @@ // Note that per 3.5.7 of AMD64-ABI, 256-bit args are only passed in // registers if they are "named", i.e. not part of the "..." of a // variadic function. + // + // Similarly, per 3.2.3. of the AVX512 draft, 512-bits ("named") args are + // split into eight eightbyte chunks, one SSE and seven SSEUP. Lo = SSE; Hi = SSEUp; } @@ -2147,7 +2172,7 @@ bool X86_64ABIInfo::IsIllegalVectorType(QualType Ty) const { if (const VectorType *VecTy = Ty->getAs()) { uint64_t Size = getContext().getTypeSize(VecTy); - unsigned LargestVector = HasAVX ? 256 : 128; + unsigned LargestVector = getNativeVectorSizeForAVXABI(AVXLevel); if (Size <= 64 || Size > LargestVector) return true; } @@ -7191,17 +7216,21 @@ } case llvm::Triple::x86_64: { - bool HasAVX = getTarget().getABI() == "avx"; + StringRef ABI = getTarget().getABI(); + X86AVXABILevel AVXLevel = (ABI == "avx512" ? X86AVXABILevel::AVX512 : + ABI == "avx" ? X86AVXABILevel::AVX : + X86AVXABILevel::None); switch (Triple.getOS()) { case llvm::Triple::Win32: return *(TheTargetCodeGenInfo = - new WinX86_64TargetCodeGenInfo(Types, HasAVX)); + new WinX86_64TargetCodeGenInfo(Types, AVXLevel)); case llvm::Triple::PS4: - return *(TheTargetCodeGenInfo = new PS4TargetCodeGenInfo(Types, HasAVX)); + return *(TheTargetCodeGenInfo = + new PS4TargetCodeGenInfo(Types, AVXLevel)); default: return *(TheTargetCodeGenInfo = - new X86_64TargetCodeGenInfo(Types, HasAVX)); + new X86_64TargetCodeGenInfo(Types, AVXLevel)); } } case llvm::Triple::hexagon: Index: test/CodeGen/x86_64-arguments.c =================================================================== --- test/CodeGen/x86_64-arguments.c +++ test/CodeGen/x86_64-arguments.c @@ -1,7 +1,9 @@ // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | \ -// RUN: FileCheck %s -check-prefix=CHECK -check-prefix=SSE +// RUN: FileCheck %s -check-prefix=CHECK -check-prefix=SSE -check-prefix=NO-AVX512 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx | \ -// RUN: FileCheck %s -check-prefix=CHECK -check-prefix=AVX +// RUN: FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=NO-AVX512 +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx512f | \ +// RUN: FileCheck %s -check-prefix=CHECK -check-prefix=AVX -check-prefix=AVX512 #include // CHECK-LABEL: define signext i8 @f0() @@ -458,3 +460,77 @@ } // AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}}) // AVX: @test54_helper(<8 x float> {{%[a-zA-Z0-9]+}}, <8 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[a-zA-Z0-9]+}}) + +typedef float __m512 __attribute__ ((__vector_size__ (64))); +typedef struct { + __m512 m; +} s512; + +s512 x55; +__m512 x56; + +// Even on AVX512, aggregates of size larger than four eightbytes have class +// MEMORY (AVX512 draft 0.3 3.2.3p2 Rule 1). +// +// CHECK: declare void @f55(%struct.s512* byval align 64) +void f55(s512 x); + +// However, __m512 has type SSE/SSEUP on AVX512. +// +// AVX512: declare void @f56(<16 x float>) +// NO-AVX512: declare void @f56(<16 x float>* byval align 64) +void f56(__m512 x); +void f57() { f55(x55); f56(x56); } + +// Like for __m128 on AVX, check that the struct below is passed +// in the same way regardless of AVX512 being used. +// +// CHECK: declare void @f58(%struct.t256* byval align 32) +typedef struct t256 { + __m256 m; + __m256 n; +} two256; + +extern void f58(two256 s); +void f59(two256 s) { + f58(s); +} + +// CHECK: declare void @f60(%struct.sat256* byval align 32) +typedef struct at256 { + __m256 array[2]; +} Atwo256; +typedef struct sat256 { + Atwo256 x; +} SAtwo256; + +extern void f60(SAtwo256 s); +void f61(SAtwo256 s) { + f60(s); +} + +// AVX512: @f62_helper(i32 0, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}}) +void f62_helper(int, ...); +__m512 x62; +void f62() { + f62_helper(0, x62, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i); +} + +// Like for __m256 on AVX, we always pass __m512 in memory, and don't +// need to use the register save area. +// +// AVX512-LABEL: define void @f63 +// AVX512-NOT: br i1 +// AVX512: ret void +void f63(__m512 *m, __builtin_va_list argList) { + *m = __builtin_va_arg(argList, __m512); +} + +// AVX512: @f64_helper(<16 x float> {{%[a-zA-Z0-9]+}}, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double {{%[a-zA-Z0-9]+}}, double {{%[a-zA-Z0-9]+}}) +// AVX512: @f64_helper(<16 x float> {{%[a-zA-Z0-9]+}}, <16 x float> {{%[a-zA-Z0-9]+}}, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, { double, double }* byval align 8 {{%[a-zA-Z0-9]+}}) +void f64_helper(__m512, ...); +__m512 x64; +void f64() { + f64_helper(x64, x64, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i); + f64_helper(x64, x64, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0i); +} Index: test/OpenMP/simd_metadata.c =================================================================== --- test/OpenMP/simd_metadata.c +++ test/OpenMP/simd_metadata.c @@ -1,5 +1,6 @@ // RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86 // RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -target-feature +avx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX +// RUN: %clang_cc1 -fopenmp -triple x86_64-unknown-unknown -target-feature +avx512f -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=X86-AVX512 // RUN: %clang_cc1 -fopenmp -triple powerpc64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=PPC // RUN: %clang_cc1 -fopenmp -triple powerpc64-unknown-unknown -target-abi elfv1-qpx -emit-llvm %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=PPC-QPX @@ -16,6 +17,7 @@ // X86-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15 // X86-AVX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 31 +// X86-AVX512-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 63 // PPC-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15 // PPC-QPX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15 @@ -25,6 +27,7 @@ // X86-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15 // X86-AVX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31 +// X86-AVX512-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 63 // PPC-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15 // PPC-QPX-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31