Index: clang/lib/Basic/Targets/X86.h =================================================================== --- clang/lib/Basic/Targets/X86.h +++ clang/lib/Basic/Targets/X86.h @@ -302,12 +302,14 @@ DiagnosticsEngine &Diags) override; StringRef getABI() const override { - if (getTriple().getArch() == llvm::Triple::x86_64 && SSELevel >= AVX512F) + llvm::Triple::ArchType Arch = getTriple().getArch(); + if ((Arch == llvm::Triple::x86_64 || Arch == llvm::Triple::x86) && + SSELevel >= AVX512F) return "avx512"; - if (getTriple().getArch() == llvm::Triple::x86_64 && SSELevel >= AVX) + if ((Arch == llvm::Triple::x86_64 || Arch == llvm::Triple::x86) && + SSELevel >= AVX) return "avx"; - if (getTriple().getArch() == llvm::Triple::x86 && - MMX3DNowLevel == NoMMX3DNow) + if (Arch == llvm::Triple::x86 && MMX3DNowLevel == NoMMX3DNow) return "no-mmx"; return ""; } Index: clang/lib/CodeGen/TargetInfo.cpp =================================================================== --- clang/lib/CodeGen/TargetInfo.cpp +++ clang/lib/CodeGen/TargetInfo.cpp @@ -1023,6 +1023,9 @@ // X86-32 ABI Implementation //===----------------------------------------------------------------------===// +/// The AVX ABI level for X86 targets. +enum class X86AVXABILevel { None, AVX, AVX512 }; + /// Similar to llvm::CCState, but for Clang. struct CCState { CCState(CGFunctionInfo &FI) @@ -1053,7 +1056,9 @@ bool IsWin32StructABI; bool IsSoftFloatABI; bool IsMCUABI; + bool IsLinuxABI; unsigned DefaultNumRegisterParameters; + X86AVXABILevel AVXLevel; static bool isRegisterSize(unsigned Size) { return (Size == 8 || Size == 16 || Size == 32 || Size == 64); @@ -1112,13 +1117,15 @@ X86_32ABIInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI, bool RetSmallStructInRegABI, bool Win32StructABI, - unsigned NumRegisterParameters, bool SoftFloatABI) - : SwiftABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI), - IsRetSmallStructInRegABI(RetSmallStructInRegABI), - IsWin32StructABI(Win32StructABI), - IsSoftFloatABI(SoftFloatABI), - IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()), - DefaultNumRegisterParameters(NumRegisterParameters) {} + unsigned NumRegisterParameters, bool SoftFloatABI, + X86AVXABILevel AVXLevel) + : SwiftABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI), + IsRetSmallStructInRegABI(RetSmallStructInRegABI), + IsWin32StructABI(Win32StructABI), IsSoftFloatABI(SoftFloatABI), + IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()), + IsLinuxABI(CGT.getTarget().getTriple().isOSLinux()), + DefaultNumRegisterParameters(NumRegisterParameters), + AVXLevel(AVXLevel) {} bool shouldPassIndirectlyForSwift(ArrayRef scalars, bool asReturnValue) const override { @@ -1139,10 +1146,11 @@ public: X86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI, bool RetSmallStructInRegABI, bool Win32StructABI, - unsigned NumRegisterParameters, bool SoftFloatABI) + unsigned NumRegisterParameters, bool SoftFloatABI, + X86AVXABILevel AVXLevel) : TargetCodeGenInfo(new X86_32ABIInfo( CGT, DarwinVectorABI, RetSmallStructInRegABI, Win32StructABI, - NumRegisterParameters, SoftFloatABI)) {} + NumRegisterParameters, SoftFloatABI, AVXLevel)) {} static bool isStructReturnInRegABI( const llvm::Triple &Triple, const CodeGenOptions &Opts); @@ -1538,6 +1546,29 @@ if (Align <= MinABIStackAlignInBytes) return 0; // Use default alignment. + if (IsLinuxABI) { + // Exclude other System V OS (e.g Darwin, PS4 and FreeBSD) since we don't + // want to spend any effort dealing with the ramifications of ABI breaks. + // If the target only supports doesn't support avx, return 16. + // If the target supports avx or avx512, __m256 will align to 32 bytes. + // __m512 will align to 64 bytes when the target supports avx512, align to + // 32 bytes when the target supports avx and 16 for the other. + if (Ty->getAs()) { + int TypeSize = getContext().getTypeSize(Ty); + if (TypeSize == 128) + return Align; + else if (TypeSize == 256) + return (AVXLevel == X86AVXABILevel::AVX || + AVXLevel == X86AVXABILevel::AVX512) + ? Align + : 16; + else + return AVXLevel == X86AVXABILevel::AVX512 + ? Align + : AVXLevel == X86AVXABILevel::AVX ? 32 : 16; + } else + return MinABIStackAlignInBytes; + } // On non-Darwin, the stack type alignment is always 4. if (!IsDarwinVectorABI) { // Set explicit alignment, since we may need to realign the top. @@ -2086,12 +2117,6 @@ namespace { -/// The AVX ABI level for X86 targets. -enum class X86AVXABILevel { - None, - AVX, - AVX512 -}; /// \p returns the size in bits of the largest (native) vector for \p AVXLevel. static unsigned getNativeVectorSizeForAVXABI(X86AVXABILevel AVXLevel) { @@ -2432,11 +2457,12 @@ class WinX86_32TargetCodeGenInfo : public X86_32TargetCodeGenInfo { public: - WinX86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, - bool DarwinVectorABI, bool RetSmallStructInRegABI, bool Win32StructABI, - unsigned NumRegisterParameters) - : X86_32TargetCodeGenInfo(CGT, DarwinVectorABI, RetSmallStructInRegABI, - Win32StructABI, NumRegisterParameters, false) {} + WinX86_32TargetCodeGenInfo(CodeGen::CodeGenTypes &CGT, bool DarwinVectorABI, + bool RetSmallStructInRegABI, bool Win32StructABI, + unsigned NumRegisterParameters) + : X86_32TargetCodeGenInfo(CGT, DarwinVectorABI, RetSmallStructInRegABI, + Win32StructABI, NumRegisterParameters, false, + X86AVXABILevel::None) {} void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &CGM) const override; @@ -10327,6 +10353,12 @@ X86_32TargetCodeGenInfo::isStructReturnInRegABI(Triple, CodeGenOpts); bool IsWin32FloatStructABI = Triple.isOSWindows() && !Triple.isOSCygMing(); + StringRef ABI = getTarget().getABI(); + X86AVXABILevel AVXLevel = + (ABI == "avx512" + ? X86AVXABILevel::AVX512 + : ABI == "avx" ? X86AVXABILevel::AVX : X86AVXABILevel::None); + if (Triple.getOS() == llvm::Triple::Win32) { return SetCGInfo(new WinX86_32TargetCodeGenInfo( Types, IsDarwinVectorABI, RetSmallStructInRegABI, @@ -10335,7 +10367,7 @@ return SetCGInfo(new X86_32TargetCodeGenInfo( Types, IsDarwinVectorABI, RetSmallStructInRegABI, IsWin32FloatStructABI, CodeGenOpts.NumRegisterParameters, - CodeGenOpts.FloatABI == "soft")); + CodeGenOpts.FloatABI == "soft", AVXLevel)); } } Index: clang/test/CodeGen/x86_32-align-linux-avx2.c =================================================================== --- /dev/null +++ clang/test/CodeGen/x86_32-align-linux-avx2.c @@ -0,0 +1,46 @@ +// RUN: %clang_cc1 -w -fblocks -ffreestanding -triple i386-pc-linux-gnu -target-feature +avx -emit-llvm -o %t %s +// RUN: FileCheck < %t %s + +#include + +// CHECK-LABEL: define void @testm128 +// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4 +// CHECK-NEXT: %0 = ptrtoint i8* %argp.cur to i32 +// CHECK-NEXT: %1 = add i32 %0, 15 +// CHECK-NEXT: %2 = and i32 %1, -16 +// CHECK-NEXT: %argp.cur.aligned = inttoptr i32 %2 to i8* +void testm128(int argCount, ...) { + __m128 res; + __builtin_va_list args; + __builtin_va_start(args, argCount); + res = __builtin_va_arg(args, __m128); + __builtin_va_end(args); +} + +// CHECK-LABEL: define void @testm256 +// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4 +// CHECK-NEXT: %0 = ptrtoint i8* %argp.cur to i32 +// CHECK-NEXT: %1 = add i32 %0, 31 +// CHECK-NEXT: %2 = and i32 %1, -32 +// CHECK-NEXT: %argp.cur.aligned = inttoptr i32 %2 to i8* +void testm256(int argCount, ...) { + __m256 res; + __builtin_va_list args; + __builtin_va_start(args, argCount); + res = __builtin_va_arg(args, __m256); + __builtin_va_end(args); +} + +// CHECK-LABEL: define void @testm512 +// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4 +// CHECK-NEXT: %0 = ptrtoint i8* %argp.cur to i32 +// CHECK-NEXT: %1 = add i32 %0, 31 +// CHECK-NEXT: %2 = and i32 %1, -32 +// CHECK-NEXT: %argp.cur.aligned = inttoptr i32 %2 to i8* +void testm512(int argCount, ...) { + __m512 res; + __builtin_va_list args; + __builtin_va_start(args, argCount); + res = __builtin_va_arg(args, __m512); + __builtin_va_end(args); +} Index: clang/test/CodeGen/x86_32-align-linux-avx512f.c =================================================================== --- /dev/null +++ clang/test/CodeGen/x86_32-align-linux-avx512f.c @@ -0,0 +1,18 @@ +// RUN: %clang_cc1 -w -fblocks -ffreestanding -triple i386-pc-linux-gnu -target-feature +avx512f -emit-llvm -o %t %s +// RUN: FileCheck < %t %s + +#include + +// CHECK-LABEL: define void @testm512 +// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4 +// CHECK-NEXT: %0 = ptrtoint i8* %argp.cur to i32 +// CHECK-NEXT: %1 = add i32 %0, 63 +// CHECK-NEXT: %2 = and i32 %1, -64 +// CHECK-NEXT: %argp.cur.aligned = inttoptr i32 %2 to i8* +void testm512(int argCount, ...) { + __m512 res; + __builtin_va_list args; + __builtin_va_start(args, argCount); + res = __builtin_va_arg(args, __m512); + __builtin_va_end(args); +} Index: clang/test/CodeGen/x86_32-align-linux.c =================================================================== --- /dev/null +++ clang/test/CodeGen/x86_32-align-linux.c @@ -0,0 +1,46 @@ +// RUN: %clang_cc1 -w -fblocks -ffreestanding -triple i386-pc-linux-gnu -emit-llvm -o %t %s +// RUN: FileCheck < %t %s + +#include + +// CHECK-LABEL: define void @testm128 +// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4 +// CHECK-NEXT: %0 = ptrtoint i8* %argp.cur to i32 +// CHECK-NEXT: %1 = add i32 %0, 15 +// CHECK-NEXT: %2 = and i32 %1, -16 +// CHECK-NEXT: %argp.cur.aligned = inttoptr i32 %2 to i8* +void testm128(int argCount, ...) { + __m128 res; + __builtin_va_list args; + __builtin_va_start(args, argCount); + res = __builtin_va_arg(args, __m128); + __builtin_va_end(args); +} + +// CHECK-LABEL: define void @testm256 +// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4 +// CHECK-NEXT: %0 = ptrtoint i8* %argp.cur to i32 +// CHECK-NEXT: %1 = add i32 %0, 15 +// CHECK-NEXT: %2 = and i32 %1, -16 +// CHECK-NEXT: %argp.cur.aligned = inttoptr i32 %2 to i8* +void testm256(int argCount, ...) { + __m256 res; + __builtin_va_list args; + __builtin_va_start(args, argCount); + res = __builtin_va_arg(args, __m256); + __builtin_va_end(args); +} + +// CHECK-LABEL: define void @testm512 +// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4 +// CHECK-NEXT: %0 = ptrtoint i8* %argp.cur to i32 +// CHECK-NEXT: %1 = add i32 %0, 15 +// CHECK-NEXT: %2 = and i32 %1, -16 +// CHECK-NEXT: %argp.cur.aligned = inttoptr i32 %2 to i8* +void testm512(int argCount, ...) { + __m512 res; + __builtin_va_list args; + __builtin_va_start(args, argCount); + res = __builtin_va_arg(args, __m512); + __builtin_va_end(args); +}