Index: clang/lib/CodeGen/TargetInfo.cpp =================================================================== --- clang/lib/CodeGen/TargetInfo.cpp +++ clang/lib/CodeGen/TargetInfo.cpp @@ -1091,6 +1091,7 @@ bool IsWin32StructABI; bool IsSoftFloatABI; bool IsMCUABI; + bool IsLinuxABI; unsigned DefaultNumRegisterParameters; static bool isRegisterSize(unsigned Size) { @@ -1117,11 +1118,13 @@ ABIArgInfo getIndirectReturnResult(QualType Ty, CCState &State) const; /// Return the alignment to use for the given type on the stack. - unsigned getTypeStackAlignInBytes(QualType Ty, unsigned Align) const; + unsigned getTypeStackAlignInBytes(QualType Ty, unsigned Align, + bool isNamedArg) const; Class classify(QualType Ty) const; ABIArgInfo classifyReturnType(QualType RetTy, CCState &State) const; - ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State) const; + ABIArgInfo classifyArgumentType(QualType RetTy, CCState &State, + bool isNamedArg) const; /// Updates the number of available free registers, returns /// true if any registers were allocated. @@ -1153,9 +1156,9 @@ unsigned NumRegisterParameters, bool SoftFloatABI) : SwiftABIInfo(CGT), IsDarwinVectorABI(DarwinVectorABI), IsRetSmallStructInRegABI(RetSmallStructInRegABI), - IsWin32StructABI(Win32StructABI), - IsSoftFloatABI(SoftFloatABI), + IsWin32StructABI(Win32StructABI), IsSoftFloatABI(SoftFloatABI), IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()), + IsLinuxABI(CGT.getTarget().getTriple().isOSLinux()), DefaultNumRegisterParameters(NumRegisterParameters) {} bool shouldPassIndirectlyForSwift(ArrayRef scalars, @@ -1573,13 +1576,22 @@ return false; } -unsigned X86_32ABIInfo::getTypeStackAlignInBytes(QualType Ty, - unsigned Align) const { +unsigned X86_32ABIInfo::getTypeStackAlignInBytes(QualType Ty, unsigned Align, + bool isNamedArg) const { // Otherwise, if the alignment is less than or equal to the minimum ABI // alignment, just use the default; the backend will handle this. if (Align <= MinABIStackAlignInBytes) return 0; // Use default alignment. + if (IsLinuxABI && !isNamedArg) { + // Exclude other System V OS (e.g Darwin, PS4 and FreeBSD) since we don't + // want to spend any effort dealing with the ramifications of ABI breaks. + // If the vector type is __m128/__m256/__m512, return the default alignment. + unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8; + if (Ty->isVectorType() && + (TypeAlign == 16 || TypeAlign == 32 || TypeAlign == 64)) + return TypeAlign; + } // On non-Darwin, the stack type alignment is always 4. if (!IsDarwinVectorABI) { // Set explicit alignment, since we may need to realign the top. @@ -1607,7 +1619,7 @@ // Compute the byval alignment. unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8; - unsigned StackAlign = getTypeStackAlignInBytes(Ty, TypeAlign); + unsigned StackAlign = getTypeStackAlignInBytes(Ty, TypeAlign, /*isNamedArg*/true); if (StackAlign == 0) return ABIArgInfo::getIndirect(CharUnits::fromQuantity(4), /*ByVal=*/true); @@ -1738,8 +1750,8 @@ } } -ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, - CCState &State) const { +ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty, CCState &State, + bool isNamedArg) const { // FIXME: Set alignment on indirect arguments. bool IsFastCall = State.CC == llvm::CallingConv::X86_FastCall; bool IsRegCall = State.CC == llvm::CallingConv::X86_RegCall; @@ -1849,6 +1861,19 @@ if (IsX86_MMXType(CGT.ConvertType(Ty))) return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), 64)); + if (IsLinuxABI && !isNamedArg) { + // Exclude other System V OS (e.g Darwin, PS4 and FreeBSD) since we don't + // want to spend any effort dealing with the ramifications of ABI breaks. + // According to i386 System V ABI, if parameters of type __m256 are + // required to be passed on the stack, the stack pointer must be aligned + // to 32 byte. If parameters of type __m512 are required to be passed on + // the stack, the stack pointer must be aligned to 64 byte. This change + // will only apply to varargs function calls. + unsigned Align = getContext().getTypeAlign(Ty) / 8; + if (Align == 16 || Align == 32 || Align == 64) + return ABIArgInfo::getIndirect(CharUnits::fromQuantity(Align), + /*ByVal*/true); + } return ABIArgInfo::getDirect(); } @@ -1924,12 +1949,14 @@ bool UsedInAlloca = false; MutableArrayRef Args = FI.arguments(); + unsigned NumRequiredArgs = FI.getNumRequiredArgs(); for (int I = 0, E = Args.size(); I < E; ++I) { // Skip arguments that have already been assigned. if (State.IsPreassigned.test(I)) continue; - Args[I].info = classifyArgumentType(Args[I].type, State); + Args[I].info = classifyArgumentType(Args[I].type, State, + /*isNamedArg*/ I < NumRequiredArgs); UsedInAlloca |= (Args[I].info.getKind() == ABIArgInfo::InAlloca); } @@ -2044,8 +2071,8 @@ // // Just messing with TypeInfo like this works because we never pass // anything indirectly. - TypeInfo.second = CharUnits::fromQuantity( - getTypeStackAlignInBytes(Ty, TypeInfo.second.getQuantity())); + TypeInfo.second = CharUnits::fromQuantity(getTypeStackAlignInBytes( + Ty, TypeInfo.second.getQuantity(), /*isNamedArg*/false)); return emitVoidPtrVAArg(CGF, VAListAddr, Ty, /*Indirect*/ false, TypeInfo, CharUnits::fromQuantity(4), Index: clang/test/CodeGen/x86_32-align-linux.c =================================================================== --- /dev/null +++ clang/test/CodeGen/x86_32-align-linux.c @@ -0,0 +1,60 @@ +// RUN: %clang_cc1 -w -fblocks -ffreestanding -triple i386-pc-linux-gnu -emit-llvm -o %t %s || FileCheck < %t %s +// RUN: %clang_cc1 -w -fblocks -ffreestanding -triple i386-pc-linux-gnu -target-feature +avx -emit-llvm -o %t %s || FileCheck < %t %s +// RUN: %clang_cc1 -w -fblocks -ffreestanding -triple i386-pc-linux-gnu -target-feature +avx512f -emit-llvm -o %t %s || FileCheck < %t %s + +#include + +// CHECK-LABEL: define void @testm128 +// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4 +// CHECK-NEXT: %0 = ptrtoint i8* %argp.cur to i32 +// CHECK-NEXT: %1 = add i32 %0, 15 +// CHECK-NEXT: %2 = and i32 %1, -16 +// CHECK-NEXT: %argp.cur.aligned = inttoptr i32 %2 to i8* +void testm128(int argCount, ...) { + __m128 res; + __builtin_va_list args; + __builtin_va_start(args, argCount); + res = __builtin_va_arg(args, __m128); + __builtin_va_end(args); +} + +// CHECK-LABEL: define void @testm256 +// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4 +// CHECK-NEXT: %0 = ptrtoint i8* %argp.cur to i32 +// CHECK-NEXT: %1 = add i32 %0, 31 +// CHECK-NEXT: %2 = and i32 %1, -32 +// CHECK-NEXT: %argp.cur.aligned = inttoptr i32 %2 to i8* +void testm256(int argCount, ...) { + __m256 res; + __builtin_va_list args; + __builtin_va_start(args, argCount); + res = __builtin_va_arg(args, __m256); + __builtin_va_end(args); +} + +// CHECK-LABEL: define void @testm512 +// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4 +// CHECK-NEXT: %0 = ptrtoint i8* %argp.cur to i32 +// CHECK-NEXT: %1 = add i32 %0, 63 +// CHECK-NEXT: %2 = and i32 %1, -64 +// CHECK-NEXT: %argp.cur.aligned = inttoptr i32 %2 to i8* +void testm512(int argCount, ...) { + __m512 res; + __builtin_va_list args; + __builtin_va_start(args, argCount); + res = __builtin_va_arg(args, __m512); + __builtin_va_end(args); +} + +// CHECK-LABEL: efine dso_local void @testPastArguments +// CHECK: call void (i32, ...) @testm128(i32 1, <4 x float>* byval(<4 x float>) align 16 +// CHECK: call void (i32, ...) @testm256(i32 1, <8 x float>* byval(<8 x float>) align 32 +// CHECK: call void (i32, ...) @testm512(i32 1, <16 x float>* byval(<16 x float>) align 64 +void testPastArguments(void) { + __m128 a; + __m256 b; + __m512 c; + testm128(1, a); + testm256(1, b); + testm512(1, c); +} Index: clang/test/CodeGen/x86_32-arguments-linux.c =================================================================== --- clang/test/CodeGen/x86_32-arguments-linux.c +++ clang/test/CodeGen/x86_32-arguments-linux.c @@ -14,10 +14,10 @@ // CHECK: i32 %{{.*}}, %struct.s56_0* byval(%struct.s56_0) align 4 %{{[^ ]*}}, // CHECK: i64 %{{[^ ]*}}, %struct.s56_1* byval(%struct.s56_1) align 4 %{{[^ ]*}}, // CHECK: <1 x double> %{{[^ ]*}}, %struct.s56_2* byval(%struct.s56_2) align 4 %{{[^ ]*}}, -// CHECK: <4 x i32> %{{[^ ]*}}, %struct.s56_3* byval(%struct.s56_3) align 4 %{{[^ ]*}}, -// CHECK: <2 x double> %{{[^ ]*}}, %struct.s56_4* byval(%struct.s56_4) align 4 %{{[^ ]*}}, -// CHECK: <8 x i32> %{{[^ ]*}}, %struct.s56_5* byval(%struct.s56_5) align 4 %{{[^ ]*}}, -// CHECK: <4 x double> %{{[^ ]*}}, %struct.s56_6* byval(%struct.s56_6) align 4 %{{[^ ]*}}) +// CHECK: (<4 x i32>) align 16 %{{[^ ]*}}, %struct.s56_3* byval(%struct.s56_3) align 4 %{{[^ ]*}}, +// CHECK: (<2 x double>) align 16 %{{[^ ]*}}, %struct.s56_4* byval(%struct.s56_4) align 4 %{{[^ ]*}}, +// CHECK: (<8 x i32>) align 32 %{{[^ ]*}}, %struct.s56_5* byval(%struct.s56_5) align 4 %{{[^ ]*}}, +// CHECK: (<4 x double>) align 32 %{{[^ ]*}}, %struct.s56_6* byval(%struct.s56_6) align 4 %{{[^ ]*}}) // CHECK: } // // [i386] clang misaligns long double in structures