Index: clang/lib/CodeGen/TargetInfo.cpp =================================================================== --- clang/lib/CodeGen/TargetInfo.cpp +++ clang/lib/CodeGen/TargetInfo.cpp @@ -1053,6 +1053,7 @@ bool IsWin32StructABI; bool IsSoftFloatABI; bool IsMCUABI; + bool IsLinuxABI; unsigned DefaultNumRegisterParameters; static bool isRegisterSize(unsigned Size) { @@ -1118,6 +1119,7 @@ IsWin32StructABI(Win32StructABI), IsSoftFloatABI(SoftFloatABI), IsMCUABI(CGT.getTarget().getTriple().isOSIAMCU()), + IsLinuxABI(CGT.getTarget().getTriple().isOSLinux()), DefaultNumRegisterParameters(NumRegisterParameters) {} bool shouldPassIndirectlyForSwift(ArrayRef scalars, @@ -1538,6 +1540,15 @@ if (Align <= MinABIStackAlignInBytes) return 0; // Use default alignment. + if (IsLinuxABI) { + // Exclude other System V OS (e.g Darwin, PS4 and FreeBSD) since we don't + // want to spend any effort dealing with the ramifications of ABI breaks. + // If the vector type is __m128/__m256/__m512, return the default alignment. + unsigned TypeAlign = getContext().getTypeAlign(Ty) / 8; + if (Ty->getAs() && + (TypeAlign == 16 || TypeAlign == 32 || TypeAlign == 64)) + return TypeAlign; + } // On non-Darwin, the stack type alignment is always 4. if (!IsDarwinVectorABI) { // Set explicit alignment, since we may need to realign the top. @@ -1807,6 +1818,17 @@ if (IsX86_MMXType(CGT.ConvertType(Ty))) return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), 64)); + if (IsLinuxABI) { + // Exclude other System V OS (e.g Darwin, PS4 and FreeBSD) since we don't + // want to spend any effort dealing with the ramifications of ABI breaks. + // According to i386 System V ABI, if parameters of type __m256 are required + // to be passed on the stack, the stack pointer must be aligned to 32 byte. + // If parameters of type __m512 are required to be passed on the stack, + // the stack pointer must be aligned to 64 byte. + unsigned Align = getContext().getTypeAlign(Ty) / 8; + if (Align == 16 || Align == 32 || Align == 64) + return ABIArgInfo::getIndirect(CharUnits::fromQuantity(Align)); + } return ABIArgInfo::getDirect(); } Index: clang/test/CodeGen/x86-vector-width.c =================================================================== --- clang/test/CodeGen/x86-vector-width.c +++ clang/test/CodeGen/x86-vector-width.c @@ -52,10 +52,11 @@ // CHECK: {{.*}}@goo{{.*}} #1 // CHECK: {{.*}}@hoo{{.*}} #0 // CHECK: {{.*}}@joo{{.*}} #1 -// CHECK: {{.*}}@koo{{.*}} #0 -// CHECK: {{.*}}@loo{{.*}} #1 -// CHECK: {{.*}}@moo{{.*}} #0 -// CHECK: {{.*}}@noo{{.*}} #1 +// CHECK: {{.*}}@koo{{.*}} #3 +// CHECK: {{.*}}@loo{{.*}} #3 +// CHECK: {{.*}}@moo{{.*}} #3 +// CHECK: {{.*}}@noo{{.*}} #3 // CHECK: #0 = {{.*}}"min-legal-vector-width"="128" // CHECK: #1 = {{.*}}"min-legal-vector-width"="256" +// CHECK: #3 = {{.*}}"min-legal-vector-width"="0" Index: clang/test/CodeGen/x86_32-align-linux.c =================================================================== --- /dev/null +++ clang/test/CodeGen/x86_32-align-linux.c @@ -0,0 +1,60 @@ +// RUN: %clang_cc1 -w -fblocks -ffreestanding -triple i386-pc-linux-gnu -emit-llvm -o %t %s || FileCheck < %t %s +// RUN: %clang_cc1 -w -fblocks -ffreestanding -triple i386-pc-linux-gnu -target-feature +avx -emit-llvm -o %t %s || FileCheck < %t %s +// RUN: %clang_cc1 -w -fblocks -ffreestanding -triple i386-pc-linux-gnu -target-feature +avx512f -emit-llvm -o %t %s || FileCheck < %t %s + +#include + +// CHECK-LABEL: define void @testm128 +// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4 +// CHECK-NEXT: %0 = ptrtoint i8* %argp.cur to i32 +// CHECK-NEXT: %1 = add i32 %0, 15 +// CHECK-NEXT: %2 = and i32 %1, -16 +// CHECK-NEXT: %argp.cur.aligned = inttoptr i32 %2 to i8* +void testm128(int argCount, ...) { + __m128 res; + __builtin_va_list args; + __builtin_va_start(args, argCount); + res = __builtin_va_arg(args, __m128); + __builtin_va_end(args); +} + +// CHECK-LABEL: define void @testm256 +// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4 +// CHECK-NEXT: %0 = ptrtoint i8* %argp.cur to i32 +// CHECK-NEXT: %1 = add i32 %0, 31 +// CHECK-NEXT: %2 = and i32 %1, -32 +// CHECK-NEXT: %argp.cur.aligned = inttoptr i32 %2 to i8* +void testm256(int argCount, ...) { + __m256 res; + __builtin_va_list args; + __builtin_va_start(args, argCount); + res = __builtin_va_arg(args, __m256); + __builtin_va_end(args); +} + +// CHECK-LABEL: define void @testm512 +// CHECK-LABEL: %argp.cur = load i8*, i8** %args, align 4 +// CHECK-NEXT: %0 = ptrtoint i8* %argp.cur to i32 +// CHECK-NEXT: %1 = add i32 %0, 63 +// CHECK-NEXT: %2 = and i32 %1, -64 +// CHECK-NEXT: %argp.cur.aligned = inttoptr i32 %2 to i8* +void testm512(int argCount, ...) { + __m512 res; + __builtin_va_list args; + __builtin_va_start(args, argCount); + res = __builtin_va_arg(args, __m512); + __builtin_va_end(args); +} + +// CHECK-LABEL: efine dso_local void @testPastArguments +// CHECK: call void (i32, ...) @testm128(i32 1, <4 x float>* byval(<4 x float>) align 16 +// CHECK: call void (i32, ...) @testm256(i32 1, <8 x float>* byval(<8 x float>) align 32 +// CHECK: call void (i32, ...) @testm512(i32 1, <16 x float>* byval(<16 x float>) align 64 +void testPastArguments(void) { + __m128 a; + __m256 b; + __m512 c; + testm128(1, a); + testm256(1, b); + testm512(1, c); +} Index: clang/test/CodeGen/x86_32-arguments-linux.c =================================================================== --- clang/test/CodeGen/x86_32-arguments-linux.c +++ clang/test/CodeGen/x86_32-arguments-linux.c @@ -5,19 +5,19 @@ // CHECK: i8 signext %a0, %struct.s56_0* byval(%struct.s56_0) align 4 %a1, // CHECK: i64 %a2.coerce, %struct.s56_1* byval(%struct.s56_1) align 4 %0, // CHECK: <1 x double> %a4, %struct.s56_2* byval(%struct.s56_2) align 4 %1, -// CHECK: <4 x i32> %a6, %struct.s56_3* byval(%struct.s56_3) align 4 %2, -// CHECK: <2 x double> %a8, %struct.s56_4* byval(%struct.s56_4) align 4 %3, -// CHECK: <8 x i32> %a10, %struct.s56_5* byval(%struct.s56_5) align 4 %4, -// CHECK: <4 x double> %a12, %struct.s56_6* byval(%struct.s56_6) align 4 %5) +// CHECK: <4 x i32>* byval(<4 x i32>) align 16 %2, %struct.s56_3* byval(%struct.s56_3) align 4 %3, +// CHECK: <2 x double>* byval(<2 x double>) align 16 %4, %struct.s56_4* byval(%struct.s56_4) align 4 %5, +// CHECK: <8 x i32>* byval(<8 x i32>) align 32 %6, %struct.s56_5* byval(%struct.s56_5) align 4 %7, +// CHECK: <4 x double>* byval(<4 x double>) align 32 %8, %struct.s56_6* byval(%struct.s56_6) align 4 %9) // CHECK: call void (i32, ...) @f56_0(i32 1, // CHECK: i32 %{{.*}}, %struct.s56_0* byval(%struct.s56_0) align 4 %{{[^ ]*}}, // CHECK: i64 %{{[^ ]*}}, %struct.s56_1* byval(%struct.s56_1) align 4 %{{[^ ]*}}, // CHECK: <1 x double> %{{[^ ]*}}, %struct.s56_2* byval(%struct.s56_2) align 4 %{{[^ ]*}}, -// CHECK: <4 x i32> %{{[^ ]*}}, %struct.s56_3* byval(%struct.s56_3) align 4 %{{[^ ]*}}, -// CHECK: <2 x double> %{{[^ ]*}}, %struct.s56_4* byval(%struct.s56_4) align 4 %{{[^ ]*}}, -// CHECK: <8 x i32> %{{[^ ]*}}, %struct.s56_5* byval(%struct.s56_5) align 4 %{{[^ ]*}}, -// CHECK: <4 x double> %{{[^ ]*}}, %struct.s56_6* byval(%struct.s56_6) align 4 %{{[^ ]*}}) +// CHECK: <4 x i32>* byval(<4 x i32>) align 16 %{{[^ ]*}}, %struct.s56_3* byval(%struct.s56_3) align 4 %{{[^ ]*}}, +// CHECK: <2 x double>* byval(<2 x double>) align 16 %{{[^ ]*}}, %struct.s56_4* byval(%struct.s56_4) align 4 %{{[^ ]*}}, +// CHECK: <8 x i32>* byval(<8 x i32>) align 32 %{{[^ ]*}}, %struct.s56_5* byval(%struct.s56_5) align 4 %{{[^ ]*}}, +// CHECK: <4 x double>* byval(<4 x double>) align 32 %{{[^ ]*}}, %struct.s56_6* byval(%struct.s56_6) align 4 %{{[^ ]*}}) // CHECK: } // // [i386] clang misaligns long double in structures Index: clang/test/CodeGen/x86_32-arguments-nommx.c =================================================================== --- clang/test/CodeGen/x86_32-arguments-nommx.c +++ clang/test/CodeGen/x86_32-arguments-nommx.c @@ -8,4 +8,4 @@ // but SSE2 vectors should still go into an SSE2 register typedef int __attribute__((vector_size (16))) i32v4; int b(i32v4 x) { return x[0]; } -// CHECK-LABEL: define i32 @b(<4 x i32> %x) +// CHECK-LABEL: define i32 @b(<4 x i32>* byval(<4 x i32>) align 16 %0)