Skip to content

Commit 6c8a34e

Browse files
committedSep 6, 2019
[X86] Prevent passing vectors of __int128 as <X x i128> in llvm IR
As far as I can tell, gcc passes 256/512 bit vectors __int128 in memory. And passes a vector of 1 _int128 in an xmm register. The backend considers <X x i128> as an illegal type and will scalarize any arguments with that type. So we need to coerce the argument types in the frontend to match to avoid the illegal type. I'm restricting this to change to Linux and NetBSD based on the how similar ABI changes have been handled in the past. PS4, FreeBSD, and Darwin are unaffected. I've also added a new -fclang-abi-compat version to restore the old behavior. This issue was identified in PR42607. Though even with the types changed, we still seem to be doing some unnecessary stack realignment. llvm-svn: 371169
1 parent 890b551 commit 6c8a34e

File tree

5 files changed

+58
-6
lines changed

5 files changed

+58
-6
lines changed
 

‎clang/docs/ReleaseNotes.rst

+4-1
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,10 @@ OpenCL C Language Changes in Clang
129129
ABI Changes in Clang
130130
--------------------
131131

132-
- ...
132+
- gcc passes vectors of __int128 in memory on X86-64. Clang historically
133+
broke the vectors into multiple scalars using two 64-bit values for each
134+
element. Clang now matches the gcc behavior on Linux and NetBSD. You can
135+
switch back to old API behavior with flag: -fclang-abi-compat=9.0.
133136

134137
OpenMP Support in Clang
135138
-----------------------

‎clang/include/clang/Basic/LangOptions.h

+6
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,12 @@ class LangOptions : public LangOptionsBase {
138138
/// rather than returning the required alignment.
139139
Ver7,
140140

141+
/// Attempt to be ABI-compatible with code generated by Clang 9.0.x
142+
/// (SVN r351319). This causes vectors of __int128 to be passed in memory
143+
/// instead of passing in multiple scalar registers on x86_64 on Linux and
144+
/// NetBSD.
145+
Ver9,
146+
141147
/// Conform to the underlying platform's C and C++ ABIs as closely
142148
/// as we can.
143149
Latest

‎clang/lib/CodeGen/TargetInfo.cpp

+40-2
Original file line numberDiff line numberDiff line change
@@ -2180,6 +2180,17 @@ class X86_64ABIInfo : public SwiftABIInfo {
21802180
return true;
21812181
}
21822182

2183+
// GCC classifies vectors of __int128 as memory.
2184+
bool passInt128VectorsInMem() const {
2185+
// Clang <= 9.0 did not do this.
2186+
if (getContext().getLangOpts().getClangABICompat() <=
2187+
LangOptions::ClangABI::Ver9)
2188+
return false;
2189+
2190+
const llvm::Triple &T = getTarget().getTriple();
2191+
return T.isOSLinux() || T.isOSNetBSD();
2192+
}
2193+
21832194
X86AVXABILevel AVXLevel;
21842195
// Some ABIs (e.g. X32 ABI and Native Client OS) use 32 bit pointers on
21852196
// 64-bit hardware.
@@ -2660,6 +2671,14 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase,
26602671
Hi = Lo;
26612672
} else if (Size == 128 ||
26622673
(isNamedArg && Size <= getNativeVectorSizeForAVXABI(AVXLevel))) {
2674+
QualType ElementType = VT->getElementType();
2675+
2676+
// gcc passes 256 and 512 bit <X x __int128> vectors in memory. :(
2677+
if (passInt128VectorsInMem() && Size != 128 &&
2678+
(ElementType->isSpecificBuiltinType(BuiltinType::Int128) ||
2679+
ElementType->isSpecificBuiltinType(BuiltinType::UInt128)))
2680+
return;
2681+
26632682
// Arguments of 256-bits are split into four eightbyte chunks. The
26642683
// least significant one belongs to class SSE and all the others to class
26652684
// SSEUP. The original Lo and Hi design considers that types can't be
@@ -2902,6 +2921,11 @@ bool X86_64ABIInfo::IsIllegalVectorType(QualType Ty) const {
29022921
unsigned LargestVector = getNativeVectorSizeForAVXABI(AVXLevel);
29032922
if (Size <= 64 || Size > LargestVector)
29042923
return true;
2924+
QualType EltTy = VecTy->getElementType();
2925+
if (passInt128VectorsInMem() &&
2926+
(EltTy->isSpecificBuiltinType(BuiltinType::Int128) ||
2927+
EltTy->isSpecificBuiltinType(BuiltinType::UInt128)))
2928+
return true;
29052929
}
29062930

29072931
return false;
@@ -2976,14 +3000,28 @@ llvm::Type *X86_64ABIInfo::GetByteVectorType(QualType Ty) const {
29763000
Ty = QualType(InnerTy, 0);
29773001

29783002
llvm::Type *IRType = CGT.ConvertType(Ty);
2979-
if (isa<llvm::VectorType>(IRType) ||
2980-
IRType->getTypeID() == llvm::Type::FP128TyID)
3003+
if (isa<llvm::VectorType>(IRType)) {
3004+
// Don't pass vXi128 vectors in their native type, the backend can't
3005+
// legalize them.
3006+
if (passInt128VectorsInMem() &&
3007+
IRType->getVectorElementType()->isIntegerTy(128)) {
3008+
// Use a vXi64 vector.
3009+
uint64_t Size = getContext().getTypeSize(Ty);
3010+
return llvm::VectorType::get(llvm::Type::getInt64Ty(getVMContext()),
3011+
Size / 64);
3012+
}
3013+
3014+
return IRType;
3015+
}
3016+
3017+
if (IRType->getTypeID() == llvm::Type::FP128TyID)
29813018
return IRType;
29823019

29833020
// We couldn't find the preferred IR vector type for 'Ty'.
29843021
uint64_t Size = getContext().getTypeSize(Ty);
29853022
assert((Size == 128 || Size == 256 || Size == 512) && "Invalid type found!");
29863023

3024+
29873025
// Return a LLVM IR vector type based on the size of 'Ty'.
29883026
return llvm::VectorType::get(llvm::Type::getDoubleTy(getVMContext()),
29893027
Size / 64);

‎clang/lib/Frontend/CompilerInvocation.cpp

+2
Original file line numberDiff line numberDiff line change
@@ -3164,6 +3164,8 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
31643164
Opts.setClangABICompat(LangOptions::ClangABI::Ver6);
31653165
else if (Major <= 7)
31663166
Opts.setClangABICompat(LangOptions::ClangABI::Ver7);
3167+
else if (Major <= 9)
3168+
Opts.setClangABICompat(LangOptions::ClangABI::Ver9);
31673169
} else if (Ver != "latest") {
31683170
Diags.Report(diag::err_drv_invalid_value)
31693171
<< A->getAsString(Args) << A->getValue();

‎clang/test/CodeGen/x86-vec-i128.c

+6-3
Original file line numberDiff line numberDiff line change
@@ -3,24 +3,27 @@
33
// RUN: %clang_cc1 -triple x86_64-apple-darwin %s -target-feature +sse2 -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,MEM256ALIGN16,MEM512ALIGN16
44
// RUN: %clang_cc1 -triple x86_64-scei-ps4 %s -target-feature +sse2 -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,MEM256ALIGN32,MEM512ALIGN64
55
// RUN: %clang_cc1 -triple x86_64-unknown-freebsd10.0 %s -target-feature +sse2 -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,MEM256ALIGN32,MEM512ALIGN64
6+
// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +sse2 -S -emit-llvm -o - -fclang-abi-compat=9 | FileCheck %s --check-prefixes=CLANG9ABI128,MEM256ALIGN32,MEM512ALIGN64
67

78
// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +avx -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG10ABI128,CLANG10ABI256,MEM512ALIGN64
89
// RUN: %clang_cc1 -triple x86_64-netbsd %s -target-feature +avx -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG10ABI128,CLANG10ABI256,MEM512ALIGN64
910
// RUN: %clang_cc1 -triple x86_64-apple-darwin %s -target-feature +avx -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,MEM512ALIGN32
1011
// RUN: %clang_cc1 -triple x86_64-scei-ps4 %s -target-feature +avx -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,MEM512ALIGN64
1112
// RUN: %clang_cc1 -triple x86_64-unknown-freebsd10.0 %s -target-feature +avx -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,MEM512ALIGN64
13+
// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +avx -S -emit-llvm -o - -fclang-abi-compat=9 | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,MEM512ALIGN64
1214

1315
// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +avx512f -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG10ABI128,CLANG10ABI256,CLANG10ABI512
1416
// RUN: %clang_cc1 -triple x86_64-netbsd %s -target-feature +avx512f -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG10ABI128,CLANG10ABI256,CLANG10ABI512
1517
// RUN: %clang_cc1 -triple x86_64-apple-darwin %s -target-feature +avx512f -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,CLANG9ABI512
1618
// RUN: %clang_cc1 -triple x86_64-scei-ps4 %s -target-feature +avx512f -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,CLANG9ABI512
1719
// RUN: %clang_cc1 -triple x86_64-unknown-freebsd10.0 %s -target-feature +avx512f -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,CLANG9ABI512
20+
// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +avx512f -S -emit-llvm -o - -fclang-abi-compat=9 | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,CLANG9ABI512
1821

1922
typedef unsigned long long v16u64 __attribute__((vector_size(16)));
2023
typedef unsigned __int128 v16u128 __attribute__((vector_size(16)));
2124

2225
v16u64 test_v16u128(v16u64 a, v16u128 b) {
23-
// CLANG10ABI128: define <2 x i64> @test_v16u128(<2 x i64> %{{.*}}, <1 x i128> %{{.*}})
26+
// CLANG10ABI128: define <2 x i64> @test_v16u128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
2427
// CLANG9ABI128: define <2 x i64> @test_v16u128(<2 x i64> %{{.*}}, <1 x i128> %{{.*}})
2528
return a + (v16u64)b;
2629
}
@@ -31,7 +34,7 @@ typedef unsigned __int128 v32u128 __attribute__((vector_size(32)));
3134
v32u64 test_v32u128(v32u64 a, v32u128 b) {
3235
// MEM256ALIGN16: define <4 x i64> @test_v32u128(<4 x i64>* byval(<4 x i64>) align 16 %{{.*}}, <2 x i128>* byval(<2 x i128>) align 16 %{{.*}})
3336
// MEM256ALIGN32: define <4 x i64> @test_v32u128(<4 x i64>* byval(<4 x i64>) align 32 %{{.*}}, <2 x i128>* byval(<2 x i128>) align 32 %{{.*}})
34-
// CLANG10ABI256: define <4 x i64> @test_v32u128(<4 x i64> %{{.*}}, <2 x i128> %{{.*}})
37+
// CLANG10ABI256: define <4 x i64> @test_v32u128(<4 x i64> %{{.*}}, <2 x i128>* byval(<2 x i128>) align 32 %{{.*}})
3538
// CLANG9ABI256: define <4 x i64> @test_v32u128(<4 x i64> %{{.*}}, <2 x i128> %{{.*}})
3639
return a + (v32u64)b;
3740
}
@@ -43,7 +46,7 @@ v64u64 test_v64u128(v64u64 a, v64u128 b) {
4346
// MEM512ALIGN16: define <8 x i64> @test_v64u128(<8 x i64>* byval(<8 x i64>) align 16 %{{.*}}, <4 x i128>* byval(<4 x i128>) align 16 %{{.*}})
4447
// MEM512ALIGN32: define <8 x i64> @test_v64u128(<8 x i64>* byval(<8 x i64>) align 32 %{{.*}}, <4 x i128>* byval(<4 x i128>) align 32 %{{.*}})
4548
// MEM512ALIGN64: define <8 x i64> @test_v64u128(<8 x i64>* byval(<8 x i64>) align 64 %{{.*}}, <4 x i128>* byval(<4 x i128>) align 64 %{{.*}})
46-
// CLANG10ABI512: define <8 x i64> @test_v64u128(<8 x i64> %{{.*}}, <4 x i128> %{{.*}})
49+
// CLANG10ABI512: define <8 x i64> @test_v64u128(<8 x i64> %{{.*}}, <4 x i128>* byval(<4 x i128>) align 64 %{{.*}})
4750
// CLANG9ABI512: define <8 x i64> @test_v64u128(<8 x i64> %{{.*}}, <4 x i128> %{{.*}})
4851
return a + (v64u64)b;
4952
}

0 commit comments

Comments
 (0)