diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -125,7 +125,10 @@ ABI Changes in Clang -------------------- -- ... +- gcc passes vectors of __int128 in memory on X86-64. Clang historically + broke the vectors into multiple scalars using two 64-bit values for each + element. Clang now matches the gcc behavior on Linux and NetBSD. You can + switch back to old API behavior with flag: -fclang-abi-compat=9.0. OpenMP Support in Clang ----------------------- diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -138,6 +138,12 @@ /// rather than returning the required alignment. Ver7, + /// Attempt to be ABI-compatible with code generated by Clang 9.0.x + /// (SVN r351319). This causes vectors of __int128 to be passed in memory + /// instead of passing in multiple scalar registers on x86_64 on Linux and + /// NetBSD. + Ver9, + /// Conform to the underlying platform's C and C++ ABIs as closely /// as we can. Latest diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -2177,6 +2177,17 @@ return true; } + // GCC classifies vectors of __int128 as memory. + bool passInt128VectorsInMem() const { + // Clang <= 9.0 did not do this. + if (getContext().getLangOpts().getClangABICompat() <= + LangOptions::ClangABI::Ver9) + return false; + + const llvm::Triple &T = getTarget().getTriple(); + return T.isOSLinux() || T.isOSNetBSD(); + } + X86AVXABILevel AVXLevel; // Some ABIs (e.g. X32 ABI and Native Client OS) use 32 bit pointers on // 64-bit hardware. @@ -2657,6 +2668,14 @@ Hi = Lo; } else if (Size == 128 || (isNamedArg && Size <= getNativeVectorSizeForAVXABI(AVXLevel))) { + QualType ElementType = VT->getElementType(); + + // gcc passes 256 and 512 bit vectors in memory. :( + if (passInt128VectorsInMem() && Size != 128 && + (ElementType->isSpecificBuiltinType(BuiltinType::Int128) || + ElementType->isSpecificBuiltinType(BuiltinType::UInt128))) + return; + // Arguments of 256-bits are split into four eightbyte chunks. The // least significant one belongs to class SSE and all the others to class // SSEUP. The original Lo and Hi design considers that types can't be @@ -2899,6 +2918,11 @@ unsigned LargestVector = getNativeVectorSizeForAVXABI(AVXLevel); if (Size <= 64 || Size > LargestVector) return true; + QualType EltTy = VecTy->getElementType(); + if (passInt128VectorsInMem() && + (EltTy->isSpecificBuiltinType(BuiltinType::Int128) || + EltTy->isSpecificBuiltinType(BuiltinType::UInt128))) + return true; } return false; @@ -2973,14 +2997,28 @@ Ty = QualType(InnerTy, 0); llvm::Type *IRType = CGT.ConvertType(Ty); - if (isa(IRType) || - IRType->getTypeID() == llvm::Type::FP128TyID) + if (isa(IRType)) { + // Don't pass vXi128 vectors in their native type, the backend can't + // legalize them. + if (passInt128VectorsInMem() && + IRType->getVectorElementType()->isIntegerTy(128)) { + // Use a vXi64 vector. + uint64_t Size = getContext().getTypeSize(Ty); + return llvm::VectorType::get(llvm::Type::getInt64Ty(getVMContext()), + Size / 64); + } + + return IRType; + } + + if (IRType->getTypeID() == llvm::Type::FP128TyID) return IRType; // We couldn't find the preferred IR vector type for 'Ty'. uint64_t Size = getContext().getTypeSize(Ty); assert((Size == 128 || Size == 256 || Size == 512) && "Invalid type found!"); + // Return a LLVM IR vector type based on the size of 'Ty'. return llvm::VectorType::get(llvm::Type::getDoubleTy(getVMContext()), Size / 64); diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -3129,6 +3129,8 @@ Opts.setClangABICompat(LangOptions::ClangABI::Ver6); else if (Major <= 7) Opts.setClangABICompat(LangOptions::ClangABI::Ver7); + else if (Major <= 9) + Opts.setClangABICompat(LangOptions::ClangABI::Ver9); } else if (Ver != "latest") { Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << A->getValue(); diff --git a/clang/test/CodeGen/pr42607.c b/clang/test/CodeGen/pr42607.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/pr42607.c @@ -0,0 +1,52 @@ +// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +sse2 -S -emit-llvm -o - | FileCheck %s --check-prefixes=NEWABI128,MEM256ALIGN32,MEM512ALIGN64 +// RUN: %clang_cc1 -triple x86_64-netbsd %s -target-feature +sse2 -S -emit-llvm -o - | FileCheck %s --check-prefixes=NEWABI128,MEM256ALIGN32,MEM512ALIGN64 +// RUN: %clang_cc1 -triple x86_64-apple-darwin %s -target-feature +sse2 -S -emit-llvm -o - | FileCheck %s --check-prefixes=OLDABI128,MEM256ALIGN16,MEM512ALIGN16 +// RUN: %clang_cc1 -triple x86_64-scei-ps4 %s -target-feature +sse2 -S -emit-llvm -o - | FileCheck %s --check-prefixes=OLDABI128,MEM256ALIGN32,MEM512ALIGN64 +// RUN: %clang_cc1 -triple x86_64-unknown-freebsd10.0 %s -target-feature +sse2 -S -emit-llvm -o - | FileCheck %s --check-prefixes=OLDABI128,MEM256ALIGN32,MEM512ALIGN64 +// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +sse2 -S -emit-llvm -o - -fclang-abi-compat=9 | FileCheck %s --check-prefixes=OLDABI128,MEM256ALIGN32,MEM512ALIGN64 + +// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +avx -S -emit-llvm -o - | FileCheck %s --check-prefixes=NEWABI128,NEWABI256,MEM512ALIGN64 +// RUN: %clang_cc1 -triple x86_64-netbsd %s -target-feature +avx -S -emit-llvm -o - | FileCheck %s --check-prefixes=NEWABI128,NEWABI256,MEM512ALIGN64 +// RUN: %clang_cc1 -triple x86_64-apple-darwin %s -target-feature +avx -S -emit-llvm -o - | FileCheck %s --check-prefixes=OLDABI128,OLDABI256,MEM512ALIGN32 +// RUN: %clang_cc1 -triple x86_64-scei-ps4 %s -target-feature +avx -S -emit-llvm -o - | FileCheck %s --check-prefixes=OLDABI128,OLDABI256,MEM512ALIGN64 +// RUN: %clang_cc1 -triple x86_64-unknown-freebsd10.0 %s -target-feature +avx -S -emit-llvm -o - | FileCheck %s --check-prefixes=OLDABI128,OLDABI256,MEM512ALIGN64 +// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +avx -S -emit-llvm -o - -fclang-abi-compat=9 | FileCheck %s --check-prefixes=OLDABI128,OLDABI256,MEM512ALIGN64 + +// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +avx512f -S -emit-llvm -o - | FileCheck %s --check-prefixes=NEWABI128,NEWABI256,NEWABI512 +// RUN: %clang_cc1 -triple x86_64-netbsd %s -target-feature +avx512f -S -emit-llvm -o - | FileCheck %s --check-prefixes=NEWABI128,NEWABI256,NEWABI512 +// RUN: %clang_cc1 -triple x86_64-apple-darwin %s -target-feature +avx512f -S -emit-llvm -o - | FileCheck %s --check-prefixes=OLDABI128,OLDABI256,OLDABI512 +// RUN: %clang_cc1 -triple x86_64-scei-ps4 %s -target-feature +avx512f -S -emit-llvm -o - | FileCheck %s --check-prefixes=OLDABI128,OLDABI256,OLDABI512 +// RUN: %clang_cc1 -triple x86_64-unknown-freebsd10.0 %s -target-feature +avx512f -S -emit-llvm -o - | FileCheck %s --check-prefixes=OLDABI128,OLDABI256,OLDABI512 +// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +avx512f -S -emit-llvm -o - -fclang-abi-compat=9 | FileCheck %s --check-prefixes=OLDABI128,OLDABI256,OLDABI512 + +typedef unsigned long long v16u64 __attribute__((vector_size(16))); +typedef unsigned __int128 v16u128 __attribute__((vector_size(16))); + +v16u64 test_v16u128(v16u64 a, v16u128 b) { +// NEWABI128: define <2 x i64> @test_v16u128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) +// OLDABI128: define <2 x i64> @test_v16u128(<2 x i64> %{{.*}}, <1 x i128> %{{.*}}) + return a + (v16u64)b; +} + +typedef unsigned long long v32u64 __attribute__((vector_size(32))); +typedef unsigned __int128 v32u128 __attribute__((vector_size(32))); + +v32u64 test_v32u128(v32u64 a, v32u128 b) { +// MEM256ALIGN16: define <4 x i64> @test_v32u128(<4 x i64>* byval(<4 x i64>) align 16 %{{.*}}, <2 x i128>* byval(<2 x i128>) align 16 %{{.*}}) +// MEM256ALIGN32: define <4 x i64> @test_v32u128(<4 x i64>* byval(<4 x i64>) align 32 %{{.*}}, <2 x i128>* byval(<2 x i128>) align 32 %{{.*}}) +// NEWABI256: define <4 x i64> @test_v32u128(<4 x i64> %{{.*}}, <2 x i128>* byval(<2 x i128>) align 32 %{{.*}}) +// OLDABI256: define <4 x i64> @test_v32u128(<4 x i64> %{{.*}}, <2 x i128> %{{.*}}) + return a + (v32u64)b; +} + +typedef unsigned long long v64u64 __attribute__((vector_size(64))); +typedef unsigned __int128 v64u128 __attribute__((vector_size(64))); + +v64u64 test_v64u128(v64u64 a, v64u128 b) { +// MEM512ALIGN16: define <8 x i64> @test_v64u128(<8 x i64>* byval(<8 x i64>) align 16 %{{.*}}, <4 x i128>* byval(<4 x i128>) align 16 %{{.*}}) +// MEM512ALIGN32: define <8 x i64> @test_v64u128(<8 x i64>* byval(<8 x i64>) align 32 %{{.*}}, <4 x i128>* byval(<4 x i128>) align 32 %{{.*}}) +// MEM512ALIGN64: define <8 x i64> @test_v64u128(<8 x i64>* byval(<8 x i64>) align 64 %{{.*}}, <4 x i128>* byval(<4 x i128>) align 64 %{{.*}}) +// NEWABI512: define <8 x i64> @test_v64u128(<8 x i64> %{{.*}}, <4 x i128>* byval(<4 x i128>) align 64 %{{.*}}) +// OLDABI512: define <8 x i64> @test_v64u128(<8 x i64> %{{.*}}, <4 x i128> %{{.*}}) + return a + (v64u64)b; +}