Index: clang/docs/ReleaseNotes.rst =================================================================== --- clang/docs/ReleaseNotes.rst +++ clang/docs/ReleaseNotes.rst @@ -129,7 +129,10 @@ ABI Changes in Clang -------------------- -- ... +- gcc passes vectors of __int128 in memory on X86-64. Clang historically + broke the vectors into multiple scalars using two 64-bit values for each + element. Clang now matches the gcc behavior on Linux and NetBSD. You can + switch back to old API behavior with flag: -fclang-abi-compat=9.0. OpenMP Support in Clang ----------------------- Index: clang/include/clang/Basic/LangOptions.h =================================================================== --- clang/include/clang/Basic/LangOptions.h +++ clang/include/clang/Basic/LangOptions.h @@ -138,6 +138,12 @@ /// rather than returning the required alignment. Ver7, + /// Attempt to be ABI-compatible with code generated by Clang 9.0.x + /// (SVN r351319). This causes vectors of __int128 to be passed in memory + /// instead of passing in multiple scalar registers on x86_64 on Linux and + /// NetBSD. + Ver9, + /// Conform to the underlying platform's C and C++ ABIs as closely /// as we can. Latest Index: clang/lib/CodeGen/TargetInfo.cpp =================================================================== --- clang/lib/CodeGen/TargetInfo.cpp +++ clang/lib/CodeGen/TargetInfo.cpp @@ -2180,6 +2180,17 @@ return true; } + // GCC classifies vectors of __int128 as memory. + bool passInt128VectorsInMem() const { + // Clang <= 9.0 did not do this. + if (getContext().getLangOpts().getClangABICompat() <= + LangOptions::ClangABI::Ver9) + return false; + + const llvm::Triple &T = getTarget().getTriple(); + return T.isOSLinux() || T.isOSNetBSD(); + } + X86AVXABILevel AVXLevel; // Some ABIs (e.g. X32 ABI and Native Client OS) use 32 bit pointers on // 64-bit hardware. @@ -2660,6 +2671,14 @@ Hi = Lo; } else if (Size == 128 || (isNamedArg && Size <= getNativeVectorSizeForAVXABI(AVXLevel))) { + QualType ElementType = VT->getElementType(); + + // gcc passes 256 and 512 bit vectors in memory. :( + if (passInt128VectorsInMem() && Size != 128 && + (ElementType->isSpecificBuiltinType(BuiltinType::Int128) || + ElementType->isSpecificBuiltinType(BuiltinType::UInt128))) + return; + // Arguments of 256-bits are split into four eightbyte chunks. The // least significant one belongs to class SSE and all the others to class // SSEUP. The original Lo and Hi design considers that types can't be @@ -2902,6 +2921,11 @@ unsigned LargestVector = getNativeVectorSizeForAVXABI(AVXLevel); if (Size <= 64 || Size > LargestVector) return true; + QualType EltTy = VecTy->getElementType(); + if (passInt128VectorsInMem() && + (EltTy->isSpecificBuiltinType(BuiltinType::Int128) || + EltTy->isSpecificBuiltinType(BuiltinType::UInt128))) + return true; } return false; @@ -2976,14 +3000,28 @@ Ty = QualType(InnerTy, 0); llvm::Type *IRType = CGT.ConvertType(Ty); - if (isa(IRType) || - IRType->getTypeID() == llvm::Type::FP128TyID) + if (isa(IRType)) { + // Don't pass vXi128 vectors in their native type, the backend can't + // legalize them. + if (passInt128VectorsInMem() && + IRType->getVectorElementType()->isIntegerTy(128)) { + // Use a vXi64 vector. + uint64_t Size = getContext().getTypeSize(Ty); + return llvm::VectorType::get(llvm::Type::getInt64Ty(getVMContext()), + Size / 64); + } + + return IRType; + } + + if (IRType->getTypeID() == llvm::Type::FP128TyID) return IRType; // We couldn't find the preferred IR vector type for 'Ty'. uint64_t Size = getContext().getTypeSize(Ty); assert((Size == 128 || Size == 256 || Size == 512) && "Invalid type found!"); + // Return a LLVM IR vector type based on the size of 'Ty'. return llvm::VectorType::get(llvm::Type::getDoubleTy(getVMContext()), Size / 64); Index: clang/lib/Frontend/CompilerInvocation.cpp =================================================================== --- clang/lib/Frontend/CompilerInvocation.cpp +++ clang/lib/Frontend/CompilerInvocation.cpp @@ -3164,6 +3164,8 @@ Opts.setClangABICompat(LangOptions::ClangABI::Ver6); else if (Major <= 7) Opts.setClangABICompat(LangOptions::ClangABI::Ver7); + else if (Major <= 9) + Opts.setClangABICompat(LangOptions::ClangABI::Ver9); } else if (Ver != "latest") { Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << A->getValue(); Index: clang/test/CodeGen/x86-vec-i128.c =================================================================== --- clang/test/CodeGen/x86-vec-i128.c +++ clang/test/CodeGen/x86-vec-i128.c @@ -3,24 +3,27 @@ // RUN: %clang_cc1 -triple x86_64-apple-darwin %s -target-feature +sse2 -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,MEM256ALIGN16,MEM512ALIGN16 // RUN: %clang_cc1 -triple x86_64-scei-ps4 %s -target-feature +sse2 -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,MEM256ALIGN32,MEM512ALIGN64 // RUN: %clang_cc1 -triple x86_64-unknown-freebsd10.0 %s -target-feature +sse2 -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,MEM256ALIGN32,MEM512ALIGN64 +// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +sse2 -S -emit-llvm -o - -fclang-abi-compat=9 | FileCheck %s --check-prefixes=CLANG9ABI128,MEM256ALIGN32,MEM512ALIGN64 // RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +avx -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG10ABI128,CLANG10ABI256,MEM512ALIGN64 // RUN: %clang_cc1 -triple x86_64-netbsd %s -target-feature +avx -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG10ABI128,CLANG10ABI256,MEM512ALIGN64 // RUN: %clang_cc1 -triple x86_64-apple-darwin %s -target-feature +avx -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,MEM512ALIGN32 // RUN: %clang_cc1 -triple x86_64-scei-ps4 %s -target-feature +avx -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,MEM512ALIGN64 // RUN: %clang_cc1 -triple x86_64-unknown-freebsd10.0 %s -target-feature +avx -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,MEM512ALIGN64 +// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +avx -S -emit-llvm -o - -fclang-abi-compat=9 | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,MEM512ALIGN64 // RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +avx512f -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG10ABI128,CLANG10ABI256,CLANG10ABI512 // RUN: %clang_cc1 -triple x86_64-netbsd %s -target-feature +avx512f -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG10ABI128,CLANG10ABI256,CLANG10ABI512 // RUN: %clang_cc1 -triple x86_64-apple-darwin %s -target-feature +avx512f -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,CLANG9ABI512 // RUN: %clang_cc1 -triple x86_64-scei-ps4 %s -target-feature +avx512f -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,CLANG9ABI512 // RUN: %clang_cc1 -triple x86_64-unknown-freebsd10.0 %s -target-feature +avx512f -S -emit-llvm -o - | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,CLANG9ABI512 +// RUN: %clang_cc1 -triple x86_64-linux-gnu %s -target-feature +avx512f -S -emit-llvm -o - -fclang-abi-compat=9 | FileCheck %s --check-prefixes=CLANG9ABI128,CLANG9ABI256,CLANG9ABI512 typedef unsigned long long v16u64 __attribute__((vector_size(16))); typedef unsigned __int128 v16u128 __attribute__((vector_size(16))); v16u64 test_v16u128(v16u64 a, v16u128 b) { -// CLANG10ABI128: define <2 x i64> @test_v16u128(<2 x i64> %{{.*}}, <1 x i128> %{{.*}}) +// CLANG10ABI128: define <2 x i64> @test_v16u128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) // CLANG9ABI128: define <2 x i64> @test_v16u128(<2 x i64> %{{.*}}, <1 x i128> %{{.*}}) return a + (v16u64)b; } @@ -31,7 +34,7 @@ v32u64 test_v32u128(v32u64 a, v32u128 b) { // MEM256ALIGN16: define <4 x i64> @test_v32u128(<4 x i64>* byval(<4 x i64>) align 16 %{{.*}}, <2 x i128>* byval(<2 x i128>) align 16 %{{.*}}) // MEM256ALIGN32: define <4 x i64> @test_v32u128(<4 x i64>* byval(<4 x i64>) align 32 %{{.*}}, <2 x i128>* byval(<2 x i128>) align 32 %{{.*}}) -// CLANG10ABI256: define <4 x i64> @test_v32u128(<4 x i64> %{{.*}}, <2 x i128> %{{.*}}) +// CLANG10ABI256: define <4 x i64> @test_v32u128(<4 x i64> %{{.*}}, <2 x i128>* byval(<2 x i128>) align 32 %{{.*}}) // CLANG9ABI256: define <4 x i64> @test_v32u128(<4 x i64> %{{.*}}, <2 x i128> %{{.*}}) return a + (v32u64)b; } @@ -43,7 +46,7 @@ // MEM512ALIGN16: define <8 x i64> @test_v64u128(<8 x i64>* byval(<8 x i64>) align 16 %{{.*}}, <4 x i128>* byval(<4 x i128>) align 16 %{{.*}}) // MEM512ALIGN32: define <8 x i64> @test_v64u128(<8 x i64>* byval(<8 x i64>) align 32 %{{.*}}, <4 x i128>* byval(<4 x i128>) align 32 %{{.*}}) // MEM512ALIGN64: define <8 x i64> @test_v64u128(<8 x i64>* byval(<8 x i64>) align 64 %{{.*}}, <4 x i128>* byval(<4 x i128>) align 64 %{{.*}}) -// CLANG10ABI512: define <8 x i64> @test_v64u128(<8 x i64> %{{.*}}, <4 x i128> %{{.*}}) +// CLANG10ABI512: define <8 x i64> @test_v64u128(<8 x i64> %{{.*}}, <4 x i128>* byval(<4 x i128>) align 64 %{{.*}}) // CLANG9ABI512: define <8 x i64> @test_v64u128(<8 x i64> %{{.*}}, <4 x i128> %{{.*}}) return a + (v64u64)b; }