Index: include/clang/Basic/TargetInfo.h =================================================================== --- include/clang/Basic/TargetInfo.h +++ include/clang/Basic/TargetInfo.h @@ -559,6 +559,13 @@ return ComplexLongDoubleUsesFP2Ret; } + /// Check whether llvm intrinsics such as llvm.convert.to.fp16 should be used + /// to convert to and from __fp16. This function should be removed once all + /// targets stop using the conversion intrinsics. + virtual bool useFP16ConversionIntrinsics() const { + return true; + } + /// \brief Specify if mangling based on address space map should be used or /// not for language specific address spaces bool useAddressSpaceMapMangling() const { Index: lib/Basic/Targets/AArch64.h =================================================================== --- lib/Basic/Targets/AArch64.h +++ lib/Basic/Targets/AArch64.h @@ -48,6 +48,10 @@ bool isValidCPUName(StringRef Name) const override; bool setCPU(const std::string &Name) override; + bool useFP16ConversionIntrinsics() const override { + return false; + } + void getTargetDefinesARMV81A(const LangOptions &Opts, MacroBuilder &Builder) const; void getTargetDefinesARMV82A(const LangOptions &Opts, Index: lib/Basic/Targets/ARM.h =================================================================== --- lib/Basic/Targets/ARM.h +++ lib/Basic/Targets/ARM.h @@ -126,6 +126,10 @@ bool setFPMath(StringRef Name) override; + bool useFP16ConversionIntrinsics() const override { + return false; + } + void getTargetDefinesARMV81A(const LangOptions &Opts, MacroBuilder &Builder) const; Index: lib/Basic/Targets/X86.h =================================================================== --- lib/Basic/Targets/X86.h +++ lib/Basic/Targets/X86.h @@ -361,6 +361,10 @@ return ""; } + bool useFP16ConversionIntrinsics() const override { + return false; + } + void getTargetDefines(const LangOptions &Opts, MacroBuilder &Builder) const override; Index: lib/CodeGen/CGExprConstant.cpp =================================================================== --- lib/CodeGen/CGExprConstant.cpp +++ lib/CodeGen/CGExprConstant.cpp @@ -1825,7 +1825,7 @@ const llvm::APFloat &Init = Value.getFloat(); if (&Init.getSemantics() == &llvm::APFloat::IEEEhalf() && !CGM.getContext().getLangOpts().NativeHalfType && - !CGM.getContext().getLangOpts().HalfArgsAndReturns) + CGM.getContext().getTargetInfo().useFP16ConversionIntrinsics()) return llvm::ConstantInt::get(CGM.getLLVMContext(), Init.bitcastToAPInt()); else Index: lib/CodeGen/CGExprScalar.cpp =================================================================== --- lib/CodeGen/CGExprScalar.cpp +++ lib/CodeGen/CGExprScalar.cpp @@ -951,7 +951,7 @@ if (SrcType->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) { // Cast to FP using the intrinsic if the half type itself isn't supported. if (DstTy->isFloatingPointTy()) { - if (!CGF.getContext().getLangOpts().HalfArgsAndReturns) + if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics()) return Builder.CreateCall( CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16, DstTy), Src); @@ -959,7 +959,7 @@ // Cast to other types through float, using either the intrinsic or FPExt, // depending on whether the half type itself is supported // (as opposed to operations on half, available with NativeHalfType). - if (!CGF.getContext().getLangOpts().HalfArgsAndReturns) { + if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics()) { Src = Builder.CreateCall( CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16, CGF.CGM.FloatTy), @@ -1068,7 +1068,7 @@ if (SrcTy->isFloatingPointTy()) { // Use the intrinsic if the half type itself isn't supported // (as opposed to operations on half, available with NativeHalfType). - if (!CGF.getContext().getLangOpts().HalfArgsAndReturns) + if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics()) return Builder.CreateCall( CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16, SrcTy), Src); // If the half type is supported, just use an fptrunc. @@ -1104,7 +1104,7 @@ } if (DstTy != ResTy) { - if (!CGF.getContext().getLangOpts().HalfArgsAndReturns) { + if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics()) { assert(ResTy->isIntegerTy(16) && "Only half FP requires extra conversion"); Res = Builder.CreateCall( CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16, CGF.CGM.FloatTy), @@ -2028,7 +2028,7 @@ if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) { // Another special case: half FP increment should be done via float - if (!CGF.getContext().getLangOpts().HalfArgsAndReturns) { + if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics()) { value = Builder.CreateCall( CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_from_fp16, CGF.CGM.FloatTy), @@ -2063,7 +2063,7 @@ value = Builder.CreateFAdd(value, amt, isInc ? "inc" : "dec"); if (type->isHalfType() && !CGF.getContext().getLangOpts().NativeHalfType) { - if (!CGF.getContext().getLangOpts().HalfArgsAndReturns) { + if (CGF.getContext().getTargetInfo().useFP16ConversionIntrinsics()) { value = Builder.CreateCall( CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16, CGF.CGM.FloatTy), Index: lib/CodeGen/CodeGenTypes.cpp =================================================================== --- lib/CodeGen/CodeGenTypes.cpp +++ lib/CodeGen/CodeGenTypes.cpp @@ -451,10 +451,10 @@ case BuiltinType::Half: // Half FP can either be storage-only (lowered to i16) or native. - ResultType = - getTypeForFormat(getLLVMContext(), Context.getFloatTypeSemantics(T), - Context.getLangOpts().NativeHalfType || - Context.getLangOpts().HalfArgsAndReturns); + ResultType = getTypeForFormat( + getLLVMContext(), Context.getFloatTypeSemantics(T), + Context.getLangOpts().NativeHalfType || + !Context.getTargetInfo().useFP16ConversionIntrinsics()); break; case BuiltinType::Float: case BuiltinType::Double: Index: lib/Sema/SemaExpr.cpp =================================================================== --- lib/Sema/SemaExpr.cpp +++ lib/Sema/SemaExpr.cpp @@ -11565,7 +11565,8 @@ static bool needsConversionOfHalfVec(bool OpRequiresConversion, ASTContext &Ctx, QualType SrcType) { return OpRequiresConversion && !Ctx.getLangOpts().NativeHalfType && - Ctx.getLangOpts().HalfArgsAndReturns && isVector(SrcType, Ctx.HalfTy); + !Ctx.getTargetInfo().useFP16ConversionIntrinsics() && + isVector(SrcType, Ctx.HalfTy); } /// CreateBuiltinBinOp - Creates a new built-in binary operation with Index: test/CodeGen/fp16-ops.c =================================================================== --- test/CodeGen/fp16-ops.c +++ test/CodeGen/fp16-ops.c @@ -1,8 +1,9 @@ // REQUIRES: arm-registered-target -// RUN: %clang_cc1 -emit-llvm -o - -triple arm-none-linux-gnueabi %s | FileCheck %s --check-prefix=NOHALF --check-prefix=CHECK -// RUN: %clang_cc1 -emit-llvm -o - -triple aarch64-none-linux-gnueabi %s | FileCheck %s --check-prefix=NOHALF --check-prefix=CHECK -// RUN: %clang_cc1 -emit-llvm -o - -triple arm-none-linux-gnueabi -fallow-half-arguments-and-returns %s | FileCheck %s --check-prefix=HALF --check-prefix=CHECK -// RUN: %clang_cc1 -emit-llvm -o - -triple aarch64-none-linux-gnueabi -fallow-half-arguments-and-returns %s | FileCheck %s --check-prefix=HALF --check-prefix=CHECK +// RUN: %clang_cc1 -emit-llvm -o - -triple arm-none-linux-gnueabi %s | FileCheck %s --check-prefix=NOTNATIVE --check-prefix=CHECK +// RUN: %clang_cc1 -emit-llvm -o - -triple aarch64-none-linux-gnueabi %s | FileCheck %s --check-prefix=NOTNATIVE --check-prefix=CHECK +// RUN: %clang_cc1 -emit-llvm -o - -triple x86_64-linux-gnu %s | FileCheck %s --check-prefix=NOTNATIVE --check-prefix=CHECK +// RUN: %clang_cc1 -emit-llvm -o - -triple arm-none-linux-gnueabi -fallow-half-arguments-and-returns %s | FileCheck %s --check-prefix=NOTNATIVE --check-prefix=CHECK +// RUN: %clang_cc1 -emit-llvm -o - -triple aarch64-none-linux-gnueabi -fallow-half-arguments-and-returns %s | FileCheck %s --check-prefix=NOTNATIVE --check-prefix=CHECK // RUN: %clang_cc1 -emit-llvm -o - -triple arm-none-linux-gnueabi -fnative-half-type %s \ // RUN: | FileCheck %s --check-prefix=NATIVE-HALF // RUN: %clang_cc1 -emit-llvm -o - -triple aarch64-none-linux-gnueabi -fnative-half-type %s \ @@ -16,20 +17,19 @@ volatile __fp16 h0 = 0.0, h1 = 1.0, h2; volatile float f0, f1, f2; volatile double d0; +short s0; void foo(void) { // CHECK-LABEL: define void @foo() // Check unary ops - // NOHALF: [[F16TOF32:call float @llvm.convert.from.fp16.f32]] - // HALF: [[F16TOF32:fpext half]] + // NOTNATIVE: [[F16TOF32:fpext half]] // CHECK: fptoui float // NATIVE-HALF: fptoui half test = (h0); // CHECK: uitofp i32 - // NOHALF: [[F32TOF16:call i16 @llvm.convert.to.fp16.f32]] - // HALF: [[F32TOF16:fptrunc float]] + // NOTNATIVE: [[F32TOF16:fptrunc float]] // NATIVE-HALF: uitofp i32 {{.*}} to half h0 = (test); // CHECK: [[F16TOF32]] @@ -38,8 +38,7 @@ test = (!h1); // CHECK: [[F16TOF32]] // CHECK: fsub float - // NOHALF: [[F32TOF16]] - // HALF: [[F32TOF16]] + // NOTNATIVE: [[F32TOF16]] // NATIVE-HALF: fsub half h1 = -h1; // CHECK: [[F16TOF32]] @@ -76,8 +75,6 @@ // NATIVE-HALF: fmul half h1 = h0 * h2; // CHECK: [[F16TOF32]] - // NOHALF: [[F32TOF16]] - // NOHALF: [[F16TOF32]] // CHECK: fmul float // CHECK: [[F32TOF16]] // NATIVE-HALF: fmul half @@ -107,7 +104,6 @@ // NATIVE-HALF: fdiv half h1 = (h0 / h2); // CHECK: [[F16TOF32]] - // NOHALF: [[F16TOF32]] // CHECK: fdiv float // CHECK: [[F32TOF16]] // NATIVE-HALF: fdiv half @@ -137,7 +133,6 @@ // NATIVE-HALF: fadd half h1 = (h2 + h0); // CHECK: [[F16TOF32]] - // NOHALF: [[F16TOF32]] // CHECK: fadd float // CHECK: [[F32TOF16]] // NATIVE-HALF: fadd half @@ -167,7 +162,6 @@ // NATIVE-HALF: fsub half h1 = (h2 - h0); // CHECK: [[F16TOF32]] - // NOHALF: [[F16TOF32]] // CHECK: fsub float // CHECK: [[F32TOF16]] // NATIVE-HALF: fsub half @@ -196,7 +190,6 @@ // NATIVE-HALF: fcmp olt half test = (h2 < h0); // CHECK: [[F16TOF32]] - // NOHALF: [[F16TOF32]] // CHECK: fcmp olt float // NATIVE-HALF: fcmp olt half test = (h2 < (__fp16)42.0); @@ -225,7 +218,6 @@ // NATIVE-HALF: fcmp ogt half test = (h0 > h2); // CHECK: [[F16TOF32]] - // NOHALF: [[F16TOF32]] // CHECK: fcmp ogt float // NATIVE-HALF: fcmp ogt half test = ((__fp16)42.0 > h2); @@ -254,7 +246,6 @@ // NATIVE-HALF: fcmp ole half test = (h2 <= h0); // CHECK: [[F16TOF32]] - // NOHALF: [[F16TOF32]] // CHECK: fcmp ole float // NATIVE-HALF: fcmp ole half test = (h2 <= (__fp16)42.0); @@ -284,7 +275,6 @@ // NATIVE-HALF: fcmp oge half test = (h0 >= h2); // CHECK: [[F16TOF32]] - // NOHALF: [[F16TOF32]] // CHECK: fcmp oge float // NATIVE-HALF: fcmp oge half test = (h0 >= (__fp16)-2.0); @@ -313,7 +303,6 @@ // NATIVE-HALF: fcmp oeq half test = (h1 == h2); // CHECK: [[F16TOF32]] - // NOHALF: [[F16TOF32]] // CHECK: fcmp oeq float // NATIVE-HALF: fcmp oeq half test = (h1 == (__fp16)1.0); @@ -342,7 +331,6 @@ // NATIVE-HALF: fcmp une half test = (h1 != h2); // CHECK: [[F16TOF32]] - // NOHALF: [[F16TOF32]] // CHECK: fcmp une float // NATIVE-HALF: fcmp une half test = (h1 != (__fp16)1.0); @@ -374,8 +362,7 @@ h1 = (h1 ? h2 : h0); // Check assignments (inc. compound) h0 = h1; - // NOHALF: [[F32TOF16]] - // HALF: store {{.*}} half 0xHC000 + // NOTNATIVE: store {{.*}} half 0xHC000 // NATIVE-HALF: store {{.*}} half 0xHC000 h0 = (__fp16)-2.0f; // CHECK: [[F32TOF16]] @@ -398,7 +385,6 @@ // NATIVE-HALF: fadd half h0 += h1; // CHECK: [[F16TOF32]] - // NOHALF: [[F16TOF32]] // CHECK: fadd float // CHECK: [[F32TOF16]] // NATIVE-HALF: fadd half @@ -433,7 +419,6 @@ // NATIVE-HALF: fsub half h0 -= h1; // CHECK: [[F16TOF32]] - // NOHALF: [[F16TOF32]] // CHECK: fsub float // CHECK: [[F32TOF16]] // NATIVE-HALF: fsub half @@ -468,7 +453,6 @@ // NATIVE-HALF: fmul half h0 *= h1; // CHECK: [[F16TOF32]] - // NOHALF: [[F16TOF32]] // CHECK: fmul float // CHECK: [[F32TOF16]] // NATIVE-HALF: fmul half @@ -503,7 +487,6 @@ // NATIVE-HALF: fdiv half h0 /= h1; // CHECK: [[F16TOF32]] - // NOHALF: [[F16TOF32]] // CHECK: fdiv float // CHECK: [[F32TOF16]] // NATIVE-HALF: fdiv half @@ -532,27 +515,29 @@ h0 /= i0; // Check conversions to/from double - // NOHALF: call i16 @llvm.convert.to.fp16.f64( - // HALF: fptrunc double {{.*}} to half + // NOTNATIVE: fptrunc double {{.*}} to half // NATIVE-HALF: fptrunc double {{.*}} to half h0 = d0; // CHECK: [[MID:%.*]] = fptrunc double {{%.*}} to float - // NOHALF: call i16 @llvm.convert.to.fp16.f32(float [[MID]]) - // HALF: fptrunc float [[MID]] to half + // NOTNATIVE: fptrunc float [[MID]] to half // NATIVE-HALF: [[MID:%.*]] = fptrunc double {{%.*}} to float // NATIVE-HALF: fptrunc float {{.*}} to half h0 = (float)d0; - // NOHALF: call double @llvm.convert.from.fp16.f64( - // HALF: fpext half {{.*}} to double + // NOTNATIVE: fpext half {{.*}} to double // NATIVE-HALF: fpext half {{.*}} to double d0 = h0; - // NOHALF: [[MID:%.*]] = call float @llvm.convert.from.fp16.f32( - // HALF: [[MID:%.*]] = fpext half {{.*}} to float + // NOTNATIVE: [[MID:%.*]] = fpext half {{.*}} to float // CHECK: fpext float [[MID]] to double // NATIVE-HALF: [[MID:%.*]] = fpext half {{.*}} to float // NATIVE-HALF: fpext float [[MID]] to double d0 = (float)h0; + + // NOTNATIVE: [[V1:%.*]] = load i16, i16* @s0 + // NOTNATIVE: [[CONV:%.*]] = sitofp i16 [[V1]] to float + // NOTNATIVE: [[TRUNC:%.*]] = fptrunc float [[CONV]] to half + // NOTNATIVE: store volatile half [[TRUNC]], half* @h0 + h0 = s0; } Index: test/CodeGen/fp16vec-ops.c =================================================================== --- test/CodeGen/fp16vec-ops.c +++ test/CodeGen/fp16vec-ops.c @@ -1,6 +1,7 @@ // REQUIRES: arm-registered-target // RUN: %clang_cc1 -triple arm64-apple-ios9 -emit-llvm -o - -fallow-half-arguments-and-returns %s | FileCheck %s --check-prefix=CHECK // RUN: %clang_cc1 -triple armv7-apple-ios9 -emit-llvm -o - -fallow-half-arguments-and-returns %s | FileCheck %s --check-prefix=CHECK +// RUN: %clang_cc1 -triple x86_64-apple-macos10.13 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK typedef __fp16 half4 __attribute__ ((vector_size (8))); typedef short short4 __attribute__ ((vector_size (8))); Index: test/CodeGenCXX/float16-declarations.cpp =================================================================== --- test/CodeGenCXX/float16-declarations.cpp +++ test/CodeGenCXX/float16-declarations.cpp @@ -11,16 +11,14 @@ // CHECK-DAG: @_ZN12_GLOBAL__N_13f1nE = internal global half 0xH0000, align 2 _Float16 f2n = 33.f16; -// CHECK-AARCH64-DAG: @_ZN12_GLOBAL__N_13f2nE = internal global half 0xH5020, align 2 -// CHECK-X86-DAG: @_ZN12_GLOBAL__N_13f2nE = internal global i16 20512, align 2 +// CHECK-DAG: @_ZN12_GLOBAL__N_13f2nE = internal global half 0xH5020, align 2 _Float16 arr1n[10]; // CHECK-AARCH64-DAG: @_ZN12_GLOBAL__N_15arr1nE = internal global [10 x half] zeroinitializer, align 2 // CHECK-X86-DAG: @_ZN12_GLOBAL__N_15arr1nE = internal global [10 x half] zeroinitializer, align 16 _Float16 arr2n[] = { 1.2, 3.0, 3.e4 }; -// CHECK-AARCH64-DAG: @_ZN12_GLOBAL__N_15arr2nE = internal global [3 x half] [half 0xH3CCD, half 0xH4200, half 0xH7753], align 2 -// CHECK-X86-DAG: @_ZN12_GLOBAL__N_15arr2nE = internal global [3 x i16] [i16 15565, i16 16896, i16 30547], align 2 +// CHECK-DAG: @_ZN12_GLOBAL__N_15arr2nE = internal global [3 x half] [half 0xH3CCD, half 0xH4200, half 0xH7753], align 2 const volatile _Float16 func1n(const _Float16 &arg) { return arg + f2n + arr1n[4] - arr2n[1]; @@ -35,16 +33,14 @@ // CHECK-X86-DAG: @f1f = global half 0xH0000, align 2 _Float16 f2f = 32.4; -// CHECK-AARCH64-DAG: @f2f = global half 0xH500D, align 2 -// CHECK-X86-DAG: @f2f = global i16 20493, align 2 +// CHECK-DAG: @f2f = global half 0xH500D, align 2 _Float16 arr1f[10]; // CHECK-AARCH64-DAG: @arr1f = global [10 x half] zeroinitializer, align 2 // CHECK-X86-DAG: @arr1f = global [10 x half] zeroinitializer, align 16 _Float16 arr2f[] = { -1.2, -3.0, -3.e4 }; -// CHECK-AARCH64-DAG: @arr2f = global [3 x half] [half 0xHBCCD, half 0xHC200, half 0xHF753], align 2 -// CHECK-X86-DAG: @arr2f = global [3 x i16] [i16 -17203, i16 -15872, i16 -2221], align 2 +// CHECK-DAG: @arr2f = global [3 x half] [half 0xHBCCD, half 0xHC200, half 0xHF753], align 2 _Float16 func1f(_Float16 arg); @@ -110,11 +106,9 @@ // CHECK-DAG: call void @_ZN2C1C2EDF16_(%class.C1* %{{.*}}, half %{{.*}}) S1<_Float16> s1 = { 132.f16 }; -// CHECK-AARCH64-DAG: @_ZZ4mainE2s1 = private unnamed_addr constant %struct.S1 { half 0xH5820 }, align 2 -// CHECK-X86-DAG: @_ZZ4mainE2s1 = private unnamed_addr constant { i16 } { i16 22560 }, align 2 +// CHECK-DAG: @_ZZ4mainE2s1 = private unnamed_addr constant %struct.S1 { half 0xH5820 }, align 2 // CHECK-DAG: [[S1:%[0-9]+]] = bitcast %struct.S1* %{{.*}} to i8* -// CHECK-AARCH64-DAG: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[S1]], i8* bitcast (%struct.S1* @_ZZ4mainE2s1 to i8*), i64 2, i32 2, i1 false) -// CHECK-X86-DAG: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* bitcast ({ i16 }* @_ZZ4mainE2s1 to i8*), i64 2, i32 2, i1 false) +// CHECK-DAG: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[S1]], i8* bitcast (%struct.S1* @_ZZ4mainE2s1 to i8*), i64 2, i32 2, i1 false) _Float16 f4l = func1n(f1l) + func1f(f2l) + c1.func1c(f3l) + c1.func2c(f1l) + func1t(f1l) + s1.mem2 - f1n + f2n; @@ -129,8 +123,7 @@ // CHECK-DAG: store half [[INC]], half* %{{.*}}, align 2 _Float16 arr1l[] = { -1.f16, -0.f16, -11.f16 }; -// CHECK-AARCH64-DAG: @_ZZ4mainE5arr1l = private unnamed_addr constant [3 x half] [half 0xHBC00, half 0xH8000, half 0xHC980], align 2 -// CHECK-X86-DAG: @_ZZ4mainE5arr1l = private unnamed_addr constant [3 x i16] [i16 -17408, i16 -32768, i16 -13952], align 2 +// CHECK-DAG: @_ZZ4mainE5arr1l = private unnamed_addr constant [3 x half] [half 0xHBC00, half 0xH8000, half 0xHC980], align 2 float cvtf = f2n; //CHECK-DAG: [[H2F:%[a-z0-9]+]] = fpext half {{%[0-9]+}} to float Index: test/CodeGenCXX/fp16-mangle.cpp =================================================================== --- test/CodeGenCXX/fp16-mangle.cpp +++ test/CodeGenCXX/fp16-mangle.cpp @@ -4,9 +4,9 @@ template struct S { static int i; }; template <> int S<__fp16, __fp16>::i = 3; -// CHECK-LABEL: define void @_Z1fPDh(i16* %x) +// CHECK-LABEL: define void @_Z1fPDh(half* %x) void f (__fp16 *x) { } -// CHECK-LABEL: define void @_Z1gPDhS_(i16* %x, i16* %y) +// CHECK-LABEL: define void @_Z1gPDhS_(half* %x, half* %y) void g (__fp16 *x, __fp16 *y) { }