Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -2532,11 +2532,14 @@ case Builtin::BI__builtin_sqrtf: case Builtin::BI__builtin_sqrtf16: case Builtin::BI__builtin_sqrtl: - case Builtin::BI__builtin_sqrtf128: - return RValue::get(emitUnaryMaybeConstrainedFPBuiltin(*this, E, - Intrinsic::sqrt, - Intrinsic::experimental_constrained_sqrt)); - + case Builtin::BI__builtin_sqrtf128: { + llvm::Value *Call = emitUnaryMaybeConstrainedFPBuiltin( + *this, E, + Intrinsic::sqrt, + Intrinsic::experimental_constrained_sqrt); + SetSqrtOrDivFPAccuracy(Call); + return RValue::get(Call); + } case Builtin::BItrunc: case Builtin::BItruncf: case Builtin::BItruncl: Index: clang/lib/CodeGen/CGExpr.cpp =================================================================== --- clang/lib/CodeGen/CGExpr.cpp +++ clang/lib/CodeGen/CGExpr.cpp @@ -5585,6 +5585,24 @@ cast(Val)->setMetadata(llvm::LLVMContext::MD_fpmath, Node); } +void CodeGenFunction::SetSqrtOrDivFPAccuracy(llvm::Value *Val) { + llvm::Type *EltTy = Val->getType()->getScalarType(); + if (!EltTy->isFloatTy()) + return; + + if ((getLangOpts().OpenCL && + !CGM.getCodeGenOpts().OpenCLCorrectlyRoundedDivSqrt) || + (getLangOpts().HIP && getLangOpts().CUDAIsDevice && + !CGM.getCodeGenOpts().HIPCorrectlyRoundedDivSqrt)) { + // OpenCL v1.1 s7.4: minimum accuracy of single precision / is 2.5ulp + // OpenCL v1.2 s5.6.4.2: The -cl-fp32-correctly-rounded-divide-sqrt + // build option allows an application to specify that single precision + // floating-point divide (x/y and 1/x) and sqrt used in the program + // source are correctly rounded. + SetFPAccuracy(Val, 2.5); + } +} + namespace { struct LValueOrRValue { LValue LV; Index: clang/lib/CodeGen/CGExprScalar.cpp =================================================================== --- clang/lib/CodeGen/CGExprScalar.cpp +++ clang/lib/CodeGen/CGExprScalar.cpp @@ -3478,21 +3478,7 @@ llvm::Value *Val; CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, Ops.FPFeatures); Val = Builder.CreateFDiv(Ops.LHS, Ops.RHS, "div"); - if ((CGF.getLangOpts().OpenCL && - !CGF.CGM.getCodeGenOpts().OpenCLCorrectlyRoundedDivSqrt) || - (CGF.getLangOpts().HIP && CGF.getLangOpts().CUDAIsDevice && - !CGF.CGM.getCodeGenOpts().HIPCorrectlyRoundedDivSqrt)) { - // OpenCL v1.1 s7.4: minimum accuracy of single precision / is 2.5ulp - // OpenCL v1.2 s5.6.4.2: The -cl-fp32-correctly-rounded-divide-sqrt - // build option allows an application to specify that single precision - // floating-point divide (x/y and 1/x) and sqrt used in the program - // source are correctly rounded. - llvm::Type *ValTy = Val->getType(); - if (ValTy->isFloatTy() || - (isa(ValTy) && - cast(ValTy)->getElementType()->isFloatTy())) - CGF.SetFPAccuracy(Val, 2.5); - } + CGF.SetSqrtOrDivFPAccuracy(Val); return Val; } else if (Ops.isFixedPointOp()) Index: clang/lib/CodeGen/CodeGenFunction.h =================================================================== --- clang/lib/CodeGen/CodeGenFunction.h +++ clang/lib/CodeGen/CodeGenFunction.h @@ -4688,6 +4688,10 @@ /// point operation, expressed as the maximum relative error in ulp. void SetFPAccuracy(llvm::Value *Val, float Accuracy); + /// SetFPAccuracy - Set the minimum required accuracy of the given fdiv or + /// sqrt operation based on CodeGenOpts. + void SetSqrtOrDivFPAccuracy(llvm::Value *Val); + /// Set the codegen fast-math flags. void SetFastMathFlags(FPOptions FPFeatures); Index: clang/test/CodeGenCUDA/correctly-rounded-div.cu =================================================================== --- clang/test/CodeGenCUDA/correctly-rounded-div.cu +++ clang/test/CodeGenCUDA/correctly-rounded-div.cu @@ -32,4 +32,18 @@ return a / b; } -// NCRDIV: ![[MD]] = !{float 2.500000e+00} +// COMMON-LABEL: @_Z12spscalarsqrt +// NCRDIV: call contract float @llvm.sqrt.f32(float %{{.+}}), !fpmath ![[MD:[0-9]+]] +// CRDIV: call contract float @llvm.sqrt.f32(float %{{.+}}){{$}} +__device__ float spscalarsqrt(float a) { + return __builtin_sqrtf(a); +} + +// COMMON-LABEL: @_Z12dpscalarsqrt +// COMMON: call contract double @llvm.sqrt.f64(double %{{.+}}){{$}} +// COMMON-NOT: !fpmath +__device__ double dpscalarsqrt(double a) { + return __builtin_sqrt(a); +} + +// NCRSQRT: ![[MD]] = !{float 2.500000e+00} Index: clang/test/CodeGenOpenCL/fpmath.cl =================================================================== --- clang/test/CodeGenOpenCL/fpmath.cl +++ clang/test/CodeGenOpenCL/fpmath.cl @@ -21,6 +21,13 @@ return a / b; } +float spscalarsqrt(float a) { + // CHECK-LABEL: @spscalarsqrt + // NODIVOPT: call float @llvm.sqrt.f32(float %{{.+}}), !fpmath ![[MD:[0-9]+]] + // DIVOPT: call float @llvm.sqrt.f32(float %{{.+}}){{$}} + return __builtin_sqrtf(a); +} + #if __OPENCL_C_VERSION__ >=120 void printf(constant char* fmt, ...); @@ -34,11 +41,26 @@ #ifndef NOFP64 #pragma OPENCL EXTENSION cl_khr_fp64 : enable +typedef __attribute__(( ext_vector_type(4) )) double double4; + double dpscalardiv(double a, double b) { // CHECK: @dpscalardiv // CHECK-NOT: !fpmath return a / b; } + +double4 dpvectordiv(double4 a, double4 b) { + // CHECK: @dpvectordiv + // CHECK-NOT: !fpmath + return a / b; +} + +double dpscalarsqrt(double a) { + // CHECK-LABEL: @dpscalarsqrt + // CHECK: call double @llvm.sqrt.f64(double %{{.+}}){{$}} + return __builtin_sqrt(a); +} + #endif // NODIVOPT: ![[MD]] = !{float 2.500000e+00}