Index: cfe/trunk/include/clang/Driver/Options.td =================================================================== --- cfe/trunk/include/clang/Driver/Options.td +++ cfe/trunk/include/clang/Driver/Options.td @@ -389,6 +389,8 @@ HelpText<"OpenCL language standard to compile for.">; def cl_denorms_are_zero : Flag<["-"], "cl-denorms-are-zero">, Group, Flags<[CC1Option]>, HelpText<"OpenCL only. Allow denormals to be flushed to zero.">; +def cl_fp32_correctly_rounded_divide_sqrt : Flag<["-"], "cl-fp32-correctly-rounded-divide-sqrt">, Group, Flags<[CC1Option]>, + HelpText<"OpenCL only. Specify that single precision floating-point divide and sqrt used in the program source are correctly rounded.">; def client__name : JoinedOrSeparate<["-"], "client_name">; def combine : Flag<["-", "--"], "combine">, Flags<[DriverOption, Unsupported]>; def compatibility__version : JoinedOrSeparate<["-"], "compatibility_version">; Index: cfe/trunk/include/clang/Frontend/CodeGenOptions.def =================================================================== --- cfe/trunk/include/clang/Frontend/CodeGenOptions.def +++ cfe/trunk/include/clang/Frontend/CodeGenOptions.def @@ -111,6 +111,7 @@ ///< Disables use of the inline keyword. CODEGENOPT(NoNaNsFPMath , 1, 0) ///< Assume FP arguments, results not NaN. CODEGENOPT(FlushDenorm , 1, 0) ///< Allow FP denorm numbers to be flushed to zero +CODEGENOPT(CorrectlyRoundedDivSqrt, 1, 0) ///< -cl-fp32-correctly-rounded-divide-sqrt CODEGENOPT(NoZeroInitializedInBSS , 1, 0) ///< -fno-zero-initialized-in-bss. /// \brief Method of Objective-C dispatch to use. ENUM_CODEGENOPT(ObjCDispatchMethod, ObjCDispatchMethodKind, 2, Legacy) Index: cfe/trunk/lib/CodeGen/CGCall.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CGCall.cpp +++ cfe/trunk/lib/CodeGen/CGCall.cpp @@ -1734,6 +1734,9 @@ llvm::utostr(CodeGenOpts.SSPBufferSize)); FuncAttrs.addAttribute("no-signed-zeros-fp-math", llvm::toStringRef(CodeGenOpts.NoSignedZeros)); + FuncAttrs.addAttribute( + "correctly-rounded-divide-sqrt-fp-math", + llvm::toStringRef(CodeGenOpts.CorrectlyRoundedDivSqrt)); if (CodeGenOpts.StackRealignment) FuncAttrs.addAttribute("stackrealign"); Index: cfe/trunk/lib/CodeGen/CGExprScalar.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CGExprScalar.cpp +++ cfe/trunk/lib/CodeGen/CGExprScalar.cpp @@ -2276,8 +2276,13 @@ if (Ops.LHS->getType()->isFPOrFPVectorTy()) { llvm::Value *Val = Builder.CreateFDiv(Ops.LHS, Ops.RHS, "div"); - if (CGF.getLangOpts().OpenCL) { - // OpenCL 1.1 7.4: minimum accuracy of single precision / is 2.5ulp + if (CGF.getLangOpts().OpenCL && + !CGF.CGM.getCodeGenOpts().CorrectlyRoundedDivSqrt) { + // OpenCL v1.1 s7.4: minimum accuracy of single precision / is 2.5ulp + // OpenCL v1.2 s5.6.4.2: The -cl-fp32-correctly-rounded-divide-sqrt + // build option allows an application to specify that single precision + // floating-point divide (x/y and 1/x) and sqrt used in the program + // source are correctly rounded. llvm::Type *ValTy = Val->getType(); if (ValTy->isFloatTy() || (isa(ValTy) && Index: cfe/trunk/lib/Driver/Tools.cpp =================================================================== --- cfe/trunk/lib/Driver/Tools.cpp +++ cfe/trunk/lib/Driver/Tools.cpp @@ -5282,6 +5282,9 @@ if (Args.getLastArg(options::OPT_cl_denorms_are_zero)) { CmdArgs.push_back("-cl-denorms-are-zero"); } + if (Args.getLastArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt)) { + CmdArgs.push_back("-cl-fp32-correctly-rounded-divide-sqrt"); + } // Forward -f options with positive and negative forms; we translate // these by hand. Index: cfe/trunk/lib/Frontend/CompilerInvocation.cpp =================================================================== --- cfe/trunk/lib/Frontend/CompilerInvocation.cpp +++ cfe/trunk/lib/Frontend/CompilerInvocation.cpp @@ -572,6 +572,8 @@ Opts.NoSignedZeros = (Args.hasArg(OPT_fno_signed_zeros) || Args.hasArg(OPT_cl_no_signed_zeros)); Opts.FlushDenorm = Args.hasArg(OPT_cl_denorms_are_zero); + Opts.CorrectlyRoundedDivSqrt = + Args.hasArg(OPT_cl_fp32_correctly_rounded_divide_sqrt); Opts.ReciprocalMath = Args.hasArg(OPT_freciprocal_math); Opts.NoZeroInitializedInBSS = Args.hasArg(OPT_mno_zero_initialized_in_bss); Opts.BackendOptions = Args.getAllArgValues(OPT_backend_option); Index: cfe/trunk/test/CodeGenOpenCL/fpmath.cl =================================================================== --- cfe/trunk/test/CodeGenOpenCL/fpmath.cl +++ cfe/trunk/test/CodeGenOpenCL/fpmath.cl @@ -1,16 +1,23 @@ -// RUN: %clang_cc1 %s -emit-llvm -o - -triple spir-unknown-unknown | FileCheck %s +// RUN: %clang_cc1 %s -emit-llvm -o - -triple spir-unknown-unknown | FileCheck --check-prefix=CHECK --check-prefix=NODIVOPT %s +// RUN: %clang_cc1 %s -emit-llvm -o - -triple spir-unknown-unknown -cl-fp32-correctly-rounded-divide-sqrt | FileCheck --check-prefix=CHECK --check-prefix=DIVOPT %s typedef __attribute__(( ext_vector_type(4) )) float float4; float spscalardiv(float a, float b) { // CHECK: @spscalardiv - // CHECK: fdiv{{.*}}, !fpmath ![[MD:[0-9]+]] + // CHECK: #[[ATTR:[0-9]+]] + // CHECK: fdiv{{.*}}, + // NODIVOPT: !fpmath ![[MD:[0-9]+]] + // DIVOPT-NOT: !fpmath ![[MD:[0-9]+]] return a / b; } float4 spvectordiv(float4 a, float4 b) { // CHECK: @spvectordiv - // CHECK: fdiv{{.*}}, !fpmath ![[MD]] + // CHECK: #[[ATTR]] + // CHECK: fdiv{{.*}}, + // NODIVOPT: !fpmath ![[MD]] + // DIVOPT-NOT: !fpmath ![[MD]] return a / b; } @@ -18,8 +25,13 @@ double dpscalardiv(double a, double b) { // CHECK: @dpscalardiv + // CHECK: #[[ATTR]] // CHECK-NOT: !fpmath return a / b; } -// CHECK: ![[MD]] = !{float 2.500000e+00} +// CHECK: attributes #[[ATTR]] = { +// NODIVOPT: "correctly-rounded-divide-sqrt-fp-math"="false" +// DIVOPT: "correctly-rounded-divide-sqrt-fp-math"="true" +// CHECK: } +// NODIVOPT: ![[MD]] = !{float 2.500000e+00} Index: cfe/trunk/test/Driver/opencl.cl =================================================================== --- cfe/trunk/test/Driver/opencl.cl +++ cfe/trunk/test/Driver/opencl.cl @@ -12,6 +12,7 @@ // RUN: %clang -S -### -cl-mad-enable %s 2>&1 | FileCheck --check-prefix=CHECK-MAD-ENABLE %s // RUN: %clang -S -### -cl-no-signed-zeros %s 2>&1 | FileCheck --check-prefix=CHECK-NO-SIGNED-ZEROS %s // RUN: %clang -S -### -cl-denorms-are-zero %s 2>&1 | FileCheck --check-prefix=CHECK-DENORMS-ARE-ZERO %s +// RUN: %clang -S -### -cl-fp32-correctly-rounded-divide-sqrt %s 2>&1 | FileCheck --check-prefix=CHECK-ROUND-DIV %s // RUN: not %clang -cl-std=c99 -DOPENCL %s 2>&1 | FileCheck --check-prefix=CHECK-C99 %s // RUN: not %clang -cl-std=invalid -DOPENCL %s 2>&1 | FileCheck --check-prefix=CHECK-INVALID %s @@ -29,6 +30,7 @@ // CHECK-MAD-ENABLE: "-cc1" {{.*}} "-cl-mad-enable" // CHECK-NO-SIGNED-ZEROS: "-cc1" {{.*}} "-cl-no-signed-zeros" // CHECK-DENORMS-ARE-ZERO: "-cc1" {{.*}} "-cl-denorms-are-zero" +// CHECK-ROUND-DIV: "-cc1" {{.*}} "-cl-fp32-correctly-rounded-divide-sqrt" // CHECK-C99: error: invalid value 'c99' in '-cl-std=c99' // CHECK-INVALID: error: invalid value 'invalid' in '-cl-std=invalid'