Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -939,6 +939,13 @@ LangOpts<"HIPUseNewLaunchAPI">, DefaultFalse, PosFlag, NegFlag, BothFlags<[], " new kernel launching API for HIP">>; +defm hip_fp32_correctly_rounded_divide_sqrt : BoolFOption<"hip-fp32-correctly-rounded-divide-sqrt", + CodeGenOpts<"CorrectlyRoundedDivSqrt">, DefaultTrue, + PosFlag, + NegFlag, + BothFlags<[], " that single precision floating-point divide and sqrt used in " + "the program source are correctly rounded (HIP device compilation only)">>, + ShouldParseIf; defm gpu_allow_device_init : BoolFOption<"gpu-allow-device-init", LangOpts<"GPUAllowDeviceInit">, DefaultFalse, PosFlag, NegFlag, Index: clang/lib/CodeGen/CGExprScalar.cpp =================================================================== --- clang/lib/CodeGen/CGExprScalar.cpp +++ clang/lib/CodeGen/CGExprScalar.cpp @@ -3216,7 +3216,8 @@ llvm::Value *Val; CodeGenFunction::CGFPOptionsRAII FPOptsRAII(CGF, Ops.FPFeatures); Val = Builder.CreateFDiv(Ops.LHS, Ops.RHS, "div"); - if (CGF.getLangOpts().OpenCL && + if ((CGF.getLangOpts().OpenCL || + (CGF.getLangOpts().HIP && CGF.getLangOpts().CUDAIsDevice)) && !CGF.CGM.getCodeGenOpts().CorrectlyRoundedDivSqrt) { // OpenCL v1.1 s7.4: minimum accuracy of single precision / is 2.5ulp // OpenCL v1.2 s5.6.4.2: The -cl-fp32-correctly-rounded-divide-sqrt Index: clang/lib/Driver/ToolChains/HIP.cpp =================================================================== --- clang/lib/Driver/ToolChains/HIP.cpp +++ clang/lib/Driver/ToolChains/HIP.cpp @@ -404,11 +404,17 @@ bool DAZ = DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero, options::OPT_fno_gpu_flush_denormals_to_zero, getDefaultDenormsAreZeroForTarget(Kind)); - // TODO: Check standard C++ flags? - bool FiniteOnly = false; - bool UnsafeMathOpt = false; - bool FastRelaxedMath = false; - bool CorrectSqrt = true; + bool FiniteOnly = + DriverArgs.hasFlag(options::OPT_ffinite_math_only, + options::OPT_fno_finite_math_only, false); + bool UnsafeMathOpt = + DriverArgs.hasFlag(options::OPT_funsafe_math_optimizations, + options::OPT_fno_unsafe_math_optimizations, false); + bool FastRelaxedMath = DriverArgs.hasFlag( + options::OPT_ffast_math, options::OPT_fno_fast_math, false); + bool CorrectSqrt = DriverArgs.hasFlag( + options::OPT_fhip_fp32_correctly_rounded_divide_sqrt, + options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt); bool Wave64 = isWave64(DriverArgs, Kind); if (DriverArgs.hasFlag(options::OPT_fgpu_sanitize, Index: clang/test/Driver/hip-device-libs.hip =================================================================== --- clang/test/Driver/hip-device-libs.hip +++ clang/test/Driver/hip-device-libs.hip @@ -113,6 +113,38 @@ // RUN: %S/Inputs/hip_multiple_inputs/b.hip \ // RUN: 2>&1 | FileCheck %s --check-prefixes=ALL,INST +// Test -fast-math +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx900 \ +// RUN: -ffast-math \ +// RUN: --rocm-path=%S/Inputs/rocm \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=FAST + +// Test -ffinite-math-only +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx900 \ +// RUN: -ffinite-math-only \ +// RUN: --rocm-path=%S/Inputs/rocm \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=FINITE + +// Test -funsafe-math-optimizations +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx900 \ +// RUN: -funsafe-math-optimizations \ +// RUN: --rocm-path=%S/Inputs/rocm \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=UNSAFE + +// Test -fno-hip-fp32-correctly-rounded-divide-sqrt +// RUN: %clang -### -target x86_64-linux-gnu \ +// RUN: --cuda-gpu-arch=gfx900 \ +// RUN: -fno-hip-fp32-correctly-rounded-divide-sqrt \ +// RUN: --rocm-path=%S/Inputs/rocm \ +// RUN: %S/Inputs/hip_multiple_inputs/b.hip \ +// RUN: 2>&1 | FileCheck %s --check-prefixes=DIVSQRT + // ALL-NOT: error: // ALL: {{"[^"]*clang[^"]*"}} // ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}hip.bc" @@ -128,3 +160,23 @@ // ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_wavefrontsize64_on.bc" // ALL-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_isa_version_{{[0-9]+}}.bc" // INST-SAME: "-mlink-builtin-bitcode" "{{.*}}instrument.bc" + +// FAST: "-mlink-builtin-bitcode" "{{.*}}oclc_daz_opt_off.bc" +// FAST-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_unsafe_math_on.bc" +// FAST-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_finite_only_on.bc" +// FAST-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_correctly_rounded_sqrt_on.bc" + +// FINITE: "-mlink-builtin-bitcode" "{{.*}}oclc_daz_opt_off.bc" +// FINITE-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_unsafe_math_off.bc" +// FINITE-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_finite_only_on.bc" +// FINITE-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_correctly_rounded_sqrt_on.bc" + +// UNSAFE: "-mlink-builtin-bitcode" "{{.*}}oclc_daz_opt_off.bc" +// UNSAFE-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_unsafe_math_on.bc" +// UNSAFE-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_finite_only_off.bc" +// UNSAFE-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_correctly_rounded_sqrt_on.bc" + +// DIVSQRT: "-mlink-builtin-bitcode" "{{.*}}oclc_daz_opt_off.bc" +// DIVSQRT-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_unsafe_math_off.bc" +// DIVSQRT-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_finite_only_off.bc" +// DIVSQRT-SAME: "-mlink-builtin-bitcode" "{{.*}}oclc_correctly_rounded_sqrt_off.bc"