Index: lib/Frontend/CompilerInvocation.cpp =================================================================== --- lib/Frontend/CompilerInvocation.cpp +++ lib/Frontend/CompilerInvocation.cpp @@ -1486,6 +1486,9 @@ Opts.CUDA = IK == IK_CUDA || IK == IK_PreprocessedCuda || LangStd == LangStandard::lang_cuda; + if (Opts.CUDA) + Opts.DefaultFPContract = 1; + // OpenCL and C++ both have bool, true, false keywords. Opts.Bool = Opts.OpenCL || Opts.CPlusPlus; Index: test/CodeGenCUDA/fp-contract.cu =================================================================== --- /dev/null +++ test/CodeGenCUDA/fp-contract.cu @@ -0,0 +1,34 @@ +// REQUIRES: x86-registered-target +// REQUIRES: nvptx-registered-target + +// By default we should fuse multiply/add into llvm.fmuladd intrinsic +// RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -emit-llvm \ +// RUN: -disable-llvm-passes -o - %s | FileCheck -check-prefix ENABLED %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \ +// RUN: -disable-llvm-passes -o - %s | FileCheck -check-prefix ENABLED %s + +// Explicit -ffp-contract=off should disable instruction fusing. +// RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -emit-llvm \ +// RUN: -ffp-contract=off -disable-llvm-passes -o - %s \ +// RUN: | FileCheck -check-prefix DISABLED %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \ +// RUN: -ffp-contract=off -disable-llvm-passes -o - %s \ +// RUN: | FileCheck -check-prefix DISABLED %s + +// Explicit -ffp-contract=fast lets LLVM do the fusing, so no fusing in clang. +// RUN: %clang_cc1 -fcuda-is-device -triple nvptx-nvidia-cuda -emit-llvm \ +// RUN: -ffp-contract=off -disable-llvm-passes -o - %s \ +// RUN: | FileCheck -check-prefix DISABLED %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm \ +// RUN: -ffp-contract=off -disable-llvm-passes -o - %s \ +// RUN: | FileCheck -check-prefix DISABLED %s + +#include "Inputs/cuda.h" + +__host__ __device__ float func(float a, float b, float c) { return a + b * c; } +// ENABLED: call float @llvm.fmuladd.f32 +// ENABLED-NEXT: ret + +// DISABLED: fmul float +// DISABLED-NEXT: fadd float +// DISABLED-NEXT: ret