Index: llvm/include/llvm/CodeGen/SelectionDAG.h =================================================================== --- llvm/include/llvm/CodeGen/SelectionDAG.h +++ llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1093,6 +1093,20 @@ SDValue simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y, SDNodeFlags Flags); + APFloat convertToSemantic(const fltSemantics &FPSemantic, APFloat V) { + bool unused; + if (&FPSemantic != &V.getSemantics()) { + if (&FPSemantic == &APFloat::IEEEdouble()) { + V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &unused); + } else if (&FPSemantic == &APFloat::IEEEsingle()) { + V.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &unused); + } else if (&FPSemantic == &APFloat::IEEEhalf()) { + V.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &unused); + } + } + return V; + } + /// VAArg produces a result and token chain, and takes a pointer /// and a source value as input. SDValue getVAArg(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5130,9 +5130,18 @@ case ISD::FSUB: C1.subtract(C2, APFloat::rmNearestTiesToEven); return getConstantFP(C1, DL, VT); - case ISD::FMUL: - C1.multiply(C2, APFloat::rmNearestTiesToEven); - return getConstantFP(C1, DL, VT); + case ISD::FMUL: { + const fltSemantics &FPSemantic = EVTToAPFloatSemantics(VT); + APFloat RHS = convertToSemantic(FPSemantic, N1CFP->getValueAPF()); + APFloat LHS = convertToSemantic(FPSemantic, N2CFP->getValueAPF()); + APFloat::opStatus status = + RHS.multiply(LHS, APFloat::rmNearestTiesToEven); + if (((status & APFloat::opInexact) == 0) || + getTarget().Options.UnsafeFPMath) { + return getConstantFP(RHS, DL, VT); + } + break; + } case ISD::FDIV: C1.divide(C2, APFloat::rmNearestTiesToEven); return getConstantFP(C1, DL, VT); Index: llvm/test/CodeGen/AArch64/select_const.ll =================================================================== --- llvm/test/CodeGen/AArch64/select_const.ll +++ llvm/test/CodeGen/AArch64/select_const.ll @@ -553,10 +553,12 @@ ; CHECK-NEXT: adrp x8, .LCPI45_0 ; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI45_0] ; CHECK-NEXT: mov x8, #7378697629483820646 -; CHECK-NEXT: movk x8, #49204, lsl #48 ; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: fmov d1, #-4.00000000 +; CHECK-NEXT: movk x8, #16404, lsl #48 ; CHECK-NEXT: fcsel d0, d1, d0, ne +; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: fmul d0, d0, d1 ; CHECK-NEXT: ret %sel = select i1 %cond, double -4.0, double 23.3 %bo = fmul double %sel, 5.1 Index: llvm/test/CodeGen/AMDGPU/llvm.sin.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/llvm.sin.ll +++ llvm/test/CodeGen/AMDGPU/llvm.sin.ll @@ -53,11 +53,10 @@ ; FUNC-LABEL: {{^}}fmf_sin_3x_f32: ; GCN-NOT: v_add_f32 -; GCN: 0x3ef47644 -; GCN: v_mul_f32 +; GCN: v_mul_f32_e32 ; SICIVI: v_fract_f32 ; GFX9-NOT: v_fract_f32 -; GCN: v_sin_f32 +; GCN: v_sin_f32_e32 ; GCN-NOT: v_sin_f32 define amdgpu_kernel void @fmf_sin_3x_f32(float addrspace(1)* %out, float %x) #1 { %y = fmul reassoc float 3.0, %x @@ -140,10 +139,9 @@ ; FUNC-LABEL: {{^}}fmf_sin_cancel_f32: ; GCN-NOT: v_add_f32 -; GCN-NOT: v_mul_f32 ; SICIVI: v_fract_f32 ; GFX9-NOT: v_fract_f32 -; GCN: v_sin_f32 +; GCN: v_sin_f32_e32 ; GCN-NOT: v_sin_f32 define amdgpu_kernel void @fmf_sin_cancel_f32(float addrspace(1)* %out, float %x) #1 { %y = fmul reassoc float 0x401921FB60000000, %x Index: llvm/test/CodeGen/PowerPC/select_const.ll =================================================================== --- llvm/test/CodeGen/PowerPC/select_const.ll +++ llvm/test/CodeGen/PowerPC/select_const.ll @@ -780,31 +780,22 @@ } define double @sel_constants_fmul_constant(i1 %cond) { -; ISEL-LABEL: sel_constants_fmul_constant: -; ISEL: # %bb.0: -; ISEL-NEXT: andi. 3, 3, 1 -; ISEL-NEXT: addis 4, 2, .LCPI45_0@toc@ha -; ISEL-NEXT: addis 3, 2, .LCPI45_1@toc@ha -; ISEL-NEXT: addi 4, 4, .LCPI45_0@toc@l -; ISEL-NEXT: addi 3, 3, .LCPI45_1@toc@l -; ISEL-NEXT: iselgt 3, 3, 4 -; ISEL-NEXT: lfdx 1, 0, 3 -; ISEL-NEXT: blr -; -; NO_ISEL-LABEL: sel_constants_fmul_constant: -; NO_ISEL: # %bb.0: -; NO_ISEL-NEXT: andi. 3, 3, 1 -; NO_ISEL-NEXT: addis 4, 2, .LCPI45_0@toc@ha -; NO_ISEL-NEXT: addis 3, 2, .LCPI45_1@toc@ha -; NO_ISEL-NEXT: addi 4, 4, .LCPI45_0@toc@l -; NO_ISEL-NEXT: addi 3, 3, .LCPI45_1@toc@l -; NO_ISEL-NEXT: bc 12, 1, .LBB45_2 -; NO_ISEL-NEXT: # %bb.1: -; NO_ISEL-NEXT: ori 3, 4, 0 -; NO_ISEL-NEXT: b .LBB45_2 -; NO_ISEL-NEXT: .LBB45_2: -; NO_ISEL-NEXT: lfdx 1, 0, 3 -; NO_ISEL-NEXT: blr +; ALL-LABEL: sel_constants_fmul_constant: +; ALL: # %bb.0: +; ALL-NEXT: andi. 3, 3, 1 +; ALL-NEXT: bc 12, 1, .LBB45_2 +; ALL-NEXT: # %bb.1: +; ALL-NEXT: addis 3, 2, .LCPI45_0@toc@ha +; ALL-NEXT: lfd 0, .LCPI45_0@toc@l(3) +; ALL-NEXT: b .LBB45_3 +; ALL-NEXT: .LBB45_2: +; ALL-NEXT: addis 3, 2, .LCPI45_1@toc@ha +; ALL-NEXT: lfs 0, .LCPI45_1@toc@l(3) +; ALL-NEXT: .LBB45_3: +; ALL-NEXT: addis 3, 2, .LCPI45_2@toc@ha +; ALL-NEXT: lfd 1, .LCPI45_2@toc@l(3) +; ALL-NEXT: xsmuldp 1, 0, 1 +; ALL-NEXT: blr %sel = select i1 %cond, double -4.0, double 23.3 %bo = fmul double %sel, 5.1 ret double %bo Index: llvm/test/CodeGen/X86/gpu-codegen-combine-fmul-to-cst_inexact.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/gpu-codegen-combine-fmul-to-cst_inexact.ll @@ -0,0 +1,19 @@ +; RUN: llc -mtriple=i686-- -global-isel=0 -stop-after=finalize-isel < %s | FileCheck %s --check-prefix=CHECK-SAFE +; RUN: llc -mtriple=i686-- -global-isel=0 -enable-unsafe-fp-math -stop-after=finalize-isel < %s | FileCheck %s --check-prefix=CHECK-UNSAFE +; Check the different situations for constant folding inexact expressions. +; 1. fmul c1, c2 => c3 +; +; Note: given the inexact result, only Unsafe or tested const values which are not +; inexact propagate. + +; Check that pattern #1 applies with Unsafe +define float @inexactMul(float %arg) { +; CHECK-SAFE-LABEL: @inexactMul( +; CHECK-SAFE: MUL_Fp32 +; CHECK-UNSAFE-LABEL: @inexactMul( +; CHECK-UNSAFE-NOT: MUL_Fp32 +; + %mul = fmul float 0x3FC99999A0000000, 0x3FC99999A0000000 + %res = fadd float %mul, %arg + ret float %res +} Index: llvm/test/CodeGen/X86/select_const.ll =================================================================== --- llvm/test/CodeGen/X86/select_const.ll +++ llvm/test/CodeGen/X86/select_const.ll @@ -466,10 +466,12 @@ ; CHECK-NEXT: testb $1, %dil ; CHECK-NEXT: jne .LBB37_1 ; CHECK-NEXT: # %bb.2: -; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1.1883E+2,3.4539999999999999E+1] +; CHECK-NEXT: movapd {{.*#+}} xmm0 = [2.3300000000000001E+1,1.1E+1] +; CHECK-NEXT: mulpd {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB37_1: -; CHECK-NEXT: movaps {{.*#+}} xmm0 = [-2.0399999999999999E+1,3.768E+1] +; CHECK-NEXT: movapd {{.*#+}} xmm0 = [-4.0E+0,1.2E+1] +; CHECK-NEXT: mulpd {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq %sel = select i1 %cond, <2 x double> , <2 x double> %bo = fmul <2 x double> %sel,