diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -8223,22 +8223,24 @@ SDValue Arg = Op.getOperand(0); SDValue TrigVal; - // TODO: Should this propagate fast-math-flags? + // Propagate fast-math flags so that the multiply we introduce can be folded + // if Arg is already the result of a multiply by constant. + auto Flags = Op->getFlags(); SDValue OneOver2Pi = DAG.getConstantFP(0.5 * numbers::inv_pi, DL, VT); if (Subtarget->hasTrigReducedRange()) { - SDValue MulVal = DAG.getNode(ISD::FMUL, DL, VT, Arg, OneOver2Pi); - TrigVal = DAG.getNode(AMDGPUISD::FRACT, DL, VT, MulVal); + SDValue MulVal = DAG.getNode(ISD::FMUL, DL, VT, Arg, OneOver2Pi, Flags); + TrigVal = DAG.getNode(AMDGPUISD::FRACT, DL, VT, MulVal, Flags); } else { - TrigVal = DAG.getNode(ISD::FMUL, DL, VT, Arg, OneOver2Pi); + TrigVal = DAG.getNode(ISD::FMUL, DL, VT, Arg, OneOver2Pi, Flags); } switch (Op.getOpcode()) { case ISD::FCOS: - return DAG.getNode(AMDGPUISD::COS_HW, SDLoc(Op), VT, TrigVal); + return DAG.getNode(AMDGPUISD::COS_HW, SDLoc(Op), VT, TrigVal, Flags); case ISD::FSIN: - return DAG.getNode(AMDGPUISD::SIN_HW, SDLoc(Op), VT, TrigVal); + return DAG.getNode(AMDGPUISD::SIN_HW, SDLoc(Op), VT, TrigVal, Flags); default: llvm_unreachable("Wrong trig opcode"); } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.sin.ll b/llvm/test/CodeGen/AMDGPU/llvm.sin.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.sin.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.sin.ll @@ -51,6 +51,21 @@ ret void } +; FUNC-LABEL: {{^}}fmf_sin_3x_f32: +; GCN-NOT: v_add_f32 +; GCN: 0x3ef47644 +; GCN: v_mul_f32 +; SICIVI: v_fract_f32 +; GFX9-NOT: v_fract_f32 +; GCN: v_sin_f32 +; GCN-NOT: v_sin_f32 +define amdgpu_kernel void @fmf_sin_3x_f32(float addrspace(1)* %out, float %x) #1 { + %y = fmul reassoc float 3.0, %x + %sin = call reassoc float @llvm.sin.f32(float %y) + store float %sin, float addrspace(1)* %out + ret void +} + ; FUNC-LABEL: {{^}}safe_sin_2x_f32: ; GCN: v_add_f32 ; GCN: v_mul_f32 @@ -80,6 +95,63 @@ ret void } +; FUNC-LABEL: {{^}}fmf_sin_2x_f32: +; GCN-NOT: v_add_f32 +; GCN: 0x3ea2f983 +; GCN: v_mul_f32 +; SICIVI: v_fract_f32 +; GFX9-NOT: v_fract_f32 +; GCN: v_sin_f32 +; GCN-NOT: v_sin_f32 +define amdgpu_kernel void @fmf_sin_2x_f32(float addrspace(1)* %out, float %x) #1 { + %y = fmul reassoc float 2.0, %x + %sin = call reassoc float @llvm.sin.f32(float %y) + store float %sin, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}safe_sin_cancel_f32: +; GCN: v_mul_f32 +; GCN: v_mul_f32 +; SICIVI: v_fract_f32 +; GFX9-NOT: v_fract_f32 +; GCN: v_sin_f32 +; GCN-NOT: v_sin_f32 +define amdgpu_kernel void @safe_sin_cancel_f32(float addrspace(1)* %out, float %x) #1 { + %y = fmul float 0x401921FB60000000, %x + %sin = call float @llvm.sin.f32(float %y) + store float %sin, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}unsafe_sin_cancel_f32: +; GCN-NOT: v_add_f32 +; GCN-NOT: v_mul_f32 +; SICIVI: v_fract_f32 +; GFX9-NOT: v_fract_f32 +; GCN: v_sin_f32 +; GCN-NOT: v_sin_f32 +define amdgpu_kernel void @unsafe_sin_cancel_f32(float addrspace(1)* %out, float %x) #2 { + %y = fmul float 0x401921FB60000000, %x + %sin = call float @llvm.sin.f32(float %y) + store float %sin, float addrspace(1)* %out + ret void +} + +; FUNC-LABEL: {{^}}fmf_sin_cancel_f32: +; GCN-NOT: v_add_f32 +; GCN-NOT: v_mul_f32 +; SICIVI: v_fract_f32 +; GFX9-NOT: v_fract_f32 +; GCN: v_sin_f32 +; GCN-NOT: v_sin_f32 +define amdgpu_kernel void @fmf_sin_cancel_f32(float addrspace(1)* %out, float %x) #1 { + %y = fmul reassoc float 0x401921FB60000000, %x + %sin = call reassoc float @llvm.sin.f32(float %y) + store float %sin, float addrspace(1)* %out + ret void +} + ; FUNC-LABEL: {{^}}sin_v4f32: ; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}} ; EG: SIN * T{{[0-9]+\.[XYZW], PV\.[XYZW]}}