Index: llvm/lib/Target/AMDGPU/SIFoldOperands.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1771,10 +1771,12 @@ if (CurrentKnownM0Val && MI.modifiesRegister(AMDGPU::M0, TRI)) CurrentKnownM0Val = nullptr; + // The IEEE mode value can be overridden, which is detected by checking + // the NaN flag on the instruction. // TODO: Omod might be OK if there is NSZ only on the source // instruction, and not the omod multiply. - if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) || - !tryFoldOMod(MI)) + if ((IsIEEEMode && !MI.getFlag(MachineInstr::FmNoNans)) || + (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) || !tryFoldOMod(MI)) tryFoldClamp(MI); continue; Index: llvm/test/CodeGen/AMDGPU/omod.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/omod.ll +++ llvm/test/CodeGen/AMDGPU/omod.ll @@ -105,6 +105,15 @@ ret void } +; GCN-LABEL: {{^}}v_omod_div2_f64_nnan: +; GCN: v_add_f64 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 1.0 div:2{{$}} +define amdgpu_kernel void @v_omod_div2_f64_nnan(double %a) #5 { + %add = fadd nnan nsz double %a, 1.0 + %div2 = fmul nnan nsz double %add, 0.5 + store double %div2, double addrspace(1)* undef + ret void +} + ; GCN-LABEL: {{^}}v_omod_mul2_f32: ; GCN: v_add_f32_e64 v{{[0-9]+}}, v0, 1.0 mul:2{{$}} define amdgpu_ps void @v_omod_mul2_f32(float %a) #0 { @@ -123,6 +132,15 @@ ret void } +; GCN-LABEL: {{^}}v_omod_mul2_f64_nnan: +; GCN: v_add_f64 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 1.0 mul:2{{$}} +define amdgpu_kernel void @v_omod_mul2_f64_nnan(double %a) #5 { + %add = fadd nnan nsz double %a, 1.0 + %div2 = fmul nnan nsz double %add, 2.0 + store double %div2, double addrspace(1)* undef + ret void +} + ; GCN-LABEL: {{^}}v_omod_mul4_f32: ; GCN: v_add_f32_e64 v{{[0-9]+}}, v0, 1.0 mul:4{{$}} define amdgpu_ps void @v_omod_mul4_f32(float %a) #0 { @@ -141,6 +159,15 @@ ret void } +; GCN-LABEL: {{^}}v_omod_mul4_f64_nnan: +; GCN: v_add_f64 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 1.0 mul:4{{$}} +define amdgpu_kernel void @v_omod_mul4_f64_nnan(double %a) #5 { + %add = fadd nnan nsz double %a, 1.0 + %div2 = fmul nnan nsz double %add, 4.0 + store double %div2, double addrspace(1)* undef + ret void +} + ; GCN-LABEL: {{^}}v_omod_mul4_multi_use_f32: ; GCN: v_add_f32_e32 [[ADD:v[0-9]+]], 1.0, v0{{$}} ; GCN: v_mul_f32_e32 v{{[0-9]+}}, 4.0, [[ADD]]{{$}}