diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -822,7 +822,7 @@ Register Denom = MI.getOperand(4).getReg(); unsigned ChooseDenom = MI.getOperand(5).getImm(); - Register Src0 = ChooseDenom != 0 ? Numer : Denom; + Register Src0 = ChooseDenom == 0 ? Numer : Denom; auto MIB = BuildMI(*MBB, &MI, DL, TII.get(Opc), Dst0) .addDef(Dst1) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -6007,7 +6007,7 @@ // intrinsic has the numerator as the first operand to match a normal // division operation. - SDValue Src0 = Param->isAllOnesValue() ? Numerator : Denominator; + SDValue Src0 = !Param->isAllOnesValue() ? Numerator : Denominator; return DAG.getNode(AMDGPUISD::DIV_SCALE, DL, Op->getVTList(), Src0, Denominator, Numerator); diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll @@ -18,7 +18,7 @@ ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:4 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_div_scale_f32 v0, s[2:3], v0, v0, v2 +; GFX7-NEXT: v_div_scale_f32 v0, s[2:3], v2, v0, v2 ; GFX7-NEXT: s_mov_b64 s[2:3], s[6:7] ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm @@ -38,7 +38,7 @@ ; GFX8-NEXT: flat_load_dword v0, v[0:1] ; GFX8-NEXT: flat_load_dword v1, v[2:3] ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_div_scale_f32 v2, s[2:3], v1, v1, v0 +; GFX8-NEXT: v_div_scale_f32 v2, s[2:3], v0, v1, v0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: flat_store_dword v[0:1], v2 @@ -60,7 +60,7 @@ ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: global_load_dword v1, v[2:3], off ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v2, s2, v1, v1, v0 +; GFX10-NEXT: v_div_scale_f32 v2, s2, v0, v1, v0 ; GFX10-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-NEXT: global_store_dword v[0:1], v2, off @@ -92,7 +92,7 @@ ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 offset:4 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_div_scale_f32 v0, s[2:3], v2, v0, v2 +; GFX7-NEXT: v_div_scale_f32 v0, s[2:3], v0, v0, v2 ; GFX7-NEXT: s_mov_b64 s[2:3], s[6:7] ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm @@ -112,7 +112,7 @@ ; GFX8-NEXT: flat_load_dword v0, v[0:1] ; GFX8-NEXT: flat_load_dword v1, v[2:3] ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_div_scale_f32 v2, s[2:3], v0, v1, v0 +; GFX8-NEXT: v_div_scale_f32 v2, s[2:3], v1, v1, v0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: flat_store_dword v[0:1], v2 @@ -134,7 +134,7 @@ ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: global_load_dword v1, v[2:3], off ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v2, s2, v0, v1, v0 +; GFX10-NEXT: v_div_scale_f32 v2, s2, v1, v1, v0 ; GFX10-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-NEXT: global_store_dword v[0:1], v2, off @@ -169,7 +169,7 @@ ; GFX7-NEXT: flat_load_dwordx2 v[0:1], v[0:1] ; GFX7-NEXT: flat_load_dwordx2 v[2:3], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_div_scale_f64 v[0:1], s[2:3], v[2:3], v[2:3], v[0:1] +; GFX7-NEXT: v_div_scale_f64 v[0:1], s[2:3], v[0:1], v[2:3], v[0:1] ; GFX7-NEXT: v_mov_b32_e32 v3, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s0 ; GFX7-NEXT: flat_store_dwordx2 v[2:3], v[0:1] @@ -191,7 +191,7 @@ ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] ; GFX8-NEXT: flat_load_dwordx2 v[2:3], v[2:3] ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_div_scale_f64 v[0:1], s[2:3], v[2:3], v[2:3], v[0:1] +; GFX8-NEXT: v_div_scale_f64 v[0:1], s[2:3], v[0:1], v[2:3], v[0:1] ; GFX8-NEXT: v_mov_b32_e32 v3, s1 ; GFX8-NEXT: v_mov_b32_e32 v2, s0 ; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] @@ -214,7 +214,7 @@ ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: global_load_dwordx2 v[2:3], v[2:3], off ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f64 v[0:1], s2, v[2:3], v[2:3], v[0:1] +; GFX10-NEXT: v_div_scale_f64 v[0:1], s2, v[0:1], v[2:3], v[0:1] ; GFX10-NEXT: v_mov_b32_e32 v3, s1 ; GFX10-NEXT: v_mov_b32_e32 v2, s0 ; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off @@ -249,7 +249,7 @@ ; GFX7-NEXT: flat_load_dwordx2 v[0:1], v[0:1] ; GFX7-NEXT: flat_load_dwordx2 v[2:3], v[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_div_scale_f64 v[0:1], s[2:3], v[0:1], v[2:3], v[0:1] +; GFX7-NEXT: v_div_scale_f64 v[0:1], s[2:3], v[2:3], v[2:3], v[0:1] ; GFX7-NEXT: v_mov_b32_e32 v3, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s0 ; GFX7-NEXT: flat_store_dwordx2 v[2:3], v[0:1] @@ -271,7 +271,7 @@ ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] ; GFX8-NEXT: flat_load_dwordx2 v[2:3], v[2:3] ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_div_scale_f64 v[0:1], s[2:3], v[0:1], v[2:3], v[0:1] +; GFX8-NEXT: v_div_scale_f64 v[0:1], s[2:3], v[2:3], v[2:3], v[0:1] ; GFX8-NEXT: v_mov_b32_e32 v3, s1 ; GFX8-NEXT: v_mov_b32_e32 v2, s0 ; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] @@ -294,7 +294,7 @@ ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: global_load_dwordx2 v[2:3], v[2:3], off ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f64 v[0:1], s2, v[0:1], v[2:3], v[0:1] +; GFX10-NEXT: v_div_scale_f64 v[0:1], s2, v[2:3], v[2:3], v[0:1] ; GFX10-NEXT: v_mov_b32_e32 v3, s1 ; GFX10-NEXT: v_mov_b32_e32 v2, s0 ; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off @@ -327,7 +327,7 @@ ; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_div_scale_f32 v0, s[0:1], v0, v0, s8 +; GFX7-NEXT: v_div_scale_f32 v0, s[0:1], s8, v0, s8 ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX7-NEXT: s_endpgm ; @@ -344,7 +344,7 @@ ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v3, v1, vcc ; GFX8-NEXT: flat_load_dword v0, v[0:1] ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_div_scale_f32 v2, s[0:1], v0, v0, s0 +; GFX8-NEXT: v_div_scale_f32 v2, s[0:1], s0, v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v0, s4 ; GFX8-NEXT: v_mov_b32_e32 v1, s5 ; GFX8-NEXT: flat_store_dword v[0:1], v2 @@ -364,7 +364,7 @@ ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v2, s0, v0, v0, s0 +; GFX10-NEXT: v_div_scale_f32 v2, s0, s0, v0, s0 ; GFX10-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-NEXT: global_store_dword v[0:1], v2, off @@ -395,7 +395,7 @@ ; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_div_scale_f32 v0, s[0:1], s8, v0, s8 +; GFX7-NEXT: v_div_scale_f32 v0, s[0:1], v0, v0, s8 ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX7-NEXT: s_endpgm ; @@ -412,7 +412,7 @@ ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v3, v1, vcc ; GFX8-NEXT: flat_load_dword v0, v[0:1] ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_div_scale_f32 v2, s[0:1], s0, v0, s0 +; GFX8-NEXT: v_div_scale_f32 v2, s[0:1], v0, v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v0, s4 ; GFX8-NEXT: v_mov_b32_e32 v1, s5 ; GFX8-NEXT: flat_store_dword v[0:1], v2 @@ -432,7 +432,7 @@ ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v2, s0, s0, v0, s0 +; GFX10-NEXT: v_div_scale_f32 v2, s0, v0, v0, s0 ; GFX10-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-NEXT: global_store_dword v[0:1], v2, off @@ -463,7 +463,7 @@ ; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_div_scale_f32 v0, s[0:1], s8, s8, v0 +; GFX7-NEXT: v_div_scale_f32 v0, s[0:1], v0, s8, v0 ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX7-NEXT: s_endpgm ; @@ -480,7 +480,7 @@ ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v3, v1, vcc ; GFX8-NEXT: flat_load_dword v0, v[0:1] ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_div_scale_f32 v2, s[0:1], s0, s0, v0 +; GFX8-NEXT: v_div_scale_f32 v2, s[0:1], v0, s0, v0 ; GFX8-NEXT: v_mov_b32_e32 v0, s4 ; GFX8-NEXT: v_mov_b32_e32 v1, s5 ; GFX8-NEXT: flat_store_dword v[0:1], v2 @@ -500,7 +500,7 @@ ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v2, s0, s0, s0, v0 +; GFX10-NEXT: v_div_scale_f32 v2, s0, v0, s0, v0 ; GFX10-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-NEXT: global_store_dword v[0:1], v2, off @@ -531,7 +531,7 @@ ; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_div_scale_f32 v0, s[0:1], v0, s8, v0 +; GFX7-NEXT: v_div_scale_f32 v0, s[0:1], s8, s8, v0 ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX7-NEXT: s_endpgm ; @@ -548,7 +548,7 @@ ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v3, v1, vcc ; GFX8-NEXT: flat_load_dword v0, v[0:1] ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_div_scale_f32 v2, s[0:1], v0, s0, v0 +; GFX8-NEXT: v_div_scale_f32 v2, s[0:1], s0, s0, v0 ; GFX8-NEXT: v_mov_b32_e32 v0, s4 ; GFX8-NEXT: v_mov_b32_e32 v1, s5 ; GFX8-NEXT: flat_store_dword v[0:1], v2 @@ -568,7 +568,7 @@ ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v2, s0, v0, s0, v0 +; GFX10-NEXT: v_div_scale_f32 v2, s0, s0, s0, v0 ; GFX10-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-NEXT: global_store_dword v[0:1], v2, off @@ -600,7 +600,7 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s4 ; GFX7-NEXT: v_mov_b32_e32 v3, s5 ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_div_scale_f64 v[0:1], s[0:1], v[0:1], v[0:1], s[0:1] +; GFX7-NEXT: v_div_scale_f64 v[0:1], s[0:1], s[0:1], v[0:1], s[0:1] ; GFX7-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GFX7-NEXT: s_endpgm ; @@ -619,7 +619,7 @@ ; GFX8-NEXT: v_mov_b32_e32 v2, s4 ; GFX8-NEXT: v_mov_b32_e32 v3, s5 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_div_scale_f64 v[0:1], s[0:1], v[0:1], v[0:1], s[0:1] +; GFX8-NEXT: v_div_scale_f64 v[0:1], s[0:1], s[0:1], v[0:1], s[0:1] ; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GFX8-NEXT: s_endpgm ; @@ -639,7 +639,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v3, s5 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f64 v[0:1], s0, v[0:1], v[0:1], s[0:1] +; GFX10-NEXT: v_div_scale_f64 v[0:1], s0, s[0:1], v[0:1], s[0:1] ; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off ; GFX10-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -669,7 +669,7 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s4 ; GFX7-NEXT: v_mov_b32_e32 v3, s5 ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_div_scale_f64 v[0:1], s[0:1], s[0:1], v[0:1], s[0:1] +; GFX7-NEXT: v_div_scale_f64 v[0:1], s[0:1], v[0:1], v[0:1], s[0:1] ; GFX7-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GFX7-NEXT: s_endpgm ; @@ -688,7 +688,7 @@ ; GFX8-NEXT: v_mov_b32_e32 v2, s4 ; GFX8-NEXT: v_mov_b32_e32 v3, s5 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_div_scale_f64 v[0:1], s[0:1], s[0:1], v[0:1], s[0:1] +; GFX8-NEXT: v_div_scale_f64 v[0:1], s[0:1], v[0:1], v[0:1], s[0:1] ; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GFX8-NEXT: s_endpgm ; @@ -708,7 +708,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v3, s5 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f64 v[0:1], s0, s[0:1], v[0:1], s[0:1] +; GFX10-NEXT: v_div_scale_f64 v[0:1], s0, v[0:1], v[0:1], s[0:1] ; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off ; GFX10-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -738,7 +738,7 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s4 ; GFX7-NEXT: v_mov_b32_e32 v3, s5 ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_div_scale_f64 v[0:1], s[0:1], s[0:1], s[0:1], v[0:1] +; GFX7-NEXT: v_div_scale_f64 v[0:1], s[0:1], v[0:1], s[0:1], v[0:1] ; GFX7-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GFX7-NEXT: s_endpgm ; @@ -757,7 +757,7 @@ ; GFX8-NEXT: v_mov_b32_e32 v2, s4 ; GFX8-NEXT: v_mov_b32_e32 v3, s5 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_div_scale_f64 v[0:1], s[0:1], s[0:1], s[0:1], v[0:1] +; GFX8-NEXT: v_div_scale_f64 v[0:1], s[0:1], v[0:1], s[0:1], v[0:1] ; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GFX8-NEXT: s_endpgm ; @@ -777,7 +777,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v3, s5 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f64 v[0:1], s0, s[0:1], s[0:1], v[0:1] +; GFX10-NEXT: v_div_scale_f64 v[0:1], s0, v[0:1], s[0:1], v[0:1] ; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off ; GFX10-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -807,7 +807,7 @@ ; GFX7-NEXT: v_mov_b32_e32 v2, s4 ; GFX7-NEXT: v_mov_b32_e32 v3, s5 ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_div_scale_f64 v[0:1], s[0:1], v[0:1], s[0:1], v[0:1] +; GFX7-NEXT: v_div_scale_f64 v[0:1], s[0:1], s[0:1], s[0:1], v[0:1] ; GFX7-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GFX7-NEXT: s_endpgm ; @@ -826,7 +826,7 @@ ; GFX8-NEXT: v_mov_b32_e32 v2, s4 ; GFX8-NEXT: v_mov_b32_e32 v3, s5 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_div_scale_f64 v[0:1], s[0:1], v[0:1], s[0:1], v[0:1] +; GFX8-NEXT: v_div_scale_f64 v[0:1], s[0:1], s[0:1], s[0:1], v[0:1] ; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] ; GFX8-NEXT: s_endpgm ; @@ -846,7 +846,7 @@ ; GFX10-NEXT: v_mov_b32_e32 v3, s5 ; GFX10-NEXT: global_load_dwordx2 v[0:1], v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f64 v[0:1], s0, v[0:1], s[0:1], v[0:1] +; GFX10-NEXT: v_div_scale_f64 v[0:1], s0, s[0:1], s[0:1], v[0:1] ; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off ; GFX10-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -870,7 +870,7 @@ ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 -; GFX7-NEXT: v_div_scale_f32 v0, s[0:1], v0, v0, s2 +; GFX7-NEXT: v_div_scale_f32 v0, s[0:1], s2, v0, s2 ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX7-NEXT: s_endpgm ; @@ -881,7 +881,7 @@ ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s3 -; GFX8-NEXT: v_div_scale_f32 v2, s[2:3], v0, v0, s2 +; GFX8-NEXT: v_div_scale_f32 v2, s[2:3], s2, v0, s2 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: flat_store_dword v[0:1], v2 @@ -894,7 +894,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: ; implicit-def: $vcc_hi ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v2, s2, s3, s3, s2 +; GFX10-NEXT: v_div_scale_f32 v2, s2, s2, s3, s2 ; GFX10-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-NEXT: global_store_dword v[0:1], v2, off @@ -915,7 +915,7 @@ ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s0 -; GFX7-NEXT: v_div_scale_f32 v0, s[0:1], s2, v0, s2 +; GFX7-NEXT: v_div_scale_f32 v0, s[0:1], v0, v0, s2 ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX7-NEXT: s_endpgm ; @@ -926,7 +926,7 @@ ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s3 -; GFX8-NEXT: v_div_scale_f32 v2, s[2:3], s2, v0, s2 +; GFX8-NEXT: v_div_scale_f32 v2, s[2:3], v0, v0, s2 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: flat_store_dword v[0:1], v2 @@ -939,7 +939,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: ; implicit-def: $vcc_hi ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v2, s2, s2, s3, s2 +; GFX10-NEXT: v_div_scale_f32 v2, s2, s3, s3, s2 ; GFX10-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-NEXT: global_store_dword v[0:1], v2, off @@ -959,7 +959,7 @@ ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s4 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 -; GFX7-NEXT: v_div_scale_f64 v[0:1], s[2:3], v[0:1], v[0:1], s[2:3] +; GFX7-NEXT: v_div_scale_f64 v[0:1], s[2:3], s[2:3], v[0:1], s[2:3] ; GFX7-NEXT: v_mov_b32_e32 v3, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s0 ; GFX7-NEXT: flat_store_dwordx2 v[2:3], v[0:1] @@ -973,7 +973,7 @@ ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s4 ; GFX8-NEXT: v_mov_b32_e32 v1, s5 -; GFX8-NEXT: v_div_scale_f64 v[0:1], s[2:3], v[0:1], v[0:1], s[2:3] +; GFX8-NEXT: v_div_scale_f64 v[0:1], s[2:3], s[2:3], v[0:1], s[2:3] ; GFX8-NEXT: v_mov_b32_e32 v3, s1 ; GFX8-NEXT: v_mov_b32_e32 v2, s0 ; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] @@ -986,7 +986,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: ; implicit-def: $vcc_hi ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_div_scale_f64 v[0:1], s2, s[4:5], s[4:5], s[2:3] +; GFX10-NEXT: v_div_scale_f64 v[0:1], s2, s[2:3], s[4:5], s[2:3] ; GFX10-NEXT: v_mov_b32_e32 v3, s1 ; GFX10-NEXT: v_mov_b32_e32 v2, s0 ; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off @@ -1006,7 +1006,7 @@ ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v0, s4 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 -; GFX7-NEXT: v_div_scale_f64 v[0:1], s[2:3], s[2:3], v[0:1], s[2:3] +; GFX7-NEXT: v_div_scale_f64 v[0:1], s[2:3], v[0:1], v[0:1], s[2:3] ; GFX7-NEXT: v_mov_b32_e32 v3, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s0 ; GFX7-NEXT: flat_store_dwordx2 v[2:3], v[0:1] @@ -1020,7 +1020,7 @@ ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s4 ; GFX8-NEXT: v_mov_b32_e32 v1, s5 -; GFX8-NEXT: v_div_scale_f64 v[0:1], s[2:3], s[2:3], v[0:1], s[2:3] +; GFX8-NEXT: v_div_scale_f64 v[0:1], s[2:3], v[0:1], v[0:1], s[2:3] ; GFX8-NEXT: v_mov_b32_e32 v3, s1 ; GFX8-NEXT: v_mov_b32_e32 v2, s0 ; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1] @@ -1033,7 +1033,7 @@ ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX10-NEXT: ; implicit-def: $vcc_hi ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_div_scale_f64 v[0:1], s2, s[2:3], s[4:5], s[2:3] +; GFX10-NEXT: v_div_scale_f64 v[0:1], s2, s[4:5], s[4:5], s[2:3] ; GFX10-NEXT: v_mov_b32_e32 v3, s1 ; GFX10-NEXT: v_mov_b32_e32 v2, s0 ; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off @@ -1057,7 +1057,7 @@ ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_div_scale_f32 v0, s[2:3], v0, v0, 1.0 +; GFX7-NEXT: v_div_scale_f32 v0, s[2:3], 1.0, v0, 1.0 ; GFX7-NEXT: s_mov_b64 s[2:3], s[6:7] ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm @@ -1074,7 +1074,7 @@ ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v3, v1, vcc ; GFX8-NEXT: flat_load_dword v0, v[0:1] ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_div_scale_f32 v2, s[2:3], v0, v0, 1.0 +; GFX8-NEXT: v_div_scale_f32 v2, s[2:3], 1.0, v0, 1.0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: flat_store_dword v[0:1], v2 @@ -1093,7 +1093,7 @@ ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v2, s2, v0, v0, 1.0 +; GFX10-NEXT: v_div_scale_f32 v2, s2, 1.0, v0, 1.0 ; GFX10-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-NEXT: global_store_dword v[0:1], v2, off @@ -1121,7 +1121,7 @@ ; GFX7-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_div_scale_f32 v0, s[2:3], 2.0, 2.0, v0 +; GFX7-NEXT: v_div_scale_f32 v0, s[2:3], v0, 2.0, v0 ; GFX7-NEXT: s_mov_b64 s[2:3], s[6:7] ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm @@ -1138,7 +1138,7 @@ ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v3, v1, vcc ; GFX8-NEXT: flat_load_dword v0, v[0:1] ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_div_scale_f32 v2, s[2:3], 2.0, 2.0, v0 +; GFX8-NEXT: v_div_scale_f32 v2, s[2:3], v0, 2.0, v0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: flat_store_dword v[0:1], v2 @@ -1157,7 +1157,7 @@ ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v3, v1, vcc_lo ; GFX10-NEXT: global_load_dword v0, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v2, s2, 2.0, 2.0, v0 +; GFX10-NEXT: v_div_scale_f32 v2, s2, v0, 2.0, v0 ; GFX10-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-NEXT: global_store_dword v[0:1], v2, off @@ -1188,7 +1188,7 @@ ; GFX7-NEXT: s_waitcnt vmcnt(1) ; GFX7-NEXT: v_and_b32_e32 v1, 0x7fffffff, v2 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_div_scale_f32 v0, s[2:3], v0, v0, v1 +; GFX7-NEXT: v_div_scale_f32 v0, s[2:3], v1, v0, v1 ; GFX7-NEXT: s_mov_b64 s[2:3], s[6:7] ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm @@ -1210,7 +1210,7 @@ ; GFX8-NEXT: s_waitcnt vmcnt(1) lgkmcnt(1) ; GFX8-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_div_scale_f32 v2, s[2:3], v1, v1, v0 +; GFX8-NEXT: v_div_scale_f32 v2, s[2:3], v0, v1, v0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: flat_store_dword v[0:1], v2 @@ -1234,7 +1234,7 @@ ; GFX10-NEXT: s_waitcnt vmcnt(1) ; GFX10-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v2, s2, v1, v1, v0 +; GFX10-NEXT: v_div_scale_f32 v2, s2, v0, v1, v0 ; GFX10-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-NEXT: global_store_dword v[0:1], v2, off @@ -1269,7 +1269,7 @@ ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0 -; GFX7-NEXT: v_div_scale_f32 v0, s[2:3], v0, v0, v2 +; GFX7-NEXT: v_div_scale_f32 v0, s[2:3], v2, v0, v2 ; GFX7-NEXT: s_mov_b64 s[2:3], s[6:7] ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm @@ -1290,7 +1290,7 @@ ; GFX8-NEXT: flat_load_dword v1, v[2:3] ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1 -; GFX8-NEXT: v_div_scale_f32 v2, s[2:3], v1, v1, v0 +; GFX8-NEXT: v_div_scale_f32 v2, s[2:3], v0, v1, v0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 ; GFX8-NEXT: flat_store_dword v[0:1], v2 @@ -1313,7 +1313,7 @@ ; GFX10-NEXT: global_load_dword v1, v[2:3], off ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1 -; GFX10-NEXT: v_div_scale_f32 v2, s2, v1, v1, v0 +; GFX10-NEXT: v_div_scale_f32 v2, s2, v0, v1, v0 ; GFX10-NEXT: v_mov_b32_e32 v0, s0 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-NEXT: global_store_dword v[0:1], v2, off @@ -1339,7 +1339,7 @@ ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; GFX7-NEXT: v_mov_b32_e32 v0, 0x41000000 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: v_div_scale_f32 v0, s[2:3], s0, s0, v0 +; GFX7-NEXT: v_div_scale_f32 v0, s[2:3], v0, s0, v0 ; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_mov_b32 s3, 0xf000 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 @@ -1348,7 +1348,7 @@ ; GFX8-LABEL: test_div_scale_f32_val_undef_val: ; GFX8: ; %bb.0: ; GFX8-NEXT: v_mov_b32_e32 v0, 0x41000000 -; GFX8-NEXT: v_div_scale_f32 v2, s[2:3], s0, s0, v0 +; GFX8-NEXT: v_div_scale_f32 v2, s[2:3], v0, s0, v0 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s0 @@ -1359,10 +1359,11 @@ ; GFX10-LABEL: test_div_scale_f32_val_undef_val: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX10-NEXT: v_mov_b32_e32 v0, 0x41000000 ; GFX10-NEXT: ; implicit-def: $vcc_hi ; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_div_scale_f32 v2, s2, v0, s0, v0 ; GFX10-NEXT: v_mov_b32_e32 v0, s0 -; GFX10-NEXT: v_div_scale_f32 v2, s2, s0, s0, 0x41000000 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-NEXT: global_store_dword v[0:1], v2, off ; GFX10-NEXT: s_endpgm @@ -1378,7 +1379,7 @@ ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; GFX7-NEXT: v_mov_b32_e32 v0, 0x41000000 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: v_div_scale_f32 v0, s[2:3], v0, v0, s0 +; GFX7-NEXT: v_div_scale_f32 v0, s[2:3], s0, v0, s0 ; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_mov_b32 s3, 0xf000 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 @@ -1387,7 +1388,7 @@ ; GFX8-LABEL: test_div_scale_f32_undef_val_val: ; GFX8: ; %bb.0: ; GFX8-NEXT: v_mov_b32_e32 v0, 0x41000000 -; GFX8-NEXT: v_div_scale_f32 v2, s[2:3], v0, v0, s0 +; GFX8-NEXT: v_div_scale_f32 v2, s[2:3], s0, v0, s0 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v0, s0 @@ -1398,11 +1399,10 @@ ; GFX10-LABEL: test_div_scale_f32_undef_val_val: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX10-NEXT: v_mov_b32_e32 v0, 0x41000000 ; GFX10-NEXT: ; implicit-def: $vcc_hi ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_div_scale_f32 v2, s2, v0, v0, s0 ; GFX10-NEXT: v_mov_b32_e32 v0, s0 +; GFX10-NEXT: v_div_scale_f32 v2, s2, s0, 0x41000000, s0 ; GFX10-NEXT: v_mov_b32_e32 v1, s1 ; GFX10-NEXT: global_store_dword v[0:1], v2, off ; GFX10-NEXT: s_endpgm @@ -1455,7 +1455,7 @@ ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; GFX7-NEXT: s_mov_b32 s2, 0 ; GFX7-NEXT: s_mov_b32 s3, 0x40200000 -; GFX7-NEXT: v_div_scale_f64 v[0:1], s[2:3], v[0:1], v[0:1], s[2:3] +; GFX7-NEXT: v_div_scale_f64 v[0:1], s[2:3], s[2:3], v[0:1], s[2:3] ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v3, s1 ; GFX7-NEXT: v_mov_b32_e32 v2, s0 @@ -1466,7 +1466,7 @@ ; GFX8: ; %bb.0: ; GFX8-NEXT: s_mov_b32 s2, 0 ; GFX8-NEXT: s_mov_b32 s3, 0x40200000 -; GFX8-NEXT: v_div_scale_f64 v[0:1], s[2:3], v[0:1], v[0:1], s[2:3] +; GFX8-NEXT: v_div_scale_f64 v[0:1], s[2:3], s[2:3], v[0:1], s[2:3] ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v3, s1 @@ -1481,7 +1481,7 @@ ; GFX10-NEXT: s_mov_b32 s3, 0x40200000 ; GFX10-NEXT: ; implicit-def: $vcc_hi ; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: v_div_scale_f64 v[0:1], s2, s[0:1], s[0:1], s[2:3] +; GFX10-NEXT: v_div_scale_f64 v[0:1], s2, s[2:3], s[0:1], s[2:3] ; GFX10-NEXT: v_mov_b32_e32 v3, s1 ; GFX10-NEXT: v_mov_b32_e32 v2, s0 ; GFX10-NEXT: global_store_dwordx2 v[2:3], v[0:1], off diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.div.scale.ll @@ -19,7 +19,7 @@ %a = load volatile float, float addrspace(1)* %gep.0, align 4 %b = load volatile float, float addrspace(1)* %gep.1, align 4 - %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone + %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 store float %result0, float addrspace(1)* %out, align 4 ret void @@ -39,7 +39,7 @@ %a = load volatile float, float addrspace(1)* %gep.0, align 4 %b = load volatile float, float addrspace(1)* %gep.1, align 4 - %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone + %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 store float %result0, float addrspace(1)* %out, align 4 ret void @@ -59,7 +59,7 @@ %a = load volatile double, double addrspace(1)* %gep.0, align 8 %b = load volatile double, double addrspace(1)* %gep.1, align 8 - %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone + %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone %result0 = extractvalue { double, i1 } %result, 0 store double %result0, double addrspace(1)* %out, align 8 ret void @@ -79,7 +79,7 @@ %a = load volatile double, double addrspace(1)* %gep.0, align 8 %b = load volatile double, double addrspace(1)* %gep.1, align 8 - %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone + %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone %result0 = extractvalue { double, i1 } %result, 0 store double %result0, double addrspace(1)* %out, align 8 ret void @@ -97,7 +97,7 @@ %b = load float, float addrspace(1)* %gep, align 4 - %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone + %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 store float %result0, float addrspace(1)* %out, align 4 ret void @@ -115,7 +115,7 @@ %b = load float, float addrspace(1)* %gep, align 4 - %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone + %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 store float %result0, float addrspace(1)* %out, align 4 ret void @@ -133,7 +133,7 @@ %a = load float, float addrspace(1)* %gep, align 4 - %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone + %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 store float %result0, float addrspace(1)* %out, align 4 ret void @@ -151,7 +151,7 @@ %a = load float, float addrspace(1)* %gep, align 4 - %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone + %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 store float %result0, float addrspace(1)* %out, align 4 ret void @@ -169,7 +169,7 @@ %b = load double, double addrspace(1)* %gep, align 8 - %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone + %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone %result0 = extractvalue { double, i1 } %result, 0 store double %result0, double addrspace(1)* %out, align 8 ret void @@ -187,7 +187,7 @@ %b = load double, double addrspace(1)* %gep, align 8 - %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone + %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone %result0 = extractvalue { double, i1 } %result, 0 store double %result0, double addrspace(1)* %out, align 8 ret void @@ -205,7 +205,7 @@ %a = load double, double addrspace(1)* %gep, align 8 - %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone + %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone %result0 = extractvalue { double, i1 } %result, 0 store double %result0, double addrspace(1)* %out, align 8 ret void @@ -223,7 +223,7 @@ %a = load double, double addrspace(1)* %gep, align 8 - %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone + %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone %result0 = extractvalue { double, i1 } %result, 0 store double %result0, double addrspace(1)* %out, align 8 ret void @@ -237,7 +237,7 @@ ; SI: buffer_store_dword [[RESULT0]] ; SI: s_endpgm define amdgpu_kernel void @test_div_scale_f32_all_scalar_1(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b) nounwind { - %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone + %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 store float %result0, float addrspace(1)* %out, align 4 ret void @@ -251,7 +251,7 @@ ; SI: buffer_store_dword [[RESULT0]] ; SI: s_endpgm define amdgpu_kernel void @test_div_scale_f32_all_scalar_2(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b) nounwind { - %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 true) nounwind readnone + %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b, i1 false) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 store float %result0, float addrspace(1)* %out, align 4 ret void @@ -266,7 +266,7 @@ ; SI: buffer_store_dwordx2 [[RESULT0]] ; SI: s_endpgm define amdgpu_kernel void @test_div_scale_f64_all_scalar_1(double addrspace(1)* %out, [8 x i32], double %a, [8 x i32], double %b) nounwind { - %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone + %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone %result0 = extractvalue { double, i1 } %result, 0 store double %result0, double addrspace(1)* %out, align 8 ret void @@ -281,7 +281,7 @@ ; SI: buffer_store_dwordx2 [[RESULT0]] ; SI: s_endpgm define amdgpu_kernel void @test_div_scale_f64_all_scalar_2(double addrspace(1)* %out, [8 x i32], double %a, [8 x i32], double %b) nounwind { - %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 true) nounwind readnone + %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double %a, double %b, i1 false) nounwind readnone %result0 = extractvalue { double, i1 } %result, 0 store double %result0, double addrspace(1)* %out, align 8 ret void @@ -297,7 +297,7 @@ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %a = load float, float addrspace(1)* %gep.0, align 4 - %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 1.0, float %a, i1 false) nounwind readnone + %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 1.0, float %a, i1 true) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 store float %result0, float addrspace(1)* %out, align 4 ret void @@ -313,7 +313,7 @@ %gep.0 = getelementptr float, float addrspace(1)* %in, i32 %tid %a = load float, float addrspace(1)* %gep.0, align 4 - %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float 2.0, i1 false) nounwind readnone + %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float 2.0, i1 true) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 store float %result0, float addrspace(1)* %out, align 4 ret void @@ -336,7 +336,7 @@ %a.fabs = call float @llvm.fabs.f32(float %a) nounwind readnone - %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a.fabs, float %b, i1 false) nounwind readnone + %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a.fabs, float %b, i1 true) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 store float %result0, float addrspace(1)* %out, align 4 ret void @@ -359,7 +359,7 @@ %b.fabs = call float @llvm.fabs.f32(float %b) nounwind readnone - %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b.fabs, i1 false) nounwind readnone + %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %a, float %b.fabs, i1 true) nounwind readnone %result0 = extractvalue { float, i1 } %result, 0 store float %result0, float addrspace(1)* %out, align 4 ret void @@ -369,7 +369,7 @@ ; SI: s_mov_b32 [[K:s[0-9]+]], 0x41000000 ; SI: v_div_scale_f32 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, [[K]], v{{[0-9]+}}, [[K]] define amdgpu_kernel void @test_div_scale_f32_val_undef_val(float addrspace(1)* %out) #0 { - %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 8.0, float undef, i1 false) + %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float 8.0, float undef, i1 true) %result0 = extractvalue { float, i1 } %result, 0 store float %result0, float addrspace(1)* %out, align 4 ret void @@ -379,7 +379,7 @@ ; SI: s_mov_b32 [[K:s[0-9]+]], 0x41000000 ; SI: v_div_scale_f32 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, [[K]], [[K]], v{{[0-9]+}} define amdgpu_kernel void @test_div_scale_f32_undef_val_val(float addrspace(1)* %out) #0 { - %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float 8.0, i1 false) + %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float 8.0, i1 true) %result0 = extractvalue { float, i1 } %result, 0 store float %result0, float addrspace(1)* %out, align 4 ret void @@ -389,7 +389,7 @@ ; SI-NOT: v0 ; SI: v_div_scale_f32 v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s0, s0, v0 define amdgpu_kernel void @test_div_scale_f32_undef_undef_val(float addrspace(1)* %out) #0 { - %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float undef, i1 false) + %result = call { float, i1 } @llvm.amdgcn.div.scale.f32(float undef, float undef, i1 true) %result0 = extractvalue { float, i1 } %result, 0 store float %result0, float addrspace(1)* %out, align 4 ret void @@ -400,7 +400,7 @@ ; SI-DAG: s_mov_b32 s[[K_HI:[0-9]+]], 0x40200000 ; SI: v_div_scale_f64 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[K_LO]]:[[K_HI]]{{\]}}, v[0:1], s{{\[}}[[K_LO]]:[[K_HI]]{{\]}} define amdgpu_kernel void @test_div_scale_f64_val_undef_val(double addrspace(1)* %out) #0 { - %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double 8.0, double undef, i1 false) + %result = call { double, i1 } @llvm.amdgcn.div.scale.f64(double 8.0, double undef, i1 true) %result0 = extractvalue { double, i1 } %result, 0 store double %result0, double addrspace(1)* %out, align 8 ret void