diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -1072,15 +1072,14 @@ addPass(&SIFoldOperandsID); if (EnableDPPCombine) addPass(&GCNDPPCombineID); - addPass(&DeadMachineInstructionElimID); addPass(&SILoadStoreOptimizerID); if (EnableSDWAPeephole) { addPass(&SIPeepholeSDWAID); addPass(&EarlyMachineLICMID); addPass(&MachineCSEID); addPass(&SIFoldOperandsID); - addPass(&DeadMachineInstructionElimID); } + addPass(&DeadMachineInstructionElimID); addPass(createSIShrinkInstructionsPass()); } diff --git a/llvm/test/CodeGen/AMDGPU/srem64.ll b/llvm/test/CodeGen/AMDGPU/srem64.ll --- a/llvm/test/CodeGen/AMDGPU/srem64.ll +++ b/llvm/test/CodeGen/AMDGPU/srem64.ll @@ -2027,7 +2027,7 @@ ; GCN-IR-NEXT: v_subb_u32_e32 v6, vcc, 0, v11, vcc ; GCN-IR-NEXT: v_or_b32_e32 v4, v12, v4 ; GCN-IR-NEXT: v_ashrrev_i32_e32 v12, 31, v6 -; GCN-IR-NEXT: v_and_b32_e32 v14, 0x8000, v12 +; GCN-IR-NEXT: v_and_b32_e32 v15, 0x8000, v12 ; GCN-IR-NEXT: v_and_b32_e32 v6, 1, v12 ; GCN-IR-NEXT: v_add_i32_e32 v12, vcc, 1, v8 ; GCN-IR-NEXT: v_or_b32_e32 v5, v13, v5 @@ -2037,9 +2037,9 @@ ; GCN-IR-NEXT: v_mov_b32_e32 v7, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v9, v13 ; GCN-IR-NEXT: v_mov_b32_e32 v13, v7 -; GCN-IR-NEXT: v_mov_b32_e32 v15, 0 -; GCN-IR-NEXT: v_sub_i32_e64 v10, s[4:5], v10, v14 -; GCN-IR-NEXT: v_subb_u32_e64 v11, s[4:5], v11, v15, s[4:5] +; GCN-IR-NEXT: v_mov_b32_e32 v14, 0 +; GCN-IR-NEXT: v_sub_i32_e64 v10, s[4:5], v10, v15 +; GCN-IR-NEXT: v_subb_u32_e64 v11, s[4:5], v11, v14, s[4:5] ; GCN-IR-NEXT: s_or_b64 s[8:9], vcc, s[8:9] ; GCN-IR-NEXT: v_mov_b32_e32 v12, v6 ; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[8:9] diff --git a/llvm/test/CodeGen/AMDGPU/udiv64.ll b/llvm/test/CodeGen/AMDGPU/udiv64.ll --- a/llvm/test/CodeGen/AMDGPU/udiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/udiv64.ll @@ -1377,25 +1377,25 @@ ; GCN-IR-NEXT: ; =>This Inner Loop Header: Depth=1 ; GCN-IR-NEXT: v_lshl_b64 v[7:8], v[7:8], 1 ; GCN-IR-NEXT: v_lshrrev_b32_e32 v4, 31, v3 -; GCN-IR-NEXT: v_or_b32_e32 v6, v7, v4 +; GCN-IR-NEXT: v_or_b32_e32 v7, v7, v4 ; GCN-IR-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 -; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, s12, v6 +; GCN-IR-NEXT: v_sub_i32_e32 v4, vcc, s12, v7 ; GCN-IR-NEXT: v_subb_u32_e32 v4, vcc, 0, v8, vcc ; GCN-IR-NEXT: v_or_b32_e32 v2, v9, v2 +; GCN-IR-NEXT: v_ashrrev_i32_e32 v9, 31, v4 +; GCN-IR-NEXT: v_and_b32_e32 v11, 0x8000, v9 +; GCN-IR-NEXT: v_and_b32_e32 v4, 1, v9 ; GCN-IR-NEXT: v_add_i32_e32 v9, vcc, 1, v0 -; GCN-IR-NEXT: v_ashrrev_i32_e32 v7, 31, v4 ; GCN-IR-NEXT: v_or_b32_e32 v3, v10, v3 ; GCN-IR-NEXT: v_addc_u32_e32 v10, vcc, 0, v1, vcc ; GCN-IR-NEXT: v_cmp_lt_u64_e32 vcc, v[9:10], v[0:1] -; GCN-IR-NEXT: v_and_b32_e32 v4, 1, v7 -; GCN-IR-NEXT: v_and_b32_e32 v7, 0x8000, v7 ; GCN-IR-NEXT: v_mov_b32_e32 v0, v9 ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v1, v10 ; GCN-IR-NEXT: v_mov_b32_e32 v10, v5 -; GCN-IR-NEXT: v_mov_b32_e32 v11, 0 -; GCN-IR-NEXT: v_sub_i32_e64 v7, s[4:5], v6, v7 -; GCN-IR-NEXT: v_subb_u32_e64 v8, s[4:5], v8, v11, s[4:5] +; GCN-IR-NEXT: v_mov_b32_e32 v6, 0 +; GCN-IR-NEXT: v_sub_i32_e64 v7, s[4:5], v7, v11 +; GCN-IR-NEXT: v_subb_u32_e64 v8, s[4:5], v8, v6, s[4:5] ; GCN-IR-NEXT: s_or_b64 s[8:9], vcc, s[8:9] ; GCN-IR-NEXT: v_mov_b32_e32 v9, v4 ; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[8:9] diff --git a/llvm/test/CodeGen/AMDGPU/urem64.ll b/llvm/test/CodeGen/AMDGPU/urem64.ll --- a/llvm/test/CodeGen/AMDGPU/urem64.ll +++ b/llvm/test/CodeGen/AMDGPU/urem64.ll @@ -1409,7 +1409,7 @@ ; GCN-IR-NEXT: v_subb_u32_e32 v4, vcc, 0, v9, vcc ; GCN-IR-NEXT: v_or_b32_e32 v2, v10, v2 ; GCN-IR-NEXT: v_ashrrev_i32_e32 v10, 31, v4 -; GCN-IR-NEXT: v_and_b32_e32 v12, 0x8000, v10 +; GCN-IR-NEXT: v_and_b32_e32 v13, 0x8000, v10 ; GCN-IR-NEXT: v_and_b32_e32 v4, 1, v10 ; GCN-IR-NEXT: v_add_i32_e32 v10, vcc, 1, v6 ; GCN-IR-NEXT: v_or_b32_e32 v3, v11, v3 @@ -1419,9 +1419,9 @@ ; GCN-IR-NEXT: v_mov_b32_e32 v5, 0 ; GCN-IR-NEXT: v_mov_b32_e32 v7, v11 ; GCN-IR-NEXT: v_mov_b32_e32 v11, v5 -; GCN-IR-NEXT: v_mov_b32_e32 v13, 0 -; GCN-IR-NEXT: v_sub_i32_e64 v8, s[4:5], v8, v12 -; GCN-IR-NEXT: v_subb_u32_e64 v9, s[4:5], v9, v13, s[4:5] +; GCN-IR-NEXT: v_mov_b32_e32 v12, 0 +; GCN-IR-NEXT: v_sub_i32_e64 v8, s[4:5], v8, v13 +; GCN-IR-NEXT: v_subb_u32_e64 v9, s[4:5], v9, v12, s[4:5] ; GCN-IR-NEXT: s_or_b64 s[8:9], vcc, s[8:9] ; GCN-IR-NEXT: v_mov_b32_e32 v10, v4 ; GCN-IR-NEXT: s_andn2_b64 exec, exec, s[8:9]