diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp --- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -198,8 +198,6 @@ INITIALIZE_PASS_END(TwoAddressInstructionPass, DEBUG_TYPE, "Two-Address instruction pass", false, false) -static bool isPlainlyKilled(MachineInstr *MI, Register Reg, LiveIntervals *LIS); - /// Return the MachineInstr* if it is the single def of the Reg in current BB. static MachineInstr *getSingleDef(Register Reg, MachineBasicBlock *BB, const MachineRegisterInfo *MRI) { @@ -287,7 +285,7 @@ /// Test if the given register value, which is used by the /// given instruction, is killed by the given instruction. -static bool isPlainlyKilled(MachineInstr *MI, Register Reg, +static bool isPlainlyKilled(const MachineInstr *MI, Register Reg, LiveIntervals *LIS) { if (LIS && Reg.isVirtual() && !LIS->isNotInMIMap(*MI)) { // FIXME: Sometimes tryInstructionTransform() will add instructions and @@ -311,6 +309,12 @@ return MI->killsRegister(Reg); } +/// Test if the register used by the given operand is killed by the operand's +/// instruction. +static bool isPlainlyKilled(const MachineOperand &MO, LiveIntervals *LIS) { + return MO.isKill() || isPlainlyKilled(MO.getParent(), MO.getReg(), LIS); +} + /// Test if the given register value, which is used by the given /// instruction, is killed by the given instruction. This looks through /// coalescable copies to see if the original value is potentially not killed. @@ -861,8 +865,7 @@ Defs.push_back(MOReg); else { Uses.push_back(MOReg); - if (MOReg != Reg && (MO.isKill() || - (LIS && isPlainlyKilled(MI, MOReg, LIS)))) + if (MOReg != Reg && isPlainlyKilled(MO, LIS)) Kills.push_back(MOReg); } } @@ -913,8 +916,7 @@ } else { if (regOverlapsSet(Defs, MOReg, TRI)) return false; - bool isKill = - MO.isKill() || (LIS && isPlainlyKilled(&OtherMI, MOReg, LIS)); + bool isKill = isPlainlyKilled(MO, LIS); if (MOReg != Reg && ((isKill && regOverlapsSet(Uses, MOReg, TRI)) || regOverlapsSet(Kills, MOReg, TRI))) // Don't want to extend other live ranges and update kills. @@ -1042,7 +1044,7 @@ continue; if (isDefTooClose(MOReg, DI->second, MI)) return false; - bool isKill = MO.isKill() || (LIS && isPlainlyKilled(KillMI, MOReg, LIS)); + bool isKill = isPlainlyKilled(MO, LIS); if (MOReg == Reg && !isKill) return false; Uses.push_back(MOReg); @@ -1084,8 +1086,7 @@ if (regOverlapsSet(Kills, MOReg, TRI)) // Don't want to extend other live ranges and update kills. return false; - if (&OtherMI != MI && MOReg == Reg && - !(MO.isKill() || (LIS && isPlainlyKilled(&OtherMI, MOReg, LIS)))) + if (&OtherMI != MI && MOReg == Reg && !isPlainlyKilled(MO, LIS)) // We can't schedule across a use of the register in question. return false; } else { diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-limit-duplane.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-limit-duplane.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-limit-duplane.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-limit-duplane.ll @@ -6,15 +6,14 @@ define <4 x i32> @test(ptr %arg1, ptr %arg2) { ; CHECK-LABEL: test: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov x8, #8 +; CHECK-NEXT: mov x8, #8 // =0x8 ; CHECK-NEXT: ptrue p0.s, vl8 -; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0, x8, lsl #2] +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x0] -; CHECK-NEXT: mov z0.d, z1.d -; CHECK-NEXT: add z2.s, z2.s, z2.s -; CHECK-NEXT: ext z0.b, z0.b, z1.b, #16 -; CHECK-NEXT: add z1.s, z1.s, z1.s +; CHECK-NEXT: add z1.s, z0.s, z0.s +; CHECK-NEXT: ext z0.b, z0.b, z0.b, #16 ; CHECK-NEXT: dup v0.4s, v0.s[2] +; CHECK-NEXT: add z2.s, z2.s, z2.s ; CHECK-NEXT: st1w { z1.s }, p0, [x0, x8, lsl #2] ; CHECK-NEXT: st1w { z2.s }, p0, [x0] ; CHECK-NEXT: ret @@ -30,15 +29,14 @@ define <2 x i32> @test2(ptr %arg1, ptr %arg2) { ; CHECK-LABEL: test2: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov x8, #8 +; CHECK-NEXT: mov x8, #8 // =0x8 ; CHECK-NEXT: ptrue p0.s, vl8 -; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0, x8, lsl #2] +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x0] -; CHECK-NEXT: mov z0.d, z1.d -; CHECK-NEXT: add z2.s, z2.s, z2.s -; CHECK-NEXT: ext z0.b, z0.b, z1.b, #24 -; CHECK-NEXT: add z1.s, z1.s, z1.s +; CHECK-NEXT: add z1.s, z0.s, z0.s +; CHECK-NEXT: ext z0.b, z0.b, z0.b, #24 ; CHECK-NEXT: dup v0.2s, v0.s[0] +; CHECK-NEXT: add z2.s, z2.s, z2.s ; CHECK-NEXT: st1w { z1.s }, p0, [x0, x8, lsl #2] ; CHECK-NEXT: st1w { z2.s }, p0, [x0] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll @@ -29,17 +29,16 @@ define <2 x i32> @test2(ptr %arg1, ptr %arg2) { ; CHECK-LABEL: test2: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldp q2, q0, [x0, #32] -; CHECK-NEXT: ldp q4, q5, [x0] -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: add z3.s, z0.s, z0.s -; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 -; CHECK-NEXT: mov z0.s, s1 -; CHECK-NEXT: add z1.s, z2.s, z2.s -; CHECK-NEXT: stp q1, q3, [x0, #32] -; CHECK-NEXT: add z1.s, z4.s, z4.s +; CHECK-NEXT: ldp q1, q0, [x0, #32] +; CHECK-NEXT: add z1.s, z1.s, z1.s +; CHECK-NEXT: ldp q2, q3, [x0] +; CHECK-NEXT: add z4.s, z0.s, z0.s +; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 +; CHECK-NEXT: mov z0.s, s0 +; CHECK-NEXT: stp q1, q4, [x0, #32] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 -; CHECK-NEXT: add z2.s, z5.s, z5.s +; CHECK-NEXT: add z1.s, z2.s, z2.s +; CHECK-NEXT: add z2.s, z3.s, z3.s ; CHECK-NEXT: stp q1, q2, [x0] ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll --- a/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll +++ b/llvm/test/CodeGen/AMDGPU/mad-mix-lo.ll @@ -1190,24 +1190,22 @@ ; GISEL-GFX900: ; %bb.0: ; GISEL-GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-GFX900-NEXT: v_mad_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] -; GISEL-GFX900-NEXT: v_mov_b32_e32 v4, v3 -; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v4, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] -; GISEL-GFX900-NEXT: v_max_f16_e64 v0, v3, v3 clamp -; GISEL-GFX900-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GISEL-GFX900-NEXT: v_max_f16_e64 v4, v3, v3 clamp +; GISEL-GFX900-NEXT: v_mad_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; GISEL-GFX900-NEXT: v_bfe_u32 v0, v4, 0, 16 ; GISEL-GFX900-NEXT: v_mov_b32_e32 v1, 0xffff0000 -; GISEL-GFX900-NEXT: v_and_or_b32 v0, v4, v1, v0 +; GISEL-GFX900-NEXT: v_and_or_b32 v0, v3, v1, v0 ; GISEL-GFX900-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-GFX906-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo: ; GISEL-GFX906: ; %bb.0: ; GISEL-GFX906-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GISEL-GFX906-NEXT: v_fma_mixlo_f16 v3, v0, v1, v2 op_sel_hi:[1,1,1] -; GISEL-GFX906-NEXT: v_mov_b32_e32 v4, v3 -; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v4, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] -; GISEL-GFX906-NEXT: v_max_f16_e64 v0, v3, v3 clamp -; GISEL-GFX906-NEXT: v_bfe_u32 v0, v0, 0, 16 +; GISEL-GFX906-NEXT: v_max_f16_e64 v4, v3, v3 clamp +; GISEL-GFX906-NEXT: v_fma_mixhi_f16 v3, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] +; GISEL-GFX906-NEXT: v_bfe_u32 v0, v4, 0, 16 ; GISEL-GFX906-NEXT: v_mov_b32_e32 v1, 0xffff0000 -; GISEL-GFX906-NEXT: v_and_or_b32 v0, v4, v1, v0 +; GISEL-GFX906-NEXT: v_and_or_b32 v0, v3, v1, v0 ; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31] ; ; GISEL-VI-LABEL: v_mad_mix_v2f32_clamp_postcvt_lo: diff --git a/llvm/test/CodeGen/AVR/shift32.ll b/llvm/test/CodeGen/AVR/shift32.ll --- a/llvm/test/CodeGen/AVR/shift32.ll +++ b/llvm/test/CodeGen/AVR/shift32.ll @@ -525,10 +525,10 @@ ; CHECK-NEXT: sbc r18, r18 ; CHECK-NEXT: lsl r24 ; CHECK-NEXT: rol r25 +; CHECK-NEXT: mov r19, r18 ; CHECK-NEXT: mov r23, r18 ; CHECK-NEXT: rol r23 ; CHECK-NEXT: mov r22, r25 -; CHECK-NEXT: mov r19, r18 ; CHECK-NEXT: movw r24, r18 ; CHECK-NEXT: ret %res = ashr i32 %a, 22 diff --git a/llvm/test/CodeGen/X86/pr32284.ll b/llvm/test/CodeGen/X86/pr32284.ll --- a/llvm/test/CodeGen/X86/pr32284.ll +++ b/llvm/test/CodeGen/X86/pr32284.ll @@ -504,18 +504,18 @@ ; X86-NEXT: .cfi_def_cfa_register %ebp ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $8, %esp -; X86-NEXT: movl var_13, %ecx -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: testl %ecx, %ecx -; X86-NEXT: notl %ecx -; X86-NEXT: sete %al +; X86-NEXT: movl var_13, %eax +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: testl %eax, %eax +; X86-NEXT: notl %eax +; X86-NEXT: sete %cl ; X86-NEXT: movl var_16, %edx -; X86-NEXT: xorl %ecx, %edx -; X86-NEXT: andl %eax, %edx -; X86-NEXT: orl %ecx, %edx +; X86-NEXT: xorl %eax, %edx +; X86-NEXT: andl %ecx, %edx +; X86-NEXT: orl %eax, %edx ; X86-NEXT: movl %edx, (%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) -; X86-NEXT: movl %ecx, var_46 +; X86-NEXT: movl %eax, var_46 ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: .cfi_def_cfa %esp, 4