diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp --- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -37,6 +37,15 @@ // ... // No clobber of %R0 // %R1 = COPY %R0 <<< Removed // +// or +// +// $R0 = OP ... +// ... // No read/clobber of $R0 and $R1 +// $R1 = COPY $R0 // $R0 is killed +// Replace $R0 with $R1 and remove the COPY +// $R1 = OP ... +// ... +// //===----------------------------------------------------------------------===// #include "llvm/ADT/DenseMap.h" @@ -98,6 +107,28 @@ } } + /// Remove register from copy maps. + void invalidateRegister(unsigned Reg, const TargetRegisterInfo &TRI) { + // Since Reg might be a subreg of some registers, only invalidate Reg is not + // enough. We have to find the COPY defines Reg or registers defined by Reg + // and invalidate all of them. + DenseSet RegsToInvalidate{Reg}; + for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) { + auto I = Copies.find(*RUI); + if (I != Copies.end()) { + if (MachineInstr *MI = I->second.MI) { + RegsToInvalidate.insert(MI->getOperand(0).getReg()); + RegsToInvalidate.insert(MI->getOperand(1).getReg()); + } + RegsToInvalidate.insert(I->second.DefRegs.begin(), + I->second.DefRegs.end()); + } + } + for (unsigned InvalidReg : RegsToInvalidate) + for (MCRegUnitIterator RUI(InvalidReg, &TRI); RUI.isValid(); ++RUI) + Copies.erase(*RUI); + } + /// Clobber a single register, removing it from the tracker's copy maps. void clobberRegister(unsigned Reg, const TargetRegisterInfo &TRI) { for (MCRegUnitIterator RUI(Reg, &TRI); RUI.isValid(); ++RUI) { @@ -151,6 +182,38 @@ return CI->second.MI; } + MachineInstr *findCopyDefViaUnit(unsigned RegUnit, + const TargetRegisterInfo &TRI) { + auto CI = Copies.find(RegUnit); + if (CI == Copies.end()) + return nullptr; + if (CI->second.DefRegs.size() != 1) + return nullptr; + MCRegUnitIterator RUI(CI->second.DefRegs[0], &TRI); + return findCopyForUnit(*RUI, TRI, true); + } + + MachineInstr *findAvailBackwardCopy(MachineInstr &I, unsigned Reg, + const TargetRegisterInfo &TRI) { + MCRegUnitIterator RUI(Reg, &TRI); + MachineInstr *AvailCopy = findCopyDefViaUnit(*RUI, TRI); + if (!AvailCopy || + !TRI.isSubRegisterEq(AvailCopy->getOperand(1).getReg(), Reg)) + return nullptr; + + Register AvailSrc = AvailCopy->getOperand(1).getReg(); + Register AvailDef = AvailCopy->getOperand(0).getReg(); + for (const MachineInstr &MI : + make_range(AvailCopy->getReverseIterator(), I.getReverseIterator())) + for (const MachineOperand &MO : MI.operands()) + if (MO.isRegMask()) + // FIXME: Shall we simultaneously invalidate AvailSrc or AvailDef? + if (MO.clobbersPhysReg(AvailSrc) || MO.clobbersPhysReg(AvailDef)) + return nullptr; + + return AvailCopy; + } + MachineInstr *findAvailCopy(MachineInstr &DestCopy, unsigned Reg, const TargetRegisterInfo &TRI) { // We check the first RegUnit here, since we'll only be interested in the @@ -211,11 +274,16 @@ void ClobberRegister(unsigned Reg); void ReadRegister(unsigned Reg, MachineInstr &Reader, DebugType DT); - void CopyPropagateBlock(MachineBasicBlock &MBB); + void ForwardCopyPropagateBlock(MachineBasicBlock &MBB); + void BackwardCopyPropagateBlock(MachineBasicBlock &MBB); bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def); void forwardUses(MachineInstr &MI); + void propagateDefs(MachineInstr &MI); bool isForwardableRegClassCopy(const MachineInstr &Copy, const MachineInstr &UseI, unsigned UseIdx); + bool isBackwardPropagatableRegClassCopy(const MachineInstr &Copy, + const MachineInstr &UseI, + unsigned UseIdx); bool hasImplicitOverlap(const MachineInstr &MI, const MachineOperand &Use); /// Candidates for deletion. @@ -313,6 +381,19 @@ return true; } +bool MachineCopyPropagation::isBackwardPropagatableRegClassCopy( + const MachineInstr &Copy, const MachineInstr &UseI, unsigned UseIdx) { + Register Def = Copy.getOperand(0).getReg(); + + if (const TargetRegisterClass *URC = + UseI.getRegClassConstraint(UseIdx, TII, TRI)) + return URC->contains(Def); + + // We don't process further if UseI is a COPY, since forward copy propagation + // should handle that. + return false; +} + /// Decide whether we should forward the source of \param Copy to its use in /// \param UseI based on the physical register class constraints of the opcode /// and avoiding introducing more cross-class COPYs. @@ -468,8 +549,9 @@ } } -void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { - LLVM_DEBUG(dbgs() << "MCP: CopyPropagateBlock " << MBB.getName() << "\n"); +void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { + LLVM_DEBUG(dbgs() << "MCP: ForwardCopyPropagateBlock " << MBB.getName() + << "\n"); for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) { MachineInstr *MI = &*I; @@ -647,6 +729,128 @@ Tracker.clear(); } +static bool isBackwardPropagatableCopy(MachineInstr &MI, + const MachineRegisterInfo &MRI) { + assert(MI.isCopy() && "MI is expected to be a COPY"); + Register Def = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + + if (MRI.isReserved(Def) || MRI.isReserved(Src)) + return false; + + return MI.getOperand(1).isRenamable() && MI.getOperand(1).isKill(); +} + +void MachineCopyPropagation::propagateDefs(MachineInstr &MI) { + if (!Tracker.hasAnyCopies()) + return; + + for (unsigned OpIdx = 0, OpEnd = MI.getNumOperands(); OpIdx != OpEnd; + ++OpIdx) { + MachineOperand &MODef = MI.getOperand(OpIdx); + + if (!MODef.isReg() || MODef.isUse()) + continue; + + // Ignore non-trivial cases. + if (MODef.isTied() || MODef.isUndef() || MODef.isImplicit()) + continue; + + // We only handle if the register comes from a vreg. + if (!MODef.isRenamable()) + continue; + + MachineInstr *Copy = + Tracker.findAvailBackwardCopy(MI, MODef.getReg(), *TRI); + if (!Copy) + continue; + + Register Def = Copy->getOperand(0).getReg(); + Register Src = Copy->getOperand(1).getReg(); + + if (MODef.getReg() != Src) + continue; + + if (!isBackwardPropagatableRegClassCopy(*Copy, MI, OpIdx)) + continue; + + if (hasImplicitOverlap(MI, MODef)) + continue; + + LLVM_DEBUG(dbgs() << "MCP: Replacing " << printReg(MODef.getReg(), TRI) + << "\n with " << printReg(Def, TRI) << "\n in " + << MI << " from " << *Copy); + + MODef.setReg(Def); + MODef.setIsRenamable(Copy->getOperand(0).isRenamable()); + + LLVM_DEBUG(dbgs() << "MCP: After replacement: " << MI << "\n"); + MaybeDeadCopies.insert(Copy); + Changed = true; + } +} + +void MachineCopyPropagation::BackwardCopyPropagateBlock( + MachineBasicBlock &MBB) { + LLVM_DEBUG(dbgs() << "MCP: BackwardCopyPropagateBlock " << MBB.getName() + << "\n"); + + for (MachineBasicBlock::reverse_iterator I = MBB.rbegin(), E = MBB.rend(); + I != E;) { + MachineInstr *MI = &*I; + ++I; + + // Ignore non-trivial COPYs. + if (MI->isCopy() && MI->getNumOperands() == 2 && + !TRI->regsOverlap(MI->getOperand(0).getReg(), + MI->getOperand(1).getReg())) { + + Register Def = MI->getOperand(0).getReg(); + Register Src = MI->getOperand(1).getReg(); + + // Unlike forward cp, we don't invoke propagateDefs here, + // just let forward cp do COPY-to-COPY propagation. + if (isBackwardPropagatableCopy(*MI, *MRI)) { + Tracker.invalidateRegister(Src, *TRI); + Tracker.invalidateRegister(Def, *TRI); + Tracker.trackCopy(MI, *TRI); + continue; + } + } + + // Invalidate any earlyclobber regs first. + for (const MachineOperand &MO : MI->operands()) + if (MO.isReg() && MO.isEarlyClobber()) { + Register Reg = MO.getReg(); + if (!Reg) + continue; + Tracker.invalidateRegister(Reg, *TRI); + } + + propagateDefs(*MI); + for (const MachineOperand &MO : MI->operands()) { + if (!MO.isReg()) + continue; + + if (!MO.getReg()) + continue; + + if (MO.isDef()) + Tracker.invalidateRegister(MO.getReg(), *TRI); + + if (MO.readsReg()) + Tracker.invalidateRegister(MO.getReg(), *TRI); + } + } + + for (auto *Copy : MaybeDeadCopies) + Copy->eraseFromParent(); + + MaybeDeadCopies.clear(); + CopyDbgUsers.clear(); + Tracker.clear(); +} + bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -657,8 +861,10 @@ TII = MF.getSubtarget().getInstrInfo(); MRI = &MF.getRegInfo(); - for (MachineBasicBlock &MBB : MF) - CopyPropagateBlock(MBB); + for (MachineBasicBlock &MBB : MF) { + BackwardCopyPropagateBlock(MBB); + ForwardCopyPropagateBlock(MBB); + } return Changed; } diff --git a/llvm/test/CodeGen/PowerPC/machine-backward-cp.mir b/llvm/test/CodeGen/PowerPC/machine-backward-cp.mir --- a/llvm/test/CodeGen/PowerPC/machine-backward-cp.mir +++ b/llvm/test/CodeGen/PowerPC/machine-backward-cp.mir @@ -11,8 +11,7 @@ body: | bb.0.entry: ; CHECK-LABEL: name: test0 - ; CHECK: renamable $x4 = LI8 1024 - ; CHECK: $x3 = COPY killed renamable $x4 + ; CHECK: $x3 = LI8 1024 ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3 renamable $x4 = LI8 1024 $x3 = COPY renamable killed $x4 @@ -28,8 +27,7 @@ body: | ; CHECK-LABEL: name: test1 ; CHECK: bb.0.entry: - ; CHECK: renamable $x5 = LI8 42 - ; CHECK: renamable $x4 = COPY killed renamable $x5 + ; CHECK: renamable $x4 = LI8 42 ; CHECK: B %bb.1 ; CHECK: bb.1: ; CHECK: liveins: $x4 @@ -139,8 +137,8 @@ ; CHECK-LABEL: name: iterative_deletion ; CHECK: liveins: $x5 - ; CHECK: renamable $x6 = ADDI8 killed renamable $x5, 1 - ; CHECK: $x3 = COPY $x6 + ; CHECK: renamable $x4 = ADDI8 killed renamable $x5, 1 + ; CHECK: $x3 = COPY $x4 ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3 renamable $x6 = ADDI8 renamable killed $x5, 1 renamable $x4 = COPY renamable killed $x6 @@ -160,8 +158,8 @@ ; CHECK-LABEL: name: Enter ; CHECK: liveins: $x4, $x7 ; CHECK: renamable $x5 = COPY killed renamable $x7 - ; CHECK: renamable $x6 = ADDI8 killed renamable $x4, 1 - ; CHECK: $x3 = ADD8 killed renamable $x5, $x6 + ; CHECK: renamable $x7 = ADDI8 killed renamable $x4, 1 + ; CHECK: $x3 = ADD8 killed renamable $x5, killed renamable $x7 ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3 renamable $x5 = COPY killed renamable $x7 renamable $x6 = ADDI8 killed renamable $x4, 1 @@ -181,10 +179,9 @@ ; CHECK-LABEL: name: foo ; CHECK: liveins: $x4, $x7 ; CHECK: renamable $x5 = COPY killed renamable $x7 - ; CHECK: renamable $x6 = ADDI8 renamable $x4, 1 - ; CHECK: renamable $x7 = COPY killed renamable $x6 - ; CHECK: renamable $x8 = ADDI8 killed $x4, 2 - ; CHECK: $x3 = ADD8 killed renamable $x5, $x8 + ; CHECK: renamable $x7 = ADDI8 renamable $x4, 1 + ; CHECK: renamable $x6 = ADDI8 killed $x4, 2 + ; CHECK: $x3 = ADD8 killed renamable $x5, killed renamable $x6 ; CHECK: $x3 = ADD8 $x3, killed renamable $x7 ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3 renamable $x5 = COPY killed renamable $x7 @@ -208,10 +205,10 @@ ; CHECK-LABEL: name: bar ; CHECK: liveins: $x4, $x7 ; CHECK: renamable $x5 = COPY killed renamable $x7 - ; CHECK: renamable $x6 = ADDI8 renamable $x4, 1 - ; CHECK: renamable $x8 = COPY $x6 - ; CHECK: renamable $x6 = ADDI8 renamable $x5, 2 - ; CHECK: $x3 = ADD8 killed renamable $x5, $x6 + ; CHECK: renamable $x7 = ADDI8 renamable $x4, 1 + ; CHECK: renamable $x8 = COPY killed renamable $x7 + ; CHECK: renamable $x7 = ADDI8 renamable $x5, 2 + ; CHECK: $x3 = ADD8 killed renamable $x5, killed renamable $x7 ; CHECK: $x3 = ADD8 $x3, killed renamable $x8 ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3 renamable $x5 = COPY killed renamable $x7 @@ -236,10 +233,9 @@ ; CHECK-LABEL: name: bogus ; CHECK: liveins: $x7 ; CHECK: renamable $x5 = COPY renamable $x7 - ; CHECK: renamable $x6 = ADDI8 $x7, 1 - ; CHECK: renamable $x7 = COPY $x6 + ; CHECK: renamable $x4 = ADDI8 $x7, 1 ; CHECK: renamable $x6 = ADDI8 renamable $x5, 2 - ; CHECK: $x3 = ADD8 $x7, killed renamable $x5 + ; CHECK: $x3 = ADD8 killed renamable $x4, killed renamable $x5 ; CHECK: $x3 = ADD8 $x3, killed renamable $x6 ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3 renamable $x5 = COPY killed renamable $x7 @@ -263,10 +259,10 @@ liveins: $x7 ; CHECK-LABEL: name: foobar ; CHECK: liveins: $x7 - ; CHECK: renamable $x6 = ADDI8 $x7, 1 - ; CHECK: renamable $x8 = COPY $x6 - ; CHECK: renamable $x6 = ADDI8 $x7, 2 - ; CHECK: $x3 = ADD8 $x6, $x7 + ; CHECK: renamable $x4 = ADDI8 $x7, 1 + ; CHECK: renamable $x8 = COPY killed renamable $x4 + ; CHECK: renamable $x4 = ADDI8 $x7, 2 + ; CHECK: $x3 = ADD8 killed renamable $x4, $x7 ; CHECK: $x3 = ADD8 $x3, killed renamable $x8 ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3 renamable $x5 = COPY killed renamable $x7 @@ -280,3 +276,22 @@ BLR8 implicit $lr8, implicit undef $rm, implicit $x3 ... + +--- +name: cross_call +alignment: 4 +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $x2, $x3, $x20 + ; CHECK-LABEL: name: cross_call + ; CHECK: liveins: $x2, $x3, $x20 + ; CHECK: renamable $x20 = LI8 1024 + ; CHECK: BL8_NOP @foo, csr_svr464_altivec, implicit-def $lr8, implicit $rm, implicit $x3, implicit-def $x3, implicit $x2 + ; CHECK: $x3 = COPY killed renamable $x20 + ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit $x3 + renamable $x20 = LI8 1024 + BL8_NOP @foo, csr_svr464_altivec, implicit-def $lr8, implicit $rm, implicit $x3, implicit-def $x3, implicit $x2 + $x3 = COPY renamable killed $x20 + BLR8 implicit $lr8, implicit undef $rm, implicit $x3 +... diff --git a/llvm/test/CodeGen/PowerPC/redundant-copy-after-tail-dup.ll b/llvm/test/CodeGen/PowerPC/redundant-copy-after-tail-dup.ll --- a/llvm/test/CodeGen/PowerPC/redundant-copy-after-tail-dup.ll +++ b/llvm/test/CodeGen/PowerPC/redundant-copy-after-tail-dup.ll @@ -26,8 +26,7 @@ ; CHECK-P9-NEXT: cmplwi r3, 2 ; CHECK-P9-NEXT: bge- cr0, .LBB0_6 ; CHECK-P9-NEXT: # %bb.3: # %land.lhs.true.1 -; CHECK-P9-NEXT: li r5, 0 -; CHECK-P9-NEXT: mr r3, r5 +; CHECK-P9-NEXT: li r3, 0 ; CHECK-P9-NEXT: blr ; CHECK-P9-NEXT: .LBB0_4: # %lor.lhs.false ; CHECK-P9-NEXT: cmplwi cr0, r4, 0 diff --git a/llvm/test/CodeGen/RISCV/alu64.ll b/llvm/test/CodeGen/RISCV/alu64.ll --- a/llvm/test/CodeGen/RISCV/alu64.ll +++ b/llvm/test/CodeGen/RISCV/alu64.ll @@ -224,8 +224,7 @@ ; RV32I-NEXT: srli a4, a0, 1 ; RV32I-NEXT: srl a3, a4, a3 ; RV32I-NEXT: or a1, a1, a3 -; RV32I-NEXT: sll a2, a0, a2 -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: sll a0, a0, a2 ; RV32I-NEXT: ret %1 = shl i64 %a, %b ret i64 %1 @@ -311,8 +310,7 @@ ; RV32I-NEXT: slli a4, a1, 1 ; RV32I-NEXT: sll a3, a4, a3 ; RV32I-NEXT: or a0, a0, a3 -; RV32I-NEXT: srl a2, a1, a2 -; RV32I-NEXT: mv a1, a2 +; RV32I-NEXT: srl a1, a1, a2 ; RV32I-NEXT: ret %1 = lshr i64 %a, %b ret i64 %1 diff --git a/llvm/test/CodeGen/RISCV/shifts.ll b/llvm/test/CodeGen/RISCV/shifts.ll --- a/llvm/test/CodeGen/RISCV/shifts.ll +++ b/llvm/test/CodeGen/RISCV/shifts.ll @@ -23,8 +23,7 @@ ; RV32I-NEXT: slli a4, a1, 1 ; RV32I-NEXT: sll a3, a4, a3 ; RV32I-NEXT: or a0, a0, a3 -; RV32I-NEXT: srl a2, a1, a2 -; RV32I-NEXT: mv a1, a2 +; RV32I-NEXT: srl a1, a1, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: lshr64: @@ -114,8 +113,7 @@ ; RV32I-NEXT: srli a4, a0, 1 ; RV32I-NEXT: srl a3, a4, a3 ; RV32I-NEXT: or a1, a1, a3 -; RV32I-NEXT: sll a2, a0, a2 -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: sll a0, a0, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: shl64: @@ -191,8 +189,7 @@ ; RV64I-NEXT: slli a4, a1, 1 ; RV64I-NEXT: sll a3, a4, a3 ; RV64I-NEXT: or a0, a0, a3 -; RV64I-NEXT: srl a2, a1, a2 -; RV64I-NEXT: mv a1, a2 +; RV64I-NEXT: srl a1, a1, a2 ; RV64I-NEXT: ret %1 = lshr i128 %a, %b ret i128 %1 @@ -298,8 +295,7 @@ ; RV64I-NEXT: srli a4, a0, 1 ; RV64I-NEXT: srl a3, a4, a3 ; RV64I-NEXT: or a1, a1, a3 -; RV64I-NEXT: sll a2, a0, a2 -; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: sll a0, a0, a2 ; RV64I-NEXT: ret %1 = shl i128 %a, %b ret i128 %1 diff --git a/llvm/test/CodeGen/X86/fp128-i128.ll b/llvm/test/CodeGen/X86/fp128-i128.ll --- a/llvm/test/CodeGen/X86/fp128-i128.ll +++ b/llvm/test/CodeGen/X86/fp128-i128.ll @@ -496,9 +496,8 @@ ; AVX-NEXT: testl %ebp, %ebp ; AVX-NEXT: jle .LBB10_1 ; AVX-NEXT: # %bb.2: # %if.then -; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm2 ; AVX-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX-NEXT: vmovaps %xmm1, %xmm2 ; AVX-NEXT: jmp .LBB10_3 ; AVX-NEXT: .LBB10_1: ; AVX-NEXT: vmovaps (%rsp), %xmm2 # 16-byte Reload diff --git a/llvm/test/CodeGen/X86/fshr.ll b/llvm/test/CodeGen/X86/fshr.ll --- a/llvm/test/CodeGen/X86/fshr.ll +++ b/llvm/test/CodeGen/X86/fshr.ll @@ -279,8 +279,7 @@ ; X86-SLOW-NEXT: orl %edi, %edx ; X86-SLOW-NEXT: movl %edx, (%esp) # 4-byte Spill ; X86-SLOW-NEXT: .LBB4_2: -; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-SLOW-NEXT: movl %ecx, %edx +; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-SLOW-NEXT: movl %ebx, %ecx ; X86-SLOW-NEXT: shrl %cl, %edx ; X86-SLOW-NEXT: movb %bl, %ah diff --git a/llvm/test/CodeGen/X86/i128-mul.ll b/llvm/test/CodeGen/X86/i128-mul.ll --- a/llvm/test/CodeGen/X86/i128-mul.ll +++ b/llvm/test/CodeGen/X86/i128-mul.ll @@ -88,9 +88,8 @@ ; X86-NEXT: movl 4(%eax,%ebp,8), %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %esi, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %edx, %edi -; X86-NEXT: mull %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: mull %edi ; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %ecx, %eax diff --git a/llvm/test/CodeGen/X86/mul-i512.ll b/llvm/test/CodeGen/X86/mul-i512.ll --- a/llvm/test/CodeGen/X86/mul-i512.ll +++ b/llvm/test/CodeGen/X86/mul-i512.ll @@ -153,9 +153,8 @@ ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: adcl $0, %edx ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[0-9]+}}(%esp), %esi -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: movl 8(%esi), %ebx +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl 8(%ecx), %ebx ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload diff --git a/llvm/test/CodeGen/X86/shift-i128.ll b/llvm/test/CodeGen/X86/shift-i128.ll --- a/llvm/test/CodeGen/X86/shift-i128.ll +++ b/llvm/test/CodeGen/X86/shift-i128.ll @@ -296,8 +296,7 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: shll %cl, %ebx ; X86-NEXT: movl %ebp, %esi @@ -534,8 +533,7 @@ ; X86-NEXT: .LBB6_9: # %entry ; X86-NEXT: movl %edi, %esi ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: shrl %cl, %ebp ; X86-NEXT: testb $32, %cl @@ -795,9 +793,8 @@ ; X86-NEXT: # %bb.4: # %entry ; X86-NEXT: movl %edi, %ebx ; X86-NEXT: .LBB7_5: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %ebp -; X86-NEXT: movl %ecx, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl %ebp, %edi ; X86-NEXT: movl %edx, %ecx ; X86-NEXT: sarl %cl, %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi @@ -835,8 +832,7 @@ ; X86-NEXT: movl %esi, %edi ; X86-NEXT: .LBB7_9: # %entry ; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X86-NEXT: sarl %cl, %esi ; X86-NEXT: testb $32, %cl @@ -850,8 +846,7 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movb $64, %cl ; X86-NEXT: subb %dl, %cl -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl %ebx, %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: shldl %cl, %ebx, %ebp ; X86-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill @@ -1059,12 +1054,11 @@ ; X86-NEXT: pushl %esi ; X86-NEXT: subl $72, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X86-NEXT: movl %ebx, %ecx ; X86-NEXT: shll %cl, %ebp -; X86-NEXT: movl %eax, %esi ; X86-NEXT: shll %cl, %esi ; X86-NEXT: movl %edx, %eax ; X86-NEXT: subl $64, %eax @@ -1130,9 +1124,7 @@ ; X86-NEXT: movl %ecx, %ebp ; X86-NEXT: movl %edx, %ecx ; X86-NEXT: shll %cl, %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %esi -; X86-NEXT: movl %edx, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: shll %cl, %esi ; X86-NEXT: testb $32, %dl ; X86-NEXT: movl $0, %edi @@ -1210,8 +1202,7 @@ ; X86-NEXT: movl %edi, %ecx ; X86-NEXT: .LBB8_23: # %entry ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: shll %cl, %edi ; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill diff --git a/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll --- a/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll @@ -98,8 +98,8 @@ ; X86-NEXT: addl %esi, %ecx ; X86-NEXT: adcl $0, %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: mull %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: mull %edx ; X86-NEXT: movl %edx, %esi ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill