diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -1044,6 +1044,12 @@ return isCopyInstrImpl(MI); } + bool isFullCopyInstr(const MachineInstr &MI) const { + if (!isCopyInstr(MI)) + return false; + return !MI.getOperand(0).getSubReg() && !MI.getOperand(1).getSubReg(); + } + /// If the specific machine instruction is an instruction that adds an /// immediate value and a physical register, and stores the result in /// the given physical register \c Reg, return a pair of the source @@ -1954,6 +1960,13 @@ return false; } + /// Allows targets to use appropriate copy instruction while spilitting live + /// range of a register in register allocation. + virtual unsigned getLiveRangeSplitOpcode(Register reg, + MachineRegisterInfo &MRI) const { + return TargetOpcode::COPY; + } + /// During PHI eleimination lets target to make necessary checks and /// insert the copy to the PHI destination register in a target specific /// manner. diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp --- a/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -97,7 +97,7 @@ // Trace copies introduced by live range splitting. The inline // spiller can rematerialize through these copies, so the spill // weight must reflect this. - while (MI->isFullCopy()) { + while (TII.isFullCopyInstr(*MI)) { // The copy destination must match the interval register. if (MI->getOperand(0).getReg() != Reg) return false; @@ -224,7 +224,11 @@ continue; NumInstr++; - if (MI->isIdentityCopy() || MI->isImplicitDef()) + bool identityCopy = + TII.isCopyInstr(*MI) && + MI->getOperand(0).getReg() == MI->getOperand(1).getReg() && + MI->getOperand(0).getSubReg() == MI->getOperand(1).getSubReg(); + if (identityCopy || MI->isImplicitDef()) continue; if (!Visited.insert(MI).second) continue; @@ -258,7 +262,7 @@ } // Get allocation hints from copies. - if (!MI->isCopy()) + if (!TII.isCopyInstr(*MI)) continue; Register HintReg = copyHint(MI, LI.reg(), TRI, MRI); if (!HintReg) diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp --- a/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/llvm/lib/CodeGen/InlineSpiller.cpp @@ -257,8 +257,9 @@ /// isFullCopyOf - If MI is a COPY to or from Reg, return the other register, /// otherwise return 0. -static Register isFullCopyOf(const MachineInstr &MI, Register Reg) { - if (!MI.isFullCopy()) +static Register isFullCopyOf(const MachineInstr &MI, Register Reg, + const TargetInstrInfo &TII) { + if (!TII.isFullCopyInstr(MI)) return Register(); if (MI.getOperand(0).getReg() == Reg) return MI.getOperand(1).getReg(); @@ -314,7 +315,7 @@ MachineInstr &MI = *RI++; // Allow copies to/from Reg. - if (isFullCopyOf(MI, Reg)) + if (isFullCopyOf(MI, Reg, TII)) continue; // Allow stack slot loads. @@ -353,7 +354,7 @@ for (MachineInstr &MI : llvm::make_early_inc_range(MRI.reg_instructions(Reg))) { - Register SnipReg = isFullCopyOf(MI, Reg); + Register SnipReg = isFullCopyOf(MI, Reg, TII); if (!isSibling(SnipReg)) continue; LiveInterval &SnipLI = LIS.getInterval(SnipReg); @@ -476,14 +477,14 @@ // Find all spills and copies of VNI. for (MachineInstr &MI : llvm::make_early_inc_range(MRI.use_nodbg_instructions(Reg))) { - if (!MI.isCopy() && !MI.mayStore()) + if (!TII.isCopyInstr(MI) && !MI.mayStore()) continue; SlotIndex Idx = LIS.getInstructionIndex(MI); if (LI->getVNInfoAt(Idx) != VNI) continue; // Follow sibling copies down the dominator tree. - if (Register DstReg = isFullCopyOf(MI, Reg)) { + if (Register DstReg = isFullCopyOf(MI, Reg, TII)) { if (isSibling(DstReg)) { LiveInterval &DstLI = LIS.getInterval(DstReg); VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getRegSlot()); @@ -826,7 +827,7 @@ if (Ops.back().first != MI || MI->isBundled()) return false; - bool WasCopy = MI->isCopy(); + bool WasCopy = TII.isCopyInstr(*MI) ? true : false; Register ImpReg; // TII::foldMemoryOperand will do what we need here for statepoint @@ -1111,7 +1112,7 @@ Idx = VNI->def; // Check for a sibling copy. - Register SibReg = isFullCopyOf(MI, Reg); + Register SibReg = isFullCopyOf(MI, Reg, TII); if (SibReg && isSibling(SibReg)) { // This may actually be a copy between snippets. if (isRegToSpill(SibReg)) { diff --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp --- a/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -348,7 +348,8 @@ // unlikely to change anything. We typically don't want to shrink the // PIC base register that has lots of uses everywhere. // Always shrink COPY uses that probably come from live range splitting. - if ((MI->readsVirtualRegister(Reg) && (MI->isCopy() || MO.isDef())) || + if ((MI->readsVirtualRegister(Reg) && + (TII.isCopyInstr(*MI) || MO.isDef())) || (MO.readsReg() && (MRI.hasOneNonDBGUse(Reg) || useIsKill(LI, MO)))) ToShrink.insert(&LI); else if (MO.readsReg()) diff --git a/llvm/lib/CodeGen/LiveRangeShrink.cpp b/llvm/lib/CodeGen/LiveRangeShrink.cpp --- a/llvm/lib/CodeGen/LiveRangeShrink.cpp +++ b/llvm/lib/CodeGen/LiveRangeShrink.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" @@ -197,7 +198,8 @@ // is because it needs more accurate model to handle register // pressure correctly. MachineInstr &DefInstr = *MRI.def_instr_begin(Reg); - if (!DefInstr.isCopy()) + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + if (!TII.isCopyInstr(DefInstr)) NumEligibleUse++; Insert = FindDominatedInstruction(DefInstr, Insert, IOM); } else { diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -1286,9 +1286,10 @@ /// VirtReg. static bool readsLaneSubset(const MachineRegisterInfo &MRI, const MachineInstr *MI, const LiveInterval &VirtReg, - const TargetRegisterInfo *TRI, SlotIndex Use) { + const TargetRegisterInfo *TRI, SlotIndex Use, + const TargetInstrInfo *TII) { // Early check the common case. - if (MI->isCopy() && + if (TII->isCopyInstr(*MI) && MI->getOperand(0).getSubReg() == MI->getOperand(1).getSubReg()) return false; @@ -1348,14 +1349,14 @@ // the allocation. for (const SlotIndex Use : Uses) { if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Use)) { - if (MI->isFullCopy() || + if (TII->isFullCopyInstr(*MI) || (SplitSubClass && SuperRCNumAllocatableRegs == getNumAllocatableRegsForConstraints(MI, VirtReg.reg(), SuperRC, TII, TRI, RegClassInfo)) || // TODO: Handle split for subranges with subclass constraints? (!SplitSubClass && VirtReg.hasSubRanges() && - !readsLaneSubset(*MRI, MI, VirtReg, TRI, Use))) { + !readsLaneSubset(*MRI, MI, VirtReg, TRI, Use, TII))) { LLVM_DEBUG(dbgs() << " skip:\t" << Use << '\t' << *MI); continue; } @@ -2142,7 +2143,7 @@ /// \p Out is not cleared before being populated. void RAGreedy::collectHintInfo(Register Reg, HintsInfo &Out) { for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) { - if (!Instr.isFullCopy()) + if (!TII->isFullCopyInstr(Instr)) continue; // Look for the other end of the copy. Register OtherReg = Instr.getOperand(0).getReg(); @@ -2457,7 +2458,7 @@ MI.getOpcode() == TargetOpcode::STATEPOINT; }; for (MachineInstr &MI : MBB) { - if (MI.isCopy()) { + if (TII->isCopyInstr(MI)) { const MachineOperand &Dest = MI.getOperand(0); const MachineOperand &Src = MI.getOperand(1); Register SrcReg = Src.getReg(); diff --git a/llvm/lib/CodeGen/SplitKit.cpp b/llvm/lib/CodeGen/SplitKit.cpp --- a/llvm/lib/CodeGen/SplitKit.cpp +++ b/llvm/lib/CodeGen/SplitKit.cpp @@ -536,7 +536,7 @@ SlotIndex SplitEditor::buildCopy(Register FromReg, Register ToReg, LaneBitmask LaneMask, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, bool Late, unsigned RegIdx) { - const MCInstrDesc &Desc = TII.get(TargetOpcode::COPY); + const MCInstrDesc &Desc = TII.get(TII.getLiveRangeSplitOpcode(FromReg, MRI)); SlotIndexes &Indexes = *LIS.getSlotIndexes(); if (LaneMask.all() || LaneMask == MRI.getMaxLaneMaskForVReg(FromReg)) { // The full vreg is copied. @@ -1584,7 +1584,9 @@ if (BI.LiveIn && BI.LiveOut) return true; // No point in isolating a copy. It has no register class constraints. - if (LIS.getInstructionFromIndex(BI.FirstInstr)->isCopyLike()) + MachineInstr *MI = LIS.getInstructionFromIndex(BI.FirstInstr); + bool copyLike = TII.isCopyInstr(*MI) || MI->isSubregToReg(); + if (copyLike) return false; // Finally, don't isolate an end point that was created by earlier splits. return isOriginalEndpoint(BI.FirstInstr); diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -440,8 +440,9 @@ // If the COPY instruction in MI can be folded to a stack operation, return // the register class to use. static const TargetRegisterClass *canFoldCopy(const MachineInstr &MI, + const TargetInstrInfo &TII, unsigned FoldIdx) { - assert(MI.isCopy() && "MI must be a COPY instruction"); + assert(TII.isCopyInstr(MI) && "MI must be a COPY instruction"); if (MI.getNumOperands() != 2) return nullptr; assert(FoldIdx<2 && "FoldIdx refers no nonexistent operand"); @@ -630,10 +631,10 @@ } // Straight COPY may fold as load/store. - if (!MI.isCopy() || Ops.size() != 1) + if (!isCopyInstr(MI) || Ops.size() != 1) return nullptr; - const TargetRegisterClass *RC = canFoldCopy(MI, Ops[0]); + const TargetRegisterClass *RC = canFoldCopy(MI, *this, Ops[0]); if (!RC) return nullptr; diff --git a/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp b/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp --- a/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -226,18 +226,11 @@ bool isDSPControlWrite = false; // Condition is made to match the creation of WRDSP/RDDSP copy instruction // from copyPhysReg function. - if (isReadOrWriteToDSPReg(MI, isDSPControlWrite)) { - if (!MI.getOperand(1).isImm() || MI.getOperand(1).getImm() != (1 << 4)) - return std::nullopt; - else if (isDSPControlWrite) { - return DestSourcePair{MI.getOperand(2), MI.getOperand(0)}; - - } else { - return DestSourcePair{MI.getOperand(0), MI.getOperand(2)}; - } - } else if (MI.isMoveReg() || isORCopyInst(MI)) { + if (isReadOrWriteToDSPReg(MI, isDSPControlWrite)) + return std::nullopt; + else if (MI.isMoveReg() || isORCopyInst(MI)) return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; - } + return std::nullopt; } diff --git a/llvm/test/CodeGen/Mips/madd-msub.ll b/llvm/test/CodeGen/Mips/madd-msub.ll --- a/llvm/test/CodeGen/Mips/madd-msub.ll +++ b/llvm/test/CodeGen/Mips/madd-msub.ll @@ -42,22 +42,22 @@ ; ; 64-LABEL: madd1: ; 64: # %bb.0: # %entry -; 64-NEXT: sll $1, $4, 0 -; 64-NEXT: sll $2, $5, 0 -; 64-NEXT: dmult $2, $1 +; 64-NEXT: sll $4, $4, 0 +; 64-NEXT: sll $5, $5, 0 +; 64-NEXT: dmult $5, $4 ; 64-NEXT: mflo $1 -; 64-NEXT: sll $2, $6, 0 +; 64-NEXT: sll $6, $6, 0 ; 64-NEXT: jr $ra -; 64-NEXT: daddu $2, $1, $2 +; 64-NEXT: daddu $2, $1, $6 ; ; 64R6-LABEL: madd1: ; 64R6: # %bb.0: # %entry -; 64R6-NEXT: sll $1, $4, 0 -; 64R6-NEXT: sll $2, $5, 0 -; 64R6-NEXT: dmul $1, $2, $1 -; 64R6-NEXT: sll $2, $6, 0 +; 64R6-NEXT: sll $4, $4, 0 +; 64R6-NEXT: sll $5, $5, 0 +; 64R6-NEXT: dmul $1, $5, $4 +; 64R6-NEXT: sll $6, $6, 0 ; 64R6-NEXT: jr $ra -; 64R6-NEXT: daddu $2, $1, $2 +; 64R6-NEXT: daddu $2, $1, $6 ; ; 16-LABEL: madd1: ; 16: # %bb.0: # %entry @@ -173,18 +173,18 @@ ; ; 64-LABEL: madd3: ; 64: # %bb.0: # %entry -; 64-NEXT: sll $1, $4, 0 -; 64-NEXT: sll $2, $5, 0 -; 64-NEXT: dmult $2, $1 +; 64-NEXT: sll $4, $4, 0 +; 64-NEXT: sll $5, $5, 0 +; 64-NEXT: dmult $5, $4 ; 64-NEXT: mflo $1 ; 64-NEXT: jr $ra ; 64-NEXT: daddu $2, $1, $6 ; ; 64R6-LABEL: madd3: ; 64R6: # %bb.0: # %entry -; 64R6-NEXT: sll $1, $4, 0 -; 64R6-NEXT: sll $2, $5, 0 -; 64R6-NEXT: dmul $1, $2, $1 +; 64R6-NEXT: sll $4, $4, 0 +; 64R6-NEXT: sll $5, $5, 0 +; 64R6-NEXT: dmul $1, $5, $4 ; 64R6-NEXT: jr $ra ; 64R6-NEXT: daddu $2, $1, $6 ; @@ -291,22 +291,22 @@ ; ; 64-LABEL: msub1: ; 64: # %bb.0: # %entry -; 64-NEXT: sll $1, $4, 0 -; 64-NEXT: sll $2, $5, 0 -; 64-NEXT: dmult $2, $1 +; 64-NEXT: sll $4, $4, 0 +; 64-NEXT: sll $5, $5, 0 +; 64-NEXT: dmult $5, $4 ; 64-NEXT: mflo $1 -; 64-NEXT: sll $2, $6, 0 +; 64-NEXT: sll $6, $6, 0 ; 64-NEXT: jr $ra -; 64-NEXT: dsubu $2, $2, $1 +; 64-NEXT: dsubu $2, $6, $1 ; ; 64R6-LABEL: msub1: ; 64R6: # %bb.0: # %entry -; 64R6-NEXT: sll $1, $4, 0 -; 64R6-NEXT: sll $2, $5, 0 -; 64R6-NEXT: dmul $1, $2, $1 -; 64R6-NEXT: sll $2, $6, 0 +; 64R6-NEXT: sll $4, $4, 0 +; 64R6-NEXT: sll $5, $5, 0 +; 64R6-NEXT: dmul $1, $5, $4 +; 64R6-NEXT: sll $6, $6, 0 ; 64R6-NEXT: jr $ra -; 64R6-NEXT: dsubu $2, $2, $1 +; 64R6-NEXT: dsubu $2, $6, $1 ; ; 16-LABEL: msub1: ; 16: # %bb.0: # %entry @@ -424,18 +424,18 @@ ; ; 64-LABEL: msub3: ; 64: # %bb.0: # %entry -; 64-NEXT: sll $1, $4, 0 -; 64-NEXT: sll $2, $5, 0 -; 64-NEXT: dmult $2, $1 +; 64-NEXT: sll $4, $4, 0 +; 64-NEXT: sll $5, $5, 0 +; 64-NEXT: dmult $5, $4 ; 64-NEXT: mflo $1 ; 64-NEXT: jr $ra ; 64-NEXT: dsubu $2, $6, $1 ; ; 64R6-LABEL: msub3: ; 64R6: # %bb.0: # %entry -; 64R6-NEXT: sll $1, $4, 0 -; 64R6-NEXT: sll $2, $5, 0 -; 64R6-NEXT: dmul $1, $2, $1 +; 64R6-NEXT: sll $4, $4, 0 +; 64R6-NEXT: sll $5, $5, 0 +; 64R6-NEXT: dmul $1, $5, $4 ; 64R6-NEXT: jr $ra ; 64R6-NEXT: dsubu $2, $6, $1 ; @@ -546,22 +546,22 @@ ; ; 64-LABEL: msub5: ; 64: # %bb.0: # %entry -; 64-NEXT: sll $1, $4, 0 -; 64-NEXT: sll $2, $5, 0 -; 64-NEXT: dmult $2, $1 +; 64-NEXT: sll $4, $4, 0 +; 64-NEXT: sll $5, $5, 0 +; 64-NEXT: dmult $5, $4 ; 64-NEXT: mflo $1 -; 64-NEXT: sll $2, $6, 0 +; 64-NEXT: sll $6, $6, 0 ; 64-NEXT: jr $ra -; 64-NEXT: dsubu $2, $1, $2 +; 64-NEXT: dsubu $2, $1, $6 ; ; 64R6-LABEL: msub5: ; 64R6: # %bb.0: # %entry -; 64R6-NEXT: sll $1, $4, 0 -; 64R6-NEXT: sll $2, $5, 0 -; 64R6-NEXT: dmul $1, $2, $1 -; 64R6-NEXT: sll $2, $6, 0 +; 64R6-NEXT: sll $4, $4, 0 +; 64R6-NEXT: sll $5, $5, 0 +; 64R6-NEXT: dmul $1, $5, $4 +; 64R6-NEXT: sll $6, $6, 0 ; 64R6-NEXT: jr $ra -; 64R6-NEXT: dsubu $2, $1, $2 +; 64R6-NEXT: dsubu $2, $1, $6 ; ; 16-LABEL: msub5: ; 16: # %bb.0: # %entry diff --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll --- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll @@ -1611,32 +1611,33 @@ ; CHECK-NEXT: @ Parent Loop BB19_3 Depth=1 ; CHECK-NEXT: @ => This Inner Loop Header: Depth=2 ; CHECK-NEXT: vmov r7, s7 -; CHECK-NEXT: vldrw.u32 q2, [r9, #16] +; CHECK-NEXT: vldr s0, [r1, #12] ; CHECK-NEXT: vmov r11, s6 ; CHECK-NEXT: vldrw.u32 q1, [r9, #112] -; CHECK-NEXT: vmov r4, s1 -; CHECK-NEXT: vldr s1, [r1, #12] ; CHECK-NEXT: vmov r3, s3 ; CHECK-NEXT: vldr s3, [r1, #8] ; CHECK-NEXT: vstrw.32 q1, [sp, #32] @ 16-byte Spill ; CHECK-NEXT: vldrw.u32 q1, [r9] -; CHECK-NEXT: vmov r8, s1 +; CHECK-NEXT: vmov r8, s0 +; CHECK-NEXT: vldrw.u32 q2, [r9, #16] ; CHECK-NEXT: ldr r6, [r1, #4] ; CHECK-NEXT: vldrw.u32 q7, [r9, #32] ; CHECK-NEXT: vmul.f32 q1, q1, r8 ; CHECK-NEXT: vmov r0, s3 -; CHECK-NEXT: vldrw.u32 q3, [r9, #48] ; CHECK-NEXT: vfma.f32 q1, q2, r0 +; CHECK-NEXT: vldrw.u32 q3, [r9, #48] ; CHECK-NEXT: ldr r0, [r1], #16 ; CHECK-NEXT: vfma.f32 q1, q7, r6 +; CHECK-NEXT: vmov r4, s1 ; CHECK-NEXT: vldrw.u32 q6, [r9, #64] -; CHECK-NEXT: vmov.f32 s2, s1 +; CHECK-NEXT: vmov.f32 s1, s0 ; CHECK-NEXT: vfma.f32 q1, q3, r0 +; CHECK-NEXT: vmov.f32 s2, s0 ; CHECK-NEXT: vldrw.u32 q5, [r9, #80] ; CHECK-NEXT: vfma.f32 q1, q6, r4 ; CHECK-NEXT: vldrw.u32 q4, [r9, #96] -; CHECK-NEXT: vldrw.u32 q2, [sp, #32] @ 16-byte Reload ; CHECK-NEXT: vfma.f32 q1, q5, r3 +; CHECK-NEXT: vldrw.u32 q2, [sp, #32] @ 16-byte Reload ; CHECK-NEXT: vfma.f32 q1, q4, r7 ; CHECK-NEXT: vfma.f32 q1, q2, r11 ; CHECK-NEXT: vstrb.8 q1, [r5], #16 diff --git a/llvm/test/CodeGen/X86/GlobalISel/add-ext.ll b/llvm/test/CodeGen/X86/GlobalISel/add-ext.ll --- a/llvm/test/CodeGen/X86/GlobalISel/add-ext.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/add-ext.ll @@ -205,8 +205,8 @@ ; CHECK-NEXT: addq %rdi, %rcx ; CHECK-NEXT: movl (%rcx), %ecx ; CHECK-NEXT: addl (%rax), %ecx -; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: imulq $4, %rax, %rax +; CHECK-NEXT: movl %esi, %esi +; CHECK-NEXT: imulq $4, %rsi, %rax ; CHECK-NEXT: addq %rdi, %rax ; CHECK-NEXT: movl %ecx, (%rax) ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/dagcombine-cse.ll b/llvm/test/CodeGen/X86/dagcombine-cse.ll --- a/llvm/test/CodeGen/X86/dagcombine-cse.ll +++ b/llvm/test/CodeGen/X86/dagcombine-cse.ll @@ -106,24 +106,24 @@ ; ; X64-LABEL: square_high: ; X64: ## %bb.0: ## %entry -; X64-NEXT: movl %esi, %ecx -; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movl %esi, %esi +; X64-NEXT: movq %rsi, %rax ; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdx, %rsi +; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %r8 ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: mulq %rdi ; X64-NEXT: addq %r8, %rdx -; X64-NEXT: movq %rsi, %rax +; X64-NEXT: movq %rcx, %rax ; X64-NEXT: adcq $0, %rax ; X64-NEXT: addq %rdx, %r8 -; X64-NEXT: adcq %rsi, %rax -; X64-NEXT: imulq %rcx, %rcx -; X64-NEXT: addq %rax, %rcx -; X64-NEXT: shrdq $32, %rcx, %r8 -; X64-NEXT: shrq $32, %rcx +; X64-NEXT: adcq %rcx, %rax +; X64-NEXT: imulq %rsi, %rsi +; X64-NEXT: addq %rax, %rsi +; X64-NEXT: shrdq $32, %rsi, %r8 +; X64-NEXT: shrq $32, %rsi ; X64-NEXT: movq %r8, %rax -; X64-NEXT: movq %rcx, %rdx +; X64-NEXT: movq %rsi, %rdx ; X64-NEXT: retq entry: %conv = zext i96 %x to i192 diff --git a/llvm/test/CodeGen/X86/fold-and-shift-x86_64.ll b/llvm/test/CodeGen/X86/fold-and-shift-x86_64.ll --- a/llvm/test/CodeGen/X86/fold-and-shift-x86_64.ll +++ b/llvm/test/CodeGen/X86/fold-and-shift-x86_64.ll @@ -34,8 +34,8 @@ define i8 @t3(ptr %X, i64 %i) { ; CHECK-LABEL: t3: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: movzbl (%rdi,%rax,4), %eax +; CHECK-NEXT: movl %esi, %esi +; CHECK-NEXT: movzbl (%rdi,%rsi,4), %eax ; CHECK-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-lowhigh.ll b/llvm/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-lowhigh.ll --- a/llvm/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-lowhigh.ll +++ b/llvm/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-lowhigh.ll @@ -78,18 +78,18 @@ define i64 @out64_constmask(i64 %x, i64 %y) { ; CHECK-NOBMI-LABEL: out64_constmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: movl %edi, %ecx +; CHECK-NOBMI-NEXT: movl %edi, %edi ; CHECK-NOBMI-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000 ; CHECK-NOBMI-NEXT: andq %rsi, %rax -; CHECK-NOBMI-NEXT: orq %rcx, %rax +; CHECK-NOBMI-NEXT: orq %rdi, %rax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: out64_constmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: movl %edi, %ecx +; CHECK-BMI-NEXT: movl %edi, %edi ; CHECK-BMI-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000 ; CHECK-BMI-NEXT: andq %rsi, %rax -; CHECK-BMI-NEXT: orq %rcx, %rax +; CHECK-BMI-NEXT: orq %rdi, %rax ; CHECK-BMI-NEXT: retq %mx = and i64 %x, 4294967295 %my = and i64 %y, -4294967296