Index: include/llvm/Target/TargetSubtargetInfo.h =================================================================== --- include/llvm/Target/TargetSubtargetInfo.h +++ include/llvm/Target/TargetSubtargetInfo.h @@ -205,6 +205,9 @@ /// Enable tracking of subregister liveness in register allocator. virtual bool enableSubRegLiveness() const { return false; } + + /// Enable copy propagation for arbitrary register uses. + virtual bool enableFullCopyPropagation() const { return false; } }; } // End llvm namespace Index: lib/CodeGen/MachineCopyPropagation.cpp =================================================================== --- lib/CodeGen/MachineCopyPropagation.cpp +++ lib/CodeGen/MachineCopyPropagation.cpp @@ -31,12 +31,18 @@ STATISTIC(NumDeletes, "Number of dead copies deleted"); +static cl::opt +EnableFullCopyPropagation("full-copy-propagation", + cl::desc("Replace uses with earlier copy sources where possible"), + cl::init(true)); + namespace { typedef SmallVector RegList; typedef DenseMap SourceMap; typedef DenseMap Reg2MIMap; class MachineCopyPropagation : public MachineFunctionPass { + const MachineFunction *MF; const TargetRegisterInfo *TRI; const TargetInstrInfo *TII; const MachineRegisterInfo *MRI; @@ -62,6 +68,7 @@ private: void ClobberRegister(unsigned Reg); void CopyPropagateBlock(MachineBasicBlock &MBB); + bool rename(MachineInstr &MI, unsigned FromReg, unsigned ToReg) const; bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def); /// Candidates for deletion. @@ -73,6 +80,7 @@ /// Src -> Def map SourceMap SrcMap; bool Changed; + bool FullCopyPropagation; }; } char MachineCopyPropagation::ID = 0; @@ -177,6 +185,38 @@ return true; } +/// Change register use operands reading \p FromReg to \p ToReg where possible. +bool MachineCopyPropagation::rename(MachineInstr &MI, unsigned FromReg, + unsigned ToReg) const { + assert(TargetRegisterInfo::isPhysicalRegister(FromReg)); + assert(TargetRegisterInfo::isPhysicalRegister(ToReg)); + // Check that it is save to rename on all operands. + const MCInstrDesc &Desc = MI.getDesc(); + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || !MO.isUse() || MO.getReg() != FromReg) + continue; + unsigned OpNum = MI.getOperandNo(&MO); + // Operands not mention in the MCInstrDesc are probably due to ABI + // constraints and should not be touched. + if (OpNum > Desc.getNumOperands() || + Desc.getOperandConstraint(OpNum, MCOI::TIED_TO) != -1) + return false; + + const TargetRegisterClass *RC = TII->getRegClass(Desc, OpNum, TRI, *MF); + if (RC == nullptr || !RC->contains(ToReg)) + return false; + } + + // Rename register on use operands. + for (MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || !MO.isUse() || MO.getReg() != FromReg) + continue; + assert(MO.getSubReg() == 0); + MO.setReg(ToReg); + } + return true; +} + void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { DEBUG(dbgs() << "MCP: CopyPropagateBlock " << MBB.getName() << "\n"); @@ -254,7 +294,7 @@ // Not a copy. SmallVector Defs; const MachineOperand *RegMask = nullptr; - for (const MachineOperand &MO : MI->operands()) { + for (MachineOperand &MO : MI->operands()) { if (MO.isRegMask()) RegMask = &MO; if (!MO.isReg()) @@ -271,6 +311,35 @@ continue; } + // Full copy propagation. Translate + // x = COPY y + // use x + // into + // x = COPY y + // use y + // This reduces dependencies possibly helping post-ra schedulers and + // out of order execution engines. + if (FullCopyPropagation && MO.readsReg() && !MRI->isReserved(Reg)) { + Reg2MIMap::iterator CI = AvailCopyMap.find(Reg); + if (CI != AvailCopyMap.end()) { + MachineInstr &Copy = *CI->second; + assert(Copy.isCopy()); + unsigned Def = Copy.getOperand(0).getReg(); + unsigned Src = Copy.getOperand(1).getReg(); + assert(!MRI->isReserved(Def)); + if (Reg == Def && !MRI->isReserved(Src)) { + bool Renamed = rename(*MI, Def, Src); + if (Renamed) { + // Clear potential kill flags on the way. + for (MachineInstr &MI : + make_range(Copy.getIterator(), MI->getIterator())) + MI.clearRegisterKills(Src, TRI); + Reg = Src; + } + } + } + } + // If 'Reg' is defined by a copy, the copy is no longer a candidate // for elimination. for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { @@ -359,9 +428,13 @@ Changed = false; - TRI = MF.getSubtarget().getRegisterInfo(); - TII = MF.getSubtarget().getInstrInfo(); + this->MF = &MF; + const TargetSubtargetInfo &STI = MF.getSubtarget(); + TRI = STI.getRegisterInfo(); + TII = STI.getInstrInfo(); MRI = &MF.getRegInfo(); + FullCopyPropagation = EnableFullCopyPropagation & + STI.enableFullCopyPropagation(); for (MachineBasicBlock &MBB : MF) CopyPropagateBlock(MBB); Index: lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- lib/Target/AArch64/AArch64Subtarget.h +++ lib/Target/AArch64/AArch64Subtarget.h @@ -248,6 +248,8 @@ bool enableEarlyIfConversion() const override; + bool enableFullCopyPropagation() const override { return true; } + std::unique_ptr getCustomPBQPConstraints() const override; }; } // End llvm namespace Index: lib/Target/ARM/ARMSubtarget.h =================================================================== --- lib/Target/ARM/ARMSubtarget.h +++ lib/Target/ARM/ARMSubtarget.h @@ -495,6 +495,8 @@ // enableAtomicExpand- True if we need to expand our atomics. bool enableAtomicExpand() const override; + bool enableFullCopyPropagation() const override { return true; } + /// getInstrItins - Return the instruction itineraries based on subtarget /// selection. const InstrItineraryData *getInstrItineraryData() const override { Index: lib/Target/X86/X86Subtarget.h =================================================================== --- lib/Target/X86/X86Subtarget.h +++ lib/Target/X86/X86Subtarget.h @@ -592,6 +592,8 @@ bool enableEarlyIfConversion() const override; + bool enableFullCopyPropagation() const override { return true; } + /// Return the instruction itineraries based on the subtarget selection. const InstrItineraryData *getInstrItineraryData() const override { return &InstrItins; Index: test/CodeGen/AArch64/f16-instructions.ll =================================================================== --- test/CodeGen/AArch64/f16-instructions.ll +++ test/CodeGen/AArch64/f16-instructions.ll @@ -352,7 +352,7 @@ ; CHECK-LABEL: test_phi: ; CHECK: mov x[[PTR:[0-9]+]], x0 -; CHECK: ldr h[[AB:[0-9]+]], [x[[PTR]]] +; CHECK: ldr h[[AB:[0-9]+]], [x0] ; CHECK: [[LOOP:LBB[0-9_]+]]: ; CHECK: mov.16b v[[R:[0-9]+]], v[[AB]] ; CHECK: ldr h[[AB]], [x[[PTR]]] Index: test/CodeGen/AArch64/flags-multiuse.ll =================================================================== --- test/CodeGen/AArch64/flags-multiuse.ll +++ test/CodeGen/AArch64/flags-multiuse.ll @@ -12,7 +12,9 @@ ; CHECK-LABEL: test_multiflag: %test = icmp ne i32 %n, %m -; CHECK: cmp [[LHS:w[0-9]+]], [[RHS:w[0-9]+]] +; CHECK: mov [[RHSCOPY:w[0-9]+]], [[RHS:w[0-9]+]] +; CHECK: mov [[LHSCOPY:w[0-9]+]], [[LHS:w[0-9]+]] +; CHECK: cmp [[LHS]], [[RHS]] %val = zext i1 %test to i32 ; CHECK: cset {{[xw][0-9]+}}, ne @@ -25,7 +27,7 @@ ; Currently, the comparison is emitted again. An MSR/MRS pair would also be ; acceptable, but assuming the call preserves NZCV is not. br i1 %test, label %iftrue, label %iffalse -; CHECK: cmp [[LHS]], [[RHS]] +; CHECK: cmp [[LHSCOPY]], [[RHSCOPY]] ; CHECK: b.eq iftrue: Index: test/CodeGen/AArch64/merge-store.ll =================================================================== --- test/CodeGen/AArch64/merge-store.ll +++ test/CodeGen/AArch64/merge-store.ll @@ -59,10 +59,9 @@ define void @test(%struct1* %fde, i32 %fd, void (i32, i32, i8*)* %func, i8* %arg) { ;CHECK-LABEL: test entry: -;A53: mov [[DATA:w[0-9]+]], w1 ;A53: str q{{[0-9]+}}, {{.*}} ;A53: str q{{[0-9]+}}, {{.*}} -;A53: str [[DATA]], {{.*}} +;A53: str w1, {{.*}} %0 = bitcast %struct1* %fde to i8* tail call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 40, i32 8, i1 false) Index: test/CodeGen/AArch64/neg-imm.ll =================================================================== --- test/CodeGen/AArch64/neg-imm.ll +++ test/CodeGen/AArch64/neg-imm.ll @@ -7,8 +7,8 @@ define void @test(i32 %px) { ; CHECK_LABEL: test: ; CHECK_LABEL: %entry -; CHECK: subs -; CHECK-NEXT: csel +; CHECK: subs [[REG0:w[0-9]+]], +; CHECK: csel {{w[0-9]+}}, wzr, [[REG0]] entry: %sub = add nsw i32 %px, -1 %cmp = icmp slt i32 %px, 1 Index: test/CodeGen/ARM/atomic-op.ll =================================================================== --- test/CodeGen/ARM/atomic-op.ll +++ test/CodeGen/ARM/atomic-op.ll @@ -249,7 +249,8 @@ %pair = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst monotonic %oldval = extractvalue { i32, i1 } %pair, 0 -; CHECK-ARMV7: ldrex [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]] +; CHECK-ARMV7: mov r[[ADDR:[0-9]+]], r0 +; CHECK-ARMV7: ldrex [[OLDVAL:r[0-9]+]], [r0] ; CHECK-ARMV7: cmp [[OLDVAL]], r1 ; CHECK-ARMV7: bne [[FAIL_BB:\.?LBB[0-9]+_[0-9]+]] ; CHECK-ARMV7: dmb ish @@ -267,7 +268,8 @@ ; CHECK-ARMV7: dmb ish ; CHECK-ARMV7: bx lr -; CHECK-T2: ldrex [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]] +; CHECK-T2: mov r[[ADDR:[0-9]+]], r0 +; CHECK-T2: ldrex [[OLDVAL:r[0-9]+]], [r0] ; CHECK-T2: cmp [[OLDVAL]], r1 ; CHECK-T2: bne [[FAIL_BB:\.?LBB.*]] ; CHECK-T2: dmb ish Index: test/CodeGen/ARM/swifterror.ll =================================================================== --- test/CodeGen/ARM/swifterror.ll +++ test/CodeGen/ARM/swifterror.ll @@ -179,7 +179,7 @@ ; CHECK-APPLE: beq ; CHECK-APPLE: mov r0, #16 ; CHECK-APPLE: malloc -; CHECK-APPLE: strb r{{.*}}, [{{.*}}[[ID]], #8] +; CHECK-APPLE: strb r{{.*}}, [r0, #8] ; CHECK-APPLE: ble ; CHECK-APPLE: mov r6, [[ID]] Index: test/CodeGen/Thumb/thumb-shrink-wrapping.ll =================================================================== --- test/CodeGen/Thumb/thumb-shrink-wrapping.ll +++ test/CodeGen/Thumb/thumb-shrink-wrapping.ll @@ -606,7 +606,7 @@ define i32 @b_to_bx(i32 %value) { ; CHECK-LABEL: b_to_bx: ; DISABLE: push {r7, lr} -; CHECK: cmp r1, #49 +; CHECK: cmp r0, #49 ; CHECK-NEXT: bgt [[ELSE_LABEL:LBB[0-9_]+]] ; ENABLE: push {r7, lr} Index: test/CodeGen/X86/avx512-bugfix-25270.ll =================================================================== --- test/CodeGen/X86/avx512-bugfix-25270.ll +++ test/CodeGen/X86/avx512-bugfix-25270.ll @@ -9,10 +9,10 @@ ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: subq $112, %rsp ; CHECK-NEXT: movq %rdi, %rbx -; CHECK-NEXT: vmovdqu32 (%rbx), %zmm0 +; CHECK-NEXT: vmovdqu32 (%rdi), %zmm0 ; CHECK-NEXT: vmovups %zmm0, (%rsp) ## 64-byte Spill ; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %zmm1 -; CHECK-NEXT: vmovdqa32 %zmm1, (%rbx) +; CHECK-NEXT: vmovdqa32 %zmm1, (%rdi) ; CHECK-NEXT: callq _Print__512 ; CHECK-NEXT: vmovups (%rsp), %zmm0 ## 64-byte Reload ; CHECK-NEXT: callq _Print__512 Index: test/CodeGen/X86/avx512-calling-conv.ll =================================================================== --- test/CodeGen/X86/avx512-calling-conv.ll +++ test/CodeGen/X86/avx512-calling-conv.ll @@ -467,7 +467,7 @@ ; KNL_X32-NEXT: movl %edi, (%esp) ; KNL_X32-NEXT: calll _test11 ; KNL_X32-NEXT: movl %eax, %ebx -; KNL_X32-NEXT: movzbl %bl, %eax +; KNL_X32-NEXT: movzbl %al, %eax ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) ; KNL_X32-NEXT: movl %esi, {{[0-9]+}}(%esp) ; KNL_X32-NEXT: movl %edi, (%esp) Index: test/CodeGen/X86/fp128-i128.ll =================================================================== --- test/CodeGen/X86/fp128-i128.ll +++ test/CodeGen/X86/fp128-i128.ll @@ -196,7 +196,7 @@ ret fp128 %add ; CHECK-LABEL: TestI128_4: ; CHECK: movaps %xmm0, %xmm1 -; CHECK-NEXT: movaps %xmm1, 16(%rsp) +; CHECK-NEXT: movaps %xmm0, 16(%rsp) ; CHECK-NEXT: movq 24(%rsp), %rax ; CHECK-NEXT: movq %rax, 8(%rsp) ; CHECK-NEXT: movq $0, (%rsp) @@ -240,7 +240,7 @@ ret fp128 %add ; CHECK-LABEL: acosl: ; CHECK: movaps %xmm0, %xmm1 -; CHECK-NEXT: movaps %xmm1, 16(%rsp) +; CHECK-NEXT: movaps %xmm0, 16(%rsp) ; CHECK-NEXT: movq 24(%rsp), %rax ; CHECK-NEXT: movq %rax, 8(%rsp) ; CHECK-NEXT: movq $0, (%rsp) Index: test/CodeGen/X86/localescape.ll =================================================================== --- test/CodeGen/X86/localescape.ll +++ test/CodeGen/X86/localescape.ll @@ -27,7 +27,7 @@ ; X64-LABEL: print_framealloc_from_fp: ; X64: movq %rcx, %[[parent_fp:[a-z]+]] -; X64: movl .Lalloc_func$frame_escape_0(%[[parent_fp]]), %edx +; X64: movl .Lalloc_func$frame_escape_0(%rcx), %edx ; X64: leaq {{.*}}(%rip), %[[str:[a-z]+]] ; X64: movq %[[str]], %rcx ; X64: callq printf Index: test/CodeGen/X86/mul128.ll =================================================================== --- test/CodeGen/X86/mul128.ll +++ test/CodeGen/X86/mul128.ll @@ -7,7 +7,7 @@ ; X64-NEXT: movq %rdx, %r8 ; X64-NEXT: imulq %rdi, %rcx ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %r8 +; X64-NEXT: mulq %rdx ; X64-NEXT: addq %rcx, %rdx ; X64-NEXT: imulq %r8, %rsi ; X64-NEXT: addq %rsi, %rdx Index: test/CodeGen/X86/pmul.ll =================================================================== --- test/CodeGen/X86/pmul.ll +++ test/CodeGen/X86/pmul.ll @@ -830,7 +830,7 @@ ; SSE41: # BB#0: # %entry ; SSE41-NEXT: movdqa %xmm1, %xmm4 ; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: pmovsxbw %xmm1, %xmm0 +; SSE41-NEXT: pmovsxbw %xmm0, %xmm0 ; SSE41-NEXT: pmovsxbw {{.*}}(%rip), %xmm6 ; SSE41-NEXT: pmullw %xmm6, %xmm0 ; SSE41-NEXT: movdqa {{.*#+}} xmm7 = [255,255,255,255,255,255,255,255] @@ -1016,7 +1016,7 @@ ; SSE41-NEXT: movdqa %xmm1, %xmm8 ; SSE41-NEXT: movdqa %xmm0, %xmm1 ; SSE41-NEXT: pmovsxbw %xmm4, %xmm9 -; SSE41-NEXT: pmovsxbw %xmm1, %xmm0 +; SSE41-NEXT: pmovsxbw %xmm0, %xmm0 ; SSE41-NEXT: pmullw %xmm9, %xmm0 ; SSE41-NEXT: movdqa {{.*#+}} xmm9 = [255,255,255,255,255,255,255,255] ; SSE41-NEXT: pand %xmm9, %xmm0 Index: test/CodeGen/X86/pr11415.ll =================================================================== --- test/CodeGen/X86/pr11415.ll +++ test/CodeGen/X86/pr11415.ll @@ -6,8 +6,7 @@ ; CHECK: #APP ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movq %rcx, %rax -; CHECK-NEXT: movq %rax, -8(%rsp) +; CHECK-NEXT: movq %rcx, -8(%rsp) ; CHECK-NEXT: movq -8(%rsp), %rdx ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP Index: test/CodeGen/X86/shrink-wrap-chkstk.ll =================================================================== --- test/CodeGen/X86/shrink-wrap-chkstk.ll +++ test/CodeGen/X86/shrink-wrap-chkstk.ll @@ -61,7 +61,7 @@ ; CHECK-LABEL: @use_eax_before_prologue@8: # @use_eax_before_prologue ; CHECK: movl %ecx, %eax -; CHECK: cmpl %edx, %eax +; CHECK: cmpl %edx, %ecx ; CHECK: jge LBB1_2 ; CHECK: pushl %eax ; CHECK: movl $4092, %eax Index: test/CodeGen/X86/vec_int_to_fp.ll =================================================================== --- test/CodeGen/X86/vec_int_to_fp.ll +++ test/CodeGen/X86/vec_int_to_fp.ll @@ -1136,7 +1136,7 @@ ; SSE-LABEL: uitofp_2i64_to_4f32: ; SSE: # BB#0: ; SSE-NEXT: movdqa %xmm0, %xmm1 -; SSE-NEXT: movd %xmm1, %rax +; SSE-NEXT: movd %xmm0, %rax ; SSE-NEXT: movl %eax, %ecx ; SSE-NEXT: andl $1, %ecx ; SSE-NEXT: testq %rax, %rax Index: test/CodeGen/X86/vec_minmax_sint.ll =================================================================== --- test/CodeGen/X86/vec_minmax_sint.ll +++ test/CodeGen/X86/vec_minmax_sint.ll @@ -437,7 +437,7 @@ ; SSE42: # BB#0: ; SSE42-NEXT: movdqa %xmm0, %xmm2 ; SSE42-NEXT: movdqa %xmm1, %xmm3 -; SSE42-NEXT: pcmpgtq %xmm2, %xmm3 +; SSE42-NEXT: pcmpgtq %xmm0, %xmm3 ; SSE42-NEXT: pcmpeqd %xmm0, %xmm0 ; SSE42-NEXT: pxor %xmm3, %xmm0 ; SSE42-NEXT: blendvpd %xmm2, %xmm1 Index: test/CodeGen/X86/vector-blend.ll =================================================================== --- test/CodeGen/X86/vector-blend.ll +++ test/CodeGen/X86/vector-blend.ll @@ -1031,7 +1031,7 @@ ; SSE41-NEXT: psrld $31, %xmm1 ; SSE41-NEXT: pslld $31, %xmm1 ; SSE41-NEXT: pxor %xmm3, %xmm3 -; SSE41-NEXT: psubd %xmm2, %xmm3 +; SSE41-NEXT: psubd %xmm0, %xmm3 ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: blendvps %xmm2, %xmm3 ; SSE41-NEXT: movaps %xmm3, %xmm0 Index: test/CodeGen/X86/vector-sext.ll =================================================================== --- test/CodeGen/X86/vector-sext.ll +++ test/CodeGen/X86/vector-sext.ll @@ -144,7 +144,7 @@ ; SSSE3-LABEL: sext_16i8_to_8i32: ; SSSE3: # BB#0: # %entry ; SSSE3-NEXT: movdqa %xmm0, %xmm1 -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; SSSE3-NEXT: psrad $24, %xmm0 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,4,u,u,u,5,u,u,u,6,u,u,u,7] @@ -301,7 +301,7 @@ ; SSE2-LABEL: sext_16i8_to_8i64: ; SSE2: # BB#0: # %entry ; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: psrad $31, %xmm2 @@ -333,7 +333,7 @@ ; SSSE3-LABEL: sext_16i8_to_8i64: ; SSSE3: # BB#0: # %entry ; SSSE3-NEXT: movdqa %xmm0, %xmm1 -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: psrad $31, %xmm2 Index: test/CodeGen/X86/vector-zext.ll =================================================================== --- test/CodeGen/X86/vector-zext.ll +++ test/CodeGen/X86/vector-zext.ll @@ -56,7 +56,7 @@ ; SSE41: # BB#0: # %entry ; SSE41-NEXT: movdqa %xmm0, %xmm1 ; SSE41-NEXT: pxor %xmm2, %xmm2 -; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] ; SSE41-NEXT: retq ; @@ -292,7 +292,7 @@ ; SSE41: # BB#0: # %entry ; SSE41-NEXT: movdqa %xmm0, %xmm1 ; SSE41-NEXT: pxor %xmm2, %xmm2 -; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero +; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] ; SSE41-NEXT: retq ; @@ -449,7 +449,7 @@ ; SSE41: # BB#0: # %entry ; SSE41-NEXT: movdqa %xmm0, %xmm1 ; SSE41-NEXT: pxor %xmm2, %xmm2 -; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero +; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] ; SSE41-NEXT: retq ; @@ -1145,7 +1145,7 @@ ; SSE41: # BB#0: # %entry ; SSE41-NEXT: movdqa %xmm0, %xmm1 ; SSE41-NEXT: pxor %xmm2, %xmm2 -; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero +; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] ; SSE41-NEXT: retq ; @@ -1193,7 +1193,7 @@ ; SSE41: # BB#0: # %entry ; SSE41-NEXT: movdqa %xmm0, %xmm1 ; SSE41-NEXT: pxor %xmm2, %xmm2 -; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero +; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero ; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] ; SSE41-NEXT: retq ; Index: test/CodeGen/X86/vselect-minmax.ll =================================================================== --- test/CodeGen/X86/vselect-minmax.ll +++ test/CodeGen/X86/vselect-minmax.ll @@ -3487,12 +3487,12 @@ ; SSE2-NEXT: movdqa %xmm2, %xmm9 ; SSE2-NEXT: movdqa %xmm0, %xmm10 ; SSE2-NEXT: movdqa %xmm7, %xmm12 -; SSE2-NEXT: pcmpgtb %xmm8, %xmm12 +; SSE2-NEXT: pcmpgtb %xmm3, %xmm12 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 ; SSE2-NEXT: movdqa %xmm12, %xmm3 ; SSE2-NEXT: pxor %xmm0, %xmm3 ; SSE2-NEXT: movdqa %xmm6, %xmm13 -; SSE2-NEXT: pcmpgtb %xmm9, %xmm13 +; SSE2-NEXT: pcmpgtb %xmm2, %xmm13 ; SSE2-NEXT: movdqa %xmm13, %xmm2 ; SSE2-NEXT: pxor %xmm0, %xmm2 ; SSE2-NEXT: movdqa %xmm5, %xmm14 @@ -4368,12 +4368,12 @@ ; SSE2-NEXT: movdqa %xmm2, %xmm9 ; SSE2-NEXT: movdqa %xmm0, %xmm10 ; SSE2-NEXT: movdqa %xmm7, %xmm12 -; SSE2-NEXT: pcmpgtd %xmm8, %xmm12 +; SSE2-NEXT: pcmpgtd %xmm3, %xmm12 ; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 ; SSE2-NEXT: movdqa %xmm12, %xmm3 ; SSE2-NEXT: pxor %xmm0, %xmm3 ; SSE2-NEXT: movdqa %xmm6, %xmm13 -; SSE2-NEXT: pcmpgtd %xmm9, %xmm13 +; SSE2-NEXT: pcmpgtd %xmm2, %xmm13 ; SSE2-NEXT: movdqa %xmm13, %xmm2 ; SSE2-NEXT: pxor %xmm0, %xmm2 ; SSE2-NEXT: movdqa %xmm5, %xmm14 @@ -4890,7 +4890,7 @@ ; SSE2-LABEL: test122: ; SSE2: # BB#0: # %entry ; SSE2-NEXT: movdqa %xmm7, %xmm8 -; SSE2-NEXT: movdqa %xmm8, -{{[0-9]+}}(%rsp) # 16-byte Spill +; SSE2-NEXT: movdqa %xmm7, -{{[0-9]+}}(%rsp) # 16-byte Spill ; SSE2-NEXT: movdqa %xmm3, %xmm7 ; SSE2-NEXT: movdqa %xmm2, %xmm3 ; SSE2-NEXT: movdqa %xmm1, %xmm2 @@ -5164,7 +5164,7 @@ ; SSE2-LABEL: test124: ; SSE2: # BB#0: # %entry ; SSE2-NEXT: movdqa %xmm7, %xmm11 -; SSE2-NEXT: movdqa %xmm11, -{{[0-9]+}}(%rsp) # 16-byte Spill +; SSE2-NEXT: movdqa %xmm7, -{{[0-9]+}}(%rsp) # 16-byte Spill ; SSE2-NEXT: movdqa %xmm3, %xmm7 ; SSE2-NEXT: movdqa %xmm2, %xmm3 ; SSE2-NEXT: movdqa %xmm1, %xmm2 @@ -5467,7 +5467,7 @@ ; SSE2-LABEL: test126: ; SSE2: # BB#0: # %entry ; SSE2-NEXT: movdqa %xmm7, %xmm8 -; SSE2-NEXT: movdqa %xmm8, -{{[0-9]+}}(%rsp) # 16-byte Spill +; SSE2-NEXT: movdqa %xmm7, -{{[0-9]+}}(%rsp) # 16-byte Spill ; SSE2-NEXT: movdqa %xmm3, %xmm7 ; SSE2-NEXT: movdqa %xmm2, %xmm3 ; SSE2-NEXT: movdqa %xmm1, %xmm2 @@ -5795,7 +5795,7 @@ ; SSE2-LABEL: test128: ; SSE2: # BB#0: # %entry ; SSE2-NEXT: movdqa %xmm7, %xmm11 -; SSE2-NEXT: movdqa %xmm11, -{{[0-9]+}}(%rsp) # 16-byte Spill +; SSE2-NEXT: movdqa %xmm7, -{{[0-9]+}}(%rsp) # 16-byte Spill ; SSE2-NEXT: movdqa %xmm3, %xmm7 ; SSE2-NEXT: movdqa %xmm2, %xmm3 ; SSE2-NEXT: movdqa %xmm1, %xmm2 @@ -6190,7 +6190,7 @@ ; SSE2-NEXT: movdqa %xmm12, %xmm9 ; SSE2-NEXT: pxor %xmm0, %xmm9 ; SSE2-NEXT: movdqa %xmm6, %xmm13 -; SSE2-NEXT: pcmpgtb %xmm8, %xmm13 +; SSE2-NEXT: pcmpgtb %xmm2, %xmm13 ; SSE2-NEXT: movdqa %xmm13, %xmm2 ; SSE2-NEXT: pxor %xmm0, %xmm2 ; SSE2-NEXT: movdqa %xmm5, %xmm14 @@ -7084,7 +7084,7 @@ ; SSE2-NEXT: movdqa %xmm12, %xmm9 ; SSE2-NEXT: pxor %xmm0, %xmm9 ; SSE2-NEXT: movdqa %xmm6, %xmm13 -; SSE2-NEXT: pcmpgtd %xmm8, %xmm13 +; SSE2-NEXT: pcmpgtd %xmm2, %xmm13 ; SSE2-NEXT: movdqa %xmm13, %xmm2 ; SSE2-NEXT: pxor %xmm0, %xmm2 ; SSE2-NEXT: movdqa %xmm5, %xmm14 @@ -7610,7 +7610,7 @@ ; SSE2-LABEL: test154: ; SSE2: # BB#0: # %entry ; SSE2-NEXT: movdqa %xmm7, %xmm8 -; SSE2-NEXT: movdqa %xmm8, -{{[0-9]+}}(%rsp) # 16-byte Spill +; SSE2-NEXT: movdqa %xmm7, -{{[0-9]+}}(%rsp) # 16-byte Spill ; SSE2-NEXT: movdqa %xmm3, %xmm7 ; SSE2-NEXT: movdqa %xmm2, %xmm3 ; SSE2-NEXT: movdqa %xmm1, %xmm2 @@ -7882,7 +7882,7 @@ ; SSE2-LABEL: test156: ; SSE2: # BB#0: # %entry ; SSE2-NEXT: movdqa %xmm7, %xmm11 -; SSE2-NEXT: movdqa %xmm11, -{{[0-9]+}}(%rsp) # 16-byte Spill +; SSE2-NEXT: movdqa %xmm7, -{{[0-9]+}}(%rsp) # 16-byte Spill ; SSE2-NEXT: movdqa %xmm3, %xmm7 ; SSE2-NEXT: movdqa %xmm2, %xmm3 ; SSE2-NEXT: movdqa %xmm1, %xmm2 @@ -8183,7 +8183,7 @@ ; SSE2-LABEL: test158: ; SSE2: # BB#0: # %entry ; SSE2-NEXT: movdqa %xmm7, %xmm8 -; SSE2-NEXT: movdqa %xmm8, -{{[0-9]+}}(%rsp) # 16-byte Spill +; SSE2-NEXT: movdqa %xmm7, -{{[0-9]+}}(%rsp) # 16-byte Spill ; SSE2-NEXT: movdqa %xmm3, %xmm7 ; SSE2-NEXT: movdqa %xmm2, %xmm3 ; SSE2-NEXT: movdqa %xmm1, %xmm2 @@ -8509,7 +8509,7 @@ ; SSE2-LABEL: test160: ; SSE2: # BB#0: # %entry ; SSE2-NEXT: movdqa %xmm7, %xmm11 -; SSE2-NEXT: movdqa %xmm11, -{{[0-9]+}}(%rsp) # 16-byte Spill +; SSE2-NEXT: movdqa %xmm7, -{{[0-9]+}}(%rsp) # 16-byte Spill ; SSE2-NEXT: movdqa %xmm3, %xmm7 ; SSE2-NEXT: movdqa %xmm2, %xmm3 ; SSE2-NEXT: movdqa %xmm1, %xmm2 @@ -10289,7 +10289,7 @@ ; SSE4: # BB#0: # %entry ; SSE4-NEXT: movdqa %xmm0, %xmm2 ; SSE4-NEXT: movdqa %xmm1, %xmm3 -; SSE4-NEXT: pcmpgtq %xmm2, %xmm3 +; SSE4-NEXT: pcmpgtq %xmm0, %xmm3 ; SSE4-NEXT: pcmpeqd %xmm0, %xmm0 ; SSE4-NEXT: pxor %xmm3, %xmm0 ; SSE4-NEXT: blendvpd %xmm2, %xmm1 @@ -10768,7 +10768,7 @@ ; SSE4: # BB#0: # %entry ; SSE4-NEXT: movdqa %xmm0, %xmm2 ; SSE4-NEXT: movdqa %xmm1, %xmm3 -; SSE4-NEXT: pcmpgtq %xmm2, %xmm3 +; SSE4-NEXT: pcmpgtq %xmm0, %xmm3 ; SSE4-NEXT: pcmpeqd %xmm0, %xmm0 ; SSE4-NEXT: pxor %xmm3, %xmm0 ; SSE4-NEXT: blendvpd %xmm1, %xmm2 Index: test/CodeGen/X86/x86-shrink-wrap-unwind.ll =================================================================== --- test/CodeGen/X86/x86-shrink-wrap-unwind.ll +++ test/CodeGen/X86/x86-shrink-wrap-unwind.ll @@ -23,7 +23,7 @@ ; Compare the arguments and jump to exit. ; After the prologue is set. ; CHECK: movl %edi, [[ARG0CPY:%e[a-z]+]] -; CHECK-NEXT: cmpl %esi, [[ARG0CPY]] +; CHECK-NEXT: cmpl %esi, %edi ; CHECK-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]] ; ; Store %a in the alloca. @@ -69,7 +69,7 @@ ; Compare the arguments and jump to exit. ; After the prologue is set. ; CHECK: movl %edi, [[ARG0CPY:%e[a-z]+]] -; CHECK-NEXT: cmpl %esi, [[ARG0CPY]] +; CHECK-NEXT: cmpl %esi, %edi ; CHECK-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]] ; ; Prologue code. @@ -115,7 +115,7 @@ ; Compare the arguments and jump to exit. ; After the prologue is set. ; CHECK: movl %edi, [[ARG0CPY:%e[a-z]+]] -; CHECK-NEXT: cmpl %esi, [[ARG0CPY]] +; CHECK-NEXT: cmpl %esi, %edi ; CHECK-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]] ; ; Prologue code. Index: test/CodeGen/X86/x86-shrink-wrapping.ll =================================================================== --- test/CodeGen/X86/x86-shrink-wrapping.ll +++ test/CodeGen/X86/x86-shrink-wrapping.ll @@ -17,7 +17,7 @@ ; Compare the arguments and jump to exit. ; No prologue needed. ; ENABLE: movl %edi, [[ARG0CPY:%e[a-z]+]] -; ENABLE-NEXT: cmpl %esi, [[ARG0CPY]] +; ENABLE-NEXT: cmpl %esi, %edi ; ENABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]] ; ; Prologue code. @@ -27,7 +27,7 @@ ; Compare the arguments and jump to exit. ; After the prologue is set. ; DISABLE: movl %edi, [[ARG0CPY:%e[a-z]+]] -; DISABLE-NEXT: cmpl %esi, [[ARG0CPY]] +; DISABLE-NEXT: cmpl %esi, %edi ; DISABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]] ; ; Store %a in the alloca.