Index: llvm/lib/Target/X86/X86InstrInfo.cpp =================================================================== --- llvm/lib/Target/X86/X86InstrInfo.cpp +++ llvm/lib/Target/X86/X86InstrInfo.cpp @@ -3648,8 +3648,15 @@ std::optional X86InstrInfo::isCopyInstrImpl(const MachineInstr &MI) const { - if (MI.isMoveReg()) + if (MI.isMoveReg()) { + // FIXME: Dirty hack for apparent invariant that doesn't hold when + // subreg_to_reg is coalesced with ordinary copies, such that the bits that + // were asserted as 0 are now undef. + if (MI.getOperand(0).isUndef() && MI.getOperand(0).getSubReg()) + return std::nullopt; + return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; + } return std::nullopt; } Index: llvm/test/CodeGen/X86/GlobalISel/add-ext.ll =================================================================== --- llvm/test/CodeGen/X86/GlobalISel/add-ext.ll +++ llvm/test/CodeGen/X86/GlobalISel/add-ext.ll @@ -205,8 +205,8 @@ ; CHECK-NEXT: addq %rdi, %rcx ; CHECK-NEXT: movl (%rcx), %ecx ; CHECK-NEXT: addl (%rax), %ecx -; CHECK-NEXT: movl %esi, %esi -; CHECK-NEXT: imulq $4, %rsi, %rax +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: imulq $4, %rax, %rax ; CHECK-NEXT: addq %rdi, %rax ; CHECK-NEXT: movl %ecx, (%rax) ; CHECK-NEXT: retq Index: llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness.ll =================================================================== --- llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness.ll +++ llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness.ll @@ -37,12 +37,12 @@ ; CHECK-NEXT: movl %r8d, %r14d ; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: movq %rsi, %r12 +; CHECK-NEXT: movq %rsi, %r13 ; CHECK-NEXT: movq %rdi, %r15 ; CHECK-NEXT: incl %r14d ; CHECK-NEXT: xorl %ebx, %ebx -; CHECK-NEXT: # implicit-def: $rax -; CHECK-NEXT: # kill: killed $rax +; CHECK-NEXT: # implicit-def: $r12 +; CHECK-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: jmp .LBB0_3 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_1: # %bb17 @@ -55,39 +55,42 @@ ; CHECK-NEXT: callq _Znwm@PLT ; CHECK-NEXT: shlq $4, %r15 ; CHECK-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; CHECK-NEXT: movq %rdx, %rcx +; CHECK-NEXT: movq %r12, %rcx ; CHECK-NEXT: shrq $32, %rcx ; CHECK-NEXT: movb %cl, 12(%rax) -; CHECK-NEXT: movl %edx, 8(%rax) +; CHECK-NEXT: movl %r12d, 8(%rax) ; CHECK-NEXT: movq %r15, %rbx ; CHECK-NEXT: movq %r13, %r15 +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload ; CHECK-NEXT: decl %r14d -; CHECK-NEXT: je .LBB0_7 +; CHECK-NEXT: je .LBB0_8 ; CHECK-NEXT: .LBB0_3: # %bb7 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: callq widget@PLT -; CHECK-NEXT: cmpb $-5, (%r12) -; CHECK-NEXT: jb .LBB0_5 -; CHECK-NEXT: # %bb.4: # %bb12 +; CHECK-NEXT: cmpb $-5, (%r13) +; CHECK-NEXT: jae .LBB0_5 +; CHECK-NEXT: # %bb.4: # in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: movl %r12d, %r12d +; CHECK-NEXT: cmpq %r15, %rbx +; CHECK-NEXT: jbe .LBB0_1 +; CHECK-NEXT: jmp .LBB0_7 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_5: # %bb12 ; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 ; CHECK-NEXT: movq 0, %rax ; CHECK-NEXT: movq 8, %rax -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: .LBB0_5: # %bb14 -; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload ; CHECK-NEXT: cmpq %r15, %rbx ; CHECK-NEXT: jbe .LBB0_1 -; CHECK-NEXT: # %bb.6: # in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: .LBB0_7: # in Loop: Header=BB0_3 Depth=1 ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: xorl %ebx, %ebx ; CHECK-NEXT: decl %r14d ; CHECK-NEXT: jne .LBB0_3 -; CHECK-NEXT: .LBB0_7: # %bb21 +; CHECK-NEXT: .LBB0_8: # %bb21 ; CHECK-NEXT: cmpb $0, 12(%rax) -; CHECK-NEXT: jne .LBB0_9 -; CHECK-NEXT: # %bb.8: # %bb26 +; CHECK-NEXT: jne .LBB0_10 +; CHECK-NEXT: # %bb.9: # %bb26 ; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %r12 @@ -97,7 +100,7 @@ ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: .cfi_def_cfa %rsp, 8 ; CHECK-NEXT: retq -; CHECK-NEXT: .LBB0_9: # %bb25 +; CHECK-NEXT: .LBB0_10: # %bb25 ; CHECK-NEXT: .cfi_def_cfa %rbp, 16 ; CHECK-NEXT: movq %r15, %rdi ; CHECK-NEXT: callq pluto@PLT Index: llvm/test/CodeGen/X86/dagcombine-cse.ll =================================================================== --- llvm/test/CodeGen/X86/dagcombine-cse.ll +++ llvm/test/CodeGen/X86/dagcombine-cse.ll @@ -106,24 +106,24 @@ ; ; X64-LABEL: square_high: ; X64: ## %bb.0: ## %entry -; X64-NEXT: movl %esi, %esi -; X64-NEXT: movq %rsi, %rax +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %r8 ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: mulq %rdi ; X64-NEXT: addq %r8, %rdx -; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq %rsi, %rax ; X64-NEXT: adcq $0, %rax ; X64-NEXT: addq %rdx, %r8 -; X64-NEXT: adcq %rcx, %rax -; X64-NEXT: imulq %rsi, %rsi -; X64-NEXT: addq %rax, %rsi -; X64-NEXT: shrdq $32, %rsi, %r8 -; X64-NEXT: shrq $32, %rsi +; X64-NEXT: adcq %rsi, %rax +; X64-NEXT: imulq %rcx, %rcx +; X64-NEXT: addq %rax, %rcx +; X64-NEXT: shrdq $32, %rcx, %r8 +; X64-NEXT: shrq $32, %rcx ; X64-NEXT: movq %r8, %rax -; X64-NEXT: movq %rsi, %rdx +; X64-NEXT: movq %rcx, %rdx ; X64-NEXT: retq entry: %conv = zext i96 %x to i192 Index: llvm/test/CodeGen/X86/fold-and-shift-x86_64.ll =================================================================== --- llvm/test/CodeGen/X86/fold-and-shift-x86_64.ll +++ llvm/test/CodeGen/X86/fold-and-shift-x86_64.ll @@ -34,8 +34,8 @@ define i8 @t3(ptr %X, i64 %i) { ; CHECK-LABEL: t3: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl %esi, %esi -; CHECK-NEXT: movzbl (%rdi,%rsi,4), %eax +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movzbl (%rdi,%rax,4), %eax ; CHECK-NEXT: retq entry: Index: llvm/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-lowhigh.ll =================================================================== --- llvm/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-lowhigh.ll +++ llvm/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-lowhigh.ll @@ -78,18 +78,18 @@ define i64 @out64_constmask(i64 %x, i64 %y) { ; CHECK-NOBMI-LABEL: out64_constmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: movl %edi, %edi +; CHECK-NOBMI-NEXT: movl %edi, %ecx ; CHECK-NOBMI-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000 ; CHECK-NOBMI-NEXT: andq %rsi, %rax -; CHECK-NOBMI-NEXT: orq %rdi, %rax +; CHECK-NOBMI-NEXT: orq %rcx, %rax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: out64_constmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: movl %edi, %edi +; CHECK-BMI-NEXT: movl %edi, %ecx ; CHECK-BMI-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000 ; CHECK-BMI-NEXT: andq %rsi, %rax -; CHECK-BMI-NEXT: orq %rdi, %rax +; CHECK-BMI-NEXT: orq %rcx, %rax ; CHECK-BMI-NEXT: retq %mx = and i64 %x, 4294967295 %my = and i64 %y, -4294967296