Index: llvm/include/llvm/CodeGen/TargetInstrInfo.h =================================================================== --- llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -1037,10 +1037,29 @@ /// For COPY-instruction the method naturally returns destination and source /// registers as machine operands, for all other instructions the method calls /// target-dependent implementation. - std::optional isCopyInstr(const MachineInstr &MI) const { + std::optional + isCopyInstr(const MachineInstr &MI, + bool ForbidImplicitOperands = true) const { if (MI.isCopy()) { + // TODO: Should validate implicit operands here? return DestSourcePair{MI.getOperand(0), MI.getOperand(1)}; } + + // TODO: This is a conservative hack to ensure correctness when extra + // operands are added for special liveness tracking, while also not changing + // debug info. In particular SUBREG_TO_REG may introduce an implicit-def of + // a super register after coalescing. This may manifest as a copy-like + // instruction with an undef subregister def, and a full register + // implicit-def appended to the operand list. + + // Really, implementations of this should be considering extra implicit + // operands. A more sophisticated implementation would recognize an + // implicit-def of the full register, and report that as the + // destination. This should be removed when all targets are validated for + // correct SUBREG_TO_REG liveness handling. + if (ForbidImplicitOperands && MI.getNumImplicitOperands() != 0) + return std::nullopt; + return isCopyInstrImpl(MI); } Index: llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp =================================================================== --- llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp +++ llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp @@ -2116,7 +2116,7 @@ } bool InstrRefBasedLDV::transferRegisterCopy(MachineInstr &MI) { - auto DestSrc = TII->isCopyInstr(MI); + auto DestSrc = TII->isCopyInstr(MI, false); if (!DestSrc) return false; Index: llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp =================================================================== --- llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp +++ llvm/lib/CodeGen/LiveDebugValues/VarLocBasedImpl.cpp @@ -1364,7 +1364,7 @@ // TODO: Try to keep tracking of an entry value if we encounter a propagated // DBG_VALUE describing the copy of the entry value. (Propagated entry value // does not indicate the parameter modification.) - auto DestSrc = TII->isCopyInstr(*TransferInst); + auto DestSrc = TII->isCopyInstr(*TransferInst, false); if (DestSrc) { const MachineOperand *SrcRegOp, *DestRegOp; SrcRegOp = DestSrc->Source; @@ -1840,7 +1840,7 @@ OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs, TransferMap &Transfers) { - auto DestSrc = TII->isCopyInstr(MI); + auto DestSrc = TII->isCopyInstr(MI, false); if (!DestSrc) return; Index: llvm/lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -8330,7 +8330,7 @@ describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) { - auto DestSrc = TII->isCopyInstr(MI); + auto DestSrc = TII->isCopyInstr(MI, false); if (!DestSrc) return std::nullopt; Index: llvm/test/CodeGen/X86/GlobalISel/add-ext.ll =================================================================== --- llvm/test/CodeGen/X86/GlobalISel/add-ext.ll +++ llvm/test/CodeGen/X86/GlobalISel/add-ext.ll @@ -205,8 +205,8 @@ ; CHECK-NEXT: addq %rdi, %rcx ; CHECK-NEXT: movl (%rcx), %ecx ; CHECK-NEXT: addl (%rax), %ecx -; CHECK-NEXT: movl %esi, %esi -; CHECK-NEXT: imulq $4, %rsi, %rax +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: imulq $4, %rax, %rax ; CHECK-NEXT: addq %rdi, %rax ; CHECK-NEXT: movl %ecx, (%rax) ; CHECK-NEXT: retq Index: llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness-reduced.ll =================================================================== --- llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness-reduced.ll +++ llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness-reduced.ll @@ -27,7 +27,7 @@ ; CHECK-NEXT: pushq %r13 ; CHECK-NEXT: pushq %r12 ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: pushq %rax ; CHECK-NEXT: .cfi_offset %rbx, -56 ; CHECK-NEXT: .cfi_offset %r12, -48 ; CHECK-NEXT: .cfi_offset %r13, -40 @@ -36,33 +36,33 @@ ; CHECK-NEXT: movl %esi, %ebx ; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: xorl %r15d, %r15d -; CHECK-NEXT: xorl %r13d, %r13d ; CHECK-NEXT: xorl %r12d, %r12d -; CHECK-NEXT: # implicit-def: $rax -; CHECK-NEXT: # kill: killed $rax +; CHECK-NEXT: # implicit-def: $r13 ; CHECK-NEXT: jmp .LBB0_2 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_1: # %bb5 ; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 ; CHECK-NEXT: orl $1, %r12d -; CHECK-NEXT: movq %r14, %r13 +; CHECK-NEXT: movq %r14, %r15 ; CHECK-NEXT: .LBB0_2: # %bb7 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: callq *%r15 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: callq *%rax +; CHECK-NEXT: movl %r13d, %r13d ; CHECK-NEXT: testb $1, %bl ; CHECK-NEXT: movl $0, %r14d ; CHECK-NEXT: jne .LBB0_1 ; CHECK-NEXT: # %bb.3: # %bb17 ; CHECK-NEXT: # in Loop: Header=BB0_2 Depth=1 ; CHECK-NEXT: xorl %r14d, %r14d -; CHECK-NEXT: testq %r13, %r13 +; CHECK-NEXT: testq %r15, %r15 ; CHECK-NEXT: sete %r14b ; CHECK-NEXT: xorl %edi, %edi -; CHECK-NEXT: callq *%r15 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: callq *%rax ; CHECK-NEXT: shlq $4, %r14 ; CHECK-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; CHECK-NEXT: movl %eax, 0 +; CHECK-NEXT: movl %r13d, 0 ; CHECK-NEXT: movb $0, 4 ; CHECK-NEXT: jmp .LBB0_1 bb: Index: llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness.ll =================================================================== --- llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness.ll +++ llvm/test/CodeGen/X86/coalescer-breaks-subreg-to-reg-liveness.ll @@ -37,12 +37,12 @@ ; CHECK-NEXT: movl %r8d, %r14d ; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: movq %rsi, %r12 +; CHECK-NEXT: movq %rsi, %r13 ; CHECK-NEXT: movq %rdi, %r15 ; CHECK-NEXT: incl %r14d ; CHECK-NEXT: xorl %ebx, %ebx -; CHECK-NEXT: # implicit-def: $rax -; CHECK-NEXT: # kill: killed $rax +; CHECK-NEXT: # implicit-def: $r12 +; CHECK-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: jmp .LBB0_3 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_1: # %bb17 @@ -55,39 +55,42 @@ ; CHECK-NEXT: callq _Znwm@PLT ; CHECK-NEXT: shlq $4, %r15 ; CHECK-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; CHECK-NEXT: movq %rdx, %rcx +; CHECK-NEXT: movq %r12, %rcx ; CHECK-NEXT: shrq $32, %rcx ; CHECK-NEXT: movb %cl, 12(%rax) -; CHECK-NEXT: movl %edx, 8(%rax) +; CHECK-NEXT: movl %r12d, 8(%rax) ; CHECK-NEXT: movq %r15, %rbx ; CHECK-NEXT: movq %r13, %r15 +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload ; CHECK-NEXT: decl %r14d -; CHECK-NEXT: je .LBB0_7 +; CHECK-NEXT: je .LBB0_8 ; CHECK-NEXT: .LBB0_3: # %bb7 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: callq widget@PLT -; CHECK-NEXT: cmpb $-5, (%r12) -; CHECK-NEXT: jb .LBB0_5 -; CHECK-NEXT: # %bb.4: # %bb12 +; CHECK-NEXT: cmpb $-5, (%r13) +; CHECK-NEXT: jae .LBB0_5 +; CHECK-NEXT: # %bb.4: # in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: movl %r12d, %r12d +; CHECK-NEXT: cmpq %r15, %rbx +; CHECK-NEXT: jbe .LBB0_1 +; CHECK-NEXT: jmp .LBB0_7 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_5: # %bb12 ; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 ; CHECK-NEXT: movq 0, %rax ; CHECK-NEXT: movq 8, %rax -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: .LBB0_5: # %bb14 -; CHECK-NEXT: # in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload ; CHECK-NEXT: cmpq %r15, %rbx ; CHECK-NEXT: jbe .LBB0_1 -; CHECK-NEXT: # %bb.6: # in Loop: Header=BB0_3 Depth=1 +; CHECK-NEXT: .LBB0_7: # in Loop: Header=BB0_3 Depth=1 ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: xorl %ebx, %ebx ; CHECK-NEXT: decl %r14d ; CHECK-NEXT: jne .LBB0_3 -; CHECK-NEXT: .LBB0_7: # %bb21 +; CHECK-NEXT: .LBB0_8: # %bb21 ; CHECK-NEXT: cmpb $0, 12(%rax) -; CHECK-NEXT: jne .LBB0_9 -; CHECK-NEXT: # %bb.8: # %bb26 +; CHECK-NEXT: jne .LBB0_10 +; CHECK-NEXT: # %bb.9: # %bb26 ; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %r12 @@ -97,7 +100,7 @@ ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: .cfi_def_cfa %rsp, 8 ; CHECK-NEXT: retq -; CHECK-NEXT: .LBB0_9: # %bb25 +; CHECK-NEXT: .LBB0_10: # %bb25 ; CHECK-NEXT: .cfi_def_cfa %rbp, 16 ; CHECK-NEXT: movq %r15, %rdi ; CHECK-NEXT: callq pluto@PLT Index: llvm/test/CodeGen/X86/dagcombine-cse.ll =================================================================== --- llvm/test/CodeGen/X86/dagcombine-cse.ll +++ llvm/test/CodeGen/X86/dagcombine-cse.ll @@ -106,24 +106,24 @@ ; ; X64-LABEL: square_high: ; X64: ## %bb.0: ## %entry -; X64-NEXT: movl %esi, %esi -; X64-NEXT: movq %rsi, %rax +; X64-NEXT: movl %esi, %ecx +; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %rdi -; X64-NEXT: movq %rdx, %rcx +; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %r8 ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: mulq %rdi ; X64-NEXT: addq %r8, %rdx -; X64-NEXT: movq %rcx, %rax +; X64-NEXT: movq %rsi, %rax ; X64-NEXT: adcq $0, %rax ; X64-NEXT: addq %rdx, %r8 -; X64-NEXT: adcq %rcx, %rax -; X64-NEXT: imulq %rsi, %rsi -; X64-NEXT: addq %rax, %rsi -; X64-NEXT: shrdq $32, %rsi, %r8 -; X64-NEXT: shrq $32, %rsi +; X64-NEXT: adcq %rsi, %rax +; X64-NEXT: imulq %rcx, %rcx +; X64-NEXT: addq %rax, %rcx +; X64-NEXT: shrdq $32, %rcx, %r8 +; X64-NEXT: shrq $32, %rcx ; X64-NEXT: movq %r8, %rax -; X64-NEXT: movq %rsi, %rdx +; X64-NEXT: movq %rcx, %rdx ; X64-NEXT: retq entry: %conv = zext i96 %x to i192 Index: llvm/test/CodeGen/X86/fold-and-shift-x86_64.ll =================================================================== --- llvm/test/CodeGen/X86/fold-and-shift-x86_64.ll +++ llvm/test/CodeGen/X86/fold-and-shift-x86_64.ll @@ -34,8 +34,8 @@ define i8 @t3(ptr %X, i64 %i) { ; CHECK-LABEL: t3: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl %esi, %esi -; CHECK-NEXT: movzbl (%rdi,%rsi,4), %eax +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: movzbl (%rdi,%rax,4), %eax ; CHECK-NEXT: retq entry: Index: llvm/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-lowhigh.ll =================================================================== --- llvm/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-lowhigh.ll +++ llvm/test/CodeGen/X86/unfold-masked-merge-scalar-constmask-lowhigh.ll @@ -78,18 +78,18 @@ define i64 @out64_constmask(i64 %x, i64 %y) { ; CHECK-NOBMI-LABEL: out64_constmask: ; CHECK-NOBMI: # %bb.0: -; CHECK-NOBMI-NEXT: movl %edi, %edi +; CHECK-NOBMI-NEXT: movl %edi, %ecx ; CHECK-NOBMI-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000 ; CHECK-NOBMI-NEXT: andq %rsi, %rax -; CHECK-NOBMI-NEXT: orq %rdi, %rax +; CHECK-NOBMI-NEXT: orq %rcx, %rax ; CHECK-NOBMI-NEXT: retq ; ; CHECK-BMI-LABEL: out64_constmask: ; CHECK-BMI: # %bb.0: -; CHECK-BMI-NEXT: movl %edi, %edi +; CHECK-BMI-NEXT: movl %edi, %ecx ; CHECK-BMI-NEXT: movabsq $-4294967296, %rax # imm = 0xFFFFFFFF00000000 ; CHECK-BMI-NEXT: andq %rsi, %rax -; CHECK-BMI-NEXT: orq %rdi, %rax +; CHECK-BMI-NEXT: orq %rcx, %rax ; CHECK-BMI-NEXT: retq %mx = and i64 %x, 4294967295 %my = and i64 %y, -4294967296