diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp --- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -134,7 +134,7 @@ bool convertInstTo3Addr(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, Register RegA, - Register RegB, unsigned Dist); + Register RegB, unsigned &Dist); bool isDefTooClose(Register Reg, unsigned Dist, MachineInstr *MI); @@ -146,7 +146,7 @@ bool tryInstructionTransform(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, unsigned SrcIdx, unsigned DstIdx, - unsigned Dist, bool shouldOnlyCommute); + unsigned &Dist, bool shouldOnlyCommute); bool tryInstructionCommute(MachineInstr *MI, unsigned DstOpIdx, @@ -674,7 +674,7 @@ /// Return true if this transformation was successful. bool TwoAddressInstructionPass::convertInstTo3Addr( MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, - Register RegA, Register RegB, unsigned Dist) { + Register RegA, Register RegB, unsigned &Dist) { MachineInstrSpan MIS(mi, MBB); MachineInstr *NewMI = TII->convertToThreeAddress(*mi, LV); if (!NewMI) @@ -723,7 +723,9 @@ if (LIS && !SingleInst) LIS->repairIntervalsInRange(MBB, MIS.begin(), MIS.end(), OrigRegs); - DistanceMap.insert(std::make_pair(NewMI, Dist)); + for (MachineInstr &MI : MIS) + DistanceMap.insert(std::make_pair(&MI, Dist++)); + Dist--; mi = NewMI; nmi = std::next(mi); @@ -1226,7 +1228,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, unsigned SrcIdx, unsigned DstIdx, - unsigned Dist, bool shouldOnlyCommute) { + unsigned &Dist, bool shouldOnlyCommute) { if (OptLevel == CodeGenOpt::None) return false; @@ -1334,6 +1336,8 @@ // look "normal" to the transformation logic. MBB->insert(mi, NewMIs[0]); MBB->insert(mi, NewMIs[1]); + DistanceMap.insert(std::make_pair(NewMIs[0], Dist++)); + DistanceMap.insert(std::make_pair(NewMIs[1], Dist)); LLVM_DEBUG(dbgs() << "2addr: NEW LOAD: " << *NewMIs[0] << "2addr: NEW INST: " << *NewMIs[1]); @@ -1389,6 +1393,7 @@ } MI.eraseFromParent(); + DistanceMap.erase(&MI); // Update LiveIntervals. if (LIS) { @@ -1405,6 +1410,9 @@ LLVM_DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n"); NewMIs[0]->eraseFromParent(); NewMIs[1]->eraseFromParent(); + DistanceMap.erase(NewMIs[0]); + DistanceMap.erase(NewMIs[1]); + Dist--; } } } diff --git a/llvm/test/CodeGen/X86/addcarry.ll b/llvm/test/CodeGen/X86/addcarry.ll --- a/llvm/test/CodeGen/X86/addcarry.ll +++ b/llvm/test/CodeGen/X86/addcarry.ll @@ -179,7 +179,7 @@ ; CHECK-NEXT: leal (%rsi,%rcx), %edx ; CHECK-NEXT: addl %esi, %edx ; CHECK-NEXT: setb %al -; CHECK-NEXT: addl %esi, %ecx +; CHECK-NEXT: addl %ecx, %esi ; CHECK-NEXT: movl %edx, (%rdi) ; CHECK-NEXT: adcb $0, %al ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/distancemap.mir b/llvm/test/CodeGen/X86/distancemap.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/distancemap.mir @@ -0,0 +1,95 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc %s -o - -mtriple=x86_64-unknown-linux -run-pass=twoaddressinstruction -verify-machineinstrs | FileCheck %s + +# In TwoAddressInstructionPass, new instructions should be added to DistanceMap. +# In this case, function convertInstTo3Addr is called on the first ADD +# instruction, extra COPY instructions are generated. If they are not added to +# DistanceMap, function noUseAfterLastDef computes a wrong value for the later +# ADD instruction, and a different commute decision is made. + +--- | + ; ModuleID = 't.ll' + source_filename = "t.ll" + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + target triple = "x86_64-unknown-linux" + + declare i16 @llvm.sadd.sat.i16(i16, i16) + + define signext i16 @func16(i16 signext %x, i16 signext %y, i16 signext %z) nounwind { + %a = mul i16 %y, %z + %tmp = call i16 @llvm.sadd.sat.i16(i16 %x, i16 %a) + ret i16 %tmp + } + +... +--- +name: func16 +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: gr32 } + - { id: 1, class: gr32 } + - { id: 2, class: gr32 } + - { id: 3, class: gr16 } + - { id: 4, class: gr32 } + - { id: 5, class: gr16 } + - { id: 6, class: gr16 } + - { id: 7, class: gr32 } + - { id: 8, class: gr32 } + - { id: 9, class: gr32 } + - { id: 10, class: gr16 } + - { id: 11, class: gr32 } + - { id: 13, class: gr32 } + - { id: 14, class: gr16 } +frameInfo: + maxAlignment: 1 +body: | + bb.0 : + liveins: $edi, $esi, $edx + ; CHECK-LABEL: name: func16 + ; CHECK: liveins: $edi, $esi, $edx + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY killed $edx + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY killed $esi + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr32 = COPY killed $edi + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr16 = COPY killed [[COPY2]].sub_16bit + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr32 = COPY [[COPY1]] + ; CHECK-NEXT: [[IMUL32rr:%[0-9]+]]:gr32 = IMUL32rr [[IMUL32rr]], killed [[COPY]], implicit-def dead $eflags + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr16 = COPY killed [[IMUL32rr]].sub_16bit + ; CHECK-NEXT: [[DEF:%[0-9]+]]:gr64_nosp = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF]].sub_16bit:gr64_nosp = COPY [[COPY3]] + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:gr64_nosp = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1]].sub_16bit:gr64_nosp = COPY [[COPY4]] + ; CHECK-NEXT: [[LEA64_32r:%[0-9]+]]:gr32 = LEA64_32r killed [[DEF]], 1, killed [[DEF1]], 0, $noreg + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gr16 = COPY killed [[LEA64_32r]].sub_16bit + ; CHECK-NEXT: [[MOVSX32rr16_:%[0-9]+]]:gr32 = MOVSX32rr16 killed [[COPY5]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr32 = COPY [[MOVSX32rr16_]] + ; CHECK-NEXT: [[SAR32ri:%[0-9]+]]:gr32 = SAR32ri [[SAR32ri]], 15, implicit-def dead $eflags + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr32 = COPY [[SAR32ri]] + ; CHECK-NEXT: [[XOR32ri:%[0-9]+]]:gr32 = XOR32ri [[XOR32ri]], -32768, implicit-def dead $eflags + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr16 = COPY [[COPY3]] + ; CHECK-NEXT: [[ADD16rr:%[0-9]+]]:gr16 = ADD16rr [[ADD16rr]], killed [[COPY4]], implicit-def $eflags + ; CHECK-NEXT: undef %11.sub_16bit:gr32 = COPY killed [[ADD16rr]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr32 = COPY [[XOR32ri]] + ; CHECK-NEXT: [[CMOV32rr:%[0-9]+]]:gr32 = CMOV32rr [[CMOV32rr]], killed %11, 1, implicit killed $eflags + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr16 = COPY killed [[CMOV32rr]].sub_16bit + ; CHECK-NEXT: $ax = COPY killed [[COPY6]] + ; CHECK-NEXT: RET 0, killed $ax + %2:gr32 = COPY killed $edx + %1:gr32 = COPY killed $esi + %0:gr32 = COPY killed $edi + %3:gr16 = COPY killed %0.sub_16bit:gr32 + %4:gr32 = IMUL32rr killed %1:gr32, killed %2:gr32, implicit-def dead $eflags + %5:gr16 = COPY killed %4.sub_16bit:gr32 + %6:gr16 = ADD16rr %3:gr16, %5:gr16, implicit-def dead $eflags + %7:gr32 = MOVSX32rr16 killed %6:gr16 + %8:gr32 = SAR32ri killed %7:gr32, 15, implicit-def dead $eflags + %9:gr32 = XOR32ri killed %8:gr32, -32768, implicit-def dead $eflags + %10:gr16 = ADD16rr killed %3:gr16, killed %5:gr16, implicit-def $eflags + %11:gr32 = INSERT_SUBREG undef %12:gr32, killed %10:gr16, %subreg.sub_16bit + %13:gr32 = CMOV32rr killed %11:gr32, killed %9:gr32, 0, implicit killed $eflags + %14:gr16 = COPY killed %13.sub_16bit:gr32 + $ax = COPY killed %14:gr16 + RET 0, killed $ax + +... diff --git a/llvm/test/CodeGen/X86/sadd_sat_plus.ll b/llvm/test/CodeGen/X86/sadd_sat_plus.ll --- a/llvm/test/CodeGen/X86/sadd_sat_plus.ll +++ b/llvm/test/CodeGen/X86/sadd_sat_plus.ll @@ -95,8 +95,8 @@ ; X64-NEXT: cwtl ; X64-NEXT: sarl $15, %eax ; X64-NEXT: xorl $-32768, %eax # imm = 0x8000 -; X64-NEXT: addw %di, %si -; X64-NEXT: cmovnol %esi, %eax +; X64-NEXT: addw %si, %di +; X64-NEXT: cmovnol %edi, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %a = mul i16 %y, %z @@ -131,8 +131,8 @@ ; X64-NEXT: leal (%rdi,%rax), %ecx ; X64-NEXT: sarb $7, %cl ; X64-NEXT: xorb $-128, %cl -; X64-NEXT: addb %dil, %al -; X64-NEXT: movzbl %al, %edx +; X64-NEXT: addb %al, %dil +; X64-NEXT: movzbl %dil, %edx ; X64-NEXT: movzbl %cl, %eax ; X64-NEXT: cmovnol %edx, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax diff --git a/llvm/test/CodeGen/X86/sadd_sat_vec.ll b/llvm/test/CodeGen/X86/sadd_sat_vec.ll --- a/llvm/test/CodeGen/X86/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/X86/sadd_sat_vec.ll @@ -434,8 +434,8 @@ ; SSE-NEXT: leal (%rax,%rcx), %esi ; SSE-NEXT: sarb $7, %sil ; SSE-NEXT: xorb $-128, %sil -; SSE-NEXT: addb %al, %cl -; SSE-NEXT: movzbl %cl, %eax +; SSE-NEXT: addb %cl, %al +; SSE-NEXT: movzbl %al, %eax ; SSE-NEXT: movzbl %sil, %ecx ; SSE-NEXT: cmovnol %eax, %ecx ; SSE-NEXT: movb %cl, (%rdx) @@ -448,8 +448,8 @@ ; AVX-NEXT: leal (%rax,%rcx), %esi ; AVX-NEXT: sarb $7, %sil ; AVX-NEXT: xorb $-128, %sil -; AVX-NEXT: addb %al, %cl -; AVX-NEXT: movzbl %cl, %eax +; AVX-NEXT: addb %cl, %al +; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: movzbl %sil, %ecx ; AVX-NEXT: cmovnol %eax, %ecx ; AVX-NEXT: movb %cl, (%rdx) @@ -470,9 +470,9 @@ ; SSE-NEXT: movswl %si, %esi ; SSE-NEXT: sarl $15, %esi ; SSE-NEXT: xorl $-32768, %esi # imm = 0x8000 -; SSE-NEXT: addw %ax, %cx -; SSE-NEXT: cmovol %esi, %ecx -; SSE-NEXT: movw %cx, (%rdx) +; SSE-NEXT: addw %cx, %ax +; SSE-NEXT: cmovol %esi, %eax +; SSE-NEXT: movw %ax, (%rdx) ; SSE-NEXT: retq ; ; AVX-LABEL: v1i16: @@ -483,9 +483,9 @@ ; AVX-NEXT: movswl %si, %esi ; AVX-NEXT: sarl $15, %esi ; AVX-NEXT: xorl $-32768, %esi # imm = 0x8000 -; AVX-NEXT: addw %ax, %cx -; AVX-NEXT: cmovol %esi, %ecx -; AVX-NEXT: movw %cx, (%rdx) +; AVX-NEXT: addw %cx, %ax +; AVX-NEXT: cmovol %esi, %eax +; AVX-NEXT: movw %ax, (%rdx) ; AVX-NEXT: retq %x = load <1 x i16>, <1 x i16>* %px %y = load <1 x i16>, <1 x i16>* %py