diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -1078,7 +1078,51 @@ UsedInInstr.clear(); BundleVirtRegsMap.clear(); - // Scan for special cases; Apply pre-assigned register defs to state. + // Scan for special cases; + // + // Check for copies with a VReg use in a reg-class with all registers + // pre-assigned. In that case, hoist the instruction up to the definition of + // the VReg in the hope that some pre-assigned registers have been freed up. + if (MI.isCopy()) { + MachineOperand &Op0 = MI.getOperand(0); + MachineOperand &Op1 = MI.getOperand(1); + if (Op0.getReg().isPhysical() && Op1.getReg().isVirtual()) { + Register Reg = Op1.getReg(); + const TargetRegisterClass &RC = *MRI->getRegClass(Reg); + ArrayRef AllocationOrder = RegClassInfo.getOrder(&RC); + if (all_of(AllocationOrder, [&](MCPhysReg PhysReg) { + for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { + if (RegUnitStates[*UI] == regPreAssigned) { + return true; + } + } + return false; + })) { + + // If we find the definition of Reg, move MI just after it and exit + // the function. + for (auto &MI2 : reverse(make_range( + MI.getParent()->begin()->getIterator(), MI.getIterator()))) { + bool DefinesReg = MI2.getOperand(0).isReg() && + MI2.getOperand(0).isDef() && + MI2.getOperand(0).getReg() == Reg; + + bool UsesReg = any_of(MI2.operands(), [Reg](MachineOperand &MO) { + return MO.isReg() && MO.getReg() == Reg; + }); + + // If MI2 defines or uses Reg, move MI just after MI2. + // we can only move just after this instruction. + if (DefinesReg || UsesReg) { + MI.moveBefore(&*std::next(MI2.getIterator())); + return; + } + } + } + } + } + + // Apply pre-assigned register defs to state. bool HasPhysRegUse = false; bool HasRegMask = false; bool HasVRegDef = false; @@ -1433,7 +1477,7 @@ Coalesced.clear(); // Traverse block in reverse order allocating instructions one by one. - for (MachineInstr &MI : reverse(MBB)) { + for (MachineInstr &MI : make_early_inc_range(reverse(MBB))) { LLVM_DEBUG( dbgs() << "\n>> " << MI << "Regs:"; dumpState() diff --git a/llvm/test/CodeGen/X86/regallocfast-need-to-move-copy.mir b/llvm/test/CodeGen/X86/regallocfast-need-to-move-copy.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/regallocfast-need-to-move-copy.mir @@ -0,0 +1,194 @@ +# RUN: llc -o - -run-pass=regallocfast -verify-machineinstrs %s | FileCheck %s + +--- | + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + target triple = "x86_64-unknown-linux" + + define void @test1() { + %tmp = alloca i8, i64 6144, align 32 + ret void + } + + define void @test2() { + %tmp = alloca i8, i64 6144, align 32 + ret void + } + + declare void @baz(i8*, i32, i8*, ...) + +... +--- +# Test to make sure we do not run out of registers if a VReg operand of a copy +# cannot be assigned a register, because all registers in the class are +# pre-assigned. In this case, the copy ($al = COPY %8) can be moved closer to +# the def. + +# CHECK-LABEL: name:{{.*}}test1 +# CHECK: bb.0: +# CHECK: renamable $ecx = MOV32rm killed renamable $rdi, 1, $noreg, 0, $noreg :: (load 4) +# CHECK-NEXT: renamable $al = COPY renamable $ch, implicit killed $ecx +# CHECK-NEXT: renamable $ecx = MOVZX32rr8_NOREX killed renamable $al +# CHECK-NEXT: $r8d = COPY killed renamable $ecx +# CHECK-NEXT: renamable $esi = MOV32ri 3427 +# CHECK-NEXT: renamable $al = MOV8ri 2 +# CHECK-NEXT: renamable $rdi = IMPLICIT_DEF +# CHECK-NEXT: renamable $rdx = IMPLICIT_DEF +# CHECK-NEXT: renamable $xmm0 = IMPLICIT_DEF +# CHECK-NEXT: renamable $xmm1 = IMPLICIT_DEF +# CHECK-NEXT: renamable $ecx = IMPLICIT_DEF +# CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @baz, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit killed $esi, implicit $rdx, implicit killed $xmm0, implicit killed $xmm1, implicit killed $ecx, implicit killed $r8d, implicit $al, implicit-def $rsp, implicit-def $ssp +# +name: test1 +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: gr32 } + - { id: 1, class: gr64 } + - { id: 2, class: gr32 } + - { id: 3, class: gr8 } + - { id: 4, class: gr32_abcd } + - { id: 5, class: gr8_norex } + - { id: 6, class: gr32_norex } + - { id: 7, class: gr32 } + - { id: 8, class: gr8 } + - { id: 9, class: gr64 } + - { id: 10, class: gr64 } + - { id: 11, class: fr64 } + - { id: 12, class: fr64 } + - { id: 13, class: gr32 } + - { id: 14, class: gr64 } + - { id: 15, class: gr64 } +liveins: + - { reg: '$rdi', virtual-reg: '%1' } + - { reg: '$esi', virtual-reg: '%2' } +frameInfo: + maxAlignment: 32 + hasCalls: true +stack: + - { id: 0, name: tmp, type: variable-sized, alignment: 32 } +machineFunctionInfo: {} +body: | + bb.0: + liveins: $rdi, $esi + successors: %bb.4, %bb.5 + + %1:gr64 = COPY $rdi + %4:gr32_abcd = MOV32rm %1, 1, $noreg, 0, $noreg :: (load 4) + %5:gr8_norex = COPY %4.sub_8bit_hi + %6:gr32_norex = MOVZX32rr8_NOREX killed %5 + %7:gr32 = MOV32ri 3427 + %8:gr8 = MOV8ri 2 + %9:gr64 = IMPLICIT_DEF + $rdi = COPY %9 + $esi = COPY %7 + %10:gr64 = IMPLICIT_DEF + $rdx = COPY %10 + %11:fr64 = IMPLICIT_DEF + $xmm0 = COPY %11 + %12:fr64 = IMPLICIT_DEF + $xmm1 = COPY %12 + %13:gr32 = IMPLICIT_DEF + $ecx = COPY %13 + $r8d = COPY %6 + $al = COPY %8 + CALL64pcrel32 target-flags(x86-plt) @baz, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $esi, implicit $rdx, implicit $xmm0, implicit $xmm1, implicit $ecx, implicit $r8d, implicit $al, implicit-def $rsp, implicit-def $ssp + JMP_1 %bb.4 + + bb.4: + RETQ + + bb.5 (landing-pad): + liveins: $rax, $rdx + + %15:gr64 = COPY killed $rdx + %14:gr64 = COPY killed $rax + RETQ + +... +--- +# A variant of test1, with another use of %8 between the def and the call. +# CHECK-LABEL: name:{{.*}}test2 +# CHECK: bb.0: +# CHECK: renamable $ecx = MOV32rm killed renamable $rdi, 1, $noreg, 0, $noreg :: (load 4) +# CHECK-NEXT: renamable $al = COPY renamable $ch, implicit killed $ecx +# CHECK-NEXT: renamable $ecx = MOVZX32rr8_NOREX killed renamable $al +# CHECK-NEXT: $r8d = COPY killed renamable $ecx +# CHECK-NEXT: renamable $esi = MOV32ri 3427 +# CHECK-NEXT: renamable $al = MOV8ri 2 +# CHECK-NEXT: renamable $rdi = IMPLICIT_DEF +# CHECK-NEXT: $ah = COPY renamable $al +# CHECK-NEXT: renamable $rdx = IMPLICIT_DEF +# CHECK-NEXT: renamable $xmm0 = IMPLICIT_DEF +# CHECK-NEXT: renamable $xmm1 = IMPLICIT_DEF +# CHECK-NEXT: renamable $ecx = IMPLICIT_DEF +# CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @baz, csr_64, implicit $rsp, implicit $ssp, implicit killed $rdi, implicit killed $esi, implicit $rdx, implicit killed $xmm0, implicit killed $xmm1, implicit killed $ecx, implicit killed $r8d, implicit $al, implicit-def $rsp, implicit-def $ssp +# +name: test2 +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: gr32 } + - { id: 1, class: gr64 } + - { id: 2, class: gr32 } + - { id: 3, class: gr8 } + - { id: 4, class: gr32_abcd } + - { id: 5, class: gr8_norex } + - { id: 6, class: gr32_norex } + - { id: 7, class: gr32 } + - { id: 8, class: gr8 } + - { id: 9, class: gr64 } + - { id: 10, class: gr64 } + - { id: 11, class: fr64 } + - { id: 12, class: fr64 } + - { id: 13, class: gr32 } + - { id: 14, class: gr64 } + - { id: 15, class: gr64 } +liveins: + - { reg: '$rdi', virtual-reg: '%1' } + - { reg: '$esi', virtual-reg: '%2' } +frameInfo: + maxAlignment: 32 + hasCalls: true +stack: + - { id: 0, name: tmp, type: variable-sized, alignment: 32 } +machineFunctionInfo: {} +body: | + + bb.0: + liveins: $rdi, $esi + successors: %bb.4, %bb.5 + + %1:gr64 = COPY $rdi + %4:gr32_abcd = MOV32rm %1, 1, $noreg, 0, $noreg :: (load 4) + %5:gr8_norex = COPY %4.sub_8bit_hi + %6:gr32_norex = MOVZX32rr8_NOREX killed %5 + %7:gr32 = MOV32ri 3427 + %8:gr8 = MOV8ri 2 + %9:gr64 = IMPLICIT_DEF + $rdi = COPY %9 + $esi = COPY %7 + $ah = COPY %8 + %10:gr64 = IMPLICIT_DEF + $rdx = COPY %10 + %11:fr64 = IMPLICIT_DEF + $xmm0 = COPY %11 + %12:fr64 = IMPLICIT_DEF + $xmm1 = COPY %12 + %13:gr32 = IMPLICIT_DEF + $ecx = COPY %13 + $r8d = COPY %6 + $al = COPY %8 + CALL64pcrel32 target-flags(x86-plt) @baz, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit $esi, implicit $rdx, implicit $xmm0, implicit $xmm1, implicit $ecx, implicit $r8d, implicit $al, implicit-def $rsp, implicit-def $ssp + JMP_1 %bb.4 + + bb.4: + RETQ + + bb.5 (landing-pad): + liveins: $rax, $rdx + + %15:gr64 = COPY killed $rdx + %14:gr64 = COPY killed $rax + RETQ + +...