diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp --- a/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/llvm/lib/CodeGen/RegAllocFast.cpp @@ -1078,7 +1078,52 @@ UsedInInstr.clear(); BundleVirtRegsMap.clear(); - // Scan for special cases; Apply pre-assigned register defs to state. + // Scan for special cases; + // + // Check for copies with a VReg use in a reg-class with all registers + // pre-assigned. In that case, hoist the instruction up to the definition of + // the VReg in the hope that some pre-assigned registers have been freed up. + if (MI.isCopy()) { + MachineOperand &Op0 = MI.getOperand(0); + MachineOperand &Op1 = MI.getOperand(1); + if (Op0.isReg() && Op0.getReg().isPhysical() && Op1.isReg() && + Op1.getReg().isVirtual()) { + Register Reg = Op1.getReg(); + const TargetRegisterClass &RC = *MRI->getRegClass(Reg); + ArrayRef AllocationOrder = RegClassInfo.getOrder(&RC); + if (all_of(AllocationOrder, [&](MCPhysReg PhysReg) { + for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { + if (RegUnitStates[*UI] == regPreAssigned) { + return true; + } + } + return false; + })) { + + // If we find the definition of Reg, move MI just after it and exit + // the function. + for (auto &MI2 : reverse(make_range( + MI.getParent()->begin()->getIterator(), MI.getIterator()))) { + bool DefinesReg = MI2.getOperand(0).isReg() && + MI2.getOperand(0).isDef() && + MI2.getOperand(0).getReg() == Reg; + + bool UsesReg = any_of(MI2.operands(), [Reg](MachineOperand &MO) { + return MO.isReg() && MO.getReg() == Reg; + }); + + // If MI2 defines or uses Reg, move MI just after MI2. + // we can only move just after this instruction. + if (DefinesReg || UsesReg) { + MI.moveBefore(&*std::next(MI2.getIterator())); + return; + } + } + } + } + } + + // Apply pre-assigned register defs to state. bool HasPhysRegUse = false; bool HasRegMask = false; bool HasVRegDef = false; @@ -1433,7 +1478,7 @@ Coalesced.clear(); // Traverse block in reverse order allocating instructions one by one. - for (MachineInstr &MI : reverse(MBB)) { + for (MachineInstr &MI : make_early_inc_range(reverse(MBB))) { LLVM_DEBUG( dbgs() << "\n>> " << MI << "Regs:"; dumpState() diff --git a/llvm/test/CodeGen/X86/regallocfast-need-to-move-copy.ll b/llvm/test/CodeGen/X86/regallocfast-need-to-move-copy.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/regallocfast-need-to-move-copy.ll @@ -0,0 +1,87 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -o - -O0 -verify-machineinstrs %s | FileCheck %s + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux" + +define void @widget(i32* %arg, i1 %arg1) align 2 personality i8* undef { +; CHECK-LABEL: widget: +; CHECK: # %bb.0: # %bb +; CHECK-NEXT: pushq %rbp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbp, -16 +; CHECK-NEXT: movq %rsp, %rbp +; CHECK-NEXT: .cfi_def_cfa_register %rbp +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: andq $-32, %rsp +; CHECK-NEXT: subq $64, %rsp +; CHECK-NEXT: movq %rsp, %rbx +; CHECK-NEXT: .cfi_offset %rbx, -24 +; CHECK-NEXT: movq %rdi, 24(%rbx) # 8-byte Spill +; CHECK-NEXT: movb %sil, %al +; CHECK-NEXT: testb $1, %al +; CHECK-NEXT: jne .LBB0_1 +; CHECK-NEXT: jmp .LBB0_2 +; CHECK-NEXT: .LBB0_1: # %bb2 +; CHECK-NEXT: .LBB0_2: # %bb3 +; CHECK-NEXT: movq 24(%rbx), %rax # 8-byte Reload +; CHECK-NEXT: movl (%rax), %eax +; CHECK-NEXT: movl %eax, 20(%rbx) # 4-byte Spill +; CHECK-NEXT: # %bb.3: # %bb5 +; CHECK-NEXT: movl 20(%rbx), %ecx # 4-byte Reload +; CHECK-NEXT: movb %ch, %al +; CHECK-NEXT: movzbl %al, %ecx +; CHECK-NEXT: movl %ecx, %r8d +; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: movl $3427, %esi # imm = 0xD63 +; CHECK-NEXT: movb $2, %al +; CHECK-NEXT: # implicit-def: $rdi +; CHECK-NEXT: # implicit-def: $rdx +; CHECK-NEXT: # implicit-def: $xmm0 +; CHECK-NEXT: # implicit-def: $xmm1 +; CHECK-NEXT: # implicit-def: $ecx +; CHECK-NEXT: callq baz@PLT +; CHECK-NEXT: .Ltmp1: +; CHECK-NEXT: jmp .LBB0_4 +; CHECK-NEXT: .LBB0_4: # %bb8 +; CHECK-NEXT: leaq -8(%rbp), %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB0_5: # %bb9 +; CHECK-NEXT: .cfi_def_cfa %rbp, 16 +; CHECK-NEXT: .Ltmp2: +; CHECK-NEXT: leaq -8(%rbp), %rsp +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: popq %rbp +; CHECK-NEXT: .cfi_def_cfa %rsp, 8 +; CHECK-NEXT: retq +bb: + br i1 %arg1, label %bb2, label %bb3 + +bb2: ; preds = %bb + %tmp = alloca i8, i64 6144, align 32 + br label %bb3 + +bb3: ; preds = %bb2, %bb + %tmp4 = load i32, i32* %arg, align 4 + br label %bb5 + +bb5: ; preds = %bb3 + %tmp6 = lshr i32 %tmp4, 8 + %tmp7 = and i32 %tmp6, 255 + invoke void (i8*, i32, i8*, ...) @baz(i8* undef, i32 3427, i8* undef, double undef, double undef, i32 undef, i32 %tmp7) + to label %bb8 unwind label %bb9 + +bb8: ; preds = %bb5 + ret void + +bb9: ; preds = %bb5 + %tmp10 = landingpad { i8*, i32 } + cleanup + ret void +} + + +declare void @baz(i8*, i32, i8*, ...)