Index: lib/CodeGen/InlineSpiller.cpp =================================================================== --- lib/CodeGen/InlineSpiller.cpp +++ lib/CodeGen/InlineSpiller.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveRangeEdit.h" +#include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/LiveStacks.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" @@ -214,6 +215,7 @@ void eliminateRedundantSpills(LiveInterval &LI, VNInfo *VNI); void markValueUsed(LiveInterval*, VNInfo*); + bool canGuaranteeAssignmentAfterRemat(unsigned VReg, MachineInstr &MI); bool reMaterializeFor(LiveInterval &, MachineInstr &MI); void reMaterializeAll(); @@ -513,6 +515,67 @@ } while (!WorkList.empty()); } +bool InlineSpiller::canGuaranteeAssignmentAfterRemat(unsigned VReg, + MachineInstr &MI) { + // Basic scheme is to figure out how many physical registers we could + // assign immediately before the given instruction and prevent remat if + // there are more virtual registers which might need assigned to the + // largest containing register class. Note that this makes assumption + // about what the "asign" logic in the register allocator does; in + // particular, it assumes the previous assignments which span the potential + // remat and use sites can be evicted and spilled to free up every physical + // register. + // TODO: This code is very conservative. We can be more aggressive here, + // but if find we need to, we should seriously consider separating remat as + // it's own phase in RegAllocGreedy. + + // Returns the largest super register type which contains the regclass of + // VReg. Working with this register class ensures we're conservative about + // sub-regs. + auto getLLRCForVReg = [&](unsigned VReg) { + assert(TargetRegisterInfo::isVirtualRegister(VReg)); + const auto *RC = MRI.getRegClass(VReg); + return TRI.getLargestLegalSuperClass(RC, MF); + }; + + const auto *RC = getLLRCForVReg(VReg); + const ArrayRef AllocationOrder = RC->getRawAllocationOrder(MF); + if (AllocationOrder.size() > MI.getNumOperands()) + // fastpath for common case + return true; + + int NumPhysRegAvailable = 0; + // See if one of the registers in RC wasn't used so far. LiveRegUnit + // handles register aliases for us for already assigned phyregs. + LiveRegUnits Used(TRI); + Used.accumulate(MI); + for (MCPhysReg Reg : AllocationOrder) + if (!MRI.isReserved(Reg) && Used.available(Reg)) + NumPhysRegAvailable++; + + if (!NumPhysRegAvailable) + return false; + + // Each virtual register might require a unique physreg in the worst case. + // Check to see if we can assign one free physreg to every virtual register + // not yet assigned. This is necccessary as we may decide to remat *all* + // operands before actually assigning any of them. This is actually the + // common case. + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + const auto Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg) || + RC != getLLRCForVReg(Reg)) + continue; + NumPhysRegAvailable--; + if (!NumPhysRegAvailable) + return false; + } + return true; +} + /// reMaterializeFor - Attempt to rematerialize before MI instead of reloading. bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) { // Analyze instruction @@ -568,6 +631,11 @@ return true; } + // If we can't guarantee that we'll be able to actually assign the new vreg, + // we can't remat. + if (!canGuaranteeAssignmentAfterRemat(VirtReg.reg, MI)) + return false; + // Allocate a new register for the remat. unsigned NewVReg = Edit->createFrom(Original); Index: test/CodeGen/X86/statepoint-live-in.ll =================================================================== --- test/CodeGen/X86/statepoint-live-in.ll +++ test/CodeGen/X86/statepoint-live-in.ll @@ -128,6 +128,77 @@ ret void } +; A variant of test7 where values are not directly foldable from stack slots. +define void @test7(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z) gc "statepoint-example" { +; The code for this is terrible, check simply for correctness for the moment +; CHECK-LABEL: test7: +; CHECK: callq _bar +entry: + %a64 = zext i32 %a to i64 + %b64 = zext i32 %b to i64 + %c64 = zext i32 %c to i64 + %d64 = zext i32 %d to i64 + %e64 = zext i32 %e to i64 + %f64 = zext i32 %f to i64 + %g64 = zext i32 %g to i64 + %h64 = zext i32 %h to i64 + %i64 = zext i32 %i to i64 + %j64 = zext i32 %j to i64 + %k64 = zext i32 %k to i64 + %l64 = zext i32 %l to i64 + %m64 = zext i32 %m to i64 + %n64 = zext i32 %n to i64 + %o64 = zext i32 %o to i64 + %p64 = zext i32 %p to i64 + %q64 = zext i32 %q to i64 + %r64 = zext i32 %r to i64 + %s64 = zext i32 %s to i64 + %t64 = zext i32 %t to i64 + %u64 = zext i32 %u to i64 + %v64 = zext i32 %v to i64 + %w64 = zext i32 %w to i64 + %x64 = zext i32 %x to i64 + %y64 = zext i32 %y to i64 + %z64 = zext i32 %z to i64 + %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 26, i64 %a64, i64 %b64, i64 %c64, i64 %d64, i64 %e64, i64 %f64, i64 %g64, i64 %h64, i64 %i64, i64 %j64, i64 %k64, i64 %l64, i64 %m64, i64 %n64, i64 %o64, i64 %p64, i64 %q64, i64 %r64, i64 %s64, i64 %t64, i64 %u64, i64 %v64, i64 %w64, i64 %x64, i64 %y64, i64 %z64) + ret void +} + +; a variant of test7 with mixed types chosen to exercise register aliases +define void @test8(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z) gc "statepoint-example" { +; The code for this is terrible, check simply for correctness for the moment +; CHECK-LABEL: test8: +; CHECK: callq _bar +entry: + %a8 = trunc i32 %a to i8 + %b8 = trunc i32 %b to i8 + %c8 = trunc i32 %c to i8 + %d8 = trunc i32 %d to i8 + %e16 = trunc i32 %e to i16 + %f16 = trunc i32 %f to i16 + %g16 = trunc i32 %g to i16 + %h16 = trunc i32 %h to i16 + %i64 = zext i32 %i to i64 + %j64 = zext i32 %j to i64 + %k64 = zext i32 %k to i64 + %l64 = zext i32 %l to i64 + %m64 = zext i32 %m to i64 + %n64 = zext i32 %n to i64 + %o64 = zext i32 %o to i64 + %p64 = zext i32 %p to i64 + %q64 = zext i32 %q to i64 + %r64 = zext i32 %r to i64 + %s64 = zext i32 %s to i64 + %t64 = zext i32 %t to i64 + %u64 = zext i32 %u to i64 + %v64 = zext i32 %v to i64 + %w64 = zext i32 %w to i64 + %x64 = zext i32 %x to i64 + %y64 = zext i32 %y to i64 + %z64 = zext i32 %z to i64 + %statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 26, i8 %a8, i8 %b8, i8 %c8, i8 %d8, i16 %e16, i16 %f16, i16 %g16, i16 %h16, i64 %i64, i64 %j64, i64 %k64, i64 %l64, i64 %m64, i64 %n64, i64 %o64, i64 %p64, i64 %q64, i64 %r64, i64 %s64, i64 %t64, i64 %u64, i64 %v64, i64 %w64, i64 %x64, i64 %y64, i64 %z64) + ret void +} ; CHECK: Ltmp0-_test1 ; CHECK: .byte 1