diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -449,6 +449,38 @@ return true; } +static bool inlineAsmWouldIncreasePressure(const MachineInstr &MI, + const MachineFunction &MF, + const TargetInstrInfo *TII) { + if (!MI.isInlineAsm()) + return false; + + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + DenseMap RCCount; + + // For each operand, keep a count of the encountered TargetRegisterClass. + for (unsigned OpNo = 0, e = MI.getNumOperands(); OpNo != e; ++OpNo) + if (MI.getOperand(OpNo).isReg()) + if (const auto *RC = MI.getRegClassConstraint(OpNo, TII, TRI)) + ++RCCount[RC]; + + // Is the number of TargetRegisterClass operands of the INLINEASM above the + // pressure limit? + for (auto it : RCCount) { + unsigned PressureLimit = TRI->getRegPressureLimit(it.first, MF); + if (it.second > PressureLimit) { + LLVM_DEBUG( + dbgs() << "Not reschduling around: " << MI << "\nINLINEASM has " + << it.second << " " << TRI->getRegClassName(it.first) + << " operands, which is above the register pressure limit of " + << PressureLimit << "\n";); + return true; + } + } + + return false; +} + /// Return true of the given instruction should not be included in a scheduling /// region. /// @@ -459,11 +491,12 @@ /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce /// the boundary, but there would be no benefit to postRA scheduling across /// calls this late anyway. -static bool isSchedBoundary(MachineBasicBlock::iterator MI, - MachineBasicBlock *MBB, - MachineFunction *MF, +static bool isSchedBoundary(const MachineInstr &MI, + const MachineBasicBlock &MBB, + const MachineFunction &MF, const TargetInstrInfo *TII) { - return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF); + return MI.isCall() || TII->isSchedulingBoundary(MI, &MBB, MF) || + inlineAsmWouldIncreasePressure(MI, MF, TII); } /// A region of an MBB for scheduling. @@ -486,12 +519,10 @@ using MBBRegionsVector = SmallVector; -static void -getSchedRegions(MachineBasicBlock *MBB, - MBBRegionsVector &Regions, - bool RegionsTopDown) { - MachineFunction *MF = MBB->getParent(); - const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); +static void getSchedRegions(MachineBasicBlock *MBB, MBBRegionsVector &Regions, + bool RegionsTopDown) { + const MachineFunction &MF = *MBB->getParent(); + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); MachineBasicBlock::iterator I = nullptr; for(MachineBasicBlock::iterator RegionEnd = MBB->end(); @@ -499,7 +530,7 @@ // Avoid decrementing RegionEnd for blocks with no terminator. if (RegionEnd != MBB->end() || - isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) { + isSchedBoundary(*std::prev(RegionEnd), *MBB, MF, TII)) { --RegionEnd; } @@ -508,8 +539,8 @@ unsigned NumRegionInstrs = 0; I = RegionEnd; for (;I != MBB->begin(); --I) { - MachineInstr &MI = *std::prev(I); - if (isSchedBoundary(&MI, &*MBB, MF, TII)) + const MachineInstr &MI = *std::prev(I); + if (isSchedBoundary(MI, *MBB, MF, TII)) break; if (!MI.isDebugOrPseudoInstr()) { // MBB::size() uses instr_iterator to count. Here we need a bundle to diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -263,6 +263,8 @@ switch (RC->getID()) { default: return 0; + case X86::GR8RegClassID: + case X86::GR16RegClassID: case X86::GR32RegClassID: return 4 - FPDiff; case X86::GR64RegClassID: diff --git a/llvm/test/CodeGen/X86/scheduler-asm-moves.mir b/llvm/test/CodeGen/X86/scheduler-asm-moves.mir --- a/llvm/test/CodeGen/X86/scheduler-asm-moves.mir +++ b/llvm/test/CodeGen/X86/scheduler-asm-moves.mir @@ -123,6 +123,8 @@ ; CHECK-LABEL: name: synproxy_send_tcp_ipv6 ; CHECK: liveins: $eax, $edx ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edx + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32_abcd = COPY $eax ; CHECK-NEXT: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm $noreg, 1, $noreg, @csum_ipv6_magic_saddr, $noreg :: (dereferenceable load (s8) from `i8* getelementptr inbounds (%struct.in6_addr, %struct.in6_addr* @csum_ipv6_magic_saddr, i32 0, i32 0, i32 0)`) ; CHECK-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm $noreg, 1, $noreg, @csum_ipv6_magic_daddr, $noreg :: (dereferenceable load (s32) from @csum_ipv6_magic_daddr, !tbaa !4) ; CHECK-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm $noreg, 1, $noreg, @csum_ipv6_magic_proto, $noreg :: (dereferenceable load (s32) from @csum_ipv6_magic_proto, !tbaa !4) @@ -131,11 +133,9 @@ ; CHECK-NEXT: MOV32mr $noreg, 1, $noreg, @csum_ipv6_magic_sum, $noreg, %2 :: (store (s32) into @csum_ipv6_magic_sum, !tbaa !4) ; CHECK-NEXT: [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm $noreg, 1, $noreg, @synproxy_send_tcp_ipv6_nskb, $noreg :: (dereferenceable load (s32) from `i8** bitcast (%struct.sk_buff** @synproxy_send_tcp_ipv6_nskb to i8**)`, !tbaa !9) ; CHECK-NEXT: OR8mi [[MOV32rm2]], 1, $noreg, 0, $noreg, 3, implicit-def dead $eflags :: (store (s8) into %ir.4), (load (s8) from %ir.4) - ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32_abcd = COPY $eax - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $edx - ; CHECK-NEXT: [[MOV8rm1:%[0-9]+]]:gr8 = MOV8rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s8) from %ir.5, !tbaa !11) + ; CHECK-NEXT: [[MOV8rm1:%[0-9]+]]:gr8 = MOV8rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s8) from %ir.5, !tbaa !11) ; CHECK-NEXT: MOV8mr $noreg, 1, $noreg, @synproxy_send_tcp_ipv6_fl6, $noreg, [[MOV8rm1]] :: (store (s8) into `i8* getelementptr inbounds (%struct.in6_addr, %struct.in6_addr* @synproxy_send_tcp_ipv6_fl6, i32 0, i32 0, i32 0)`, !tbaa !11) - ; CHECK-NEXT: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 [[COPY]].sub_8bit + ; CHECK-NEXT: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 [[COPY1]].sub_8bit ; CHECK-NEXT: $eax = COPY [[MOVZX32rr8_]] ; CHECK-NEXT: TCRETURNdi @fl6nthsecurity_skb_classify_flow, 0, csr_32, implicit $esp, implicit $ssp, implicit $eax %1:gr32 = COPY $edx