diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -218,6 +218,10 @@ finalizeInsInstrs(MachineInstr &Root, MachineCombinerPattern &P, SmallVectorImpl &InsInstrs) const override; + bool shouldReduceRegisterPressure( + const MachineBasicBlock *MBB, + const RegisterClassInfo *RegClassInfo) const override; + void genAlternativeCodeSequence( MachineInstr &Root, MachineCombinerPattern Pattern, SmallVectorImpl &InsInstrs, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -26,6 +26,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineTraceMetrics.h" +#include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/MC/MCInstBuilder.h" @@ -34,6 +35,8 @@ using namespace llvm; +#define DEBUG_TYPE "riscv-instr-info" + #define GEN_CHECK_COMPRESS_INSTR #include "RISCVGenCompressInstEmitter.inc" @@ -1379,6 +1382,49 @@ } } +static std::vector +getMBBPressure(const MachineBasicBlock *MBB, + const RegisterClassInfo *RegClassInfo, + const TargetRegisterInfo *TRI, const MachineRegisterInfo *MRI) { + RegionPressure Pressure; + RegPressureTracker RPTracker(Pressure); + + // Initialize the register pressure tracker. + RPTracker.init(MBB->getParent(), RegClassInfo, nullptr, MBB, MBB->end(), + /*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true); + + for (const auto &MI : reverse(*MBB)) { + if (MI.isDebugOrPseudoInstr()) + continue; + RegisterOperands RegOpers; + RegOpers.collect(MI, *TRI, *MRI, false, false); + RPTracker.recedeSkipDebugValues(); + assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!"); + RPTracker.recede(RegOpers); + } + + // Close the RPTracker to finalize live ins. + RPTracker.closeRegion(); + + return RPTracker.getPressure().MaxSetPressure; +} + +bool RISCVInstrInfo::shouldReduceRegisterPressure( + const MachineBasicBlock *MBB, const RegisterClassInfo *RegClassInfo) const { + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + const MachineFunction *MF = MBB->getParent(); + const MachineRegisterInfo *MRI = &MF->getRegInfo(); + + unsigned GPRLimit = TRI->getRegPressureSetLimit( + *MBB->getParent(), RISCV::RegisterPressureSets::GPR); + unsigned Pressure = getMBBPressure(MBB, RegClassInfo, TRI, + MRI)[RISCV::RegisterPressureSets::GPR]; + + LLVM_DEBUG(dbgs() << "GPR Register Pressure: " << Pressure << "::" << GPRLimit + << "\n"); + return Pressure > GPRLimit; +} + static bool isFADD(unsigned Opc) { switch (Opc) { default: @@ -1572,9 +1618,15 @@ MachineInstr &Root, SmallVectorImpl &Patterns, bool DoRegPressureReduce) const { + // We only track register pressure on the GPR register class: see + // shouldReduceRegisterPressure() to see how DoRegPressureReduce is set. + if (getFPPatterns(Root, Patterns, DoRegPressureReduce)) return true; + if (DoRegPressureReduce) + return false; + return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns, DoRegPressureReduce); } diff --git a/llvm/test/CodeGen/RISCV/machine-combiner.ll b/llvm/test/CodeGen/RISCV/machine-combiner.ll --- a/llvm/test/CodeGen/RISCV/machine-combiner.ll +++ b/llvm/test/CodeGen/RISCV/machine-combiner.ll @@ -1142,164 +1142,140 @@ define i32 @test_reg_pressure(ptr %a) { ; CHECK-LABEL: test_reg_pressure: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -64 -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: sd s0, 56(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s1, 48(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s2, 40(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s3, 32(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s4, 24(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s5, 16(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s6, 8(sp) # 8-byte Folded Spill -; CHECK-NEXT: .cfi_offset s0, -8 -; CHECK-NEXT: .cfi_offset s1, -16 -; CHECK-NEXT: .cfi_offset s2, -24 -; CHECK-NEXT: .cfi_offset s3, -32 -; CHECK-NEXT: .cfi_offset s4, -40 -; CHECK-NEXT: .cfi_offset s5, -48 -; CHECK-NEXT: .cfi_offset s6, -56 ; CHECK-NEXT: fence rw, rw -; CHECK-NEXT: lw a6, 0(a0) +; CHECK-NEXT: lw a1, 0(a0) ; CHECK-NEXT: fence r, rw ; CHECK-NEXT: fence rw, rw -; CHECK-NEXT: lw a7, 4(a0) +; CHECK-NEXT: lw a2, 4(a0) ; CHECK-NEXT: fence r, rw ; CHECK-NEXT: fence rw, rw -; CHECK-NEXT: lw t0, 8(a0) +; CHECK-NEXT: lw a3, 8(a0) +; CHECK-NEXT: or a1, a1, a2 ; CHECK-NEXT: fence r, rw ; CHECK-NEXT: fence rw, rw -; CHECK-NEXT: lw t1, 12(a0) +; CHECK-NEXT: lw a2, 12(a0) +; CHECK-NEXT: or a1, a1, a3 ; CHECK-NEXT: fence r, rw ; CHECK-NEXT: fence rw, rw -; CHECK-NEXT: lw t2, 16(a0) +; CHECK-NEXT: lw a3, 16(a0) +; CHECK-NEXT: or a1, a1, a2 ; CHECK-NEXT: fence r, rw ; CHECK-NEXT: fence rw, rw -; CHECK-NEXT: lw t6, 20(a0) +; CHECK-NEXT: lw a2, 20(a0) +; CHECK-NEXT: or a1, a1, a3 ; CHECK-NEXT: fence r, rw ; CHECK-NEXT: fence rw, rw -; CHECK-NEXT: lw t3, 24(a0) +; CHECK-NEXT: lw a3, 24(a0) +; CHECK-NEXT: or a1, a1, a2 ; CHECK-NEXT: fence r, rw ; CHECK-NEXT: fence rw, rw -; CHECK-NEXT: lw t4, 28(a0) +; CHECK-NEXT: lw a2, 28(a0) +; CHECK-NEXT: or a1, a1, a3 ; CHECK-NEXT: fence r, rw ; CHECK-NEXT: fence rw, rw -; CHECK-NEXT: lw s3, 32(a0) +; CHECK-NEXT: lw a3, 32(a0) +; CHECK-NEXT: or a1, a1, a2 ; CHECK-NEXT: fence r, rw ; CHECK-NEXT: fence rw, rw -; CHECK-NEXT: lw s2, 36(a0) +; CHECK-NEXT: lw a2, 36(a0) +; CHECK-NEXT: or a1, a1, a3 ; CHECK-NEXT: fence r, rw ; CHECK-NEXT: fence rw, rw -; CHECK-NEXT: lw t5, 40(a0) +; CHECK-NEXT: lw a3, 40(a0) +; CHECK-NEXT: or a1, a1, a2 ; CHECK-NEXT: fence r, rw ; CHECK-NEXT: fence rw, rw -; CHECK-NEXT: lw s4, 44(a0) +; CHECK-NEXT: lw a2, 44(a0) +; CHECK-NEXT: or a1, a1, a3 ; CHECK-NEXT: fence r, rw ; CHECK-NEXT: fence rw, rw -; CHECK-NEXT: lw s5, 48(a0) +; CHECK-NEXT: lw a3, 48(a0) +; CHECK-NEXT: or a1, a1, a2 ; CHECK-NEXT: fence r, rw ; CHECK-NEXT: fence rw, rw -; CHECK-NEXT: or a4, a6, a7 -; CHECK-NEXT: lw s6, 52(a0) +; CHECK-NEXT: lw a2, 52(a0) +; CHECK-NEXT: or a1, a1, a3 ; CHECK-NEXT: fence r, rw ; CHECK-NEXT: fence rw, rw -; CHECK-NEXT: or s0, t0, t1 -; CHECK-NEXT: lw a7, 56(a0) +; CHECK-NEXT: lw a3, 56(a0) +; CHECK-NEXT: or a1, a1, a2 ; CHECK-NEXT: fence r, rw ; CHECK-NEXT: fence rw, rw -; CHECK-NEXT: or a1, t2, t6 -; CHECK-NEXT: lw a6, 60(a0) +; CHECK-NEXT: lw a2, 60(a0) +; CHECK-NEXT: or a1, a1, a3 ; CHECK-NEXT: fence r, rw ; CHECK-NEXT: fence rw, rw -; CHECK-NEXT: or a4, a4, s0 -; CHECK-NEXT: lw s0, 64(a0) +; CHECK-NEXT: lw a3, 64(a0) +; CHECK-NEXT: or a1, a1, a2 ; CHECK-NEXT: fence r, rw ; CHECK-NEXT: fence rw, rw -; CHECK-NEXT: or a1, a1, t3 -; CHECK-NEXT: lw a3, 68(a0) +; CHECK-NEXT: lw a2, 68(a0) +; CHECK-NEXT: or a1, a1, a3 ; CHECK-NEXT: fence r, rw ; CHECK-NEXT: fence rw, rw -; CHECK-NEXT: or a2, t4, s3 -; CHECK-NEXT: lw t2, 72(a0) +; CHECK-NEXT: lw a3, 72(a0) ; CHECK-NEXT: fence r, rw +; CHECK-NEXT: or a1, a1, a2 ; CHECK-NEXT: fence rw, rw -; CHECK-NEXT: or a1, a1, a4 -; CHECK-NEXT: lw a4, 76(a0) +; CHECK-NEXT: lw a2, 76(a0) ; CHECK-NEXT: fence r, rw ; CHECK-NEXT: fence rw, rw -; CHECK-NEXT: or a2, a2, s2 -; CHECK-NEXT: lw a4, 80(a0) +; CHECK-NEXT: or a1, a1, a3 +; CHECK-NEXT: lw a2, 80(a0) ; CHECK-NEXT: fence r, rw ; CHECK-NEXT: fence rw, rw -; CHECK-NEXT: or a5, s4, s5 -; CHECK-NEXT: lw t0, 84(a0) +; CHECK-NEXT: or a1, a1, a2 +; CHECK-NEXT: lw a2, 84(a0) ; CHECK-NEXT: fence r, rw ; CHECK-NEXT: fence rw, rw -; CHECK-NEXT: or a2, a2, t5 -; CHECK-NEXT: lw t1, 88(a0) +; CHECK-NEXT: or a1, a1, a2 +; CHECK-NEXT: lw a2, 88(a0) ; CHECK-NEXT: fence r, rw ; CHECK-NEXT: fence rw, rw -; CHECK-NEXT: or a5, a5, s6 -; CHECK-NEXT: lw s1, 92(a0) +; CHECK-NEXT: or a1, a1, a2 +; CHECK-NEXT: lw a2, 92(a0) ; CHECK-NEXT: fence r, rw ; CHECK-NEXT: fence rw, rw ; CHECK-NEXT: or a1, a1, a2 ; CHECK-NEXT: lw a2, 96(a0) -; CHECK-NEXT: or a5, a5, a7 -; CHECK-NEXT: or a3, a3, s0 ; CHECK-NEXT: fence r, rw ; CHECK-NEXT: fence rw, rw -; CHECK-NEXT: or a2, a2, s1 -; CHECK-NEXT: lw s1, 100(a0) -; CHECK-NEXT: or a5, a5, a6 -; CHECK-NEXT: or a3, a3, t2 +; CHECK-NEXT: or a1, a1, a2 +; CHECK-NEXT: lw a2, 100(a0) ; CHECK-NEXT: fence r, rw ; CHECK-NEXT: fence rw, rw -; CHECK-NEXT: or a2, a2, s1 -; CHECK-NEXT: lw s1, 104(a0) +; CHECK-NEXT: or a1, a1, a2 +; CHECK-NEXT: lw a2, 104(a0) ; CHECK-NEXT: fence r, rw ; CHECK-NEXT: fence rw, rw -; CHECK-NEXT: or a3, a3, a4 -; CHECK-NEXT: lw a4, 108(a0) +; CHECK-NEXT: or a1, a1, a2 +; CHECK-NEXT: lw a2, 108(a0) ; CHECK-NEXT: fence r, rw ; CHECK-NEXT: fence rw, rw -; CHECK-NEXT: or a2, a2, s1 -; CHECK-NEXT: lw s1, 112(a0) +; CHECK-NEXT: or a1, a1, a2 +; CHECK-NEXT: lw a2, 112(a0) ; CHECK-NEXT: fence r, rw ; CHECK-NEXT: fence rw, rw -; CHECK-NEXT: or a3, a3, t0 -; CHECK-NEXT: lw s0, 116(a0) +; CHECK-NEXT: or a1, a1, a2 +; CHECK-NEXT: lw a2, 116(a0) ; CHECK-NEXT: fence r, rw ; CHECK-NEXT: fence rw, rw -; CHECK-NEXT: or a2, a2, a4 -; CHECK-NEXT: lw a4, 120(a0) +; CHECK-NEXT: or a1, a1, a2 +; CHECK-NEXT: lw a2, 120(a0) ; CHECK-NEXT: fence r, rw ; CHECK-NEXT: fence rw, rw -; CHECK-NEXT: or a1, a1, a5 -; CHECK-NEXT: lw a5, 124(a0) -; CHECK-NEXT: or a3, a3, t1 -; CHECK-NEXT: or a2, a2, s1 +; CHECK-NEXT: or a1, a1, a2 +; CHECK-NEXT: lw a2, 124(a0) ; CHECK-NEXT: fence r, rw ; CHECK-NEXT: fence rw, rw -; CHECK-NEXT: or a4, a4, a5 -; CHECK-NEXT: lw a5, 124(a0) -; CHECK-NEXT: or a1, a1, a3 -; CHECK-NEXT: or a2, a2, s0 -; CHECK-NEXT: or a4, a4, a5 +; CHECK-NEXT: or a1, a1, a2 +; CHECK-NEXT: lw a2, 124(a0) +; CHECK-NEXT: or a1, a1, a2 ; CHECK-NEXT: fence r, rw ; CHECK-NEXT: fence rw, rw ; CHECK-NEXT: lw a0, 124(a0) -; CHECK-NEXT: or a1, a1, a2 -; CHECK-NEXT: or a0, a0, a4 ; CHECK-NEXT: or a0, a0, a1 ; CHECK-NEXT: fence r, rw -; CHECK-NEXT: ld s0, 56(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s1, 48(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s2, 40(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s3, 32(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s4, 24(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s5, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s6, 8(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 64 ; CHECK-NEXT: ret entry: %0 = load atomic i32, ptr %a seq_cst, align 4