diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -49,10 +49,10 @@ MCInst getNop() const override; const MCInstrDesc &getBrCond(RISCVCC::CondCode CC) const; - unsigned isLoadFromStackSlot(const MachineInstr &MI, - int &FrameIndex) const override; - unsigned isStoreToStackSlot(const MachineInstr &MI, - int &FrameIndex) const override; + unsigned isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex, + unsigned &MemBytes) const override; + unsigned isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex, + unsigned &MemBytes) const override; void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -78,20 +78,28 @@ } unsigned RISCVInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, - int &FrameIndex) const { + int &FrameIndex, + unsigned &MemBytes) const { switch (MI.getOpcode()) { default: return 0; case RISCV::LB: case RISCV::LBU: + MemBytes = 1; + break; case RISCV::LH: case RISCV::LHU: case RISCV::FLH: + MemBytes = 2; + break; case RISCV::LW: case RISCV::FLW: case RISCV::LWU: + MemBytes = 4; + break; case RISCV::LD: case RISCV::FLD: + MemBytes = 8; break; } @@ -105,17 +113,25 @@ } unsigned RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI, - int &FrameIndex) const { + int &FrameIndex, + unsigned &MemBytes) const { switch (MI.getOpcode()) { default: return 0; case RISCV::SB: + MemBytes = 1; + break; case RISCV::SH: - case RISCV::SW: case RISCV::FSH: + MemBytes = 2; + break; + case RISCV::SW: case RISCV::FSW: + MemBytes = 4; + break; case RISCV::SD: case RISCV::FSD: + MemBytes = 8; break; } diff --git a/llvm/test/CodeGen/RISCV/stack-slot-coloring.mir b/llvm/test/CodeGen/RISCV/stack-slot-coloring.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/stack-slot-coloring.mir @@ -0,0 +1,147 @@ +# RUN: llc -march=riscv32 -run-pass=greedy,virtregrewriter,stack-slot-coloring %s -o - 2>&1 | FileCheck %s + +--- | + define dso_local i32 @main() local_unnamed_addr { + entry: + %a = alloca i32, align 4 + ret i32 0 + } + +... +--- +name: main +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +failsVerification: false +tracksDebugUserValues: false +registers: + - { id: 0, class: gpr, preferred-register: '' } + - { id: 1, class: gpr, preferred-register: '' } +liveins: [] +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: a, type: default, offset: 0, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + varArgsFrameIndex: 0 + varArgsSaveSize: 0 +body: | + bb.0.entry: + $x10 = LW %stack.0.a, 0 :: (volatile dereferenceable load (s32) from %ir.a) + $x11 = LW %stack.0.a, 0 :: (volatile dereferenceable load (s32) from %ir.a) + $x12 = LW %stack.0.a, 0 :: (volatile dereferenceable load (s32) from %ir.a) + $x13 = LW %stack.0.a, 0 :: (volatile dereferenceable load (s32) from %ir.a) + $x14 = LW %stack.0.a, 0 :: (volatile dereferenceable load (s32) from %ir.a) + $x15 = LW %stack.0.a, 0 :: (volatile dereferenceable load (s32) from %ir.a) + $x16 = LW %stack.0.a, 0 :: (volatile dereferenceable load (s32) from %ir.a) + $x17 = LW %stack.0.a, 0 :: (volatile dereferenceable load (s32) from %ir.a) + $x5 = LW %stack.0.a, 0 :: (volatile dereferenceable load (s32) from %ir.a) + $x6 = LW %stack.0.a, 0 :: (volatile dereferenceable load (s32) from %ir.a) + $x7 = LW %stack.0.a, 0 :: (volatile dereferenceable load (s32) from %ir.a) + $x28 = LW %stack.0.a, 0 :: (volatile dereferenceable load (s32) from %ir.a) + $x29 = LW %stack.0.a, 0 :: (volatile dereferenceable load (s32) from %ir.a) + $x30 = LW %stack.0.a, 0 :: (volatile dereferenceable load (s32) from %ir.a) + $x31 = LW %stack.0.a, 0 :: (volatile dereferenceable load (s32) from %ir.a) + $x8 = LW %stack.0.a, 0 :: (volatile dereferenceable load (s32) from %ir.a) + $x9 = LW %stack.0.a, 0 :: (volatile dereferenceable load (s32) from %ir.a) + $x18 = LW %stack.0.a, 0 :: (volatile dereferenceable load (s32) from %ir.a) + $x19 = LW %stack.0.a, 0 :: (volatile dereferenceable load (s32) from %ir.a) + $x20 = LW %stack.0.a, 0 :: (volatile dereferenceable load (s32) from %ir.a) + $x21 = LW %stack.0.a, 0 :: (volatile dereferenceable load (s32) from %ir.a) + $x22 = LW %stack.0.a, 0 :: (volatile dereferenceable load (s32) from %ir.a) + $x23 = LW %stack.0.a, 0 :: (volatile dereferenceable load (s32) from %ir.a) + $x24 = LW %stack.0.a, 0 :: (volatile dereferenceable load (s32) from %ir.a) + $x25 = LW %stack.0.a, 0 :: (volatile dereferenceable load (s32) from %ir.a) + $x26 = LW %stack.0.a, 0 :: (volatile dereferenceable load (s32) from %ir.a) + $x27 = LW %stack.0.a, 0 :: (volatile dereferenceable load (s32) from %ir.a) + + ; First vreg load + %1:gpr = LW %stack.0.a, 0 :: (volatile dereferenceable load (s32) from %ir.a) + + ; First faulty sequence; %1 spilt + %12:gpr = LB %stack.0.a, 0 :: (volatile dereferenceable load (s8) from %ir.a) + SW %12, %stack.0.a, 0 :: (volatile store (s32) into %ir.a) + ; CHECK: renamable $x1 = LB %stack.0.a, 0 :: (volatile dereferenceable load (s8) from %ir.a) + ; CHECK-NEXT: SW killed renamable $x1, %stack.0.a, 0 :: (volatile store (s32) into %ir.a) + + ; Store %1 to avoid it being optimised out, will result in a load-from-spill + SW %1, %stack.0.a, 0 :: (volatile store (s32) into %ir.a) + + ; That code sequence a second time, to generate a second spill slot that + ; will get coloured and merged. + %2:gpr = LW %stack.0.a, 0 :: (volatile dereferenceable load (s32) from %ir.a) + + %22:gpr = LB %stack.0.a, 0 :: (volatile dereferenceable load (s8) from %ir.a) + SW %22, %stack.0.a, 0 :: (volatile store (s32) into %ir.a) + ; CHECK: renamable $x1 = LB %stack.0.a, 0 :: (volatile dereferenceable load (s8) from %ir.a) + ; CHECK-NEXT: SW killed renamable $x1, %stack.0.a, 0 :: (volatile store (s32) into %ir.a) + + SW %2, %stack.0.a, 0 :: (volatile store (s32) into %ir.a) + + SW $x10, %stack.0.a, 0 :: (volatile store (s32) into %ir.a) + SW $x11, %stack.0.a, 0 :: (volatile store (s32) into %ir.a) + SW $x12, %stack.0.a, 0 :: (volatile store (s32) into %ir.a) + SW $x13, %stack.0.a, 0 :: (volatile store (s32) into %ir.a) + SW $x14, %stack.0.a, 0 :: (volatile store (s32) into %ir.a) + SW $x15, %stack.0.a, 0 :: (volatile store (s32) into %ir.a) + SW $x16, %stack.0.a, 0 :: (volatile store (s32) into %ir.a) + SW $x17, %stack.0.a, 0 :: (volatile store (s32) into %ir.a) + SW $x5, %stack.0.a, 0 :: (volatile store (s32) into %ir.a) + SW $x6, %stack.0.a, 0 :: (volatile store (s32) into %ir.a) + SW $x7, %stack.0.a, 0 :: (volatile store (s32) into %ir.a) + SW $x28, %stack.0.a, 0 :: (volatile store (s32) into %ir.a) + SW $x29, %stack.0.a, 0 :: (volatile store (s32) into %ir.a) + SW $x30, %stack.0.a, 0 :: (volatile store (s32) into %ir.a) + SW $x31, %stack.0.a, 0 :: (volatile store (s32) into %ir.a) + SW $x8, %stack.0.a, 0 :: (volatile store (s32) into %ir.a) + SW $x9, %stack.0.a, 0 :: (volatile store (s32) into %ir.a) + SW $x18, %stack.0.a, 0 :: (volatile store (s32) into %ir.a) + SW $x19, %stack.0.a, 0 :: (volatile store (s32) into %ir.a) + SW $x20, %stack.0.a, 0 :: (volatile store (s32) into %ir.a) + SW $x21, %stack.0.a, 0 :: (volatile store (s32) into %ir.a) + SW $x22, %stack.0.a, 0 :: (volatile store (s32) into %ir.a) + SW $x23, %stack.0.a, 0 :: (volatile store (s32) into %ir.a) + SW $x24, %stack.0.a, 0 :: (volatile store (s32) into %ir.a) + SW $x25, %stack.0.a, 0 :: (volatile store (s32) into %ir.a) + SW $x26, %stack.0.a, 0 :: (volatile store (s32) into %ir.a) + SW $x27, %stack.0.a, 0 :: (volatile store (s32) into %ir.a) + $x10 = COPY $x0 + PseudoRET implicit killed $x10 + +...