diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -69,6 +69,14 @@ int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const override; + using TargetInstrInfo::foldMemoryOperandImpl; + MachineInstr *foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, + ArrayRef Ops, + MachineBasicBlock::iterator InsertPt, + int FrameIndex, + LiveIntervals *LIS = nullptr, + VirtRegMap *VRM = nullptr) const override; + // Materializes the given integer Val into DstReg. void movImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register DstReg, uint64_t Val, @@ -183,6 +191,9 @@ namespace RISCV { +// Returns true if this is the sext.w pattern, addiw rd, rs1, 0. +bool isSEXT_W(const MachineInstr &MI); + // Returns true if the given MI is an RVV instruction opcode for which we may // expect to see a FrameIndex operand. bool isRVVSpill(const MachineInstr &MI); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -637,6 +637,37 @@ } } +MachineInstr *RISCVInstrInfo::foldMemoryOperandImpl( + MachineFunction &MF, MachineInstr &MI, ArrayRef Ops, + MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS, + VirtRegMap *VRM) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + + // The below optimizations narrow the load so they are only valid for little + // endian. + // TODO: Support big endian by adding an offset into the frame object? + if (MF.getDataLayout().isBigEndian()) + return nullptr; + + // Fold load from stack followed by sext.w into lw. + // TODO: Fold with sext.b, sext.h, zext.b, zext.h, zext.w? + if (Ops.size() == 1 && Ops[0] == 1 && RISCV::isSEXT_W(MI)) { + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, FrameIndex), + MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex), + MFI.getObjectAlign(FrameIndex)); + + Register DstReg = MI.getOperand(0).getReg(); + return BuildMI(*MI.getParent(), InsertPt, MI.getDebugLoc(), get(RISCV::LW), + DstReg) + .addFrameIndex(FrameIndex) + .addImm(0) + .addMemOperand(MMO); + } + + return nullptr; +} + void RISCVInstrInfo::movImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register DstReg, uint64_t Val, @@ -1865,6 +1896,12 @@ return VL; } +// Returns true if this is the sext.w pattern, addiw rd, rs1, 0. +bool RISCV::isSEXT_W(const MachineInstr &MI) { + return MI.getOpcode() == RISCV::ADDIW && MI.getOperand(1).isReg() && + MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0; +} + static bool isRVVWholeLoadStore(unsigned Opcode) { switch (Opcode) { default: diff --git a/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp b/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp --- a/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp +++ b/llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp @@ -443,8 +443,7 @@ MachineInstr *MI = &*I++; // We're looking for the sext.w pattern ADDIW rd, rs1, 0. - if (MI->getOpcode() != RISCV::ADDIW || !MI->getOperand(2).isImm() || - MI->getOperand(2).getImm() != 0 || !MI->getOperand(1).isReg()) + if (!RISCV::isSEXT_W(*MI)) continue; // Input should be a virtual register. diff --git a/llvm/test/CodeGen/RISCV/stack-folding.ll b/llvm/test/CodeGen/RISCV/stack-folding.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/stack-folding.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=riscv64 | FileCheck %s + +; Make sure we emit an lw for the stack reload in 'truebb'. +define i1 @foo(i64 %x, i32 %y) nounwind { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -144 +; CHECK-NEXT: sd ra, 136(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd gp, 128(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd tp, 120(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 112(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s1, 104(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s2, 96(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s3, 88(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s4, 80(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s5, 72(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s6, 64(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s7, 56(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s8, 48(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s9, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s10, 32(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s11, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd a1, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd a0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: #APP +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ld a0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: beqz a0, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %falsebb +; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: j .LBB0_3 +; CHECK-NEXT: .LBB0_2: # %truebb +; CHECK-NEXT: lw a0, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: slti a0, a0, 0 +; CHECK-NEXT: .LBB0_3: # %falsebb +; CHECK-NEXT: ld ra, 136(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld gp, 128(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld tp, 120(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 112(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s1, 104(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s2, 96(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s3, 88(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s4, 80(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s5, 72(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s6, 64(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s7, 56(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s8, 48(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s9, 40(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s10, 32(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s11, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 144 +; CHECK-NEXT: ret + tail call void asm sideeffect "", "~{x1},~{x3},~{x4},~{x5},~{x6},~{x7},~{x8},~{x9},~{x10},~{x11},~{x12},~{x13},~{x14},~{x15},~{x16},~{x17},~{x18},~{x19},~{x20},~{x21},~{x22},~{x23},~{x24},~{x25},~{x26},~{x27},~{x28},~{x29},~{x30},~{x31}"() + %a = icmp eq i64 %x, 0 + br i1 %a, label %truebb, label %falsebb +truebb: + %b = icmp slt i32 %y, 0 + ret i1 %b +falsebb: + ret i1 0 +}