diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -2053,6 +2053,10 @@ // Merge an ADDI into the offset of a load/store instruction where possible. // (load (addi base, off1), off2) -> (load base, off1+off2) // (store val, (addi base, off1), off2) -> (store val, base, off1+off2) +// (load (add base, (addi src, off1)), off2) +// -> (load (add base, src), off1+off2) +// (store val, (add base, (addi src, off1)), off2) +// -> (store val, (add base, src), off1+off2) // This is possible when off1+off2 fits a 12-bit immediate. bool RISCVDAGToDAGISel::doPeepholeLoadStoreADDI(SDNode *N) { int OffsetOpIdx; @@ -2092,8 +2096,34 @@ SDValue Base = N->getOperand(BaseOpIdx); + if (!Base.isMachineOpcode()) + return false; + + // There is a ADD between ADDI and load/store. + SDValue Add; + int AddBaseIdx; + if (Base.getMachineOpcode() == RISCV::ADD) { + if (!Base.hasOneUse()) + return false; + Add = Base; + SDValue Op0 = Base.getOperand(0); + SDValue Op1 = Base.getOperand(1); + if (Op0.isMachineOpcode() && Op0.getMachineOpcode() == RISCV::ADDI && + isa(Op0.getOperand(1)) && + cast(Op0.getOperand(1))->getSExtValue() != 0) { + AddBaseIdx = 1; + Base = Op0; + } else if (Op1.isMachineOpcode() && Op1.getMachineOpcode() == RISCV::ADDI && + isa(Op1.getOperand(1)) && + cast(Op1.getOperand(1))->getSExtValue() != 0) { + AddBaseIdx = 0; + Base = Op1; + } else + return false; + } + // If the base is an ADDI, we can merge it in to the load/store. - if (!Base.isMachineOpcode() || Base.getMachineOpcode() != RISCV::ADDI) + if (Base.getMachineOpcode() != RISCV::ADDI) return false; SDValue ImmOperand = Base.getOperand(1); @@ -2140,13 +2170,27 @@ LLVM_DEBUG(N->dump(CurDAG)); LLVM_DEBUG(dbgs() << "\n"); + if (Add) + Add = SDValue(CurDAG->UpdateNodeOperands(Add.getNode(), + Add.getOperand(AddBaseIdx), + Base.getOperand(0)), + 0); + // Modify the offset operand of the load/store. - if (BaseOpIdx == 0) // Load - CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand, - N->getOperand(2)); - else // Store - CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0), - ImmOperand, N->getOperand(3)); + if (BaseOpIdx == 0) { // Load + if (Add) + N = CurDAG->UpdateNodeOperands(N, Add, ImmOperand, N->getOperand(2)); + else + N = CurDAG->UpdateNodeOperands(N, Base.getOperand(0), ImmOperand, + N->getOperand(2)); + } else { // Store + if (Add) + N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), Add, ImmOperand, + N->getOperand(3)); + else + N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), Base.getOperand(0), + ImmOperand, N->getOperand(3)); + } return true; } diff --git a/llvm/test/CodeGen/RISCV/large-stack.ll b/llvm/test/CodeGen/RISCV/large-stack.ll --- a/llvm/test/CodeGen/RISCV/large-stack.ll +++ b/llvm/test/CodeGen/RISCV/large-stack.ll @@ -59,13 +59,12 @@ ; RV32I-FPELIM-NEXT: sub sp, sp, a1 ; RV32I-FPELIM-NEXT: .cfi_def_cfa_offset 400016 ; RV32I-FPELIM-NEXT: lui a1, 78 -; RV32I-FPELIM-NEXT: addi a1, a1, 512 ; RV32I-FPELIM-NEXT: addi a2, sp, 8 ; RV32I-FPELIM-NEXT: add a1, a2, a1 ; RV32I-FPELIM-NEXT: #APP ; RV32I-FPELIM-NEXT: nop ; RV32I-FPELIM-NEXT: #NO_APP -; RV32I-FPELIM-NEXT: sw a0, 0(a1) +; RV32I-FPELIM-NEXT: sw a0, 512(a1) ; RV32I-FPELIM-NEXT: #APP ; RV32I-FPELIM-NEXT: nop ; RV32I-FPELIM-NEXT: #NO_APP @@ -95,7 +94,6 @@ ; RV32I-WITHFP-NEXT: addi a1, a1, 688 ; RV32I-WITHFP-NEXT: sub sp, sp, a1 ; RV32I-WITHFP-NEXT: lui a1, 78 -; RV32I-WITHFP-NEXT: addi a1, a1, 512 ; RV32I-WITHFP-NEXT: lui a2, 1048478 ; RV32I-WITHFP-NEXT: addi a2, a2, 1388 ; RV32I-WITHFP-NEXT: add a2, s0, a2 @@ -103,7 +101,7 @@ ; RV32I-WITHFP-NEXT: #APP ; RV32I-WITHFP-NEXT: nop ; RV32I-WITHFP-NEXT: #NO_APP -; RV32I-WITHFP-NEXT: sw a0, 0(a1) +; RV32I-WITHFP-NEXT: sw a0, 512(a1) ; RV32I-WITHFP-NEXT: #APP ; RV32I-WITHFP-NEXT: nop ; RV32I-WITHFP-NEXT: #NO_APP diff --git a/llvm/test/CodeGen/RISCV/mem.ll b/llvm/test/CodeGen/RISCV/mem.ll --- a/llvm/test/CodeGen/RISCV/mem.ll +++ b/llvm/test/CodeGen/RISCV/mem.ll @@ -198,3 +198,42 @@ store i32 %a, i32* %1 ret i32 %2 } + +define i32 @lw_far_local(i32* %a) { +; RV32I-LABEL: lw_far_local: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a1, 4 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: lw a0, -4(a0) +; RV32I-NEXT: ret + %1 = getelementptr inbounds i32, i32* %a, i64 4095 + %2 = load volatile i32, i32* %1 + ret i32 %2 +} + +define void @st_far_local(i32* %a, i32 %b) { +; RV32I-LABEL: st_far_local: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a2, 4 +; RV32I-NEXT: add a0, a0, a2 +; RV32I-NEXT: sw a1, -4(a0) +; RV32I-NEXT: ret + %1 = getelementptr inbounds i32, i32* %a, i64 4095 + store i32 %b, i32* %1 + ret void +} + +define i32 @lw_sw_far_local(i32* %a, i32 %b) { +; RV32I-LABEL: lw_sw_far_local: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a2, 4 +; RV32I-NEXT: addi a2, a2, -4 +; RV32I-NEXT: add a2, a0, a2 +; RV32I-NEXT: lw a0, 0(a2) +; RV32I-NEXT: sw a1, 0(a2) +; RV32I-NEXT: ret + %1 = getelementptr inbounds i32, i32* %a, i64 4095 + %2 = load volatile i32, i32* %1 + store i32 %b, i32* %1 + ret i32 %2 +} diff --git a/llvm/test/CodeGen/RISCV/mem64.ll b/llvm/test/CodeGen/RISCV/mem64.ll --- a/llvm/test/CodeGen/RISCV/mem64.ll +++ b/llvm/test/CodeGen/RISCV/mem64.ll @@ -228,3 +228,44 @@ store i64 %a, i64* %2 ret i64 %1 } + +define i64 @lw_far_local(i64* %a) { +; RV64I-LABEL: lw_far_local: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a1, 8 +; RV64I-NEXT: addiw a1, a1, -8 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ld a0, 0(a0) +; RV64I-NEXT: ret + %1 = getelementptr inbounds i64, i64* %a, i64 4095 + %2 = load volatile i64, i64* %1 + ret i64 %2 +} + +define void @st_far_local(i64* %a, i64 %b) { +; RV64I-LABEL: st_far_local: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a2, 8 +; RV64I-NEXT: addiw a2, a2, -8 +; RV64I-NEXT: add a0, a0, a2 +; RV64I-NEXT: sd a1, 0(a0) +; RV64I-NEXT: ret + %1 = getelementptr inbounds i64, i64* %a, i64 4095 + store i64 %b, i64* %1 + ret void +} + +define i64 @lw_sw_far_local(i64* %a, i64 %b) { +; RV64I-LABEL: lw_sw_far_local: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a2, 8 +; RV64I-NEXT: addiw a2, a2, -8 +; RV64I-NEXT: add a2, a0, a2 +; RV64I-NEXT: ld a0, 0(a2) +; RV64I-NEXT: sd a1, 0(a2) +; RV64I-NEXT: ret + %1 = getelementptr inbounds i64, i64* %a, i64 4095 + %2 = load volatile i64, i64* %1 + store i64 %b, i64* %1 + ret i64 %2 +}