diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -14,6 +14,7 @@ #include "MCTargetDesc/RISCVMCTargetDesc.h" #include "Utils/RISCVMatInt.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/Support/Alignment.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" @@ -160,8 +161,9 @@ } // Merge an ADDI into the offset of a load/store instruction where possible. -// (load (add base, off), 0) -> (load base, off) -// (store val, (add base, off)) -> (store val, base, off) +// (load (addi base, off1), off2) -> (load base, off1+off2) +// (store val, (addi base, off1), off2) -> (store val, base, off1+off2) +// This is possible when off1+off2 fits a 12-bit immediate. void RISCVDAGToDAGISel::doPeepholeLoadStoreADDI() { SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode()); ++Position; @@ -202,10 +204,7 @@ break; } - // Currently, the load/store offset must be 0 to be considered for this - // peephole optimisation. - if (!isa(N->getOperand(OffsetOpIdx)) || - N->getConstantOperandVal(OffsetOpIdx) != 0) + if (!isa(N->getOperand(OffsetOpIdx))) continue; SDValue Base = N->getOperand(BaseOpIdx); @@ -215,18 +214,41 @@ continue; SDValue ImmOperand = Base.getOperand(1); + uint64_t Offset2 = N->getConstantOperandVal(OffsetOpIdx); if (auto Const = dyn_cast(ImmOperand)) { - ImmOperand = CurDAG->getTargetConstant( - Const->getSExtValue(), SDLoc(ImmOperand), ImmOperand.getValueType()); + int64_t Offset1 = Const->getSExtValue(); + int64_t CombinedOffset = Offset1 + Offset2; + if (!isInt<12>(CombinedOffset)) + continue; + ImmOperand = CurDAG->getTargetConstant(CombinedOffset, SDLoc(ImmOperand), + ImmOperand.getValueType()); } else if (auto GA = dyn_cast(ImmOperand)) { + // If the off1 in (addi base, off1) is a global variable's address (its + // low part, really), then we can rely on the alignment of that variable + // to provide a margin of safety before off1 can overflow the 12 bits. + // Check if off2 falls within that margin; if so off1+off2 can't overflow. + auto GO = dyn_cast(GA->getGlobal()); + if (!GO) + continue; + unsigned Alignment = GO->getAlignment(); + if (Offset2 != 0 && Offset2 >= Alignment) + continue; + int64_t Offset1 = GA->getOffset(); + int64_t CombinedOffset = Offset1 + Offset2; ImmOperand = CurDAG->getTargetGlobalAddress( GA->getGlobal(), SDLoc(ImmOperand), ImmOperand.getValueType(), - GA->getOffset(), GA->getTargetFlags()); + CombinedOffset, GA->getTargetFlags()); } else if (auto CP = dyn_cast(ImmOperand)) { + // Ditto. + Align Alignment = CP->getAlign(); + if (Offset2 != 0 && Alignment <= Offset2) + continue; + int64_t Offset1 = CP->getOffset(); + int64_t CombinedOffset = Offset1 + Offset2; ImmOperand = CurDAG->getTargetConstantPool( CP->getConstVal(), ImmOperand.getValueType(), CP->getAlign(), - CP->getOffset(), CP->getTargetFlags()); + CombinedOffset, CP->getTargetFlags()); } else { continue; } diff --git a/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll b/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll --- a/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll +++ b/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll @@ -99,8 +99,7 @@ ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: lui a1, %hi(g_8) ; RV32I-NEXT: lw a0, %lo(g_8)(a1) -; RV32I-NEXT: addi a1, a1, %lo(g_8) -; RV32I-NEXT: lw a1, 4(a1) +; RV32I-NEXT: lw a1, %lo(g_8+4)(a1) ; RV32I-NEXT: ret ; ; RV64I-LABEL: load_g_8: @@ -118,8 +117,7 @@ ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: lui a1, %hi(g_16) ; RV32I-NEXT: lw a0, %lo(g_16)(a1) -; RV32I-NEXT: addi a1, a1, %lo(g_16) -; RV32I-NEXT: lw a1, 4(a1) +; RV32I-NEXT: lw a1, %lo(g_16+4)(a1) ; RV32I-NEXT: ret ; ; RV64I-LABEL: load_g_16: @@ -155,9 +153,8 @@ ; RV32I-LABEL: store_g_8: ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: lui a0, %hi(g_8) +; RV32I-NEXT: sw zero, %lo(g_8+4)(a0) ; RV32I-NEXT: sw zero, %lo(g_8)(a0) -; RV32I-NEXT: addi a0, a0, %lo(g_8) -; RV32I-NEXT: sw zero, 4(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: store_g_8: @@ -197,15 +194,14 @@ define i64 @load_ga_16() nounwind { ; RV32I-LABEL: load_ga_16: ; RV32I: # %bb.0: # %entry -; RV32I-NEXT: lui a0, %hi(ga_16) -; RV32I-NEXT: addi a1, a0, %lo(ga_16) -; RV32I-NEXT: lw a0, 8(a1) -; RV32I-NEXT: lw a1, 12(a1) +; RV32I-NEXT: lui a1, %hi(ga_16) +; RV32I-NEXT: lw a0, %lo(ga_16+8)(a1) +; RV32I-NEXT: lw a1, %lo(ga_16+12)(a1) ; RV32I-NEXT: ret ; ; RV64I-LABEL: load_ga_16: ; RV64I: # %bb.0: # %entry -; RV64I-NEXT: lui a0, %hi(ga_16+8) +; RV64I-NEXT: lui a0, %hi(ga_16) ; RV64I-NEXT: ld a0, %lo(ga_16+8)(a0) ; RV64I-NEXT: ret entry: @@ -245,8 +241,7 @@ ; RV32I-NEXT: lui a0, %tprel_hi(tl_8) ; RV32I-NEXT: add a1, a0, tp, %tprel_add(tl_8) ; RV32I-NEXT: lw a0, %tprel_lo(tl_8)(a1) -; RV32I-NEXT: addi a1, a1, %tprel_lo(tl_8) -; RV32I-NEXT: lw a1, 4(a1) +; RV32I-NEXT: lw a1, %tprel_lo(tl_8+4)(a1) ; RV32I-NEXT: ret ; ; RV64I-LABEL: load_tl_8: diff --git a/llvm/test/CodeGen/RISCV/fp128.ll b/llvm/test/CodeGen/RISCV/fp128.ll --- a/llvm/test/CodeGen/RISCV/fp128.ll +++ b/llvm/test/CodeGen/RISCV/fp128.ll @@ -14,27 +14,25 @@ ; RV32I-NEXT: addi sp, sp, -48 ; RV32I-NEXT: sw ra, 44(sp) ; RV32I-NEXT: lui a0, %hi(x) -; RV32I-NEXT: addi a1, a0, %lo(x) -; RV32I-NEXT: lw a6, 4(a1) -; RV32I-NEXT: lw a7, 8(a1) -; RV32I-NEXT: lw a1, 12(a1) -; RV32I-NEXT: lw a0, %lo(x)(a0) +; RV32I-NEXT: lw a6, %lo(x)(a0) +; RV32I-NEXT: lw a7, %lo(x+4)(a0) +; RV32I-NEXT: lw a3, %lo(x+8)(a0) +; RV32I-NEXT: lw a0, %lo(x+12)(a0) ; RV32I-NEXT: lui a4, %hi(y) -; RV32I-NEXT: addi a5, a4, %lo(y) -; RV32I-NEXT: lw a2, 4(a5) -; RV32I-NEXT: lw a3, 8(a5) -; RV32I-NEXT: lw a5, 12(a5) -; RV32I-NEXT: lw a4, %lo(y)(a4) -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a0, 24(sp) -; RV32I-NEXT: sw a5, 20(sp) -; RV32I-NEXT: sw a3, 16(sp) +; RV32I-NEXT: lw a5, %lo(y)(a4) +; RV32I-NEXT: lw a2, %lo(y+4)(a4) +; RV32I-NEXT: lw a1, %lo(y+8)(a4) +; RV32I-NEXT: lw a4, %lo(y+12)(a4) +; RV32I-NEXT: sw a4, 20(sp) +; RV32I-NEXT: sw a1, 16(sp) ; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: sw a1, 36(sp) -; RV32I-NEXT: sw a7, 32(sp) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a0, 36(sp) +; RV32I-NEXT: sw a3, 32(sp) +; RV32I-NEXT: sw a7, 28(sp) ; RV32I-NEXT: addi a0, sp, 24 ; RV32I-NEXT: addi a1, sp, 8 -; RV32I-NEXT: sw a6, 28(sp) +; RV32I-NEXT: sw a6, 24(sp) ; RV32I-NEXT: call __netf2 ; RV32I-NEXT: snez a0, a0 ; RV32I-NEXT: lw ra, 44(sp) @@ -53,28 +51,26 @@ ; RV32I-NEXT: addi sp, sp, -80 ; RV32I-NEXT: sw ra, 76(sp) ; RV32I-NEXT: lui a0, %hi(x) -; RV32I-NEXT: addi a1, a0, %lo(x) -; RV32I-NEXT: lw a6, 4(a1) -; RV32I-NEXT: lw a7, 8(a1) -; RV32I-NEXT: lw a1, 12(a1) -; RV32I-NEXT: lw a0, %lo(x)(a0) +; RV32I-NEXT: lw a6, %lo(x)(a0) +; RV32I-NEXT: lw a7, %lo(x+4)(a0) +; RV32I-NEXT: lw a2, %lo(x+8)(a0) +; RV32I-NEXT: lw a0, %lo(x+12)(a0) ; RV32I-NEXT: lui a4, %hi(y) -; RV32I-NEXT: addi a5, a4, %lo(y) -; RV32I-NEXT: lw a3, 4(a5) -; RV32I-NEXT: lw a2, 8(a5) -; RV32I-NEXT: lw a5, 12(a5) -; RV32I-NEXT: lw a4, %lo(y)(a4) -; RV32I-NEXT: sw a4, 24(sp) -; RV32I-NEXT: sw a0, 40(sp) -; RV32I-NEXT: sw a5, 36(sp) -; RV32I-NEXT: sw a2, 32(sp) +; RV32I-NEXT: lw a5, %lo(y)(a4) +; RV32I-NEXT: lw a3, %lo(y+4)(a4) +; RV32I-NEXT: lw a1, %lo(y+8)(a4) +; RV32I-NEXT: lw a4, %lo(y+12)(a4) +; RV32I-NEXT: sw a4, 36(sp) +; RV32I-NEXT: sw a1, 32(sp) ; RV32I-NEXT: sw a3, 28(sp) -; RV32I-NEXT: sw a1, 52(sp) -; RV32I-NEXT: sw a7, 48(sp) +; RV32I-NEXT: sw a5, 24(sp) +; RV32I-NEXT: sw a0, 52(sp) +; RV32I-NEXT: sw a2, 48(sp) +; RV32I-NEXT: sw a7, 44(sp) ; RV32I-NEXT: addi a0, sp, 56 ; RV32I-NEXT: addi a1, sp, 40 ; RV32I-NEXT: addi a2, sp, 24 -; RV32I-NEXT: sw a6, 44(sp) +; RV32I-NEXT: sw a6, 40(sp) ; RV32I-NEXT: call __addtf3 ; RV32I-NEXT: lw a1, 56(sp) ; RV32I-NEXT: lw a0, 60(sp) diff --git a/llvm/test/CodeGen/RISCV/wide-mem.ll b/llvm/test/CodeGen/RISCV/wide-mem.ll --- a/llvm/test/CodeGen/RISCV/wide-mem.ll +++ b/llvm/test/CodeGen/RISCV/wide-mem.ll @@ -22,8 +22,7 @@ ; RV32I: # %bb.0: ; RV32I-NEXT: lui a1, %hi(val64) ; RV32I-NEXT: lw a0, %lo(val64)(a1) -; RV32I-NEXT: addi a1, a1, %lo(val64) -; RV32I-NEXT: lw a1, 4(a1) +; RV32I-NEXT: lw a1, %lo(val64+4)(a1) ; RV32I-NEXT: ret %1 = load i64, i64* @val64 ret i64 %1