diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -2102,10 +2102,8 @@ // There is a ADD between ADDI and load/store. We can only fold ADDI that // do not have a FrameIndex operand. SDValue Add; - int AddBaseIdx; - if (Base.getMachineOpcode() == RISCV::ADD) { - if (!Base.hasOneUse()) - return false; + unsigned AddBaseIdx; + if (Base.getMachineOpcode() == RISCV::ADD && Base.hasOneUse()) { Add = Base; SDValue Op0 = Base.getOperand(0); SDValue Op1 = Base.getOperand(1); @@ -2119,12 +2117,36 @@ isa(Op1.getOperand(1))) { AddBaseIdx = 0; Base = Op1; + } else if (Op1.isMachineOpcode() && + Op1.getMachineOpcode() == RISCV::ADDIW && + isa(Op1.getOperand(1)) && + Op1.getOperand(0).isMachineOpcode() && + Op1.getOperand(0).getMachineOpcode() == RISCV::LUI) { + // We found an LUI+ADDIW constant materialization. We might be able to + // fold the ADDIW offset if it could be treated as ADDI. + // Emulate the constant materialization to see if the result would be + // a simm32 if ADDI was used instead of ADDIW. + + // First the LUI. + uint64_t Imm = Op1.getOperand(0).getConstantOperandVal(0); + Imm <<= 12; + Imm = SignExtend64(Imm, 32); + + // Then the ADDI. + uint64_t LoImm = cast(Op1.getOperand(1))->getSExtValue(); + Imm += LoImm; + + // If the result isn't a simm32, we can't do the optimization. + if (!isInt<32>(Imm)) + return false; + + AddBaseIdx = 0; + Base = Op1; } else return false; - } - - // If the base is an ADDI, we can merge it in to the load/store. - if (Base.getMachineOpcode() != RISCV::ADDI) + } else if (Base.getMachineOpcode() == RISCV::ADDI) { + // If the base is an ADDI, we can merge it in to the load/store. + } else return false; SDValue ImmOperand = Base.getOperand(1); diff --git a/llvm/test/CodeGen/RISCV/mem64.ll b/llvm/test/CodeGen/RISCV/mem64.ll --- a/llvm/test/CodeGen/RISCV/mem64.ll +++ b/llvm/test/CodeGen/RISCV/mem64.ll @@ -233,9 +233,8 @@ ; RV64I-LABEL: lw_far_local: ; RV64I: # %bb.0: ; RV64I-NEXT: lui a1, 8 -; RV64I-NEXT: addiw a1, a1, -8 ; RV64I-NEXT: add a0, a0, a1 -; RV64I-NEXT: ld a0, 0(a0) +; RV64I-NEXT: ld a0, -8(a0) ; RV64I-NEXT: ret %1 = getelementptr inbounds i64, i64* %a, i64 4095 %2 = load volatile i64, i64* %1 @@ -246,9 +245,8 @@ ; RV64I-LABEL: st_far_local: ; RV64I: # %bb.0: ; RV64I-NEXT: lui a2, 8 -; RV64I-NEXT: addiw a2, a2, -8 ; RV64I-NEXT: add a0, a0, a2 -; RV64I-NEXT: sd a1, 0(a0) +; RV64I-NEXT: sd a1, -8(a0) ; RV64I-NEXT: ret %1 = getelementptr inbounds i64, i64* %a, i64 4095 store i64 %b, i64* %1