diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -354,6 +354,7 @@ SelectionDAG &DAG) const override; bool shouldSinkOperands(Instruction *I, SmallVectorImpl &Ops) const override; + bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1231,6 +1231,15 @@ return true; } +bool RISCVTargetLowering::isOffsetFoldingLegal( + const GlobalAddressSDNode *GA) const { + // In order to maximise the opportunity for common subexpression elimination, + // keep a separate ADD node for the global address offset instead of folding + // it in the global address node. Later peephole optimisations may choose to + // fold it back in when profitable. + return false; +} + bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const { // FIXME: Change to Zfhmin once f16 becomes a legal type with Zfhmin. @@ -3578,21 +3587,12 @@ SDLoc DL(Op); EVT Ty = Op.getValueType(); GlobalAddressSDNode *N = cast(Op); - int64_t Offset = N->getOffset(); + assert(N->getOffset() == 0 && "unexpected offset in global node"); MVT XLenVT = Subtarget.getXLenVT(); const GlobalValue *GV = N->getGlobal(); bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); - SDValue Addr = getAddr(N, DAG, IsLocal); - - // In order to maximise the opportunity for common subexpression elimination, - // emit a separate ADD node for the global address offset instead of folding - // it in the global address node. Later peephole optimisations may choose to - // fold it back in when profitable. - if (Offset != 0) - return DAG.getNode(ISD::ADD, DL, Ty, Addr, - DAG.getConstant(Offset, DL, XLenVT)); - return Addr; + return getAddr(N, DAG, IsLocal); } SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op, @@ -3701,7 +3701,7 @@ SDLoc DL(Op); EVT Ty = Op.getValueType(); GlobalAddressSDNode *N = cast(Op); - int64_t Offset = N->getOffset(); + assert(N->getOffset() == 0 && "unexpected offset in global node"); MVT XLenVT = Subtarget.getXLenVT(); TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal()); @@ -3724,13 +3724,6 @@ break; } - // In order to maximise the opportunity for common subexpression elimination, - // emit a separate ADD node for the global address offset instead of folding - // it in the global address node. Later peephole optimisations may choose to - // fold it back in when profitable. - if (Offset != 0) - return DAG.getNode(ISD::ADD, DL, Ty, Addr, - DAG.getConstant(Offset, DL, XLenVT)); return Addr; } diff --git a/llvm/test/CodeGen/RISCV/double-mem.ll b/llvm/test/CodeGen/RISCV/double-mem.ll --- a/llvm/test/CodeGen/RISCV/double-mem.ll +++ b/llvm/test/CodeGen/RISCV/double-mem.ll @@ -59,10 +59,10 @@ ; RV32IFD-NEXT: fadd.d fa0, fa0, fa1 ; RV32IFD-NEXT: lui a0, %hi(G) ; RV32IFD-NEXT: fld ft0, %lo(G)(a0) +; RV32IFD-NEXT: addi a1, a0, %lo(G) ; RV32IFD-NEXT: fsd fa0, %lo(G)(a0) -; RV32IFD-NEXT: addi a0, a0, %lo(G) -; RV32IFD-NEXT: fld ft0, 72(a0) -; RV32IFD-NEXT: fsd fa0, 72(a0) +; RV32IFD-NEXT: fld ft0, 72(a1) +; RV32IFD-NEXT: fsd fa0, 72(a1) ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: fld_fsd_global: @@ -70,10 +70,10 @@ ; RV64IFD-NEXT: fadd.d fa0, fa0, fa1 ; RV64IFD-NEXT: lui a0, %hi(G) ; RV64IFD-NEXT: fld ft0, %lo(G)(a0) +; RV64IFD-NEXT: addi a1, a0, %lo(G) ; RV64IFD-NEXT: fsd fa0, %lo(G)(a0) -; RV64IFD-NEXT: addi a0, a0, %lo(G) -; RV64IFD-NEXT: fld ft0, 72(a0) -; RV64IFD-NEXT: fsd fa0, 72(a0) +; RV64IFD-NEXT: fld ft0, 72(a1) +; RV64IFD-NEXT: fsd fa0, 72(a1) ; RV64IFD-NEXT: ret ; Use %a and %b in an FP op to ensure floating point registers are used, even ; for the soft float ABI diff --git a/llvm/test/CodeGen/RISCV/float-mem.ll b/llvm/test/CodeGen/RISCV/float-mem.ll --- a/llvm/test/CodeGen/RISCV/float-mem.ll +++ b/llvm/test/CodeGen/RISCV/float-mem.ll @@ -61,10 +61,10 @@ ; RV32IF-NEXT: fadd.s fa0, fa0, fa1 ; RV32IF-NEXT: lui a0, %hi(G) ; RV32IF-NEXT: flw ft0, %lo(G)(a0) +; RV32IF-NEXT: addi a1, a0, %lo(G) ; RV32IF-NEXT: fsw fa0, %lo(G)(a0) -; RV32IF-NEXT: addi a0, a0, %lo(G) -; RV32IF-NEXT: flw ft0, 36(a0) -; RV32IF-NEXT: fsw fa0, 36(a0) +; RV32IF-NEXT: flw ft0, 36(a1) +; RV32IF-NEXT: fsw fa0, 36(a1) ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: flw_fsw_global: @@ -72,10 +72,10 @@ ; RV64IF-NEXT: fadd.s fa0, fa0, fa1 ; RV64IF-NEXT: lui a0, %hi(G) ; RV64IF-NEXT: flw ft0, %lo(G)(a0) +; RV64IF-NEXT: addi a1, a0, %lo(G) ; RV64IF-NEXT: fsw fa0, %lo(G)(a0) -; RV64IF-NEXT: addi a0, a0, %lo(G) -; RV64IF-NEXT: flw ft0, 36(a0) -; RV64IF-NEXT: fsw fa0, 36(a0) +; RV64IF-NEXT: flw ft0, 36(a1) +; RV64IF-NEXT: fsw fa0, 36(a1) ; RV64IF-NEXT: ret %1 = fadd float %a, %b %2 = load volatile float, float* @G diff --git a/llvm/test/CodeGen/RISCV/half-mem.ll b/llvm/test/CodeGen/RISCV/half-mem.ll --- a/llvm/test/CodeGen/RISCV/half-mem.ll +++ b/llvm/test/CodeGen/RISCV/half-mem.ll @@ -61,10 +61,10 @@ ; RV32IZFH-NEXT: fadd.h fa0, fa0, fa1 ; RV32IZFH-NEXT: lui a0, %hi(G) ; RV32IZFH-NEXT: flh ft0, %lo(G)(a0) +; RV32IZFH-NEXT: addi a1, a0, %lo(G) ; RV32IZFH-NEXT: fsh fa0, %lo(G)(a0) -; RV32IZFH-NEXT: addi a0, a0, %lo(G) -; RV32IZFH-NEXT: flh ft0, 18(a0) -; RV32IZFH-NEXT: fsh fa0, 18(a0) +; RV32IZFH-NEXT: flh ft0, 18(a1) +; RV32IZFH-NEXT: fsh fa0, 18(a1) ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: flh_fsh_global: @@ -72,10 +72,10 @@ ; RV64IZFH-NEXT: fadd.h fa0, fa0, fa1 ; RV64IZFH-NEXT: lui a0, %hi(G) ; RV64IZFH-NEXT: flh ft0, %lo(G)(a0) +; RV64IZFH-NEXT: addi a1, a0, %lo(G) ; RV64IZFH-NEXT: fsh fa0, %lo(G)(a0) -; RV64IZFH-NEXT: addi a0, a0, %lo(G) -; RV64IZFH-NEXT: flh ft0, 18(a0) -; RV64IZFH-NEXT: fsh fa0, 18(a0) +; RV64IZFH-NEXT: flh ft0, 18(a1) +; RV64IZFH-NEXT: fsh fa0, 18(a1) ; RV64IZFH-NEXT: ret %1 = fadd half %a, %b %2 = load volatile half, half* @G diff --git a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll --- a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll +++ b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll @@ -99,11 +99,11 @@ ; ; RV64-LABEL: big_offset_one_use: ; RV64: # %bb.0: # %entry -; RV64-NEXT: lui a0, 4 -; RV64-NEXT: addiw a0, a0, 188 -; RV64-NEXT: lui a1, %hi(s) -; RV64-NEXT: addi a1, a1, %lo(s) -; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: lui a0, %hi(s) +; RV64-NEXT: addi a0, a0, %lo(s) +; RV64-NEXT: lui a1, 4 +; RV64-NEXT: addiw a1, a1, 188 +; RV64-NEXT: add a0, a0, a1 ; RV64-NEXT: ret entry: ret i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 5) diff --git a/llvm/test/CodeGen/RISCV/loop-strength-reduce-add-cheaper-than-mul.ll b/llvm/test/CodeGen/RISCV/loop-strength-reduce-add-cheaper-than-mul.ll --- a/llvm/test/CodeGen/RISCV/loop-strength-reduce-add-cheaper-than-mul.ll +++ b/llvm/test/CodeGen/RISCV/loop-strength-reduce-add-cheaper-than-mul.ll @@ -42,7 +42,7 @@ ; RV32-NEXT: addi a3, a3, 1 ; RV32-NEXT: .LBB0_2: # %bb ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32-NEXT: add a4, a1, a2 +; RV32-NEXT: add a4, a2, a1 ; RV32-NEXT: add a1, a1, a0 ; RV32-NEXT: sb zero, 0(a4) ; RV32-NEXT: blt a1, a3, .LBB0_2 @@ -65,7 +65,7 @@ ; RV64-NEXT: addw a5, a5, a1 ; RV64-NEXT: slli a6, a5, 32 ; RV64-NEXT: srli a6, a6, 32 -; RV64-NEXT: add a6, a6, a3 +; RV64-NEXT: add a6, a3, a6 ; RV64-NEXT: sb zero, 0(a6) ; RV64-NEXT: addw a5, a5, a0 ; RV64-NEXT: addiw a2, a2, 1 diff --git a/llvm/test/CodeGen/RISCV/loop-strength-reduce-loop-invar.ll b/llvm/test/CodeGen/RISCV/loop-strength-reduce-loop-invar.ll --- a/llvm/test/CodeGen/RISCV/loop-strength-reduce-loop-invar.ll +++ b/llvm/test/CodeGen/RISCV/loop-strength-reduce-loop-invar.ll @@ -54,10 +54,10 @@ ; RV64-NEXT: blez a1, .LBB0_3 ; RV64-NEXT: # %bb.1: # %cond_true.preheader ; RV64-NEXT: li a4, 0 +; RV64-NEXT: slli a0, a0, 6 ; RV64-NEXT: lui a2, %hi(A) ; RV64-NEXT: addi a2, a2, %lo(A) -; RV64-NEXT: slli a0, a0, 6 -; RV64-NEXT: add a0, a0, a2 +; RV64-NEXT: add a0, a2, a0 ; RV64-NEXT: li a2, 4 ; RV64-NEXT: li a3, 5 ; RV64-NEXT: .LBB0_2: # %cond_true diff --git a/llvm/test/CodeGen/RISCV/mem.ll b/llvm/test/CodeGen/RISCV/mem.ll --- a/llvm/test/CodeGen/RISCV/mem.ll +++ b/llvm/test/CodeGen/RISCV/mem.ll @@ -170,10 +170,10 @@ ; RV32I: # %bb.0: ; RV32I-NEXT: lui a2, %hi(G) ; RV32I-NEXT: lw a1, %lo(G)(a2) +; RV32I-NEXT: addi a3, a2, %lo(G) ; RV32I-NEXT: sw a0, %lo(G)(a2) -; RV32I-NEXT: addi a2, a2, %lo(G) -; RV32I-NEXT: lw a3, 36(a2) -; RV32I-NEXT: sw a0, 36(a2) +; RV32I-NEXT: lw a2, 36(a3) +; RV32I-NEXT: sw a0, 36(a3) ; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: ret %1 = load volatile i32, i32* @G diff --git a/llvm/test/CodeGen/RISCV/mem64.ll b/llvm/test/CodeGen/RISCV/mem64.ll --- a/llvm/test/CodeGen/RISCV/mem64.ll +++ b/llvm/test/CodeGen/RISCV/mem64.ll @@ -215,10 +215,10 @@ ; RV64I: # %bb.0: ; RV64I-NEXT: lui a2, %hi(G) ; RV64I-NEXT: ld a1, %lo(G)(a2) +; RV64I-NEXT: addi a3, a2, %lo(G) ; RV64I-NEXT: sd a0, %lo(G)(a2) -; RV64I-NEXT: addi a2, a2, %lo(G) -; RV64I-NEXT: ld a3, 72(a2) -; RV64I-NEXT: sd a0, 72(a2) +; RV64I-NEXT: ld a2, 72(a3) +; RV64I-NEXT: sd a0, 72(a3) ; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ret %1 = load volatile i64, i64* @G diff --git a/llvm/test/CodeGen/RISCV/zext-with-load-is-free.ll b/llvm/test/CodeGen/RISCV/zext-with-load-is-free.ll --- a/llvm/test/CodeGen/RISCV/zext-with-load-is-free.ll +++ b/llvm/test/CodeGen/RISCV/zext-with-load-is-free.ll @@ -10,12 +10,12 @@ ; RV32I-LABEL: test_zext_i8: ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: lui a0, %hi(bytes) -; RV32I-NEXT: lbu a1, %lo(bytes)(a0) -; RV32I-NEXT: addi a0, a0, %lo(bytes) -; RV32I-NEXT: lbu a0, 1(a0) -; RV32I-NEXT: xori a1, a1, 136 -; RV32I-NEXT: xori a0, a0, 7 -; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: addi a1, a0, %lo(bytes) +; RV32I-NEXT: lbu a0, %lo(bytes)(a0) +; RV32I-NEXT: lbu a1, 1(a1) +; RV32I-NEXT: xori a0, a0, 136 +; RV32I-NEXT: xori a1, a1, 7 +; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: beqz a0, .LBB0_2 ; RV32I-NEXT: # %bb.1: # %if.then ; RV32I-NEXT: li a0, 1 @@ -42,14 +42,14 @@ ; RV32I-LABEL: test_zext_i16: ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: lui a0, %hi(shorts) -; RV32I-NEXT: lhu a1, %lo(shorts)(a0) -; RV32I-NEXT: addi a0, a0, %lo(shorts) -; RV32I-NEXT: lhu a0, 2(a0) +; RV32I-NEXT: addi a1, a0, %lo(shorts) +; RV32I-NEXT: lhu a0, %lo(shorts)(a0) +; RV32I-NEXT: lhu a1, 2(a1) ; RV32I-NEXT: lui a2, 16 ; RV32I-NEXT: addi a2, a2, -120 -; RV32I-NEXT: xor a1, a1, a2 -; RV32I-NEXT: xori a0, a0, 7 -; RV32I-NEXT: or a0, a1, a0 +; RV32I-NEXT: xor a0, a0, a2 +; RV32I-NEXT: xori a1, a1, 7 +; RV32I-NEXT: or a0, a0, a1 ; RV32I-NEXT: beqz a0, .LBB1_2 ; RV32I-NEXT: # %bb.1: # %if.then ; RV32I-NEXT: li a0, 1