Index: llvm/trunk/lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/RISCV/RISCVISelLowering.cpp +++ llvm/trunk/lib/Target/RISCV/RISCVISelLowering.cpp @@ -293,17 +293,22 @@ GlobalAddressSDNode *N = cast(Op); const GlobalValue *GV = N->getGlobal(); int64_t Offset = N->getOffset(); + MVT XLenVT = Subtarget.getXLenVT(); if (isPositionIndependent() || Subtarget.is64Bit()) report_fatal_error("Unable to lowerGlobalAddress"); - - SDValue GAHi = - DAG.getTargetGlobalAddress(GV, DL, Ty, Offset, RISCVII::MO_HI); - SDValue GALo = - DAG.getTargetGlobalAddress(GV, DL, Ty, Offset, RISCVII::MO_LO); + // In order to maximise the opportunity for common subexpression elimination, + // emit a separate ADD node for the global address offset instead of folding + // it in the global address node. Later peephole optimisations may choose to + // fold it back in when profitable. + SDValue GAHi = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_HI); + SDValue GALo = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_LO); SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, GAHi), 0); SDValue MNLo = SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, GALo), 0); + if (Offset != 0) + return DAG.getNode(ISD::ADD, DL, Ty, MNLo, + DAG.getConstant(Offset, DL, XLenVT)); return MNLo; } Index: llvm/trunk/test/CodeGen/RISCV/byval.ll =================================================================== --- llvm/trunk/test/CodeGen/RISCV/byval.ll +++ llvm/trunk/test/CodeGen/RISCV/byval.ll @@ -22,18 +22,16 @@ ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: addi sp, sp, -32 ; RV32I-NEXT: sw ra, 28(sp) -; RV32I-NEXT: lui a0, %hi(foo+12) -; RV32I-NEXT: lw a0, %lo(foo+12)(a0) -; RV32I-NEXT: sw a0, 24(sp) -; RV32I-NEXT: lui a0, %hi(foo+8) -; RV32I-NEXT: lw a0, %lo(foo+8)(a0) -; RV32I-NEXT: sw a0, 20(sp) -; RV32I-NEXT: lui a0, %hi(foo+4) -; RV32I-NEXT: lw a0, %lo(foo+4)(a0) -; RV32I-NEXT: sw a0, 16(sp) ; RV32I-NEXT: lui a0, %hi(foo) -; RV32I-NEXT: lw a0, %lo(foo)(a0) -; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: lw a1, %lo(foo)(a0) +; RV32I-NEXT: sw a1, 12(sp) +; RV32I-NEXT: addi a0, a0, %lo(foo) +; RV32I-NEXT: lw a1, 12(a0) +; RV32I-NEXT: sw a1, 24(sp) +; RV32I-NEXT: lw a1, 8(a0) +; RV32I-NEXT: sw a1, 20(sp) +; RV32I-NEXT: lw a0, 4(a0) +; RV32I-NEXT: sw a0, 16(sp) ; RV32I-NEXT: addi a0, sp, 12 ; RV32I-NEXT: call callee ; RV32I-NEXT: lw ra, 28(sp) Index: llvm/trunk/test/CodeGen/RISCV/double-mem.ll =================================================================== --- llvm/trunk/test/CodeGen/RISCV/double-mem.ll +++ llvm/trunk/test/CodeGen/RISCV/double-mem.ll @@ -64,9 +64,9 @@ ; RV32IFD-NEXT: lui a0, %hi(G) ; RV32IFD-NEXT: fld ft1, %lo(G)(a0) ; RV32IFD-NEXT: fsd ft0, %lo(G)(a0) -; RV32IFD-NEXT: lui a0, %hi(G+72) -; RV32IFD-NEXT: fld ft1, %lo(G+72)(a0) -; RV32IFD-NEXT: fsd ft0, %lo(G+72)(a0) +; RV32IFD-NEXT: addi a0, a0, %lo(G) +; RV32IFD-NEXT: fld ft1, 72(a0) +; RV32IFD-NEXT: fsd ft0, 72(a0) ; RV32IFD-NEXT: fsd ft0, 8(sp) ; RV32IFD-NEXT: lw a0, 8(sp) ; RV32IFD-NEXT: lw a1, 12(sp) Index: llvm/trunk/test/CodeGen/RISCV/float-mem.ll =================================================================== --- llvm/trunk/test/CodeGen/RISCV/float-mem.ll +++ llvm/trunk/test/CodeGen/RISCV/float-mem.ll @@ -51,9 +51,9 @@ ; RV32IF-NEXT: lui a0, %hi(G) ; RV32IF-NEXT: flw ft1, %lo(G)(a0) ; RV32IF-NEXT: fsw ft0, %lo(G)(a0) -; RV32IF-NEXT: lui a0, %hi(G+36) -; RV32IF-NEXT: flw ft1, %lo(G+36)(a0) -; RV32IF-NEXT: fsw ft0, %lo(G+36)(a0) +; RV32IF-NEXT: addi a0, a0, %lo(G) +; RV32IF-NEXT: flw ft1, 36(a0) +; RV32IF-NEXT: fsw ft0, 36(a0) ; RV32IF-NEXT: fmv.x.w a0, ft0 ; RV32IF-NEXT: ret %1 = fadd float %a, %b Index: llvm/trunk/test/CodeGen/RISCV/fp128.ll =================================================================== --- llvm/trunk/test/CodeGen/RISCV/fp128.ll +++ llvm/trunk/test/CodeGen/RISCV/fp128.ll @@ -13,30 +13,26 @@ ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -48 ; RV32I-NEXT: sw ra, 44(sp) -; RV32I-NEXT: lui a0, %hi(y+12) -; RV32I-NEXT: lw a0, %lo(y+12)(a0) -; RV32I-NEXT: sw a0, 20(sp) -; RV32I-NEXT: lui a0, %hi(y+8) -; RV32I-NEXT: lw a0, %lo(y+8)(a0) -; RV32I-NEXT: sw a0, 16(sp) -; RV32I-NEXT: lui a0, %hi(y+4) -; RV32I-NEXT: lw a0, %lo(y+4)(a0) -; RV32I-NEXT: sw a0, 12(sp) ; RV32I-NEXT: lui a0, %hi(y) -; RV32I-NEXT: lw a0, %lo(y)(a0) -; RV32I-NEXT: sw a0, 8(sp) -; RV32I-NEXT: lui a0, %hi(x+12) -; RV32I-NEXT: lw a0, %lo(x+12)(a0) -; RV32I-NEXT: sw a0, 36(sp) -; RV32I-NEXT: lui a0, %hi(x+8) -; RV32I-NEXT: lw a0, %lo(x+8)(a0) -; RV32I-NEXT: sw a0, 32(sp) -; RV32I-NEXT: lui a0, %hi(x+4) -; RV32I-NEXT: lw a0, %lo(x+4)(a0) +; RV32I-NEXT: lw a1, %lo(y)(a0) +; RV32I-NEXT: sw a1, 8(sp) +; RV32I-NEXT: lui a1, %hi(x) +; RV32I-NEXT: lw a2, %lo(x)(a1) +; RV32I-NEXT: sw a2, 24(sp) +; RV32I-NEXT: addi a0, a0, %lo(y) +; RV32I-NEXT: lw a2, 12(a0) +; RV32I-NEXT: sw a2, 20(sp) +; RV32I-NEXT: lw a2, 8(a0) +; RV32I-NEXT: sw a2, 16(sp) +; RV32I-NEXT: lw a0, 4(a0) +; RV32I-NEXT: sw a0, 12(sp) +; RV32I-NEXT: addi a0, a1, %lo(x) +; RV32I-NEXT: lw a1, 12(a0) +; RV32I-NEXT: sw a1, 36(sp) +; RV32I-NEXT: lw a1, 8(a0) +; RV32I-NEXT: sw a1, 32(sp) +; RV32I-NEXT: lw a0, 4(a0) ; RV32I-NEXT: sw a0, 28(sp) -; RV32I-NEXT: lui a0, %hi(x) -; RV32I-NEXT: lw a0, %lo(x)(a0) -; RV32I-NEXT: sw a0, 24(sp) ; RV32I-NEXT: addi a0, sp, 24 ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: call __netf2 @@ -57,30 +53,26 @@ ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -80 ; RV32I-NEXT: sw ra, 76(sp) -; RV32I-NEXT: lui a0, %hi(y+12) -; RV32I-NEXT: lw a0, %lo(y+12)(a0) -; RV32I-NEXT: sw a0, 36(sp) -; RV32I-NEXT: lui a0, %hi(y+8) -; RV32I-NEXT: lw a0, %lo(y+8)(a0) -; RV32I-NEXT: sw a0, 32(sp) -; RV32I-NEXT: lui a0, %hi(y+4) -; RV32I-NEXT: lw a0, %lo(y+4)(a0) -; RV32I-NEXT: sw a0, 28(sp) ; RV32I-NEXT: lui a0, %hi(y) -; RV32I-NEXT: lw a0, %lo(y)(a0) -; RV32I-NEXT: sw a0, 24(sp) -; RV32I-NEXT: lui a0, %hi(x+12) -; RV32I-NEXT: lw a0, %lo(x+12)(a0) -; RV32I-NEXT: sw a0, 52(sp) -; RV32I-NEXT: lui a0, %hi(x+8) -; RV32I-NEXT: lw a0, %lo(x+8)(a0) -; RV32I-NEXT: sw a0, 48(sp) -; RV32I-NEXT: lui a0, %hi(x+4) -; RV32I-NEXT: lw a0, %lo(x+4)(a0) +; RV32I-NEXT: lw a1, %lo(y)(a0) +; RV32I-NEXT: sw a1, 24(sp) +; RV32I-NEXT: lui a1, %hi(x) +; RV32I-NEXT: lw a2, %lo(x)(a1) +; RV32I-NEXT: sw a2, 40(sp) +; RV32I-NEXT: addi a0, a0, %lo(y) +; RV32I-NEXT: lw a2, 12(a0) +; RV32I-NEXT: sw a2, 36(sp) +; RV32I-NEXT: lw a2, 8(a0) +; RV32I-NEXT: sw a2, 32(sp) +; RV32I-NEXT: lw a0, 4(a0) +; RV32I-NEXT: sw a0, 28(sp) +; RV32I-NEXT: addi a0, a1, %lo(x) +; RV32I-NEXT: lw a1, 12(a0) +; RV32I-NEXT: sw a1, 52(sp) +; RV32I-NEXT: lw a1, 8(a0) +; RV32I-NEXT: sw a1, 48(sp) +; RV32I-NEXT: lw a0, 4(a0) ; RV32I-NEXT: sw a0, 44(sp) -; RV32I-NEXT: lui a0, %hi(x) -; RV32I-NEXT: lw a0, %lo(x)(a0) -; RV32I-NEXT: sw a0, 40(sp) ; RV32I-NEXT: addi a0, sp, 56 ; RV32I-NEXT: addi a1, sp, 40 ; RV32I-NEXT: addi a2, sp, 24 Index: llvm/trunk/test/CodeGen/RISCV/hoist-global-addr-base.ll =================================================================== --- llvm/trunk/test/CodeGen/RISCV/hoist-global-addr-base.ll +++ llvm/trunk/test/CodeGen/RISCV/hoist-global-addr-base.ll @@ -0,0 +1,111 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 < %s | FileCheck %s + +%struct.S = type { [40 x i32], i32, i32, i32, [4100 x i32], i32, i32, i32 } +@s = common dso_local global %struct.S zeroinitializer, align 4 +@foo = global [6 x i16] [i16 1, i16 2, i16 3, i16 4, i16 5, i16 0], align 2 + +define dso_local void @multiple_stores() local_unnamed_addr { +; CHECK-LABEL: multiple_stores: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a0, %hi(s) +; CHECK-NEXT: addi a0, a0, %lo(s) +; CHECK-NEXT: addi a1, zero, 20 +; CHECK-NEXT: sw a1, 164(a0) +; CHECK-NEXT: addi a1, zero, 10 +; CHECK-NEXT: sw a1, 160(a0) +; CHECK-NEXT: ret +entry: + store i32 10, i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 1), align 4 + store i32 20, i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 2), align 4 + ret void +} + +define dso_local void @control_flow() local_unnamed_addr #0 { +; CHECK-LABEL: control_flow: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a0, %hi(s) +; CHECK-NEXT: addi a0, a0, %lo(s) +; CHECK-NEXT: lw a1, 164(a0) +; CHECK-NEXT: addi a2, zero, 1 +; CHECK-NEXT: blt a1, a2, .LBB1_2 +; CHECK-NEXT: # %bb.1: # %if.then +; CHECK-NEXT: addi a1, zero, 10 +; CHECK-NEXT: sw a1, 160(a0) +; CHECK-NEXT: .LBB1_2: # %if.end +; CHECK-NEXT: ret +entry: + %0 = load i32, i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 2), align 4 + %cmp = icmp sgt i32 %0, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + store i32 10, i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 1), align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +;TODO: Offset shouln't be separated in this case. We get shorter sequence if it +; is merged in the LUI %hi and the ADDI %lo. +define dso_local i32* @big_offset_one_use() local_unnamed_addr { +; CHECK-LABEL: big_offset_one_use: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a0, 4 +; CHECK-NEXT: addi a0, a0, 188 +; CHECK-NEXT: lui a1, %hi(s) +; CHECK-NEXT: addi a1, a1, %lo(s) +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: ret +entry: + ret i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 5) +} + +;TODO: Offset shouln't be separated in this case. We get shorter sequence if it +; is merged in the LUI %hi and the ADDI %lo. +define dso_local i32* @small_offset_one_use() local_unnamed_addr { +; CHECK-LABEL: small_offset_one_use: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a0, %hi(s) +; CHECK-NEXT: addi a0, a0, %lo(s) +; CHECK-NEXT: addi a0, a0, 160 +; CHECK-NEXT: ret +entry: + ret i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 1) +} + + +;TODO: Offset shouln't be separated in this case. We get shorter sequence if it +; is merged in the LUI %hi and the ADDI %lo. +define dso_local i32 @load_half() nounwind { +; CHECK-LABEL: load_half: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sw ra, 12(sp) +; CHECK-NEXT: lui a0, %hi(foo) +; CHECK-NEXT: addi a0, a0, %lo(foo) +; CHECK-NEXT: lhu a0, 8(a0) +; CHECK-NEXT: addi a1, zero, 140 +; CHECK-NEXT: bne a0, a1, .LBB4_2 +; CHECK-NEXT: # %bb.1: # %if.end +; CHECK-NEXT: mv a0, zero +; CHECK-NEXT: lw ra, 12(sp) +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB4_2: # %if.then +; CHECK-NEXT: call abort +entry: + %0 = load i16, i16* getelementptr inbounds ([6 x i16], [6 x i16]* @foo, i32 0, i32 4), align 2 + %cmp = icmp eq i16 %0, 140 + br i1 %cmp, label %if.end, label %if.then + +if.then: + tail call void @abort() + unreachable + +if.end: + ret i32 0 +} + +declare void @abort() Index: llvm/trunk/test/CodeGen/RISCV/mem.ll =================================================================== --- llvm/trunk/test/CodeGen/RISCV/mem.ll +++ llvm/trunk/test/CodeGen/RISCV/mem.ll @@ -168,9 +168,9 @@ ; RV32I-NEXT: lui a2, %hi(G) ; RV32I-NEXT: lw a1, %lo(G)(a2) ; RV32I-NEXT: sw a0, %lo(G)(a2) -; RV32I-NEXT: lui a2, %hi(G+36) -; RV32I-NEXT: lw a3, %lo(G+36)(a2) -; RV32I-NEXT: sw a0, %lo(G+36)(a2) +; RV32I-NEXT: addi a2, a2, %lo(G) +; RV32I-NEXT: lw a3, 36(a2) +; RV32I-NEXT: sw a0, 36(a2) ; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: ret %1 = load volatile i32, i32* @G Index: llvm/trunk/test/CodeGen/RISCV/wide-mem.ll =================================================================== --- llvm/trunk/test/CodeGen/RISCV/wide-mem.ll +++ llvm/trunk/test/CodeGen/RISCV/wide-mem.ll @@ -20,10 +20,10 @@ define i64 @load_i64_global() nounwind { ; RV32I-LABEL: load_i64_global: ; RV32I: # %bb.0: -; RV32I-NEXT: lui a0, %hi(val64) -; RV32I-NEXT: lw a0, %lo(val64)(a0) -; RV32I-NEXT: lui a1, %hi(val64+4) -; RV32I-NEXT: lw a1, %lo(val64+4)(a1) +; RV32I-NEXT: lui a1, %hi(val64) +; RV32I-NEXT: lw a0, %lo(val64)(a1) +; RV32I-NEXT: addi a1, a1, %lo(val64) +; RV32I-NEXT: lw a1, 4(a1) ; RV32I-NEXT: ret %1 = load i64, i64* @val64 ret i64 %1 Index: llvm/trunk/test/CodeGen/RISCV/zext-with-load-is-free.ll =================================================================== --- llvm/trunk/test/CodeGen/RISCV/zext-with-load-is-free.ll +++ llvm/trunk/test/CodeGen/RISCV/zext-with-load-is-free.ll @@ -10,12 +10,12 @@ ; RV32I-LABEL: test_zext_i8: ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: lui a0, %hi(bytes) -; RV32I-NEXT: lbu a0, %lo(bytes)(a0) -; RV32I-NEXT: addi a1, zero, 136 -; RV32I-NEXT: bne a0, a1, .LBB0_3 +; RV32I-NEXT: lbu a1, %lo(bytes)(a0) +; RV32I-NEXT: addi a2, zero, 136 +; RV32I-NEXT: bne a1, a2, .LBB0_3 ; RV32I-NEXT: # %bb.1: # %entry -; RV32I-NEXT: lui a0, %hi(bytes+1) -; RV32I-NEXT: lbu a0, %lo(bytes+1)(a0) +; RV32I-NEXT: addi a0, a0, %lo(bytes) +; RV32I-NEXT: lbu a0, 1(a0) ; RV32I-NEXT: addi a1, zero, 7 ; RV32I-NEXT: bne a0, a1, .LBB0_3 ; RV32I-NEXT: # %bb.2: # %if.end @@ -44,14 +44,14 @@ define i32 @test_zext_i16() { ; RV32I-LABEL: test_zext_i16: ; RV32I: # %bb.0: # %entry -; RV32I-NEXT: lui a0, 16 -; RV32I-NEXT: addi a0, a0, -120 -; RV32I-NEXT: lui a1, %hi(shorts) -; RV32I-NEXT: lhu a1, %lo(shorts)(a1) -; RV32I-NEXT: bne a1, a0, .LBB1_3 +; RV32I-NEXT: lui a0, %hi(shorts) +; RV32I-NEXT: lui a1, 16 +; RV32I-NEXT: addi a1, a1, -120 +; RV32I-NEXT: lhu a2, %lo(shorts)(a0) +; RV32I-NEXT: bne a2, a1, .LBB1_3 ; RV32I-NEXT: # %bb.1: # %entry -; RV32I-NEXT: lui a0, %hi(shorts+2) -; RV32I-NEXT: lhu a0, %lo(shorts+2)(a0) +; RV32I-NEXT: addi a0, a0, %lo(shorts) +; RV32I-NEXT: lhu a0, 2(a0) ; RV32I-NEXT: addi a1, zero, 7 ; RV32I-NEXT: bne a0, a1, .LBB1_3 ; RV32I-NEXT: # %bb.2: # %if.end