diff --git a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp --- a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp +++ b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp @@ -42,6 +42,8 @@ bool foldShiftedOffset(MachineInstr &Hi, MachineInstr &Lo, MachineInstr &TailShXAdd, Register GSReg); + bool foldIntoMemoryOps(MachineInstr &Hi, MachineInstr &Lo); + RISCVMergeBaseOffsetOpt() : MachineFunctionPass(ID) {} MachineFunctionProperties getRequiredProperties() const override { @@ -267,60 +269,66 @@ MachineInstr &Lo) { Register DestReg = Lo.getOperand(0).getReg(); - // First, look for arithmetic instructions we can get an offset from. + // Look for arithmetic instructions we can get an offset from. // We might be able to remove the arithmetic instructions by folding the // offset into the LUI+ADDI. - if (MRI->hasOneUse(DestReg)) { - // Lo has only one use. - MachineInstr &Tail = *MRI->use_instr_begin(DestReg); - switch (Tail.getOpcode()) { - default: - LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr:" - << Tail); - break; - case RISCV::ADDI: { - // Offset is simply an immediate operand. - int64_t Offset = Tail.getOperand(2).getImm(); - - // We might have two ADDIs in a row. - Register TailDestReg = Tail.getOperand(0).getReg(); - if (MRI->hasOneUse(TailDestReg)) { - MachineInstr &TailTail = *MRI->use_instr_begin(TailDestReg); - if (TailTail.getOpcode() == RISCV::ADDI) { - Offset += TailTail.getOperand(2).getImm(); - LLVM_DEBUG(dbgs() << " Offset Instrs: " << Tail << TailTail); - foldOffset(Hi, Lo, TailTail, Offset); - Tail.eraseFromParent(); - return true; - } - } + if (!MRI->hasOneUse(DestReg)) + return false; - LLVM_DEBUG(dbgs() << " Offset Instr: " << Tail); - foldOffset(Hi, Lo, Tail, Offset); - return true; - } - case RISCV::ADD: { - // The offset is too large to fit in the immediate field of ADDI. - // This can be in two forms: - // 1) LUI hi_Offset followed by: - // ADDI lo_offset - // This happens in case the offset has non zero bits in - // both hi 20 and lo 12 bits. - // 2) LUI (offset20) - // This happens in case the lower 12 bits of the offset are zeros. - return foldLargeOffset(Hi, Lo, Tail, DestReg); - } - case RISCV::SH1ADD: - case RISCV::SH2ADD: - case RISCV::SH3ADD: { - // The offset is too large to fit in the immediate field of ADDI. - // It may be encoded as (SH2ADD (ADDI X0, C), DestReg) or - // (SH3ADD (ADDI X0, C), DestReg). - return foldShiftedOffset(Hi, Lo, Tail, DestReg); - } + // Lo has only one use. + MachineInstr &Tail = *MRI->use_instr_begin(DestReg); + switch (Tail.getOpcode()) { + default: + LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr:" + << Tail); + break; + case RISCV::ADDI: { + // Offset is simply an immediate operand. + int64_t Offset = Tail.getOperand(2).getImm(); + + // We might have two ADDIs in a row. + Register TailDestReg = Tail.getOperand(0).getReg(); + if (MRI->hasOneUse(TailDestReg)) { + MachineInstr &TailTail = *MRI->use_instr_begin(TailDestReg); + if (TailTail.getOpcode() == RISCV::ADDI) { + Offset += TailTail.getOperand(2).getImm(); + LLVM_DEBUG(dbgs() << " Offset Instrs: " << Tail << TailTail); + foldOffset(Hi, Lo, TailTail, Offset); + Tail.eraseFromParent(); + return true; + } } + + LLVM_DEBUG(dbgs() << " Offset Instr: " << Tail); + foldOffset(Hi, Lo, Tail, Offset); + return true; + } + case RISCV::ADD: + // The offset is too large to fit in the immediate field of ADDI. + // This can be in two forms: + // 1) LUI hi_Offset followed by: + // ADDI lo_offset + // This happens in case the offset has non zero bits in + // both hi 20 and lo 12 bits. + // 2) LUI (offset20) + // This happens in case the lower 12 bits of the offset are zeros. + return foldLargeOffset(Hi, Lo, Tail, DestReg); + case RISCV::SH1ADD: + case RISCV::SH2ADD: + case RISCV::SH3ADD: + // The offset is too large to fit in the immediate field of ADDI. + // It may be encoded as (SH2ADD (ADDI X0, C), DestReg) or + // (SH3ADD (ADDI X0, C), DestReg). + return foldShiftedOffset(Hi, Lo, Tail, DestReg); } + return false; +} + +bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi, + MachineInstr &Lo) { + Register DestReg = Lo.getOperand(0).getReg(); + // We didn't find an arithmetic instruction. If all the uses are memory ops // with the same offset, we can transform: // @@ -375,10 +383,20 @@ // We found a common offset. // Update the offsets in global address lowering. - Hi.getOperand(1).setOffset(*CommonOffset); + // We may have already folded some arithmetic so we need to add to any + // existing offset. + int64_t NewOffset = Hi.getOperand(1).getOffset() + *CommonOffset; + // RV32 ignores the upper 32 bits. + if (!ST->is64Bit()) + NewOffset = SignExtend64<32>(NewOffset); + // We can only fold simm32 offsets. + if (!isInt<32>(NewOffset)) + return false; + + Hi.getOperand(1).setOffset(NewOffset); MachineOperand &ImmOp = Lo.getOperand(2); if (Hi.getOpcode() != RISCV::AUIPC) - ImmOp.setOffset(*CommonOffset); + ImmOp.setOffset(NewOffset); // Update the immediate in the load/store instructions to add the offset. for (MachineInstr &UseMI : @@ -411,6 +429,7 @@ LLVM_DEBUG(dbgs() << " Found lowered global address: " << *Hi.getOperand(2).getGlobal() << "\n"); MadeChange |= detectAndFoldOffset(Hi, *Lo); + MadeChange |= foldIntoMemoryOps(Hi, *Lo); } } diff --git a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll --- a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll +++ b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll @@ -335,10 +335,9 @@ define void @store_addi_addi() { ; CHECK-LABEL: store_addi_addi: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(bar+2047) -; CHECK-NEXT: addi a0, a0, %lo(bar+2047) +; CHECK-NEXT: lui a0, %hi(bar+3211) ; CHECK-NEXT: li a1, 10 -; CHECK-NEXT: sb a1, 1164(a0) +; CHECK-NEXT: sb a1, %lo(bar+3211)(a0) ; CHECK-NEXT: ret store i8 10, i8* getelementptr inbounds ([0 x i8], [0 x i8]* @bar, i32 0, i64 3211) ret void @@ -347,10 +346,9 @@ define void @store_addi_addi_neg() { ; CHECK-LABEL: store_addi_addi_neg: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(bar-2048) -; CHECK-NEXT: addi a0, a0, %lo(bar-2048) +; CHECK-NEXT: lui a0, %hi(bar-4000) ; CHECK-NEXT: li a1, 10 -; CHECK-NEXT: sb a1, -1952(a0) +; CHECK-NEXT: sb a1, %lo(bar-4000)(a0) ; CHECK-NEXT: ret store i8 10, i8* getelementptr inbounds ([0 x i8], [0 x i8]* @bar, i32 0, i64 -4000) ret void @@ -360,10 +358,9 @@ define void @store_sh2add() { ; CHECK-LABEL: store_sh2add: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(bar+8192) -; CHECK-NEXT: addi a0, a0, %lo(bar+8192) +; CHECK-NEXT: lui a0, %hi(bar+6424) ; CHECK-NEXT: li a1, 10 -; CHECK-NEXT: sb a1, -1768(a0) +; CHECK-NEXT: sb a1, %lo(bar+6424)(a0) ; CHECK-NEXT: ret store i8 10, i8* getelementptr inbounds ([0 x i8], [0 x i8]* @bar, i32 0, i64 6424) ret void @@ -373,10 +370,9 @@ define void @store_sh3add() { ; CHECK-LABEL: store_sh3add: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(bar+12288) -; CHECK-NEXT: addi a0, a0, %lo(bar+12288) +; CHECK-NEXT: lui a0, %hi(bar+12848) ; CHECK-NEXT: li a1, 10 -; CHECK-NEXT: sb a1, 560(a0) +; CHECK-NEXT: sb a1, %lo(bar+12848)(a0) ; CHECK-NEXT: ret store i8 10, i8* getelementptr inbounds ([0 x i8], [0 x i8]* @bar, i32 0, i64 12848) ret void