diff --git a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp --- a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp +++ b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp @@ -50,6 +50,9 @@ void foldOffset(MachineInstr &HiLUI, MachineInstr &LoADDI, MachineInstr &Tail, int64_t Offset); bool matchLargeOffset(MachineInstr &TailAdd, Register GSReg, int64_t &Offset); + bool matchShiftedOffset(MachineInstr &TailShXAdd, Register GSReg, + int64_t &Offset); + RISCVMergeBaseOffsetOpt() : MachineFunctionPass(ID) {} MachineFunctionProperties getRequiredProperties() const override { @@ -193,6 +196,53 @@ return false; } +// Detect patterns for offsets that are passed into a SHXADD instruction. +// The offset has 1,2, or 3 trailing zeros and fits in simm13, simm14, simm15. +// The constant is created with addi voff, x0, C, and shXadd is used to +// fill insert the trailing zeros and do the addition. +// +// HiLUI: lui vreg1, %hi(s) +// LoADDI: addi vreg2, vreg1, %lo(s) +// OffsetTail: addi voff, x0, C +// TailAdd: shXadd vreg4, voff, vreg2 +bool RISCVMergeBaseOffsetOpt::matchShiftedOffset(MachineInstr &TailShXAdd, + Register GAReg, + int64_t &Offset) { + assert((TailShXAdd.getOpcode() == RISCV::SH1ADD || + TailShXAdd.getOpcode() == RISCV::SH2ADD || + TailShXAdd.getOpcode() == RISCV::SH3ADD) && + "Expected SHXADD instruction!"); + // The first source is the shifted operand. + Register Rs1 = TailShXAdd.getOperand(1).getReg(); + + if (GAReg != TailShXAdd.getOperand(2).getReg()) + return false; + + // Can't fold if the register has more than one use. + if (!MRI->hasOneUse(Rs1)) + return false; + // This can point to an ADDI X0, C. + MachineInstr &OffsetTail = *MRI->getVRegDef(Rs1); + if (OffsetTail.getOpcode() != RISCV::ADDI) + return false; + if (!OffsetTail.getOperand(1).isReg() || + OffsetTail.getOperand(1).getReg() != RISCV::X0 || + !OffsetTail.getOperand(2).isImm()) + return false; + + Offset = OffsetTail.getOperand(2).getImm(); + switch (TailShXAdd.getOpcode()) { + default: llvm_unreachable("Unexpected opcode"); + case RISCV::SH1ADD: Offset <<= 1; break; + case RISCV::SH2ADD: Offset <<= 2; break; + case RISCV::SH3ADD: Offset <<= 3; break; + } + + LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail); + DeadInstrs.insert(&OffsetTail); + return true; +} + bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &HiLUI, MachineInstr &LoADDI) { Register DestReg = LoADDI.getOperand(0).getReg(); @@ -240,6 +290,18 @@ foldOffset(HiLUI, LoADDI, Tail, Offset); return true; } + case RISCV::SH1ADD: + case RISCV::SH2ADD: + case RISCV::SH3ADD: { + // The offset is too large to fit in the immediate field of ADDI. + // It may be encoded as (SH2ADD (ADDI X0, C), DestReg) or + // (SH3ADD (ADDI X0, C), DestReg). + int64_t Offset; + if (!matchShiftedOffset(Tail, DestReg, Offset)) + return false; + foldOffset(HiLUI, LoADDI, Tail, Offset); + return true; + } case RISCV::LB: case RISCV::LH: case RISCV::LW: diff --git a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll --- a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll +++ b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32I +; RUN: llc -mtriple=riscv64 < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64I +; RUN: llc -mtriple=riscv32 -mattr=+zba < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZBA +; RUN: llc -mtriple=riscv64 -mattr=+zba < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZBA %struct.S = type { [40 x i32], i32, i32, i32, [4100 x i32], i32, i32, i32 } @s = common dso_local global %struct.S zeroinitializer, align 4 @@ -239,3 +241,28 @@ ; CHECK-NEXT: ret ret i8* getelementptr inbounds ([0 x i8], [0 x i8]* @bar, i32 0, i64 -4000) } + +; With Zba the constant 6424 is created with LI+SH2ADD. +define i8* @offset_sh2add() { +; CHECK-LABEL: offset_sh2add: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(bar+6424) +; CHECK-NEXT: addi a0, a0, %lo(bar+6424) +; CHECK-NEXT: ret + ret i8* getelementptr inbounds ([0 x i8], [0 x i8]* @bar, i32 0, i64 6424) +} + +; With Zba the constant 12848 is created with LI+SH3ADD. +define i8* @offset_sh3add() { +; CHECK-LABEL: offset_sh3add: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(bar+12848) +; CHECK-NEXT: addi a0, a0, %lo(bar+12848) +; CHECK-NEXT: ret + ret i8* getelementptr inbounds ([0 x i8], [0 x i8]* @bar, i32 0, i64 12848) +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32I: {{.*}} +; RV32ZBA: {{.*}} +; RV64I: {{.*}} +; RV64ZBA: {{.*}}