Index: llvm/trunk/lib/Target/RISCV/CMakeLists.txt =================================================================== --- llvm/trunk/lib/Target/RISCV/CMakeLists.txt +++ llvm/trunk/lib/Target/RISCV/CMakeLists.txt @@ -20,6 +20,7 @@ RISCVISelDAGToDAG.cpp RISCVISelLowering.cpp RISCVMCInstLower.cpp + RISCVMergeBaseOffset.cpp RISCVRegisterInfo.cpp RISCVSubtarget.cpp RISCVTargetMachine.cpp Index: llvm/trunk/lib/Target/RISCV/RISCV.h =================================================================== --- llvm/trunk/lib/Target/RISCV/RISCV.h +++ llvm/trunk/lib/Target/RISCV/RISCV.h @@ -25,6 +25,7 @@ class MCOperand; class MachineInstr; class MachineOperand; +class PassRegistry; void LowerRISCVMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, const AsmPrinter &AP); @@ -32,6 +33,9 @@ MCOperand &MCOp, const AsmPrinter &AP); FunctionPass *createRISCVISelDag(RISCVTargetMachine &TM); + +FunctionPass *createRISCVMergeBaseOffsetOptPass(); +void initializeRISCVMergeBaseOffsetOptPass(PassRegistry &); } #endif Index: llvm/trunk/lib/Target/RISCV/RISCVISelDAGToDAG.cpp =================================================================== --- llvm/trunk/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ llvm/trunk/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -56,14 +56,12 @@ private: void doPeepholeLoadStoreADDI(); - void doPeepholeGlobalAddiLuiOffset(); void doPeepholeBuildPairF64SplitF64(); }; } void RISCVDAGToDAGISel::PostprocessISelDAG() { doPeepholeLoadStoreADDI(); - doPeepholeGlobalAddiLuiOffset(); doPeepholeBuildPairF64SplitF64(); } @@ -130,212 +128,6 @@ return false; } -// Detect the pattern lui %hi(global) --> ADDI %lo(global) -// HiLUI LoADDI -static bool detectLuiAddiGlobal(SDNode *Tail, unsigned &Idx, SDValue &LoADDI, - SDValue &HiLUI, GlobalAddressSDNode *&GAlo, - GlobalAddressSDNode *&GAhi) { - // Try to detect the pattern on every operand of the tail instruction. - for (Idx = 0; Idx < Tail->getNumOperands(); Idx++) { - LoADDI = Tail->getOperand(Idx); - // LoADDI should only be used by one instruction (Tail). - if (!LoADDI->isMachineOpcode() || - !(LoADDI->getMachineOpcode() == RISCV::ADDI) || - !isa(LoADDI->getOperand(1)) || - !LoADDI->hasOneUse()) - continue; - // Check for existence of %lo target flag. - GAlo = cast(LoADDI->getOperand(1)); - if (!(GAlo->getTargetFlags() == RISCVII::MO_LO) || - !(GAlo->getOffset() == 0)) - return false; - // Check for existence of %hi target flag. - HiLUI = LoADDI->getOperand(0); - if (!HiLUI->isMachineOpcode() || - !(HiLUI->getMachineOpcode() == RISCV::LUI) || - !isa(HiLUI->getOperand(0)) || !HiLUI->hasOneUse()) - return false; - GAhi = cast(HiLUI->getOperand(0)); - if (!(GAhi->getTargetFlags() == RISCVII::MO_HI) || - !(GAhi->getOffset() == 0)) - return false; - return true; - } - return false; -} - -static bool matchLuiOffset(SDValue &OffsetLUI, int64_t &Offset) { - if (!OffsetLUI->isMachineOpcode() || - !(OffsetLUI->getMachineOpcode() == RISCV::LUI) || - !isa(OffsetLUI->getOperand(0))) - return false; - Offset = cast(OffsetLUI->getOperand(0))->getSExtValue(); - Offset = Offset << 12; - LLVM_DEBUG(dbgs() << " Detected \" LUI Offset_hi\"\n"); - return true; -} - -static bool matchAddiLuiOffset(SDValue &OffsetLoADDI, int64_t &Offset) { - // LoADDI should only be used by the tail instruction only. - if (!OffsetLoADDI->isMachineOpcode() || - !(OffsetLoADDI->getMachineOpcode() == RISCV::ADDI) || - !isa(OffsetLoADDI->getOperand(1)) || - !OffsetLoADDI->hasOneUse()) - return false; - int64_t OffLo = - cast(OffsetLoADDI->getOperand(1))->getZExtValue(); - // HiLUI should only be used by the loADDI. - SDValue OffsetHiLUI = (OffsetLoADDI->getOperand(0)); - if (!OffsetHiLUI->isMachineOpcode() || - !(OffsetHiLUI->getMachineOpcode() == RISCV::LUI) || - !isa(OffsetHiLUI->getOperand(0)) || - !OffsetHiLUI->hasOneUse()) - return false; - int64_t OffHi = - cast(OffsetHiLUI->getOperand(0))->getSExtValue(); - Offset = (OffHi << 12) + OffLo; - LLVM_DEBUG(dbgs() << " Detected \" ADDI (LUI Offset_hi), Offset_lo\"\n"); - return true; -} - -static void updateTailInstrUsers(SDNode *Tail, SelectionDAG *CurDAG, - GlobalAddressSDNode *GAhi, - GlobalAddressSDNode *GAlo, - SDValue &GlobalHiLUI, SDValue &GlobalLoADDI, - int64_t Offset) { - // Update the offset in GAhi and GAlo. - SDLoc DL(Tail->getOperand(1)); - SDValue GAHiNew = CurDAG->getTargetGlobalAddress(GAhi->getGlobal(), DL, - GlobalHiLUI.getValueType(), - Offset, RISCVII::MO_HI); - SDValue GALoNew = CurDAG->getTargetGlobalAddress(GAlo->getGlobal(), DL, - GlobalLoADDI.getValueType(), - Offset, RISCVII::MO_LO); - CurDAG->UpdateNodeOperands(GlobalHiLUI.getNode(), GAHiNew); - CurDAG->UpdateNodeOperands(GlobalLoADDI.getNode(), GlobalHiLUI, GALoNew); - // Update all uses of the Tail with the GlobalLoADDI. After - // this Tail will be a dead node. - SDValue From = SDValue(Tail, 0); - CurDAG->ReplaceAllUsesOfValuesWith(&From, &GlobalLoADDI, 1); -} - -// TODO: This transformation might be better implemeted in a Machine Funtion -// Pass as discussed here: https://reviews.llvm.org/D45748. -// -// Merge the offset of address calculation into the offset field -// of a global address node in a global address lowering sequence ("LUI -// %hi(global) --> add %lo(global)") under the following conditions: 1) The -// offset field in the global address lowering sequence is zero. 2) The lowered -// global address is only used in one node, referred to as "Tail". - -// This peephole does the following transformations to merge the offset: - -// 1) ADDI (ADDI (LUI %hi(global)) %lo(global)), offset -// ---> -// ADDI (LUI %hi(global + offset)) %lo(global + offset). -// -// This generates: -// lui a0, hi (global + offset) -// add a0, a0, lo (global + offset) -// Instead of -// lui a0, hi (global) -// addi a0, hi (global) -// addi a0, offset -// This pattern is for cases when the offset is small enough to fit in the -// immediate filed of ADDI (less than 12 bits). - -// 2) ADD ((ADDI (LUI %hi(global)) %lo(global)), (LUI hi_offset)) -// ---> -// offset = hi_offset << 12 -// ADDI (LUI %hi(global + offset)) %lo(global + offset) - -// Which generates the ASM: -// lui a0, hi(global + offset) -// addi a0, lo(global + offset) -// Instead of: -// lui a0, hi(global) -// addi a0, lo(global) -// lui a1, (offset) -// add a0, a0, a1 - -// This pattern is for cases when the offset doesn't fit in an immediate field -// of ADDI but the lower 12 bits are all zeros. - -// 3) ADD ((ADDI (LUI %hi(global)) %lo(global)), (ADDI lo_offset, (LUI -// hi_offset))) -// ---> -// ADDI (LUI %hi(global + offset)) %lo(global + offset) -// Which generates the ASM: -// lui a1, %hi(global + offhi20<<12 + offlo12) -// addi a1, %lo(global + offhi20<<12 + offlo12) -// Instead of: -// lui a0, hi(global) -// addi a0, lo(global) -// lui a1, (offhi20) -// addi a1, (offlo12) -// add a0, a0, a1 -// This pattern is for cases when the offset doesn't fit in an immediate field -// of ADDI and both the lower 1 bits and high 20 bits are non zero. -void RISCVDAGToDAGISel::doPeepholeGlobalAddiLuiOffset() { - SelectionDAG::allnodes_iterator Position(CurDAG->getRoot().getNode()); - ++Position; - SelectionDAG::allnodes_iterator Begin(CurDAG->allnodes_begin()); - while (Position != Begin) { - SDNode *Tail = &*--Position; - // Skip dead nodes and any non-machine opcodes. - if (Tail->use_empty() || !Tail->isMachineOpcode()) - continue; - // The tail instruction can be an ADD or an ADDI. - if (!Tail->isMachineOpcode() || !(Tail->getMachineOpcode() == RISCV::ADD || - Tail->getMachineOpcode() == RISCV::ADDI)) - continue; - // First detect the global address part of pattern: - // (lui %hi(global) --> Addi %lo(global)) - unsigned GlobalLoADDiIdx; - SDValue GlobalLoADDI; - SDValue GlobalHiLUI; - GlobalAddressSDNode *GAhi; - GlobalAddressSDNode *GAlo; - if (!detectLuiAddiGlobal(Tail, GlobalLoADDiIdx, GlobalLoADDI, GlobalHiLUI, - GAlo, GAhi)) - continue; - LLVM_DEBUG(dbgs() << " Detected \"ADDI LUI %hi(global), %lo(global)\n"); - // Detect the offset part for the address calculation by looking at the - // other operand of the tail instruction: - int64_t Offset; - if (Tail->getMachineOpcode() == RISCV::ADD) { - // If the Tail is an ADD instruction, the offset can be in two forms: - // 1) LUI hi_Offset followed by: - // ADDI lo_offset - // This happens in case the offset has non zero bits in - // both hi 20 and lo 12 bits. - // 2) LUI (offset20) - // This happens in case the lower 12 bits of the offset are zeros. - SDValue OffsetVal = Tail->getOperand(1 - GlobalLoADDiIdx); - if (!matchAddiLuiOffset(OffsetVal, Offset) && - !matchLuiOffset(OffsetVal, Offset)) - continue; - } else - // The Tail is an ADDI instruction: - Offset = cast(Tail->getOperand(1 - GlobalLoADDiIdx)) - ->getSExtValue(); - - LLVM_DEBUG( - dbgs() - << " Fold offset value into global offset of LUI %hi and ADDI %lo\n"); - LLVM_DEBUG(dbgs() << "\tTail:"); - LLVM_DEBUG(Tail->dump(CurDAG)); - LLVM_DEBUG(dbgs() << "\tGlobalHiLUI:"); - LLVM_DEBUG(GlobalHiLUI->dump(CurDAG)); - LLVM_DEBUG(dbgs() << "\tGlobalLoADDI:"); - LLVM_DEBUG(GlobalLoADDI->dump(CurDAG)); - LLVM_DEBUG(dbgs() << "\n"); - updateTailInstrUsers(Tail, CurDAG, GAhi, GAlo, GlobalHiLUI, GlobalLoADDI, - Offset); - } - CurDAG->RemoveDeadNodes(); -} - // Merge an ADDI into the offset of a load/store instruction where possible. // (load (add base, off), 0) -> (load base, off) // (store val, (add base, off)) -> (store val, base, off) Index: llvm/trunk/lib/Target/RISCV/RISCVMergeBaseOffset.cpp =================================================================== --- llvm/trunk/lib/Target/RISCV/RISCVMergeBaseOffset.cpp +++ llvm/trunk/lib/Target/RISCV/RISCVMergeBaseOffset.cpp @@ -0,0 +1,286 @@ +//===----- RISCVMergeBaseOffset.cpp - Optimise address calculations ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Merge the offset of address calculation into the offset field +// of instructions in a global address lowering sequence. This pass transforms: +// lui vreg1, %hi(s) +// addi vreg2, vreg1, %lo(s) +// addi vreg3, verg2, Offset +// +// Into: +// lui vreg1, %hi(s+Offset) +// addi vreg2, vreg1, %lo(s+Offset) +// +// The transformation is carried out under certain conditions: +// 1) The offset field in the base of global address lowering sequence is zero. +// 2) The lowered global address has only one use. +// +// The offset field can be in a different form. This pass handles all of them. +//===----------------------------------------------------------------------===// + +#include "RISCV.h" +#include "RISCVTargetMachine.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/TargetOptions.h" +#include +using namespace llvm; + +#define DEBUG_TYPE "riscv-merge-base-offset" +#define RISCV_MERGE_BASE_OFFSET_NAME "RISCV Merge Base Offset" +namespace { + +struct RISCVMergeBaseOffsetOpt : public MachineFunctionPass { + static char ID; + const MachineFunction *MF; + bool runOnMachineFunction(MachineFunction &Fn) override; + bool detectLuiAddiGlobal(MachineInstr &LUI, MachineInstr *&ADDI); + + bool detectAndFoldOffset(MachineInstr &HiLUI, MachineInstr &LoADDI); + void foldOffset(MachineInstr &HiLUI, MachineInstr &LoADDI, MachineInstr &Tail, + int64_t Offset); + bool matchLargeOffset(MachineInstr &TailAdd, unsigned GSReg, int64_t &Offset); + RISCVMergeBaseOffsetOpt() : MachineFunctionPass(ID) {} + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::IsSSA); + } + + StringRef getPassName() const override { + return RISCV_MERGE_BASE_OFFSET_NAME; + } + +private: + MachineRegisterInfo *MRI; + std::set DeadInstrs; +}; +}; // end anonymous namespace + +char RISCVMergeBaseOffsetOpt::ID = 0; +INITIALIZE_PASS(RISCVMergeBaseOffsetOpt, "riscv-merge-base-offset", + RISCV_MERGE_BASE_OFFSET_NAME, false, false) + +// Detect the pattern: +// lui vreg1, %hi(s) +// addi vreg2, vreg1, %lo(s) +// +// Pattern only accepted if: +// 1) ADDI has only one use. +// 2) LUI has only one use; which is the ADDI. +// 3) Both ADDI and LUI have GlobalAddress type which indicates that these +// are generated from global address lowering. +// 4) Offset value in the Global Address is 0. +bool RISCVMergeBaseOffsetOpt::detectLuiAddiGlobal(MachineInstr &HiLUI, + MachineInstr *&LoADDI) { + if (HiLUI.getOpcode() != RISCV::LUI || + HiLUI.getOperand(1).getTargetFlags() != RISCVII::MO_HI || + HiLUI.getOperand(1).getType() != MachineOperand::MO_GlobalAddress || + HiLUI.getOperand(1).getOffset() != 0 || + !MRI->hasOneUse(HiLUI.getOperand(0).getReg())) + return false; + unsigned HiLuiDestReg = HiLUI.getOperand(0).getReg(); + LoADDI = MRI->use_begin(HiLuiDestReg)->getParent(); + if (LoADDI->getOpcode() != RISCV::ADDI || + LoADDI->getOperand(2).getTargetFlags() != RISCVII::MO_LO || + LoADDI->getOperand(2).getType() != MachineOperand::MO_GlobalAddress || + LoADDI->getOperand(2).getOffset() != 0 || + !MRI->hasOneUse(LoADDI->getOperand(0).getReg())) + return false; + return true; +} + +// Update the offset in HiLUI and LoADDI instructions. +// Delete the tail instruction and update all the uses to use the +// output from LoADDI. +void RISCVMergeBaseOffsetOpt::foldOffset(MachineInstr &HiLUI, + MachineInstr &LoADDI, + MachineInstr &Tail, int64_t Offset) { + // Put the offset back in HiLUI and the LoADDI + HiLUI.getOperand(1).setOffset(Offset); + LoADDI.getOperand(2).setOffset(Offset); + // Delete the tail instruction. + DeadInstrs.insert(&Tail); + MRI->replaceRegWith(Tail.getOperand(0).getReg(), + LoADDI.getOperand(0).getReg()); + LLVM_DEBUG(dbgs() << " Merged offset " << Offset << " into base.\n" + << " " << HiLUI << " " << LoADDI;); +} + +// Detect patterns for large offsets that are passed into an ADD instruction. +// +// Base address lowering is of the form: +// HiLUI: lui vreg1, %hi(s) +// LoADDI: addi vreg2, vreg1, %lo(s) +// / \ +// / \ +// / \ +// / The large offset can be of two forms: \ +// 1) Offset that has non zero bits in lower 2) Offset that has non zero +// 12 bits and upper 20 bits bits in upper 20 bits only +// OffseLUI: lui vreg3, 4 +// OffsetTail: addi voff, vreg3, 188 OffsetTail: lui voff, 128 +// \ / +// \ / +// \ / +// \ / +// TailAdd: add vreg4, vreg2, voff +bool RISCVMergeBaseOffsetOpt::matchLargeOffset(MachineInstr &TailAdd, + unsigned GAReg, + int64_t &Offset) { + assert((TailAdd.getOpcode() == RISCV::ADD) && "Expected ADD instruction!"); + unsigned Rs = TailAdd.getOperand(1).getReg(); + unsigned Rt = TailAdd.getOperand(2).getReg(); + unsigned Reg = Rs == GAReg ? Rt : Rs; + + // Can't fold if the register has more than one use. + if (!MRI->hasOneUse(Reg)) + return false; + // This can point to an ADDI or a LUI: + MachineInstr &OffsetTail = *MRI->getVRegDef(Reg); + if (OffsetTail.getOpcode() == RISCV::ADDI) { + // The offset value has non zero bits in both %hi and %lo parts. + // Detect an ADDI that feeds from a LUI instruction. + MachineOperand &AddiImmOp = OffsetTail.getOperand(2); + if (AddiImmOp.getTargetFlags() != RISCVII::MO_None) + return false; + int64_t OffLo = AddiImmOp.getImm(); + MachineInstr &OffsetLui = + *MRI->getVRegDef(OffsetTail.getOperand(1).getReg()); + MachineOperand &LuiImmOp = OffsetLui.getOperand(1); + if (OffsetLui.getOpcode() != RISCV::LUI || + LuiImmOp.getTargetFlags() != RISCVII::MO_None || + !MRI->hasOneUse(OffsetLui.getOperand(0).getReg())) + return false; + int64_t OffHi = OffsetLui.getOperand(1).getImm(); + Offset = (OffHi << 12) + OffLo; + LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail + << " " << OffsetLui); + DeadInstrs.insert(&OffsetTail); + DeadInstrs.insert(&OffsetLui); + return true; + } else if (OffsetTail.getOpcode() == RISCV::LUI) { + // The offset value has all zero bits in the lower 12 bits. Only LUI + // exists. + LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail); + Offset = OffsetTail.getOperand(1).getImm() << 12; + DeadInstrs.insert(&OffsetTail); + return true; + } + return false; +} + +bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &HiLUI, + MachineInstr &LoADDI) { + unsigned DestReg = LoADDI.getOperand(0).getReg(); + assert(MRI->hasOneUse(DestReg) && "expected one use for LoADDI"); + // LoADDI has only one use. + MachineInstr &Tail = *MRI->use_begin(DestReg)->getParent(); + switch (Tail.getOpcode()) { + default: + LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr:" + << Tail); + return false; + case RISCV::ADDI: { + // Offset is simply an immediate operand. + int64_t Offset = Tail.getOperand(2).getImm(); + LLVM_DEBUG(dbgs() << " Offset Instr: " << Tail); + foldOffset(HiLUI, LoADDI, Tail, Offset); + return true; + } break; + case RISCV::ADD: { + // The offset is too large to fit in the immediate field of ADDI. + // This can be in two forms: + // 1) LUI hi_Offset followed by: + // ADDI lo_offset + // This happens in case the offset has non zero bits in + // both hi 20 and lo 12 bits. + // 2) LUI (offset20) + // This happens in case the lower 12 bits of the offset are zeros. + int64_t Offset; + if (!matchLargeOffset(Tail, DestReg, Offset)) + return false; + foldOffset(HiLUI, LoADDI, Tail, Offset); + return true; + } break; + case RISCV::LB: + case RISCV::LH: + case RISCV::LW: + case RISCV::LBU: + case RISCV::LHU: + case RISCV::LWU: + case RISCV::LD: + case RISCV::FLW: + case RISCV::FLD: + case RISCV::SB: + case RISCV::SH: + case RISCV::SW: + case RISCV::SD: + case RISCV::FSW: + case RISCV::FSD: { + // Transforms the sequence: Into: + // HiLUI: lui vreg1, %hi(foo) ---> lui vreg1, %hi(foo+8) + // LoADDI: addi vreg2, vreg1, %lo(foo) ---> lw vreg3, lo(foo+8)(vreg1) + // Tail: lw vreg3, 8(vreg2) + if (Tail.getOperand(1).isFI()) + return false; + // Register defined by LoADDI should be used in the base part of the + // load\store instruction. Otherwise, no folding possible. + unsigned BaseAddrReg = Tail.getOperand(1).getReg(); + if (DestReg != BaseAddrReg) + return false; + MachineOperand &TailImmOp = Tail.getOperand(2); + int64_t Offset = TailImmOp.getImm(); + // Update the offsets in global address lowering. + HiLUI.getOperand(1).setOffset(Offset); + // Update the immediate in the Tail instruction to add the offset. + Tail.RemoveOperand(2); + MachineOperand &ImmOp = LoADDI.getOperand(2); + ImmOp.setOffset(Offset); + Tail.addOperand(ImmOp); + // Update the base reg in the Tail instruction to feed from LUI. + // Output of HiLUI is only used in LoADDI, no need to use + // MRI->replaceRegWith(). + Tail.getOperand(1).setReg(HiLUI.getOperand(0).getReg()); + DeadInstrs.insert(&LoADDI); + return true; + } break; + } + return false; +} + +bool RISCVMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) { + if (skipFunction(Fn.getFunction())) + return false; + + DeadInstrs.clear(); + MRI = &Fn.getRegInfo(); + for (MachineBasicBlock &MBB : Fn) { + LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n"); + for (MachineInstr &HiLUI : MBB) { + MachineInstr *LoADDI = nullptr; + if (!detectLuiAddiGlobal(HiLUI, LoADDI)) + continue; + LLVM_DEBUG(dbgs() << " Found lowered global address with one use: " + << *LoADDI->getOperand(2).getGlobal() << "\n"); + // If the use count is only one, merge the offset + detectAndFoldOffset(HiLUI, *LoADDI); + } + } + // Delete dead instructions. + for (auto *MI : DeadInstrs) + MI->eraseFromParent(); + return true; +} + +/// Returns an instance of the Merge Base Offset Optimization pass. +FunctionPass *llvm::createRISCVMergeBaseOffsetOptPass() { + return new RISCVMergeBaseOffsetOpt(); +} Index: llvm/trunk/lib/Target/RISCV/RISCVTargetMachine.cpp =================================================================== --- llvm/trunk/lib/Target/RISCV/RISCVTargetMachine.cpp +++ llvm/trunk/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -78,6 +78,7 @@ void addIRPasses() override; bool addInstSelector() override; void addPreEmitPass() override; + void addPreRegAlloc() override; }; } @@ -97,3 +98,7 @@ } void RISCVPassConfig::addPreEmitPass() { addPass(&BranchRelaxationPassID); } + +void RISCVPassConfig::addPreRegAlloc() { + addPass(createRISCVMergeBaseOffsetOptPass()); +} Index: llvm/trunk/test/CodeGen/RISCV/hoist-global-addr-base.ll =================================================================== --- llvm/trunk/test/CodeGen/RISCV/hoist-global-addr-base.ll +++ llvm/trunk/test/CodeGen/RISCV/hoist-global-addr-base.ll @@ -49,6 +49,23 @@ ret void } +; This test checks that the offset is reconstructed correctly when +; "addi" of the big offset has a negative immediate. +; without peephole this generates: +; lui a1, %hi(g) +; addi a1, a0, %lo(g) +; lui a0, 18 ---> offset +; addi a0, a0, -160 +; add a0, a0, a1 ---> base + offset. +define i8* @big_offset_neg_addi() { +; CHECK-LABEL: big_offset_neg_addi: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(g+73568) +; CHECK-NEXT: addi a0, a0, %lo(g+73568) +; CHECK-NEXT: ret + ret i8* getelementptr inbounds ([1048576 x i8], [1048576 x i8]* @g, i32 0, i32 73568) +} + ; This test checks for the case where the offset is only an LUI. ; without peephole this generates: ; lui a0, %hi(g) @@ -84,36 +101,19 @@ ret i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 1) } -; TODO: In this case we get a better sequence if the offset didn't get didn't -; get merged back in %if.end and %if.then. The current peephole is not able to -; detect the shared global address node across blocks. -; Without the peephole we can generate: -;# %bb.0: # %entry -; lui a0, %hi(s) -; addi a0, a0, %lo(s) -; lw a1, 164(a0) -; beqz a1, .LBB0_2 -;# %bb.1: # %if.end -; addi a0, a0, 168 -; ret -;.LBB0_2: # %if.then -; addi a0, a0, 160 -; ret ; Function Attrs: norecurse nounwind optsize readonly define dso_local i32* @control_flow_no_mem(i32 %n) local_unnamed_addr #1 { ; CHECK-LABEL: control_flow_no_mem: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lui a0, %hi(s) ; CHECK-NEXT: addi a0, a0, %lo(s) -; CHECK-NEXT: lw a0, 164(a0) -; CHECK-NEXT: beqz a0, .LBB5_2 +; CHECK-NEXT: lw a1, 164(a0) +; CHECK-NEXT: beqz a1, .LBB6_2 ; CHECK-NEXT: # %bb.1: # %if.end -; CHECK-NEXT: lui a0, %hi(s+168) -; CHECK-NEXT: addi a0, a0, %lo(s+168) +; CHECK-NEXT: addi a0, a0, 168 ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB5_2: # %if.then -; CHECK-NEXT: lui a0, %hi(s+160) -; CHECK-NEXT: addi a0, a0, %lo(s+160) +; CHECK-NEXT: .LBB6_2: # %if.then +; CHECK-NEXT: addi a0, a0, 160 ; CHECK-NEXT: ret entry: %0 = load i32, i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 2), align 4 @@ -125,32 +125,21 @@ ret i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 3) } -;TODO: Offset shouln't be separated in this case. We get shorter sequence if it -; is merged in the LUI %hi and the ADDI %lo, the "ADDI" could be folded in the -; immediate part of "lhu" genertating the sequence: -; lui a0, %hi(foo +8) -; lhu a0, %lo(foo+8)(a0) -; instead of: -; lui a0, %hi(foo) -; addi a0, a0, %lo(foo) -; lhu a0, 8(a0) - define dso_local i32 @load_half() nounwind { ; CHECK-LABEL: load_half: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: sw ra, 12(sp) -; CHECK-NEXT: lui a0, %hi(foo) -; CHECK-NEXT: addi a0, a0, %lo(foo) -; CHECK-NEXT: lhu a0, 8(a0) +; CHECK-NEXT: lui a0, %hi(foo+8) +; CHECK-NEXT: lhu a0, %lo(foo+8)(a0) ; CHECK-NEXT: addi a1, zero, 140 -; CHECK-NEXT: bne a0, a1, .LBB6_2 +; CHECK-NEXT: bne a0, a1, .LBB7_2 ; CHECK-NEXT: # %bb.1: # %if.end ; CHECK-NEXT: mv a0, zero ; CHECK-NEXT: lw ra, 12(sp) ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB6_2: # %if.then +; CHECK-NEXT: .LBB7_2: # %if.then ; CHECK-NEXT: call abort entry: %0 = load i16, i16* getelementptr inbounds ([6 x i16], [6 x i16]* @foo, i32 0, i32 4), align 2 @@ -166,3 +155,15 @@ } declare void @abort() + +define dso_local void @one_store() local_unnamed_addr { +; CHECK-LABEL: one_store: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: lui a0, %hi(s+160) +; CHECK-NEXT: addi a1, zero, 10 +; CHECK-NEXT: sw a1, %lo(s+160)(a0) +; CHECK-NEXT: ret +entry: + store i32 10, i32* getelementptr inbounds (%struct.S, %struct.S* @s, i32 0, i32 1), align 4 + ret void +}