diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -28,6 +28,7 @@ class MachineInstr; class MachineOperand; class GISelKnownBits; +class MachineDominatorTree; struct PreferredTuple { LLT Ty; // The result type of the extend. @@ -41,10 +42,12 @@ MachineRegisterInfo &MRI; GISelChangeObserver &Observer; GISelKnownBits *KB; + MachineDominatorTree *MDT; public: CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B, - GISelKnownBits *KB = nullptr); + GISelKnownBits *KB = nullptr, + MachineDominatorTree *MDT = nullptr); /// MachineRegisterInfo::replaceRegWith() and inform the observer of the changes void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const; @@ -60,17 +63,61 @@ bool matchCombineCopy(MachineInstr &MI); void applyCombineCopy(MachineInstr &MI); + /// \brief \returns true if \p DefMI precedes \p UseMI or they are the same + /// instruction. Both must be in the same basic block. + bool isPredecessor(MachineInstr &DefMI, MachineInstr &UseMI); + + /// \brief \returns true if \p DefMI dominates \p UseMI. By definition an + /// instruction dominates itself. + /// + /// If we haven't been provided with a MachineDominatorTree during + /// construction, this function returns a conservative result that tracks just + /// a single basic block. + bool dominates(MachineInstr &DefMI, MachineInstr &UseMI); + /// If \p MI is extend that consumes the result of a load, try to combine it. /// Returns true if MI changed. bool tryCombineExtendingLoads(MachineInstr &MI); bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo); void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo); + /// Combine \p MI into a pre-indexed or post-indexed load/store operation if + /// legal and the surrounding code makes it useful. + bool tryCombineIndexedLoadStore(MachineInstr &MI); + bool matchCombineBr(MachineInstr &MI); bool tryCombineBr(MachineInstr &MI); /// Optimize memcpy intrinsics et al, e.g. constant len calls. /// /p MaxLen if non-zero specifies the max length of a mem libcall to inline. + /// + /// For example (pre-indexed): + /// + /// $addr = G_GEP $base, $offset + /// [...] + /// $val = G_LOAD $addr + /// [...] + /// $whatever = COPY $addr + /// + /// --> + /// + /// $val, $addr = G_INDEXED_LOAD $base, $offset, 1 (IsPre) + /// [...] + /// $whatever = COPY $addr + /// + /// or (post-indexed): + /// + /// G_STORE $val, $base + /// [...] + /// $addr = G_GEP $base, $offset + /// [...] + /// $whatever = COPY $addr + /// + /// --> + /// + /// $addr = G_INDEXED_STORE $val, $base, $offset + /// [...] + /// $whatever = COPY $addr bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen = 0); /// Try to transform \p MI by using all of the above @@ -87,6 +134,20 @@ bool IsVolatile); bool optimizeMemset(MachineInstr &MI, Register Dst, Register Val, unsigned KnownLen, unsigned DstAlign, bool IsVolatile); + + /// Given a non-indexed load or store instruction \p MI, find an offset that + /// can be usefully and legally folded into it as a post-indexing operation. + /// + /// \returns true if a candidate is found. + bool findPostIndexCandidate(MachineInstr &MI, Register &Addr, Register &Base, + Register &Offset); + + /// Given a non-indexed load or store instruction \p MI, find an offset that + /// can be usefully and legally folded into it as a pre-indexing operation. + /// + /// \returns true if a candidate is found. + bool findPreIndexCandidate(MachineInstr &MI, Register &Addr, Register &Base, + Register &Offset); }; } // namespace llvm diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -2983,6 +2983,14 @@ return false; } + /// Returns true if the specified base+offset is a legal indexed addressing + /// mode for this target. \p MI is the load or store instruction that is being + /// considered for transformation. + virtual bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset, + bool IsPre, MachineRegisterInfo &MRI) const { + return false; + } + /// Return the entry encoding for a jump table in the current function. The /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum. virtual unsigned getJumpTableEncoding() const; diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -294,9 +294,21 @@ /// Generic zeroext load HANDLE_TARGET_OPCODE(G_ZEXTLOAD) +/// Generic indexed load (including anyext load) +HANDLE_TARGET_OPCODE(G_INDEXED_LOAD) + +/// Generic indexed signext load +HANDLE_TARGET_OPCODE(G_INDEXED_SEXTLOAD) + +/// Generic indexed zeroext load +HANDLE_TARGET_OPCODE(G_INDEXED_ZEXTLOAD) + /// Generic store. HANDLE_TARGET_OPCODE(G_STORE) +/// Generic indexed store. +HANDLE_TARGET_OPCODE(G_INDEXED_STORE) + /// Generic atomic cmpxchg with internal success check. HANDLE_TARGET_OPCODE(G_ATOMIC_CMPXCHG_WITH_SUCCESS) diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td --- a/llvm/include/llvm/Target/GenericOpcodes.td +++ b/llvm/include/llvm/Target/GenericOpcodes.td @@ -767,6 +767,32 @@ let mayLoad = 1; } +// Generic indexed load. Combines a GEP with a load. $newaddr is set to $base + $offset. +// If $am is 0 (post-indexed), then the value is loaded from $base; if $am is 1 (pre-indexed) +// then the value is loaded from $newaddr. +def G_INDEXED_LOAD : GenericInstruction { + let OutOperandList = (outs type0:$dst, ptype1:$newaddr); + let InOperandList = (ins ptype1:$base, type2:$offset, unknown:$am); + let hasSideEffects = 0; + let mayLoad = 1; +} + +// Same as G_INDEXED_LOAD except that the load performed is sign-extending, as with G_SEXTLOAD. +def G_INDEXED_SEXTLOAD : GenericInstruction { + let OutOperandList = (outs type0:$dst, ptype1:$newaddr); + let InOperandList = (ins ptype1:$base, type2:$offset, unknown:$am); + let hasSideEffects = 0; + let mayLoad = 1; +} + +// Same as G_INDEXED_LOAD except that the load performed is zero-extending, as with G_ZEXTLOAD. +def G_INDEXED_ZEXTLOAD : GenericInstruction { + let OutOperandList = (outs type0:$dst, ptype1:$newaddr); + let InOperandList = (ins ptype1:$base, type2:$offset, unknown:$am); + let hasSideEffects = 0; + let mayLoad = 1; +} + // Generic store. Expects a MachineMemOperand in addition to explicit operands. def G_STORE : GenericInstruction { let OutOperandList = (outs); @@ -775,6 +801,15 @@ let mayStore = 1; } +// Combines a store with a GEP. See description of G_INDEXED_LOAD for indexing behaviour. +def G_INDEXED_STORE : GenericInstruction { + let OutOperandList = (outs ptype0:$newaddr); + let InOperandList = (ins type1:$src, ptype0:$base, ptype2:$offset, + unknown:$am); + let hasSideEffects = 0; + let mayStore = 1; +} + // Generic atomic cmpxchg with internal success check. Expects a // MachineMemOperand in addition to explicit operands. def G_ATOMIC_CMPXCHG_WITH_SUCCESS : GenericInstruction { diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -11,6 +11,7 @@ #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -22,10 +23,19 @@ using namespace llvm; +// Option to allow testing of the combiner while no targets know about indexed +// addressing. +static cl::opt + ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), + cl::desc("Force all indexed operations to be " + "legal for the GlobalISel combiner")); + + CombinerHelper::CombinerHelper(GISelChangeObserver &Observer, - MachineIRBuilder &B, GISelKnownBits *KB) + MachineIRBuilder &B, GISelKnownBits *KB, + MachineDominatorTree *MDT) : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), - KB(KB) { + KB(KB), MDT(MDT) { (void)this->KB; } @@ -349,6 +359,204 @@ Observer.changedInstr(MI); } +bool CombinerHelper::isPredecessor(MachineInstr &DefMI, MachineInstr &UseMI) { + assert(DefMI.getParent() == UseMI.getParent()); + if (&DefMI == &UseMI) + return false; + + // Loop through the basic block until we find one of the instructions. + MachineBasicBlock::const_iterator I = DefMI.getParent()->begin(); + for (; &*I != &DefMI && &*I != &UseMI; ++I) + return &*I == &DefMI; + + llvm_unreachable("Block must contain instructions"); +} + +bool CombinerHelper::dominates(MachineInstr &DefMI, MachineInstr &UseMI) { + if (MDT) + return MDT->dominates(&DefMI, &UseMI); + else if (DefMI.getParent() != UseMI.getParent()) + return false; + + return isPredecessor(DefMI, UseMI); +} + +bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr, + Register &Base, Register &Offset) { + auto &MF = *MI.getParent()->getParent(); + const auto &TLI = *MF.getSubtarget().getTargetLowering(); + + unsigned Opcode = MI.getOpcode(); + assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD || + Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE); + + Base = MI.getOperand(1).getReg(); + MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base); + if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) + return false; + + LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI); + + for (auto &Use : MRI.use_instructions(Base)) { + if (Use.getOpcode() != TargetOpcode::G_GEP) + continue; + + Offset = Use.getOperand(2).getReg(); + if (!ForceLegalIndexing && + !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) { + LLVM_DEBUG(dbgs() << " Ignoring candidate with illegal addrmode: " + << Use); + continue; + } + + // Make sure the offset calculation is before the potentially indexed op. + // FIXME: we really care about dependency here. The offset calculation might + // be movable. + MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset); + if (!OffsetDef || !dominates(*OffsetDef, MI)) { + LLVM_DEBUG(dbgs() << " Ignoring candidate with offset after mem-op: " + << Use); + continue; + } + + // FIXME: check whether all uses of Base are load/store with foldable + // addressing modes. If so, using the normal addr-modes is better than + // forming an indexed one. + + bool MemOpDominatesAddrUses = true; + for (auto &GEPUse : MRI.use_instructions(Use.getOperand(0).getReg())) { + if (!dominates(MI, GEPUse)) { + MemOpDominatesAddrUses = false; + break; + } + } + + if (!MemOpDominatesAddrUses) { + LLVM_DEBUG( + dbgs() << " Ignoring candidate as memop does not dominate uses: " + << Use); + continue; + } + + LLVM_DEBUG(dbgs() << " Found match: " << Use); + Addr = Use.getOperand(0).getReg(); + return true; + } + + return false; +} + +bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr, + Register &Base, Register &Offset) { + auto &MF = *MI.getParent()->getParent(); + const auto &TLI = *MF.getSubtarget().getTargetLowering(); + + unsigned Opcode = MI.getOpcode(); + assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD || + Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE); + + Addr = MI.getOperand(1).getReg(); + MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_GEP, Addr, MRI); + if (!AddrDef || MRI.hasOneUse(Addr)) + return false; + + Base = AddrDef->getOperand(1).getReg(); + Offset = AddrDef->getOperand(2).getReg(); + + LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI); + + if (!ForceLegalIndexing && + !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) { + LLVM_DEBUG(dbgs() << " Skipping, not legal for target"); + return false; + } + + MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI); + if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) { + LLVM_DEBUG(dbgs() << " Skipping, frame index would need copy anyway."); + return false; + } + + if (MI.getOpcode() == TargetOpcode::G_STORE) { + // Would require a copy. + if (Base == MI.getOperand(0).getReg()) { + LLVM_DEBUG(dbgs() << " Skipping, storing base so need copy anyway."); + return false; + } + + // We're expecting one use of Addr in MI, but it could also be the + // value stored, which isn't actually dominated by the instruction. + if (MI.getOperand(0).getReg() == Addr) { + LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses"); + return false; + } + } + + // FIXME: check whether all uses of the base pointer are constant GEPs. That + // might allow us to end base's liveness here by adjusting the constant. + + for (auto &UseMI : MRI.use_instructions(Addr)) { + if (!dominates(MI, UseMI)) { + LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses."); + return false; + } + } + + return true; +} + +bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) { + unsigned Opcode = MI.getOpcode(); + if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD && + Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE) + return false; + + bool IsStore = Opcode == TargetOpcode::G_STORE; + Register Addr, Base, Offset; + bool IsPre = findPreIndexCandidate(MI, Addr, Base, Offset); + if (!IsPre && !findPostIndexCandidate(MI, Addr, Base, Offset)) + return false; + + + unsigned NewOpcode; + switch (Opcode) { + case TargetOpcode::G_LOAD: + NewOpcode = TargetOpcode::G_INDEXED_LOAD; + break; + case TargetOpcode::G_SEXTLOAD: + NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD; + break; + case TargetOpcode::G_ZEXTLOAD: + NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD; + break; + case TargetOpcode::G_STORE: + NewOpcode = TargetOpcode::G_INDEXED_STORE; + break; + default: + llvm_unreachable("Unknown load/store opcode"); + } + + MachineInstr &AddrDef = *MRI.getUniqueVRegDef(Addr); + MachineIRBuilder MIRBuilder(MI); + auto MIB = MIRBuilder.buildInstr(NewOpcode); + if (IsStore) { + MIB.addDef(Addr); + MIB.addUse(MI.getOperand(0).getReg()); + } else { + MIB.addDef(MI.getOperand(0).getReg()); + MIB.addDef(Addr); + } + + MIB.addUse(Base); + MIB.addUse(Offset); + MIB.addImm(IsPre); + MI.eraseFromParent(); + AddrDef.eraseFromParent(); + + LLVM_DEBUG(dbgs() << " Combinined to indexed operation"); + return true; +} + bool CombinerHelper::matchCombineBr(MachineInstr &MI) { assert(MI.getOpcode() == TargetOpcode::G_BR && "Expected a G_BR"); // Try to match the following: @@ -919,5 +1127,9 @@ bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; - return tryCombineExtendingLoads(MI); + if (tryCombineExtendingLoads(MI)) + return true; + if (tryCombineIndexedLoadStore(MI)) + return true; + return false; } diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -36,7 +36,7 @@ /// NOTE: The TargetMachine owns TLOF. TargetLowering::TargetLowering(const TargetMachine &tm) - : TargetLoweringBase(tm) {} + : TargetLoweringBase(tm) {} const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { return nullptr; diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h --- a/llvm/lib/Target/AArch64/AArch64.h +++ b/llvm/lib/Target/AArch64/AArch64.h @@ -55,7 +55,7 @@ InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, AArch64Subtarget &, AArch64RegisterBankInfo &); -FunctionPass *createAArch64PreLegalizeCombiner(); +FunctionPass *createAArch64PreLegalizeCombiner(bool IsOptNone); FunctionPass *createAArch64StackTaggingPass(); void initializeAArch64A53Fix835769Pass(PassRegistry&); diff --git a/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp --- a/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/Support/Debug.h" @@ -29,13 +30,14 @@ namespace { class AArch64PreLegalizerCombinerInfo : public CombinerInfo { GISelKnownBits *KB; + MachineDominatorTree *MDT; public: AArch64PreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize, - GISelKnownBits *KB) + GISelKnownBits *KB, MachineDominatorTree *MDT) : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, /*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize), - KB(KB) {} + KB(KB), MDT(MDT) {} virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI, MachineIRBuilder &B) const override; }; @@ -43,7 +45,7 @@ bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer, MachineInstr &MI, MachineIRBuilder &B) const { - CombinerHelper Helper(Observer, B, KB); + CombinerHelper Helper(Observer, B, KB, MDT); switch (MI.getOpcode()) { default: @@ -54,8 +56,14 @@ return Helper.tryCombineBr(MI); case TargetOpcode::G_LOAD: case TargetOpcode::G_SEXTLOAD: - case TargetOpcode::G_ZEXTLOAD: - return Helper.tryCombineExtendingLoads(MI); + case TargetOpcode::G_ZEXTLOAD: { + bool Changed = false; + Changed |= Helper.tryCombineExtendingLoads(MI); + Changed |= Helper.tryCombineIndexedLoadStore(MI); + return Changed; + } + case TargetOpcode::G_STORE: + return Helper.tryCombineIndexedLoadStore(MI); case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: switch (MI.getIntrinsicID()) { case Intrinsic::memcpy: @@ -83,13 +91,15 @@ public: static char ID; - AArch64PreLegalizerCombiner(); + AArch64PreLegalizerCombiner(bool IsOptNone = false); StringRef getPassName() const override { return "AArch64PreLegalizerCombiner"; } bool runOnMachineFunction(MachineFunction &MF) override; void getAnalysisUsage(AnalysisUsage &AU) const override; +private: + bool IsOptNone; }; } @@ -99,10 +109,15 @@ getSelectionDAGFallbackAnalysisUsage(AU); AU.addRequired(); AU.addPreserved(); + if (!IsOptNone) { + AU.addRequired(); + AU.addPreserved(); + } MachineFunctionPass::getAnalysisUsage(AU); } -AArch64PreLegalizerCombiner::AArch64PreLegalizerCombiner() : MachineFunctionPass(ID) { +AArch64PreLegalizerCombiner::AArch64PreLegalizerCombiner(bool IsOptNone) + : MachineFunctionPass(ID), IsOptNone(IsOptNone) { initializeAArch64PreLegalizerCombinerPass(*PassRegistry::getPassRegistry()); } @@ -115,8 +130,10 @@ bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F); GISelKnownBits *KB = &getAnalysis().get(MF); + MachineDominatorTree *MDT = + IsOptNone ? nullptr : &getAnalysis(); AArch64PreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), - F.hasMinSize(), KB); + F.hasMinSize(), KB, MDT); Combiner C(PCInfo, TPC); return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); } @@ -133,7 +150,7 @@ namespace llvm { -FunctionPass *createAArch64PreLegalizeCombiner() { - return new AArch64PreLegalizerCombiner(); +FunctionPass *createAArch64PreLegalizeCombiner(bool IsOptNone) { + return new AArch64PreLegalizerCombiner(IsOptNone); } } // end namespace llvm diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -502,7 +502,8 @@ } void AArch64PassConfig::addPreLegalizeMachineIR() { - addPass(createAArch64PreLegalizeCombiner()); + bool IsOptNone = getOptLevel() == CodeGenOpt::None; + addPass(createAArch64PreLegalizeCombiner(IsOptNone)); } bool AArch64PassConfig::addLegalizeMachineIR() { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combiner-load-store-indexing.ll b/llvm/test/CodeGen/AArch64/GlobalISel/combiner-load-store-indexing.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combiner-load-store-indexing.ll @@ -0,0 +1,182 @@ +; RUN: llc -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -verify-machineinstrs -stop-after=aarch64-prelegalizer-combiner -force-legal-indexing %s -o - | FileCheck %s + +define i8* @test_simple_load_pre(i8* %ptr) { +; CHECK-LABEL: name: test_simple_load_pre +; CHECK: [[BASE:%.*]]:_(p0) = COPY $x0 +; CHECK: [[OFFSET:%.*]]:_(s64) = G_CONSTANT i64 42 +; CHECK-NOT: G_GEP +; CHECK: {{%.*}}:_(s8), [[NEXT:%.*]]:_(p0) = G_INDEXED_LOAD [[BASE]], [[OFFSET]](s64), 1 +; CHECK: $x0 = COPY [[NEXT]](p0) + + %next = getelementptr i8, i8* %ptr, i32 42 + load volatile i8, i8* %next + ret i8* %next +} + +define void @test_load_multiple_dominated(i8* %ptr, i1 %tst, i1 %tst2) { +; CHECK-LABEL: name: test_load_multiple_dominated +; CHECK: [[BASE:%.*]]:_(p0) = COPY $x0 +; CHECK: [[OFFSET:%.*]]:_(s64) = G_CONSTANT i64 42 +; CHECK-NOT: G_GEP +; CHECK: {{%.*}}:_(s8), [[NEXT:%.*]]:_(p0) = G_INDEXED_LOAD [[BASE]], [[OFFSET]](s64), 1 +; CHECK: $x0 = COPY [[NEXT]](p0) + %next = getelementptr i8, i8* %ptr, i32 42 + br i1 %tst, label %do_load, label %end + +do_load: + load volatile i8, i8* %next + br i1 %tst2, label %bb1, label %bb2 + +bb1: + store volatile i8* %next, i8** undef + ret void + +bb2: + call void @bar(i8* %next) + ret void + +end: + ret void +} + +define i8* @test_simple_store_pre(i8* %ptr) { +; CHECK-LABEL: name: test_simple_store_pre +; CHECK: [[BASE:%.*]]:_(p0) = COPY $x0 +; CHECK: [[VAL:%.*]]:_(s8) = G_CONSTANT i8 0 +; CHECK: [[OFFSET:%.*]]:_(s64) = G_CONSTANT i64 42 +; CHECK-NOT: G_GEP +; CHECK: [[NEXT:%.*]]:_(p0) = G_INDEXED_STORE [[VAL]](s8), [[BASE]], [[OFFSET]](s64), 1 +; CHECK: $x0 = COPY [[NEXT]](p0) + + %next = getelementptr i8, i8* %ptr, i32 42 + store volatile i8 0, i8* %next + ret i8* %next +} + +; The potentially pre-indexed address is used as the value stored. Converting +; would produce the value too late but only by one instruction. +define i64** @test_store_pre_val_loop(i64** %ptr) { +; CHECK-LABEL: name: test_store_pre_val_loop +; CHECK: G_GEP +; CHECK: G_STORE % + + %next = getelementptr i64*, i64** %ptr, i32 42 + %next.p0 = bitcast i64** %next to i64* + store volatile i64* %next.p0, i64** %next + ret i64** %next +} + +; Potentially pre-indexed address is used between GEP computing it and load. +define i8* @test_load_pre_before(i8* %ptr) { +; CHECK-LABEL: name: test_load_pre_before +; CHECK: G_GEP +; CHECK: BL @bar +; CHECK: G_LOAD % + + %next = getelementptr i8, i8* %ptr, i32 42 + call void @bar(i8* %next) + load volatile i8, i8* %next + ret i8* %next +} + +; Materializing the base into a writable register (from sp/fp) would be just as +; bad as the original GEP. +define i8* @test_alloca_load_pre() { +; CHECK-LABEL: name: test_alloca_load_pre +; CHECK: G_GEP +; CHECK: G_LOAD % + + %ptr = alloca i8, i32 128 + %next = getelementptr i8, i8* %ptr, i32 42 + load volatile i8, i8* %next + ret i8* %next +} + +; Load does not dominate use of its address. No indexing. +define i8* @test_pre_nodom(i8* %in, i1 %tst) { +; CHECK-LABEL: name: test_pre_nodom +; CHECK: G_GEP +; CHECK: G_LOAD % + + %next = getelementptr i8, i8* %in, i32 16 + br i1 %tst, label %do_indexed, label %use_addr + +do_indexed: + %val = load i8, i8* %next + store i8 %val, i8* @var + store i8* %next, i8** @varp8 + br label %use_addr + +use_addr: + ret i8* %next +} + +define i8* @test_simple_load_post(i8* %ptr) { +; CHECK-LABEL: name: test_simple_load_post +; CHECK: [[BASE:%.*]]:_(p0) = COPY $x0 +; CHECK: [[OFFSET:%.*]]:_(s64) = G_CONSTANT i64 42 +; CHECK-NOT: G_GEP +; CHECK: {{%.*}}:_(s8), [[NEXT:%.*]]:_(p0) = G_INDEXED_LOAD [[BASE]], [[OFFSET]](s64), 0 +; CHECK: $x0 = COPY [[NEXT]](p0) + + %next = getelementptr i8, i8* %ptr, i32 42 + load volatile i8, i8* %ptr + ret i8* %next +} + +define i8* @test_simple_load_post_gep_after(i8* %ptr) { +; CHECK-LABEL: name: test_simple_load_post_gep_after +; CHECK: [[BASE:%.*]]:_(p0) = COPY $x0 +; CHECK: BL @get_offset +; CHECK: [[OFFSET:%.*]]:_(s64) = COPY $x0 +; CHECK: {{%.*}}:_(s8), [[ADDR:%.*]]:_(p0) = G_INDEXED_LOAD [[BASE]], [[OFFSET]](s64), 0 +; CHECK: $x0 = COPY [[ADDR]](p0) + + %offset = call i64 @get_offset() + load volatile i8, i8* %ptr + %next = getelementptr i8, i8* %ptr, i64 %offset + ret i8* %next +} + +define i8* @test_load_post_keep_looking(i8* %ptr) { +; CHECK: name: test_load_post_keep_looking +; CHECK: G_INDEXED_LOAD + + %offset = call i64 @get_offset() + load volatile i8, i8* %ptr + %intval = ptrtoint i8* %ptr to i8 + store i8 %intval, i8* @var + + %next = getelementptr i8, i8* %ptr, i64 %offset + ret i8* %next +} + +; Base is frame index. Using indexing would need copy anyway. +define i8* @test_load_post_alloca() { +; CHECK-LABEL: name: test_load_post_alloca +; CHECK: G_GEP +; CHECK: G_LOAD % + + %ptr = alloca i8, i32 128 + %next = getelementptr i8, i8* %ptr, i32 42 + load volatile i8, i8* %ptr + ret i8* %next +} + +; Offset computation does not dominate the load we might be indexing. +define i8* @test_load_post_gep_offset_after(i8* %ptr) { +; CHECK-LABEL: name: test_load_post_gep_offset_after +; CHECK: G_LOAD % +; CHECK: BL @get_offset +; CHECK: G_GEP + + load volatile i8, i8* %ptr + %offset = call i64 @get_offset() + %next = getelementptr i8, i8* %ptr, i64 %offset + ret i8* %next +} + +declare void @bar(i8*) +declare i64 @get_offset() +@var = global i8 0 +@varp8 = global i8* null diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll b/llvm/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll @@ -16,15 +16,15 @@ ; RUN: llc -mtriple=aarch64-- -debug-pass=Structure %s -o /dev/null 2>&1 \ ; RUN: -verify-machineinstrs=0 -global-isel \ -; RUN: | FileCheck %s --check-prefix ENABLED --check-prefix NOFALLBACK +; RUN: | FileCheck %s --check-prefix ENABLED --check-prefix NOFALLBACK --check-prefix ENABLED-O1 ; RUN: llc -mtriple=aarch64-- -debug-pass=Structure %s -o /dev/null 2>&1 \ ; RUN: -verify-machineinstrs=0 -global-isel -global-isel-abort=2 \ -; RUN: | FileCheck %s --check-prefix ENABLED --check-prefix FALLBACK +; RUN: | FileCheck %s --check-prefix ENABLED --check-prefix FALLBACK --check-prefix ENABLED-O1 ; RUN: llc -mtriple=aarch64-- -debug-pass=Structure %s -o /dev/null 2>&1 \ ; RUN: -verify-machineinstrs=0 -O1 -aarch64-enable-global-isel-at-O=3 \ -; RUN: | FileCheck %s --check-prefix ENABLED +; RUN: | FileCheck %s --check-prefix ENABLED --check-prefix ENABLED-O1 ; RUN: llc -mtriple=aarch64-- -debug-pass=Structure %s -o /dev/null 2>&1 \ ; RUN: -verify-machineinstrs=0 -O1 -aarch64-enable-global-isel-at-O=0 \ @@ -44,6 +44,7 @@ ; ENABLED: IRTranslator ; VERIFY-NEXT: Verify generated machine code ; ENABLED-NEXT: Analysis for ComputingKnownBits +; ENABLED-O1-NEXT: MachineDominator Tree Construction ; ENABLED-NEXT: PreLegalizerCombiner ; VERIFY-NEXT: Verify generated machine code ; ENABLED-NEXT: Analysis containing CSE Info diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -134,9 +134,21 @@ # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: G_INDEXED_LOAD (opcode 64): 3 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_INDEXED_SEXTLOAD (opcode 65): 3 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_INDEXED_ZEXTLOAD (opcode 66): 3 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_STORE (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: G_INDEXED_STORE (opcode 68): 3 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_ATOMIC_CMPXCHG_WITH_SUCCESS (opcode {{[0-9]+}}): 3 type indices, 0 imm indices # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected