Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -231,6 +231,8 @@ } } + bool shouldFavorBackedgeIndex(const Loop *L) const; + unsigned getGISelRematGlobalCost() const { return 2; } Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -16,6 +16,7 @@ #include "llvm/CodeGen/TargetLowering.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IntrinsicsAArch64.h" +#include "llvm/IR/Use.h" #include "llvm/Support/Debug.h" #include using namespace llvm; @@ -972,6 +973,36 @@ return Considerable; } +bool AArch64TTIImpl::shouldFavorBackedgeIndex(const Loop *L) const { + // This optimisation will generally introduce base address modifying + // instruction(s) into the preheader and is only really useful for + // unrolled loops, and we don't generally do when optimising for size. + if (L->getHeader()->getParent()->hasOptSize() || + L->getNumBlocks() != 1) + return false; + + // Find pointers with multiple uses within the loop. + DenseMap NumPointerUses; + for (auto &I : *L->getHeader()) { + if (I.getType()->isPointerTy()) + NumPointerUses[&I] = 0; + + for (auto &Use : I.operands()) { + if (!Use->getType()->isPointerTy()) + continue; + if (NumPointerUses.count(Use)) + NumPointerUses[Use]++; + else + NumPointerUses[Use] = 0; + } + } + + return std::any_of(NumPointerUses.begin(), NumPointerUses.end(), + [](detail::DenseMapPair Pair) { + return Pair.second > 1; + }); +} + bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const { auto *VTy = cast(Ty);