Index: include/llvm/Analysis/IVUsers.h =================================================================== --- include/llvm/Analysis/IVUsers.h +++ include/llvm/Analysis/IVUsers.h @@ -133,6 +133,10 @@ // Ephemeral values used by @llvm.assume in this function. SmallPtrSet EphValues; + // A list of truncs which is used only by a single IV user added directly + // through the trunc. + SmallPtrSet TruncsToSingleIVUser; + void getAnalysisUsage(AnalysisUsage &AU) const override; bool runOnLoop(Loop *L, LPPassManager &LPM) override; @@ -152,6 +156,8 @@ IVStrideUse &AddUser(Instruction *User, Value *Operand); + void FilterOutTruncIVUsers(); + /// getReplacementExpr - Return a SCEV expression which computes the /// value of the OperandValToReplace of the given IVStrideUse. const SCEV *getReplacementExpr(const IVStrideUse &IU) const; Index: lib/Analysis/IVUsers.cpp =================================================================== --- lib/Analysis/IVUsers.cpp +++ lib/Analysis/IVUsers.cpp @@ -33,6 +33,7 @@ #define DEBUG_TYPE "iv-users" + char IVUsers::ID = 0; INITIALIZE_PASS_BEGIN(IVUsers, "iv-users", "Induction Variable Users", false, true) @@ -195,6 +196,21 @@ AddUserToIVUsers = true; } + // See if User is a trunc which is used only by a single IV user just added + // directly through the trunc. By adding such trunc, we expect that LSR + // chooses a better solution in case there are other IV users which have the + // same SCEV recurrence as the trunc. Later in the pass, truncs added here + // will be removed if none of IV users has the same SCEV recurrence. + if (isa(User) && !AddUserToIVUsers && User->hasOneUse()) { + Instruction *OneTruncUse = User->user_back(); + if (LI->getLoopFor(User->getParent()) == L && + LI->getLoopFor(OneTruncUse->getParent()) == L && !IVUses.empty() && + (IVUses.back()).getOperandValToReplace() == User) { + AddUserToIVUsers = true; + TruncsToSingleIVUser.insert(User); + } + } + if (AddUserToIVUsers) { // Okay, we found a user that we cannot reduce. IVStrideUse &NewUse = AddUser(User, I); @@ -251,6 +267,36 @@ initializeIVUsersPass(*PassRegistry::getPassRegistry()); } +/// Remove truncs in TruncsToSingleIVUser if none of IV users has the same SCEV +/// recurrence. +void IVUsers::FilterOutTruncIVUsers() { + if (TruncsToSingleIVUser.empty()) + return; + for (IVUsers::iterator U = begin(), E = end(); U != E;) { + if (!TruncsToSingleIVUser.count(U->getUser())) { + ++U; + continue; + } + Instruction *TruncI = U->getUser(); + assert(isa(TruncI) && TruncI->hasOneUse() && + "Unexpected TruncInst to remove"); + const SCEV *TruncS = SE->getSCEV(U->getOperandValToReplace()); + bool HasSameRec = false; + for (IVStrideUse &UU : IVUses) { + if (&UU == &*U) + continue; + if (SE->getSCEV(UU.getOperandValToReplace()) == TruncS) { + HasSameRec = true; + break; + } + } + if (!HasSameRec) + U = IVUses.erase(U); + else + ++U; + } +} + void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addRequired(); @@ -278,6 +324,7 @@ for (BasicBlock::iterator I = L->getHeader()->begin(); isa(I); ++I) (void)AddUsersIfInteresting(&*I); + FilterOutTruncIVUsers(); return false; } @@ -320,6 +367,7 @@ void IVUsers::releaseMemory() { Processed.clear(); IVUses.clear(); + TruncsToSingleIVUser.clear(); } /// getReplacementExpr - Return a SCEV expression which computes the Index: test/Transforms/LoopStrengthReduce/AArch64/lsr-trunc.ll =================================================================== --- /dev/null +++ test/Transforms/LoopStrengthReduce/AArch64/lsr-trunc.ll @@ -0,0 +1,30 @@ +; RUN: llc < %s | FileCheck %s + +@gvarray = common global [12 x i32] zeroinitializer, align 4 + +target triple = "arm64-unknown-unknown" + +; Check if the same register is used in the stored value, indexing, and compare. + +define void @test(i32 %n) { +entry: + %cmp7 = icmp sgt i32 %n, 1 + br i1 %cmp7, label %for.body.preheader, label %for.end + +for.body.preheader: + %n_sext = sext i32 %n to i64 + br label %for.body + +for.body: +; CHECK: cmp x[[REG:[0-9]+]] +; CHECK: str w[[REG]], [x{{[0-9]+}}, x[[REG]], lsl #2] + %K.in = phi i64 [ %n_sext, %for.body.preheader ], [ %K, %for.body ] + %K = add i64 %K.in, 1 + %StoredAddr = getelementptr inbounds [12 x i32], [12 x i32]* @gvarray, i64 0, i64 %K + %StoredValue = trunc i64 %K to i32 + store volatile i32 %StoredValue, i32* %StoredAddr + %cmp = icmp sgt i64 %K, 1 + br i1 %cmp, label %for.body, label %for.end +for.end: + ret void +}