Index: llvm/lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- llvm/lib/CodeGen/CodeGenPrepare.cpp +++ llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -268,6 +268,10 @@ MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100), cl::Hidden, cl::desc("Max number of address users to look at")); + +static cl::opt MaxLoopInvUsersToScan( + "cgp-max-loop-inv-users-to-scan", cl::init(20), cl::Hidden, + cl::desc("Max number of loop invariant users to look at")); namespace { enum ExtType { @@ -5081,6 +5085,19 @@ PSI, BFI, SeenInsts); } +static bool isUsedInLoop(const Value *V, const Loop *L) { + unsigned N = 0; + + for (const Use &U : V->uses()) { + if (++N > MaxLoopInvUsersToScan) + break; + const Instruction *UserI = cast(U.getUser()); + if (L->contains(UserI->getParent())) + return true; + } + + return false; +} /// Return true if Val is already known to be live at the use site that we're /// folding it into. If so, there is no cost to include it in the addressing @@ -5104,10 +5121,17 @@ if (AI->isStaticAlloca()) return true; + // If the value is loop invariant and is used in the loop which contains the + // memory instruction, it's live. + BasicBlock *BB = MemoryInst->getParent(); + if (Loop *L = LI.getLoopFor(BB); + L && L->isLoopInvariant(Val) && isUsedInLoop(Val, L)) + return true; + // Check to see if this value is already used in the memory instruction's // block. If so, it's already live into the block at the very least, so we // can reasonably fold it. - return Val->isUsedInBasicBlock(MemoryInst->getParent()); + return Val->isUsedInBasicBlock(BB); } /// It is possible for the addressing mode of the machine to fold the specified Index: llvm/test/CodeGen/AArch64/gep-sink-loop-inv-live.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/gep-sink-loop-inv-live.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -codegenprepare < %s | FileCheck %s +target triple = "aarch64-linux" + +declare void @use(...) +declare i64 @next(i64) + +define void @f(ptr %a, i64 %k, i64 %n, ptr %q) { +; CHECK-LABEL: @f( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[COND:%.*]] +; CHECK: cond: +; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[J:%.*]], [[IF_THEN:%.*]] ], [ [[I]], [[IF_ELSE:%.*]] ] +; CHECK-NEXT: [[P:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[I]] +; CHECK-NEXT: [[C0:%.*]] = icmp ult i64 [[I]], [[N:%.*]] +; CHECK-NEXT: br i1 [[C0]], label [[LOOP:%.*]], label [[EXIT:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[J]] = call i64 @next(i64 [[I]]) +; CHECK-NEXT: [[SUNKADDR:%.*]] = mul i64 [[I]], 4 +; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, ptr [[A]], i64 [[SUNKADDR]] +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[SUNKADDR1]], align 4 +; CHECK-NEXT: [[C1:%.*]] = icmp slt i32 [[V]], 0 +; CHECK-NEXT: br i1 [[C1]], label [[IF_THEN]], label [[IF_ELSE]] +; CHECK: if.then: +; CHECK-NEXT: store ptr [[P]], ptr [[Q:%.*]], align 8 +; CHECK-NEXT: br label [[COND]] +; CHECK: if.else: +; CHECK-NEXT: call void @use(ptr [[A]]) +; CHECK-NEXT: br label [[COND]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %cond + +cond: + %i = phi i64 [0, %entry], [%i.next, %next] + %p = getelementptr i32, ptr %a, i64 %i + %c0 = icmp ult i64 %i, %n + br i1 %c0, label %loop, label %exit + +loop: + %j = call i64 @next(i64 %i) + %v = load i32, ptr %p + %c1 = icmp slt i32 %v, 0 + br i1 %c1, label %if.then, label %if.else + +if.then: + store ptr %p, ptr %q + br label %next + +if.else: + call void @use(ptr %a) + br label %next + +next: + %i.next = phi i64 [%j, %if.then], [%i, %if.else] + br label %cond + +exit: + ret void +}