Index: docs/LangRef.rst =================================================================== --- docs/LangRef.rst +++ docs/LangRef.rst @@ -5154,6 +5154,50 @@ This metadata should be used in conjunction with ``llvm.loop`` loop identification metadata. +'``llvm.cond``' +^^^^^^^^^^^^^^^ + +Metadata types used to annotate conditional instructions with information +helpful for optimizations are prefixed with ``llvm.cond``. + +'``llvm.cond.no_loop_predication``' Metadata +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Loop predication tries to convert loop variant range checks to loop invariant +by widening the checks across loop iterations. This widening is done based on +the latch check of the loop. In some cases, if the latch check is too coarse, +it will cause the range check to fail unconditionally when widened to an +invariant one. These checks which cause the range check to fail spuriously, may +be introduced by the front end and LLVM optimizations can transform the loop +such that this check becomes the latch check. +The ``llvm.cond.no_loop_predication`` metadata annotated on a conditional +instruction (for example, icmp) implies that a loop containing this condition +as the latch condition should not be predicated (see LoopPredication pass). + +.. code-block:: llvm + + for.header: + %res = call i32 @foo() + %origcond = icmp eq i32 %iv, %res + br i1 %origcond, label %for.body, label %for.end + + for.body: + call void @llvm.experimental.guard(i1 %cond) + ... + %exitcond = icmp ult i32 %iv, %chunksize, !llvm.cond.no_loop_predication !{} + ... + br i1 %exitcond, label %for.end, label %for.header + + for.end: + ... + +During loop predication, we try to predicate the loop above so that the guard +cond becomes loop invariant. However, the exitcond is too coarse and the real +loop termination condition is origcond. Passes (such as loop rotation) +transformed the loop so that exitcond is the latch condition now. +So, predicating the loop based on exitcond will cause the guard to fail +spuriously, whereas it would never fail if left in the loop. + '``llvm.mem``' ^^^^^^^^^^^^^^^ Index: include/llvm/IR/LLVMContext.h =================================================================== --- include/llvm/IR/LLVMContext.h +++ include/llvm/IR/LLVMContext.h @@ -102,6 +102,7 @@ MD_associated = 22, // "associated" MD_callees = 23, // "callees" MD_irr_loop = 24, // "irr_loop" + MD_cond_no_loop_predication = 25, // "llvm.cond.no_loop_predication" }; /// Known operand bundle tag IDs, which always have the same value. All Index: lib/IR/LLVMContext.cpp =================================================================== --- lib/IR/LLVMContext.cpp +++ lib/IR/LLVMContext.cpp @@ -61,6 +61,7 @@ {MD_associated, "associated"}, {MD_callees, "callees"}, {MD_irr_loop, "irr_loop"}, + {MD_cond_no_loop_predication, "llvm.cond.no_loop_predication"}, }; for (auto &MDKind : MDKinds) { Index: lib/Transforms/Scalar/LoopPredication.cpp =================================================================== --- lib/Transforms/Scalar/LoopPredication.cpp +++ lib/Transforms/Scalar/LoopPredication.cpp @@ -622,6 +622,17 @@ if (TrueDest != L->getHeader()) Pred = ICmpInst::getInversePredicate(Pred); + { + // Loop with latch condition marked with loop predication disable metadata + // should not be predicated. This can happen if we rotated the loop + // such that this LatchCmp is now the latch condition. + auto *LatchCmp = cast( + cast(LoopLatch->getTerminator())->getCondition()); + if (LatchCmp->getMetadata(LLVMContext::MD_cond_no_loop_predication)) { + DEBUG(dbgs() << "Marked with loop predication disable metadata!\n"); + return None; + } + } auto Result = parseLoopICmp(Pred, LHS, RHS); if (!Result) { DEBUG(dbgs() << "Failed to parse the loop latch condition!\n"); Index: test/ThinLTO/X86/lazyload_metadata.ll =================================================================== --- test/ThinLTO/X86/lazyload_metadata.ll +++ test/ThinLTO/X86/lazyload_metadata.ll @@ -10,13 +10,13 @@ ; RUN: llvm-lto -thinlto-action=import %t2.bc -thinlto-index=%t3.bc \ ; RUN: -o /dev/null -stats \ ; RUN: 2>&1 | FileCheck %s -check-prefix=LAZY -; LAZY: 55 bitcode-reader - Number of Metadata records loaded +; LAZY: 57 bitcode-reader - Number of Metadata records loaded ; LAZY: 2 bitcode-reader - Number of MDStrings loaded ; RUN: llvm-lto -thinlto-action=import %t2.bc -thinlto-index=%t3.bc \ ; RUN: -o /dev/null -disable-ondemand-mds-loading -stats \ ; RUN: 2>&1 | FileCheck %s -check-prefix=NOTLAZY -; NOTLAZY: 64 bitcode-reader - Number of Metadata records loaded +; NOTLAZY: 66 bitcode-reader - Number of Metadata records loaded ; NOTLAZY: 7 bitcode-reader - Number of MDStrings loaded Index: test/Transforms/LoopPredication/basic.ll =================================================================== --- test/Transforms/LoopPredication/basic.ll +++ test/Transforms/LoopPredication/basic.ll @@ -1026,3 +1026,38 @@ %result = phi i32 [ 0, %entry ], [ %loop.acc.next, %loop ] ret i32 %result } + +; Avoid predicating the loop if we have a llvm.cond.no_loop_predication +define i64 @donot_predicate(i64* nocapture readonly %arg, i32 %length, i64* nocapture readonly %arg2, i64* nocapture readonly %n_addr, i64 %i) #0 gc "azul" { +; CHECK-LABEL: donot_predicate( +entry: + %length.ext = zext i32 %length to i64 + %n.pre = load i64, i64* %n_addr, align 4 + br label %Header + +; CHECK-LABEL: Header: +; CHECK: %within.bounds = icmp ult i64 %j2, %length.ext +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) +Header: ; preds = %entry, %Latch + %result.in3 = phi i64* [ %arg2, %entry ], [ %arg, %Latch ] + %j2 = phi i64 [ 0, %entry ], [ %j.next, %Latch ] + %within.bounds = icmp ult i64 %j2, %length.ext + call void (i1, ...) @llvm.experimental.guard(i1 %within.bounds, i32 9) [ "deopt"() ] + %innercmp = icmp eq i64 %j2, %n.pre + %j.next = add nuw nsw i64 %j2, 1 + br i1 %innercmp, label %exit, label %Latch + +Latch: ; preds = %Header + %speculate_trip_count = icmp ult i64 %j.next, 1048576, !llvm.cond.no_loop_predication !{} + br i1 %speculate_trip_count, label %deopt, label %Header + +deopt: ; preds = %Latch + %counted_speculation_failed = call i64 (...) @llvm.experimental.deoptimize.i64(i64 30) [ "deopt"(i32 0) ] + ret i64 %counted_speculation_failed + +exit: ; preds = %Header + %result.in3.lcssa = phi i64* [ %result.in3, %Header ] + %result.le = load i64, i64* %result.in3.lcssa, align 8 + ret i64 %result.le +} +declare i64 @llvm.experimental.deoptimize.i64(...)