Index: lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp =================================================================== --- lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -354,6 +354,32 @@ return true; } + +static bool GetBranchWeights(BranchInst *BI, uint64_t &LeftWeight, + uint64_t &RightWeight) { + MDNode *MD = BI->getMetadata(LLVMContext::MD_prof); + if (!MD || MD->getNumOperands() != 3) + return false; + + MDString* MDS = dyn_cast_or_null(MD->getOperand(0)); + if (!MDS || !MDS->getString().equals("branch_weights")) + return false; + + ConstantInt *LeftCI = mdconst::dyn_extract(MD->getOperand(1)); + ConstantInt *RightCI = mdconst::dyn_extract(MD->getOperand(2)); + + if (!LeftCI || LeftCI->getValue().getBitWidth() > 64) + return false; + + if (!RightCI || RightCI->getValue().getBitWidth() > 64) + return false; + + LeftWeight = LeftCI->getZExtValue(); + RightWeight = RightCI->getZExtValue(); + + return true; +} + InductiveRangeCheck * InductiveRangeCheck::create(InductiveRangeCheck::AllocatorTy &A, BranchInst *BI, Loop *L, ScalarEvolution &SE) { @@ -361,6 +387,21 @@ if (BI->isUnconditional() || BI->getParent() == L->getLoopLatch()) return nullptr; + uint64_t LeftWeight = 0, RightWeight = 0; + + if (!GetBranchWeights(BI, LeftWeight, RightWeight)) + return nullptr; + + const int LikelyRatio = 16; + // LikelyRatio = LikelyBranchWeight / UnlikelyBranchWeight, from + // LowerExpectIntrinsic.cpp + + bool TrueVeryLikely = (RightWeight == 0 && LeftWeight != 0) || + (LeftWeight / RightWeight >= LikelyRatio); + + if (!TrueVeryLikely) + return nullptr; + Value *Length = nullptr; const SCEV *IndexSCEV = nullptr; Index: test/Transforms/IRCE/multiple-access-no-preloop.ll =================================================================== --- test/Transforms/IRCE/multiple-access-no-preloop.ll +++ test/Transforms/IRCE/multiple-access-no-preloop.ll @@ -13,13 +13,13 @@ %idx = phi i32 [ 0, %entry ] , [ %idx.next, %in.bounds.b ] %idx.next = add i32 %idx, 1 %abc.a = icmp slt i32 %idx, %len.a - br i1 %abc.a, label %in.bounds.a, label %out.of.bounds + br i1 %abc.a, label %in.bounds.a, label %out.of.bounds, !prof !1 in.bounds.a: %addr.a = getelementptr i32* %arr_a, i32 %idx store i32 0, i32* %addr.a %abc.b = icmp slt i32 %idx, %len.b - br i1 %abc.b, label %in.bounds.b, label %out.of.bounds + br i1 %abc.b, label %in.bounds.b, label %out.of.bounds, !prof !1 in.bounds.b: %addr.b = getelementptr i32* %arr_b, i32 %idx @@ -57,3 +57,4 @@ ; CHECK: br i1 %next.postloop, label %loop.postloop, label %exit.loopexit !0 = !{i32 0, i32 2147483647} +!1 = !{!"branch_weights", i32 64, i32 4} Index: test/Transforms/IRCE/no-branch-weight.ll =================================================================== --- /dev/null +++ test/Transforms/IRCE/no-branch-weight.ll @@ -0,0 +1,40 @@ +; RUN: opt -verify-loop-info -irce-print-changed-loops -irce < %s 2>&1 | FileCheck %s + +; CHECK-NOT: constrained Loop + +define void @multiple_access_no_preloop( + i32* %arr_a, i32* %a_len_ptr, i32* %arr_b, i32* %b_len_ptr, i32 %n) { + + entry: + %len.a = load i32* %a_len_ptr, !range !0 + %len.b = load i32* %b_len_ptr, !range !0 + %first.itr.check = icmp sgt i32 %n, 0 + br i1 %first.itr.check, label %loop, label %exit + + loop: + %idx = phi i32 [ 0, %entry ] , [ %idx.next, %in.bounds.b ] + %idx.next = add i32 %idx, 1 + %abc.a = icmp slt i32 %idx, %len.a + br i1 %abc.a, label %in.bounds.a, label %out.of.bounds + + in.bounds.a: + %addr.a = getelementptr i32* %arr_a, i32 %idx + store i32 0, i32* %addr.a + %abc.b = icmp slt i32 %idx, %len.b + br i1 %abc.b, label %in.bounds.b, label %out.of.bounds + + in.bounds.b: + %addr.b = getelementptr i32* %arr_b, i32 %idx + store i32 -1, i32* %addr.b + %next = icmp slt i32 %idx.next, %n + br i1 %next, label %loop, label %exit + + out.of.bounds: + ret void + + exit: + ret void +} + +!0 = !{i32 0, i32 2147483647} + Index: test/Transforms/IRCE/single-access-no-preloop.ll =================================================================== --- test/Transforms/IRCE/single-access-no-preloop.ll +++ test/Transforms/IRCE/single-access-no-preloop.ll @@ -10,7 +10,7 @@ %idx = phi i32 [ 0, %entry ] , [ %idx.next, %in.bounds ] %idx.next = add i32 %idx, 1 %abc = icmp slt i32 %idx, %len - br i1 %abc, label %in.bounds, label %out.of.bounds + br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1 in.bounds: %addr = getelementptr i32* %arr, i32 %idx @@ -65,7 +65,7 @@ %idx.next = add i32 %idx, 1 %idx.for.abc = add i32 %idx, 4 %abc = icmp slt i32 %idx.for.abc, %len - br i1 %abc, label %in.bounds, label %out.of.bounds + br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1 in.bounds: %addr = getelementptr i32* %arr, i32 %idx.for.abc @@ -108,3 +108,4 @@ ; CHECK: br i1 %next.postloop, label %loop.postloop, label %exit.loopexit !0 = !{i32 0, i32 2147483647} +!1 = !{!"branch_weights", i32 64, i32 4} Index: test/Transforms/IRCE/single-access-with-preloop.ll =================================================================== --- test/Transforms/IRCE/single-access-with-preloop.ll +++ test/Transforms/IRCE/single-access-with-preloop.ll @@ -13,7 +13,7 @@ %abc.high = icmp slt i32 %array.idx, %len %abc.low = icmp sge i32 %array.idx, 0 %abc = and i1 %abc.low, %abc.high - br i1 %abc, label %in.bounds, label %out.of.bounds + br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1 in.bounds: %addr = getelementptr i32* %arr, i32 %array.idx @@ -57,3 +57,4 @@ ; CHECK: br i1 %next.postloop, label %loop.postloop, label %exit.loopexit !0 = !{i32 0, i32 2147483647} +!1 = !{!"branch_weights", i32 64, i32 4} Index: test/Transforms/IRCE/with-parent-loops.ll =================================================================== --- test/Transforms/IRCE/with-parent-loops.ll +++ test/Transforms/IRCE/with-parent-loops.ll @@ -16,7 +16,7 @@ %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ] %idx.next = add i32 %idx, 1 %abc = icmp slt i32 %idx, %len - br i1 %abc, label %in.bounds, label %out.of.bounds + br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1 in.bounds: ; preds = %loop %addr = getelementptr i32* %arr, i32 %idx @@ -50,7 +50,7 @@ %idx.i = phi i32 [ 0, %loop ], [ %idx.next.i, %in.bounds.i ] %idx.next.i = add i32 %idx.i, 1 %abc.i = icmp slt i32 %idx.i, %len.i - br i1 %abc.i, label %in.bounds.i, label %out.of.bounds.i + br i1 %abc.i, label %in.bounds.i, label %out.of.bounds.i, !prof !1 in.bounds.i: ; preds = %loop.i %addr.i = getelementptr i32* %arr, i32 %idx.i @@ -96,7 +96,7 @@ %idx.i.i = phi i32 [ 0, %loop.i ], [ %idx.next.i.i, %in.bounds.i.i ] %idx.next.i.i = add i32 %idx.i.i, 1 %abc.i.i = icmp slt i32 %idx.i.i, %len.i.i - br i1 %abc.i.i, label %in.bounds.i.i, label %out.of.bounds.i.i + br i1 %abc.i.i, label %in.bounds.i.i, label %out.of.bounds.i.i, !prof !1 in.bounds.i.i: ; preds = %loop.i.i %addr.i.i = getelementptr i32* %arr, i32 %idx.i.i @@ -140,7 +140,7 @@ %idx.i = phi i32 [ 0, %loop ], [ %idx.next.i, %in.bounds.i ] %idx.next.i = add i32 %idx.i, 1 %abc.i = icmp slt i32 %idx.i, %len.i - br i1 %abc.i, label %in.bounds.i, label %out.of.bounds.i + br i1 %abc.i, label %in.bounds.i, label %out.of.bounds.i, !prof !1 in.bounds.i: ; preds = %loop.i %addr.i = getelementptr i32* %arr, i32 %idx.i @@ -163,7 +163,7 @@ %idx.i3 = phi i32 [ 0, %inner_loop.exit ], [ %idx.next.i4, %in.bounds.i9 ] %idx.next.i4 = add i32 %idx.i3, 1 %abc.i5 = icmp slt i32 %idx.i3, %len.i1 - br i1 %abc.i5, label %in.bounds.i9, label %out.of.bounds.i10 + br i1 %abc.i5, label %in.bounds.i9, label %out.of.bounds.i10, !prof !1 in.bounds.i9: ; preds = %loop.i6 %addr.i7 = getelementptr i32* %arr, i32 %idx.i3 @@ -210,7 +210,7 @@ %idx.i.i = phi i32 [ 0, %loop.i ], [ %idx.next.i.i, %in.bounds.i.i ] %idx.next.i.i = add i32 %idx.i.i, 1 %abc.i.i = icmp slt i32 %idx.i.i, %len.i.i - br i1 %abc.i.i, label %in.bounds.i.i, label %out.of.bounds.i.i + br i1 %abc.i.i, label %in.bounds.i.i, label %out.of.bounds.i.i, !prof !1 in.bounds.i.i: ; preds = %loop.i.i %addr.i.i = getelementptr i32* %arr, i32 %idx.i.i @@ -242,7 +242,7 @@ %idx.i.i7 = phi i32 [ 0, %loop.i6 ], [ %idx.next.i.i8, %in.bounds.i.i13 ] %idx.next.i.i8 = add i32 %idx.i.i7, 1 %abc.i.i9 = icmp slt i32 %idx.i.i7, %len.i.i4 - br i1 %abc.i.i9, label %in.bounds.i.i13, label %out.of.bounds.i.i14 + br i1 %abc.i.i9, label %in.bounds.i.i13, label %out.of.bounds.i.i14, !prof !1 in.bounds.i.i13: ; preds = %loop.i.i10 %addr.i.i11 = getelementptr i32* %arr, i32 %idx.i.i7 @@ -286,7 +286,7 @@ %idx.i = phi i32 [ 0, %loop ], [ %idx.next.i, %in.bounds.i ] %idx.next.i = add i32 %idx.i, 1 %abc.i = icmp slt i32 %idx.i, %len.i - br i1 %abc.i, label %in.bounds.i, label %out.of.bounds.i + br i1 %abc.i, label %in.bounds.i, label %out.of.bounds.i, !prof !1 in.bounds.i: ; preds = %loop.i %addr.i = getelementptr i32* %arr, i32 %idx.i @@ -315,7 +315,7 @@ %idx.i.i = phi i32 [ 0, %loop.i4 ], [ %idx.next.i.i, %in.bounds.i.i ] %idx.next.i.i = add i32 %idx.i.i, 1 %abc.i.i = icmp slt i32 %idx.i.i, %len.i.i - br i1 %abc.i.i, label %in.bounds.i.i, label %out.of.bounds.i.i + br i1 %abc.i.i, label %in.bounds.i.i, label %out.of.bounds.i.i, !prof !1 in.bounds.i.i: ; preds = %loop.i.i %addr.i.i = getelementptr i32* %arr, i32 %idx.i.i @@ -342,3 +342,4 @@ attributes #0 = { alwaysinline } !0 = !{i32 0, i32 2147483647} +!1 = !{!"branch_weights", i32 64, i32 4}