diff --git a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp --- a/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ b/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -1724,9 +1724,10 @@ /// - Max latch taken count of the loop is limited. /// It guarantees that induction variable will not overflow iterating in the /// "main loop". - if (auto BO = dyn_cast(MainLoopStructure.IndVarBase)) + if (isa(MainLoopStructure.IndVarBase)) if (IsSignedPredicate) - BO->setHasNoSignedWrap(true); + cast(MainLoopStructure.IndVarBase) + ->setHasNoSignedWrap(true); /// TODO: support unsigned predicate. /// To add NUW flag we need to prove that both operands of BO are /// non-negative. E.g: diff --git a/llvm/test/Transforms/IRCE/stride_more_than_1.ll b/llvm/test/Transforms/IRCE/stride_more_than_1.ll --- a/llvm/test/Transforms/IRCE/stride_more_than_1.ll +++ b/llvm/test/Transforms/IRCE/stride_more_than_1.ll @@ -61,7 +61,7 @@ ; CHECK-NEXT: [[ADDR_POSTLOOP:%.*]] = getelementptr i32, ptr [[ARR]], i32 [[IDX_POSTLOOP]] ; CHECK-NEXT: store i32 0, ptr [[ADDR_POSTLOOP]], align 4 ; CHECK-NEXT: [[NEXT_POSTLOOP:%.*]] = icmp slt i32 [[IDX_NEXT_POSTLOOP]], 100 -; CHECK-NEXT: br i1 [[NEXT_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP1:![0-9]+]], !irce.loop.clone [[META6:![0-9]+]] +; CHECK-NEXT: br i1 [[NEXT_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP1:![0-9]+]], !irce.loop.clone !6 ; entry: @@ -137,7 +137,7 @@ ; CHECK-NEXT: [[ADDR_POSTLOOP:%.*]] = getelementptr i32, ptr [[ARR]], i32 [[IDX_POSTLOOP]] ; CHECK-NEXT: store i32 0, ptr [[ADDR_POSTLOOP]], align 4 ; CHECK-NEXT: [[NEXT_POSTLOOP:%.*]] = icmp slt i32 [[IDX_NEXT_POSTLOOP]], 2147483640 -; CHECK-NEXT: br i1 [[NEXT_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP7:![0-9]+]], !irce.loop.clone [[META6]] +; CHECK-NEXT: br i1 [[NEXT_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP7:![0-9]+]], !irce.loop.clone !6 ; entry: @@ -214,7 +214,7 @@ ; CHECK-NEXT: [[ADDR_POSTLOOP:%.*]] = getelementptr i32, ptr [[ARR]], i32 [[IDX_POSTLOOP]] ; CHECK-NEXT: store i32 0, ptr [[ADDR_POSTLOOP]], align 4 ; CHECK-NEXT: [[NEXT_POSTLOOP:%.*]] = icmp slt i32 [[IDX_NEXT_POSTLOOP]], 2147483647 -; CHECK-NEXT: br i1 [[NEXT_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP9:![0-9]+]], !irce.loop.clone [[META6]] +; CHECK-NEXT: br i1 [[NEXT_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP9:![0-9]+]], !irce.loop.clone !6 ; entry: @@ -292,7 +292,7 @@ ; CHECK-NEXT: [[ADDR_POSTLOOP:%.*]] = getelementptr i32, ptr [[ARR]], i32 [[IDX_POSTLOOP]] ; CHECK-NEXT: store i32 0, ptr [[ADDR_POSTLOOP]], align 4 ; CHECK-NEXT: [[NEXT_POSTLOOP:%.*]] = icmp slt i32 [[IDX_NEXT_POSTLOOP]], 2147483647 -; CHECK-NEXT: br i1 [[NEXT_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP11:![0-9]+]], !irce.loop.clone [[META6]] +; CHECK-NEXT: br i1 [[NEXT_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP11:![0-9]+]], !irce.loop.clone !6 ; @@ -363,7 +363,7 @@ ; CHECK-NEXT: store i32 0, ptr [[ADDR_PRELOOP]], align 4 ; CHECK-NEXT: [[NEXT_PRELOOP:%.*]] = icmp sgt i32 [[IDX_NEXT_PRELOOP]], -1 ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[IDX_NEXT_PRELOOP]], [[EXIT_PRELOOP_AT]] -; CHECK-NEXT: br i1 [[TMP1]], label [[LOOP_PRELOOP]], label [[PRELOOP_EXIT_SELECTOR:%.*]], !llvm.loop [[LOOP12:![0-9]+]], !irce.loop.clone [[META6]] +; CHECK-NEXT: br i1 [[TMP1]], label [[LOOP_PRELOOP]], label [[PRELOOP_EXIT_SELECTOR:%.*]], !llvm.loop [[LOOP12:![0-9]+]], !irce.loop.clone !6 ; CHECK: preloop.exit.selector: ; CHECK-NEXT: [[IDX_NEXT_PRELOOP_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT_PRELOOP]], [[IN_BOUNDS_PRELOOP]] ] ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[IDX_NEXT_PRELOOP_LCSSA]], -1 @@ -440,7 +440,7 @@ ; CHECK-NEXT: store i32 0, ptr [[ADDR_PRELOOP]], align 4 ; CHECK-NEXT: [[NEXT_PRELOOP:%.*]] = icmp ugt i32 [[IDX_NEXT_PRELOOP]], 6 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[IDX_NEXT_PRELOOP]], [[EXIT_PRELOOP_AT]] -; CHECK-NEXT: br i1 [[TMP1]], label [[LOOP_PRELOOP]], label [[PRELOOP_EXIT_SELECTOR:%.*]], !llvm.loop [[LOOP14:![0-9]+]], !irce.loop.clone [[META6]] +; CHECK-NEXT: br i1 [[TMP1]], label [[LOOP_PRELOOP]], label [[PRELOOP_EXIT_SELECTOR:%.*]], !llvm.loop [[LOOP14:![0-9]+]], !irce.loop.clone !6 ; CHECK: preloop.exit.selector: ; CHECK-NEXT: [[IDX_NEXT_PRELOOP_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT_PRELOOP]], [[IN_BOUNDS_PRELOOP]] ] ; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i32 [[IDX_NEXT_PRELOOP_LCSSA]], 6 @@ -564,7 +564,7 @@ ; CHECK-NEXT: store i32 0, ptr [[ADDR_PRELOOP]], align 4 ; CHECK-NEXT: [[NEXT_PRELOOP:%.*]] = icmp ugt i32 [[IDX_NEXT_PRELOOP]], 6 ; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[IDX_NEXT_PRELOOP]], [[EXIT_PRELOOP_AT]] -; CHECK-NEXT: br i1 [[TMP1]], label [[LOOP_PRELOOP]], label [[PRELOOP_EXIT_SELECTOR:%.*]], !llvm.loop [[LOOP15:![0-9]+]], !irce.loop.clone [[META6]] +; CHECK-NEXT: br i1 [[TMP1]], label [[LOOP_PRELOOP]], label [[PRELOOP_EXIT_SELECTOR:%.*]], !llvm.loop [[LOOP15:![0-9]+]], !irce.loop.clone !6 ; CHECK: preloop.exit.selector: ; CHECK-NEXT: [[IDX_NEXT_PRELOOP_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT_PRELOOP]], [[IN_BOUNDS_PRELOOP]] ] ; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i32 [[IDX_NEXT_PRELOOP_LCSSA]], 6 @@ -673,7 +673,7 @@ ; CHECK-NEXT: store i32 1, ptr [[EL_PTR_POSTLOOP]], align 4 ; CHECK-NEXT: [[IV_NEXT_POSTLOOP]] = add nuw nsw i32 [[IV_POSTLOOP]], 4 ; CHECK-NEXT: [[LOOP_COND_POSTLOOP:%.*]] = icmp slt i32 [[IV_NEXT_POSTLOOP]], [[NUM_ELEMENTS]] -; CHECK-NEXT: br i1 [[LOOP_COND_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP18:![0-9]+]], !irce.loop.clone [[META6]] +; CHECK-NEXT: br i1 [[LOOP_COND_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP18:![0-9]+]], !irce.loop.clone !6 ; entry: %capacity = load i32, ptr %capacity_p, !range !4 @@ -769,7 +769,7 @@ ; CHECK-NEXT: store i32 1, ptr [[EL_PTR_POSTLOOP]], align 4 ; CHECK-NEXT: [[IV_NEXT_POSTLOOP]] = add nuw nsw i32 [[IV_POSTLOOP]], 4 ; CHECK-NEXT: [[LOOP_COND_POSTLOOP:%.*]] = icmp slt i32 [[IV_NEXT_POSTLOOP]], [[NUM_ELEMENTS]] -; CHECK-NEXT: br i1 [[LOOP_COND_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP19:![0-9]+]], !irce.loop.clone [[META6]] +; CHECK-NEXT: br i1 [[LOOP_COND_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP19:![0-9]+]], !irce.loop.clone !6 ; entry: %capacity = load i32, ptr %capacity_p, !range !4 @@ -866,7 +866,7 @@ ; CHECK-NEXT: store i32 1, ptr [[EL_PTR_POSTLOOP]], align 4 ; CHECK-NEXT: [[IV_NEXT_POSTLOOP]] = add nuw nsw i32 [[IV_POSTLOOP]], 4 ; CHECK-NEXT: [[LOOP_COND_POSTLOOP:%.*]] = icmp slt i32 [[IV_NEXT_POSTLOOP]], [[NUM_ELEMENTS]] -; CHECK-NEXT: br i1 [[LOOP_COND_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP20:![0-9]+]], !irce.loop.clone [[META6]] +; CHECK-NEXT: br i1 [[LOOP_COND_POSTLOOP]], label [[LOOP_POSTLOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP20:![0-9]+]], !irce.loop.clone !6 ; entry: %capacity = load i32, ptr %capacity_p, !range !4 @@ -894,6 +894,93 @@ ret i32 -1 } +; Indvar base is non-overflowing binary 'or': +; check that IRCE isn't trying to add NSW flag on it. +define i32 @binop_or_is_iv_base(ptr %p, i32 %end) { +; CHECK-LABEL: define i32 @binop_or_is_iv_base +; CHECK-SAME: (ptr [[P:%.*]], i32 [[END:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[N:%.*]] = load atomic i32, ptr [[P]] unordered, align 8, !range [[RNG8]] +; CHECK-NEXT: [[CMP0:%.*]] = icmp sgt i32 [[END]], 7 +; CHECK-NEXT: br i1 [[CMP0]], label [[LOOP_HEADER_PREHEADER:%.*]], label [[COMMON_RET:%.*]] +; CHECK: loop.header.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[N]], 7 +; CHECK-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[END]], i32 [[TMP0]]) +; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = call i32 @llvm.smax.i32(i32 [[SMIN]], i32 7) +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 7, [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP1]], label [[LOOP_HEADER_PREHEADER1:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] +; CHECK: loop.header.preheader1: +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_ADD:%.*]], [[GUARDED:%.*]] ], [ 0, [[LOOP_HEADER_PREHEADER1]] ] +; CHECK-NEXT: [[CHECK:%.*]] = icmp ult i32 [[IV]], [[N]] +; CHECK-NEXT: br i1 true, label [[GUARDED]], label [[DEOPT_LOOPEXIT2:%.*]] +; CHECK: guarded: +; CHECK-NEXT: [[IV_ADD]] = add i32 [[IV]], 8 +; CHECK-NEXT: [[IV_OR:%.*]] = or i32 [[IV_ADD]], 7 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV_OR]], [[END]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[IV_OR]], [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT: br i1 [[TMP2]], label [[LOOP_HEADER]], label [[MAIN_EXIT_SELECTOR:%.*]] +; CHECK: main.exit.selector: +; CHECK-NEXT: [[IV_ADD_LCSSA:%.*]] = phi i32 [ [[IV_ADD]], [[GUARDED]] ] +; CHECK-NEXT: [[IV_OR_LCSSA:%.*]] = phi i32 [ [[IV_OR]], [[GUARDED]] ] +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[IV_OR_LCSSA]], [[END]] +; CHECK-NEXT: br i1 [[TMP3]], label [[MAIN_PSEUDO_EXIT]], label [[COMMON_RET_LOOPEXIT:%.*]] +; CHECK: main.pseudo.exit: +; CHECK-NEXT: [[IV_COPY:%.*]] = phi i32 [ 0, [[LOOP_HEADER_PREHEADER]] ], [ [[IV_ADD_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] +; CHECK-NEXT: [[INDVAR_END:%.*]] = phi i32 [ 7, [[LOOP_HEADER_PREHEADER]] ], [ [[IV_OR_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] +; CHECK-NEXT: br label [[POSTLOOP:%.*]] +; CHECK: deopt.loopexit: +; CHECK-NEXT: br label [[DEOPT:%.*]] +; CHECK: deopt.loopexit2: +; CHECK-NEXT: br label [[DEOPT]] +; CHECK: deopt: +; CHECK-NEXT: [[RV:%.*]] = call i32 (...) @llvm.experimental.deoptimize.i32(i32 13) [ "deopt"() ] +; CHECK-NEXT: ret i32 [[RV]] +; CHECK: common.ret.loopexit.loopexit: +; CHECK-NEXT: br label [[COMMON_RET_LOOPEXIT]] +; CHECK: common.ret.loopexit: +; CHECK-NEXT: br label [[COMMON_RET]] +; CHECK: common.ret: +; CHECK-NEXT: ret i32 [[END]] +; CHECK: postloop: +; CHECK-NEXT: br label [[LOOP_HEADER_POSTLOOP:%.*]] +; CHECK: loop.header.postloop: +; CHECK-NEXT: [[IV_POSTLOOP:%.*]] = phi i32 [ [[IV_ADD_POSTLOOP:%.*]], [[GUARDED_POSTLOOP:%.*]] ], [ [[IV_COPY]], [[POSTLOOP]] ] +; CHECK-NEXT: [[CHECK_POSTLOOP:%.*]] = icmp ult i32 [[IV_POSTLOOP]], [[N]] +; CHECK-NEXT: br i1 [[CHECK_POSTLOOP]], label [[GUARDED_POSTLOOP]], label [[DEOPT_LOOPEXIT:%.*]] +; CHECK: guarded.postloop: +; CHECK-NEXT: [[IV_ADD_POSTLOOP]] = add i32 [[IV_POSTLOOP]], 8 +; CHECK-NEXT: [[IV_OR_POSTLOOP:%.*]] = or i32 [[IV_ADD_POSTLOOP]], 7 +; CHECK-NEXT: [[CMP_POSTLOOP:%.*]] = icmp slt i32 [[IV_OR_POSTLOOP]], [[END]] +; CHECK-NEXT: br i1 [[CMP_POSTLOOP]], label [[LOOP_HEADER_POSTLOOP]], label [[COMMON_RET_LOOPEXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP21:![0-9]+]], !irce.loop.clone !6 +; +entry: + %n = load atomic i32, ptr %p unordered, align 8, !range !1 + %cmp0 = icmp sgt i32 %end, 7 + br i1 %cmp0, label %loop.header, label %common.ret + +loop.header: + %iv = phi i32 [ %iv.add, %guarded ], [ 0, %entry ] + %check = icmp ult i32 %iv, %n + br i1 %check, label %guarded, label %deopt + +guarded: + %iv.add = add i32 %iv, 8 + %iv.or = or i32 %iv.add, 7 + %cmp = icmp slt i32 %iv.or, %end + br i1 %cmp, label %loop.header, label %common.ret + +deopt: + %rv = call i32 (...) @llvm.experimental.deoptimize.i32(i32 13) [ "deopt"() ] + ret i32 %rv + +common.ret: + ret i32 %end +} + +declare i32 @llvm.experimental.deoptimize.i32(...) + !0 = !{i32 0, i32 50} !1 = !{i32 0, i32 2147483640} !2 = !{i32 0, i32 2147483641}