diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IntrinsicInst.h" @@ -1833,22 +1834,6 @@ return V; } -/// Check whether value has nuw/nsw/exact set but SCEV does not. -/// TODO: In reality it is better to check the poison recursively -/// but this is better than nothing. -static bool SCEVLostPoisonFlags(const SCEV *S, const Instruction *I) { - if (isa(I)) { - if (auto *NS = dyn_cast(S)) { - if (I->hasNoSignedWrap() && !NS->hasNoSignedWrap()) - return true; - if (I->hasNoUnsignedWrap() && !NS->hasNoUnsignedWrap()) - return true; - } - } else if (isa(I) && I->isExact()) - return true; - return false; -} - ScalarEvolution::ValueOffsetPair SCEVExpander::FindValueInExprValueMap(const SCEV *S, const Instruction *InsertPt) { @@ -1872,8 +1857,7 @@ if (S->getType() == V->getType() && SE.DT.dominates(EntInst, InsertPt) && (SE.LI.getLoopFor(EntInst->getParent()) == nullptr || - SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt)) && - !SCEVLostPoisonFlags(S, EntInst)) + SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt))) return {V, Offset}; } } @@ -1952,26 +1936,36 @@ if (!V) V = visit(S); - else if (VO.second) { - if (PointerType *Vty = dyn_cast(V->getType())) { - Type *Ety = Vty->getPointerElementType(); - int64_t Offset = VO.second->getSExtValue(); - int64_t ESize = SE.getTypeSizeInBits(Ety); - if ((Offset * 8) % ESize == 0) { - ConstantInt *Idx = + else { + // If we're reusing an existing instruction, we are effectively CSEing two + // copies of the instruction (with potentially different flags). As such, + // we need to drop any poison generating flags unless we can prove that + // said flags must be valid for all new users. + if (auto *I = dyn_cast(V)) + if (I->hasPoisonGeneratingFlags() && !programUndefinedIfPoison(I)) + I->dropPoisonGeneratingFlags(); + + if (VO.second) { + if (PointerType *Vty = dyn_cast(V->getType())) { + Type *Ety = Vty->getPointerElementType(); + int64_t Offset = VO.second->getSExtValue(); + int64_t ESize = SE.getTypeSizeInBits(Ety); + if ((Offset * 8) % ESize == 0) { + ConstantInt *Idx = ConstantInt::getSigned(VO.second->getType(), -(Offset * 8) / ESize); - V = Builder.CreateGEP(Ety, V, Idx, "scevgep"); - } else { - ConstantInt *Idx = + V = Builder.CreateGEP(Ety, V, Idx, "scevgep"); + } else { + ConstantInt *Idx = ConstantInt::getSigned(VO.second->getType(), -Offset); - unsigned AS = Vty->getAddressSpace(); - V = Builder.CreateBitCast(V, Type::getInt8PtrTy(SE.getContext(), AS)); - V = Builder.CreateGEP(Type::getInt8Ty(SE.getContext()), V, Idx, - "uglygep"); - V = Builder.CreateBitCast(V, Vty); + unsigned AS = Vty->getAddressSpace(); + V = Builder.CreateBitCast(V, Type::getInt8PtrTy(SE.getContext(), AS)); + V = Builder.CreateGEP(Type::getInt8Ty(SE.getContext()), V, Idx, + "uglygep"); + V = Builder.CreateBitCast(V, Vty); + } + } else { + V = Builder.CreateSub(V, VO.second); } - } else { - V = Builder.CreateSub(V, VO.second); } } // Remember the expanded value for this SCEV at this location. @@ -2180,7 +2174,9 @@ } // Use expand's logic which is used for reusing a previous Value in - // ExprValueMap. + // ExprValueMap. Note that we don't currently model the cost of + // needing to drop poison generating flags on the instruction if we + // want to reuse it. We effectively assume that has zero cost. ScalarEvolution::ValueOffsetPair VO = FindValueInExprValueMap(S, At); if (VO.first) return VO; diff --git a/llvm/test/CodeGen/PowerPC/common-chain.ll b/llvm/test/CodeGen/PowerPC/common-chain.ll --- a/llvm/test/CodeGen/PowerPC/common-chain.ll +++ b/llvm/test/CodeGen/PowerPC/common-chain.ll @@ -144,8 +144,8 @@ ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: blt cr0, .LBB1_4 ; CHECK-NEXT: # %bb.1: # %for.body.preheader -; CHECK-NEXT: sldi r7, r4, 2 -; CHECK-NEXT: sldi r9, r4, 1 +; CHECK-NEXT: sldi r7, r4, 1 +; CHECK-NEXT: sldi r9, r4, 2 ; CHECK-NEXT: add r5, r3, r5 ; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: add r8, r4, r7 @@ -155,11 +155,11 @@ ; CHECK-NEXT: .LBB1_2: # %for.body ; CHECK-NEXT: # ; CHECK-NEXT: ldx r6, r5, r4 -; CHECK-NEXT: ldx r11, r5, r9 -; CHECK-NEXT: ldx r12, r5, r10 -; CHECK-NEXT: ldx r0, r5, r7 +; CHECK-NEXT: ldx r11, r5, r7 +; CHECK-NEXT: ldx r12, r5, r8 +; CHECK-NEXT: ldx r0, r5, r9 ; CHECK-NEXT: mulld r6, r11, r6 -; CHECK-NEXT: ldx r30, r5, r8 +; CHECK-NEXT: ldx r30, r5, r10 ; CHECK-NEXT: addi r5, r5, 1 ; CHECK-NEXT: mulld r6, r6, r12 ; CHECK-NEXT: mulld r6, r6, r0 @@ -336,20 +336,20 @@ ; CHECK-NEXT: cmpdi r6, 1 ; CHECK-NEXT: blt cr0, .LBB3_4 ; CHECK-NEXT: # %bb.1: # %for.body.preheader -; CHECK-NEXT: sldi r7, r4, 3 +; CHECK-NEXT: sldi r9, r4, 3 ; CHECK-NEXT: mtctr r6 ; CHECK-NEXT: add r5, r3, r5 ; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: sldi r7, r4, 1 ; CHECK-NEXT: sldi r8, r4, 2 -; CHECK-NEXT: sub r7, r7, r4 -; CHECK-NEXT: sldi r4, r4, 1 +; CHECK-NEXT: sub r4, r9, r4 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB3_2: # %for.body ; CHECK-NEXT: # ; CHECK-NEXT: ld r6, 0(r5) -; CHECK-NEXT: ldx r9, r5, r4 +; CHECK-NEXT: ldx r9, r5, r7 ; CHECK-NEXT: ldx r10, r5, r8 -; CHECK-NEXT: ldx r11, r5, r7 +; CHECK-NEXT: ldx r11, r5, r4 ; CHECK-NEXT: addi r5, r5, 1 ; CHECK-NEXT: mulld r6, r9, r6 ; CHECK-NEXT: mulld r6, r6, r10 @@ -446,25 +446,25 @@ ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: blt cr0, .LBB4_3 ; CHECK-NEXT: # %bb.1: # %for.body.preheader -; CHECK-NEXT: mulli r7, r4, 10 +; CHECK-NEXT: mulli r11, r4, 10 +; CHECK-NEXT: sldi r8, r4, 2 ; CHECK-NEXT: add r5, r3, r5 -; CHECK-NEXT: sldi r8, r4, 3 -; CHECK-NEXT: sub r9, r8, r4 -; CHECK-NEXT: sldi r3, r4, 2 -; CHECK-NEXT: mtctr r6 -; CHECK-NEXT: add r10, r4, r3 -; CHECK-NEXT: sldi r11, r4, 1 ; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: add r8, r4, r8 +; CHECK-NEXT: sldi r9, r4, 3 +; CHECK-NEXT: mtctr r6 +; CHECK-NEXT: sldi r7, r4, 1 +; CHECK-NEXT: sub r10, r9, r4 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB4_2: # %for.body ; CHECK-NEXT: # ; CHECK-NEXT: ldx r6, r5, r4 -; CHECK-NEXT: ldx r12, r5, r11 -; CHECK-NEXT: ldx r0, r5, r10 -; CHECK-NEXT: ldx r30, r5, r9 +; CHECK-NEXT: ldx r12, r5, r7 +; CHECK-NEXT: ldx r0, r5, r8 +; CHECK-NEXT: ldx r30, r5, r10 ; CHECK-NEXT: mulld r6, r12, r6 -; CHECK-NEXT: ldx r29, r5, r8 -; CHECK-NEXT: ldx r28, r5, r7 +; CHECK-NEXT: ldx r29, r5, r9 +; CHECK-NEXT: ldx r28, r5, r11 ; CHECK-NEXT: addi r5, r5, 1 ; CHECK-NEXT: mulld r6, r6, r0 ; CHECK-NEXT: mulld r6, r6, r30 diff --git a/llvm/test/Transforms/IRCE/non-loop-invariant-rhs-instr.ll b/llvm/test/Transforms/IRCE/non-loop-invariant-rhs-instr.ll --- a/llvm/test/Transforms/IRCE/non-loop-invariant-rhs-instr.ll +++ b/llvm/test/Transforms/IRCE/non-loop-invariant-rhs-instr.ll @@ -9,7 +9,7 @@ ; CHECK-NEXT: br i1 [[TRIPCHECK]], label [[LOOP_PREHEADER:%.*]], label [[ZERO:%.*]] ; CHECK: loop.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[A:%.*]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], 1 ; CHECK-NEXT: [[SMIN:%.*]] = call i64 @llvm.smin.i64(i64 [[LEN]], i64 0) ; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[LEN]], [[SMIN]] ; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP2]], i64 [[TMP1]]) diff --git a/llvm/test/Transforms/IndVarSimplify/ashr-expansion.ll b/llvm/test/Transforms/IndVarSimplify/ashr-expansion.ll --- a/llvm/test/Transforms/IndVarSimplify/ashr-expansion.ll +++ b/llvm/test/Transforms/IndVarSimplify/ashr-expansion.ll @@ -6,7 +6,7 @@ define float @ashr_expansion_valid(i64 %x, float* %ptr) { ; CHECK-LABEL: @ashr_expansion_valid( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[BOUND:%.*]] = ashr exact i64 [[X:%.*]], 4 +; CHECK-NEXT: [[BOUND:%.*]] = ashr i64 [[X:%.*]], 4 ; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[BOUND]], i64 1) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: @@ -46,11 +46,11 @@ ; CHECK-LABEL: @ashr_equivalent_expansion( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ABS_X:%.*]] = call i64 @llvm.abs.i64(i64 [[X:%.*]], i1 false) +; CHECK-NEXT: [[DIV:%.*]] = udiv exact i64 [[ABS_X]], 16 ; CHECK-NEXT: [[T0:%.*]] = call i64 @llvm.smax.i64(i64 [[X]], i64 -1) ; CHECK-NEXT: [[T1:%.*]] = call i64 @llvm.smin.i64(i64 [[T0]], i64 1) -; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 [[ABS_X]], 4 -; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[T1]], [[TMP0]] -; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 1) +; CHECK-NEXT: [[BOUND:%.*]] = mul i64 [[DIV]], [[T1]] +; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[BOUND]], i64 1) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -97,8 +97,8 @@ ; CHECK-NEXT: [[DIV:%.*]] = udiv i64 [[ABS_X]], 16 ; CHECK-NEXT: [[T0:%.*]] = call i64 @llvm.smax.i64(i64 [[X]], i64 -1) ; CHECK-NEXT: [[T1:%.*]] = call i64 @llvm.smin.i64(i64 [[T0]], i64 1) -; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[T1]], [[DIV]] -; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP0]], i64 1) +; CHECK-NEXT: [[BOUND:%.*]] = mul i64 [[DIV]], [[T1]] +; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[BOUND]], i64 1) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] @@ -145,8 +145,8 @@ ; CHECK-NEXT: [[DIV:%.*]] = udiv i64 [[ABS_X]], 16 ; CHECK-NEXT: [[T0:%.*]] = call i64 @llvm.smax.i64(i64 [[Y:%.*]], i64 -1) ; CHECK-NEXT: [[T1:%.*]] = call i64 @llvm.smin.i64(i64 [[T0]], i64 1) -; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[T1]], [[DIV]] -; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP0]], i64 1) +; CHECK-NEXT: [[BOUND:%.*]] = mul i64 [[DIV]], [[T1]] +; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[BOUND]], i64 1) ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] diff --git a/llvm/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll b/llvm/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll --- a/llvm/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll +++ b/llvm/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll @@ -7,7 +7,7 @@ ; CHECK-LABEL: @ptriv_as2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IDX_TRUNC:%.*]] = trunc i32 [[N:%.*]] to i8 -; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8 addrspace(2)* [[BASE:%.*]], i8 [[IDX_TRUNC]] +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr i8, i8 addrspace(2)* [[BASE:%.*]], i8 [[IDX_TRUNC]] ; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i8 addrspace(2)* [[BASE]], [[ADD_PTR]] ; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] ; CHECK: for.body.preheader: @@ -53,7 +53,7 @@ ; CHECK-LABEL: @ptriv_as3( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IDX_TRUNC:%.*]] = trunc i32 [[N:%.*]] to i16 -; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8 addrspace(3)* [[BASE:%.*]], i16 [[IDX_TRUNC]] +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr i8, i8 addrspace(3)* [[BASE:%.*]], i16 [[IDX_TRUNC]] ; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i8 addrspace(3)* [[BASE]], [[ADD_PTR]] ; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] ; CHECK: for.body.preheader: diff --git a/llvm/test/Transforms/IndVarSimplify/lftr-reuse.ll b/llvm/test/Transforms/IndVarSimplify/lftr-reuse.ll --- a/llvm/test/Transforms/IndVarSimplify/lftr-reuse.ll +++ b/llvm/test/Transforms/IndVarSimplify/lftr-reuse.ll @@ -16,7 +16,7 @@ ; CHECK-LABEL: @ptriv( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[IDX_EXT:%.*]] = sext i32 [[N:%.*]] to i64 -; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, i8* [[BASE:%.*]], i64 [[IDX_EXT]] +; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr i8, i8* [[BASE:%.*]], i64 [[IDX_EXT]] ; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i8* [[BASE]], [[ADD_PTR]] ; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] ; CHECK: for.body.preheader: @@ -63,14 +63,13 @@ define void @expandOuterRecurrence(i32 %arg) nounwind { ; CHECK-LABEL: @expandOuterRecurrence( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[SUB1:%.*]] = sub nsw i32 [[ARG:%.*]], 1 +; CHECK-NEXT: [[SUB1:%.*]] = sub i32 [[ARG:%.*]], 1 ; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i32 0, [[SUB1]] ; CHECK-NEXT: br i1 [[CMP1]], label [[OUTER_PREHEADER:%.*]], label [[EXIT:%.*]] ; CHECK: outer.preheader: -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[ARG]], -1 ; CHECK-NEXT: br label [[OUTER:%.*]] ; CHECK: outer: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[TMP0]], [[OUTER_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[OUTER_INC:%.*]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i32 [ [[SUB1]], [[OUTER_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[OUTER_INC:%.*]] ] ; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_INC:%.*]], [[OUTER_INC]] ], [ 0, [[OUTER_PREHEADER]] ] ; CHECK-NEXT: [[SUB2:%.*]] = sub nsw i32 [[ARG]], [[I]] ; CHECK-NEXT: [[SUB3:%.*]] = sub nsw i32 [[SUB2]], 1 @@ -88,7 +87,7 @@ ; CHECK: outer.inc: ; CHECK-NEXT: [[I_INC]] = add nuw nsw i32 [[I]], 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i32 [[INDVARS_IV]], -1 -; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[I_INC]], [[TMP0]] +; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp ne i32 [[I_INC]], [[SUB1]] ; CHECK-NEXT: br i1 [[EXITCOND1]], label [[OUTER]], label [[EXIT_LOOPEXIT:%.*]] ; CHECK: exit.loopexit: ; CHECK-NEXT: br label [[EXIT]] diff --git a/llvm/test/Transforms/IndVarSimplify/pr24783.ll b/llvm/test/Transforms/IndVarSimplify/pr24783.ll --- a/llvm/test/Transforms/IndVarSimplify/pr24783.ll +++ b/llvm/test/Transforms/IndVarSimplify/pr24783.ll @@ -7,7 +7,7 @@ define void @f(i32* %end.s, i8** %loc, i32 %p) { ; CHECK-LABEL: @f( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[END:%.*]] = getelementptr inbounds i32, i32* [[END_S:%.*]], i32 [[P:%.*]] +; CHECK-NEXT: [[END:%.*]] = getelementptr i32, i32* [[END_S:%.*]], i32 [[P:%.*]] ; CHECK-NEXT: br label [[WHILE_BODY_I:%.*]] ; CHECK: while.body.i: ; CHECK-NEXT: br i1 true, label [[LOOP_EXIT:%.*]], label [[WHILE_BODY_I]] diff --git a/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll b/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll --- a/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll +++ b/llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll @@ -182,7 +182,7 @@ ; CHECK-LABEL: @promote_latch_condition_decrementing_loop_01( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LEN:%.*]] = load i32, i32* [[P:%.*]], align 4, !range [[RNG0:![0-9]+]] -; CHECK-NEXT: [[LEN_MINUS_1:%.*]] = add nsw i32 [[LEN]], -1 +; CHECK-NEXT: [[LEN_MINUS_1:%.*]] = add i32 [[LEN]], -1 ; CHECK-NEXT: [[ZERO_CHECK:%.*]] = icmp eq i32 [[LEN]], 0 ; CHECK-NEXT: br i1 [[ZERO_CHECK]], label [[LOOPEXIT:%.*]], label [[PREHEADER:%.*]] ; CHECK: preheader: diff --git a/llvm/test/Transforms/LoopPredication/basic.ll b/llvm/test/Transforms/LoopPredication/basic.ll --- a/llvm/test/Transforms/LoopPredication/basic.ll +++ b/llvm/test/Transforms/LoopPredication/basic.ll @@ -1602,7 +1602,7 @@ ; CHECK-LABEL: @ne_latch_zext( ; CHECK-NEXT: loop.preheader: ; CHECK-NEXT: [[N:%.*]] = zext i16 [[N16:%.*]] to i32 -; CHECK-NEXT: [[NPLUS1:%.*]] = add nuw nsw i32 [[N]], 1 +; CHECK-NEXT: [[NPLUS1:%.*]] = add i32 [[N]], 1 ; CHECK-NEXT: [[TMP0:%.*]] = icmp ule i32 [[NPLUS1]], [[LENGTH:%.*]] ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 0, [[LENGTH]] ; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], [[TMP0]] diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll --- a/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll @@ -3067,13 +3067,12 @@ define void @unique_exit(i32 %N, i32 %M) { ; EPILOG-LABEL: @unique_exit( ; EPILOG-NEXT: preheader: -; EPILOG-NEXT: %M.shifted = shl nuw i32 %M, 3 -; EPILOG-NEXT: %0 = shl i32 %M, 3 -; EPILOG-NEXT: %umax = call i32 @llvm.umax.i32(i32 %0, i32 1) -; EPILOG-NEXT: %1 = add i32 %umax, -1 +; EPILOG-NEXT: %M.shifted = shl i32 %M, 3 +; EPILOG-NEXT: %umax = call i32 @llvm.umax.i32(i32 %M.shifted, i32 1) +; EPILOG-NEXT: %0 = add i32 %umax, -1 ; EPILOG-NEXT: %xtraiter = and i32 %umax, 7 -; EPILOG-NEXT: %2 = icmp ult i32 %1, 7 -; EPILOG-NEXT: br i1 %2, label %latchExit.unr-lcssa, label %preheader.new +; EPILOG-NEXT: %1 = icmp ult i32 %0, 7 +; EPILOG-NEXT: br i1 %1, label %latchExit.unr-lcssa, label %preheader.new ; EPILOG: preheader.new: ; EPILOG-NEXT: %unroll_iter = sub i32 %umax, %xtraiter ; EPILOG-NEXT: br label %header @@ -3159,12 +3158,12 @@ ; ; EPILOG-BLOCK-LABEL: @unique_exit( ; EPILOG-BLOCK-NEXT: preheader: -; EPILOG-BLOCK-NEXT: %0 = shl i32 %M, 3 -; EPILOG-BLOCK-NEXT: %umax = call i32 @llvm.umax.i32(i32 %0, i32 1) -; EPILOG-BLOCK-NEXT: %1 = add i32 %umax, -1 +; EPILOG-BLOCK-NEXT: %M.shifted = shl i32 %M, 3 +; EPILOG-BLOCK-NEXT: %umax = call i32 @llvm.umax.i32(i32 %M.shifted, i32 1) +; EPILOG-BLOCK-NEXT: %0 = add i32 %umax, -1 ; EPILOG-BLOCK-NEXT: %xtraiter = and i32 %umax, 1 -; EPILOG-BLOCK-NEXT: %2 = icmp ult i32 %1, 1 -; EPILOG-BLOCK-NEXT: br i1 %2, label %latchExit.unr-lcssa, label %preheader.new +; EPILOG-BLOCK-NEXT: %1 = icmp ult i32 %0, 1 +; EPILOG-BLOCK-NEXT: br i1 %1, label %latchExit.unr-lcssa, label %preheader.new ; EPILOG-BLOCK: preheader.new: ; EPILOG-BLOCK-NEXT: %unroll_iter = sub i32 %umax, %xtraiter ; EPILOG-BLOCK-NEXT: br label %header @@ -3212,10 +3211,9 @@ ; ; PROLOG-LABEL: @unique_exit( ; PROLOG-NEXT: preheader: -; PROLOG-NEXT: %M.shifted = shl nuw i32 %M, 3 -; PROLOG-NEXT: %0 = shl i32 %M, 3 -; PROLOG-NEXT: %umax = call i32 @llvm.umax.i32(i32 %0, i32 1) -; PROLOG-NEXT: %1 = add i32 %umax, -1 +; PROLOG-NEXT: %M.shifted = shl i32 %M, 3 +; PROLOG-NEXT: %umax = call i32 @llvm.umax.i32(i32 %M.shifted, i32 1) +; PROLOG-NEXT: %0 = add i32 %umax, -1 ; PROLOG-NEXT: %xtraiter = and i32 %umax, 7 ; PROLOG-NEXT: %lcmp.mod = icmp ne i32 %xtraiter, 0 ; PROLOG-NEXT: br i1 %lcmp.mod, label %header.prol.preheader, label %header.prol.loopexit @@ -3239,8 +3237,8 @@ ; PROLOG: header.prol.loopexit: ; PROLOG-NEXT: %i4.unr = phi i32 [ 0, %preheader ], [ %i4.unr.ph, %header.prol.loopexit.unr-lcssa ] ; PROLOG-NEXT: %i2.ph.unr = phi i32 [ undef, %preheader ], [ %i2.ph.unr.ph, %header.prol.loopexit.unr-lcssa ] -; PROLOG-NEXT: %2 = icmp ult i32 %1, 7 -; PROLOG-NEXT: br i1 %2, label %latchExit, label %preheader.new +; PROLOG-NEXT: %1 = icmp ult i32 %0, 7 +; PROLOG-NEXT: br i1 %1, label %latchExit, label %preheader.new ; PROLOG: preheader.new: ; PROLOG-NEXT: br label %header ; PROLOG: header: @@ -3294,10 +3292,9 @@ ; ; PROLOG-BLOCK-LABEL: @unique_exit( ; PROLOG-BLOCK-NEXT: preheader: -; PROLOG-BLOCK-NEXT: %M.shifted = shl nuw i32 %M, 3 -; PROLOG-BLOCK-NEXT: %0 = shl i32 %M, 3 -; PROLOG-BLOCK-NEXT: %umax = call i32 @llvm.umax.i32(i32 %0, i32 1) -; PROLOG-BLOCK-NEXT: %1 = add i32 %umax, -1 +; PROLOG-BLOCK-NEXT: %M.shifted = shl i32 %M, 3 +; PROLOG-BLOCK-NEXT: %umax = call i32 @llvm.umax.i32(i32 %M.shifted, i32 1) +; PROLOG-BLOCK-NEXT: %0 = add i32 %umax, -1 ; PROLOG-BLOCK-NEXT: %xtraiter = and i32 %umax, 1 ; PROLOG-BLOCK-NEXT: %lcmp.mod = icmp ne i32 %xtraiter, 0 ; PROLOG-BLOCK-NEXT: br i1 %lcmp.mod, label %header.prol.preheader, label %header.prol.loopexit @@ -3311,8 +3308,8 @@ ; PROLOG-BLOCK: header.prol.loopexit: ; PROLOG-BLOCK-NEXT: %i4.unr = phi i32 [ 0, %preheader ], [ 1, %latch.prol ] ; PROLOG-BLOCK-NEXT: %i2.ph.unr = phi i32 [ undef, %preheader ], [ -1, %latch.prol ] -; PROLOG-BLOCK-NEXT: %2 = icmp ult i32 %1, 1 -; PROLOG-BLOCK-NEXT: br i1 %2, label %latchExit, label %preheader.new +; PROLOG-BLOCK-NEXT: %1 = icmp ult i32 %0, 1 +; PROLOG-BLOCK-NEXT: br i1 %1, label %latchExit, label %preheader.new ; PROLOG-BLOCK: preheader.new: ; PROLOG-BLOCK-NEXT: br label %header ; PROLOG-BLOCK: header: diff --git a/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll b/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll --- a/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll +++ b/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll @@ -54,11 +54,11 @@ ; HOIST-NEXT: [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1 ; HOIST-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] ; HOIST: for.cond.preheader: -; HOIST-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 +; HOIST-NEXT: [[SUB:%.*]] = add nsw i32 [[WIDTH]], -1 ; HOIST-NEXT: br label [[FOR_COND:%.*]] ; HOIST: for.cond: ; HOIST-NEXT: [[I_0:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[FOR_COND_PREHEADER]] ] -; HOIST-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[I_0]], [[TMP0]] +; HOIST-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[I_0]], [[SUB]] ; HOIST-NEXT: tail call void @f0() ; HOIST-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] ; HOIST: for.cond.cleanup: @@ -76,21 +76,19 @@ ; ROTATED_LATER_OLDPM-NEXT: [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1 ; ROTATED_LATER_OLDPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] ; ROTATED_LATER_OLDPM: for.cond.preheader: +; ROTATED_LATER_OLDPM-NEXT: [[SUB:%.*]] = add nsw i32 [[WIDTH]], -1 ; ROTATED_LATER_OLDPM-NEXT: [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1 -; ROTATED_LATER_OLDPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] -; ROTATED_LATER_OLDPM: for.body.preheader: -; ROTATED_LATER_OLDPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 -; ROTATED_LATER_OLDPM-NEXT: br label [[FOR_BODY:%.*]] +; ROTATED_LATER_OLDPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]] ; ROTATED_LATER_OLDPM: for.cond.cleanup: ; ROTATED_LATER_OLDPM-NEXT: tail call void @f0() ; ROTATED_LATER_OLDPM-NEXT: tail call void @f2() ; ROTATED_LATER_OLDPM-NEXT: br label [[RETURN]] ; ROTATED_LATER_OLDPM: for.body: -; ROTATED_LATER_OLDPM-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; ROTATED_LATER_OLDPM-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_COND_PREHEADER]] ] ; ROTATED_LATER_OLDPM-NEXT: tail call void @f0() ; ROTATED_LATER_OLDPM-NEXT: tail call void @f1() ; ROTATED_LATER_OLDPM-NEXT: [[INC]] = add nuw nsw i32 [[I_04]], 1 -; ROTATED_LATER_OLDPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[TMP0]] +; ROTATED_LATER_OLDPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[SUB]] ; ROTATED_LATER_OLDPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; ROTATED_LATER_OLDPM: return: ; ROTATED_LATER_OLDPM-NEXT: ret void @@ -100,21 +98,19 @@ ; ROTATED_LATER_NEWPM-NEXT: [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1 ; ROTATED_LATER_NEWPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] ; ROTATED_LATER_NEWPM: for.cond.preheader: +; ROTATED_LATER_NEWPM-NEXT: [[SUB:%.*]] = add nsw i32 [[WIDTH]], -1 ; ROTATED_LATER_NEWPM-NEXT: [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1 -; ROTATED_LATER_NEWPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] -; ROTATED_LATER_NEWPM: for.body.preheader: -; ROTATED_LATER_NEWPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 -; ROTATED_LATER_NEWPM-NEXT: br label [[FOR_BODY:%.*]] +; ROTATED_LATER_NEWPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]] ; ROTATED_LATER_NEWPM: for.cond.cleanup: ; ROTATED_LATER_NEWPM-NEXT: tail call void @f0() ; ROTATED_LATER_NEWPM-NEXT: tail call void @f2() ; ROTATED_LATER_NEWPM-NEXT: br label [[RETURN]] ; ROTATED_LATER_NEWPM: for.body: -; ROTATED_LATER_NEWPM-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; ROTATED_LATER_NEWPM-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_COND_PREHEADER]] ] ; ROTATED_LATER_NEWPM-NEXT: tail call void @f0() ; ROTATED_LATER_NEWPM-NEXT: tail call void @f1() ; ROTATED_LATER_NEWPM-NEXT: [[INC]] = add nuw nsw i32 [[I_04]], 1 -; ROTATED_LATER_NEWPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[TMP0]] +; ROTATED_LATER_NEWPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[SUB]] ; ROTATED_LATER_NEWPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; ROTATED_LATER_NEWPM: return: ; ROTATED_LATER_NEWPM-NEXT: ret void @@ -124,21 +120,19 @@ ; ROTATE_OLDPM-NEXT: [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1 ; ROTATE_OLDPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] ; ROTATE_OLDPM: for.cond.preheader: +; ROTATE_OLDPM-NEXT: [[SUB:%.*]] = add nsw i32 [[WIDTH]], -1 ; ROTATE_OLDPM-NEXT: [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1 -; ROTATE_OLDPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] -; ROTATE_OLDPM: for.body.preheader: -; ROTATE_OLDPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 -; ROTATE_OLDPM-NEXT: br label [[FOR_BODY:%.*]] +; ROTATE_OLDPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]] ; ROTATE_OLDPM: for.cond.cleanup: ; ROTATE_OLDPM-NEXT: tail call void @f0() ; ROTATE_OLDPM-NEXT: tail call void @f2() ; ROTATE_OLDPM-NEXT: br label [[RETURN]] ; ROTATE_OLDPM: for.body: -; ROTATE_OLDPM-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; ROTATE_OLDPM-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_COND_PREHEADER]] ] ; ROTATE_OLDPM-NEXT: tail call void @f0() ; ROTATE_OLDPM-NEXT: tail call void @f1() ; ROTATE_OLDPM-NEXT: [[INC]] = add nuw nsw i32 [[I_04]], 1 -; ROTATE_OLDPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[TMP0]] +; ROTATE_OLDPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[SUB]] ; ROTATE_OLDPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; ROTATE_OLDPM: return: ; ROTATE_OLDPM-NEXT: ret void @@ -148,21 +142,19 @@ ; ROTATE_NEWPM-NEXT: [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1 ; ROTATE_NEWPM-NEXT: br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]] ; ROTATE_NEWPM: for.cond.preheader: +; ROTATE_NEWPM-NEXT: [[SUB:%.*]] = add nsw i32 [[WIDTH]], -1 ; ROTATE_NEWPM-NEXT: [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1 -; ROTATE_NEWPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] -; ROTATE_NEWPM: for.body.preheader: -; ROTATE_NEWPM-NEXT: [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1 -; ROTATE_NEWPM-NEXT: br label [[FOR_BODY:%.*]] +; ROTATE_NEWPM-NEXT: br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]] ; ROTATE_NEWPM: for.cond.cleanup: ; ROTATE_NEWPM-NEXT: tail call void @f0() ; ROTATE_NEWPM-NEXT: tail call void @f2() ; ROTATE_NEWPM-NEXT: br label [[RETURN]] ; ROTATE_NEWPM: for.body: -; ROTATE_NEWPM-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; ROTATE_NEWPM-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_COND_PREHEADER]] ] ; ROTATE_NEWPM-NEXT: tail call void @f0() ; ROTATE_NEWPM-NEXT: tail call void @f1() ; ROTATE_NEWPM-NEXT: [[INC]] = add nuw nsw i32 [[I_04]], 1 -; ROTATE_NEWPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[TMP0]] +; ROTATE_NEWPM-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[SUB]] ; ROTATE_NEWPM-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]] ; ROTATE_NEWPM: return: ; ROTATE_NEWPM-NEXT: ret void