Index: llvm/lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -4234,16 +4234,20 @@ } } - // See if we can trivially sink this instruction to its user if we can - // prove that the successor is not executed more frequently than our block. - // Return the UserBlock if successful. + // See if we can trivially sink this instruction to its user or into the + // nearest common dominator of its users if we can prove that the successor + // is not executed more frequently than our block. Return the block to sink + // into if successful. auto getOptionalSinkBlockForInst = [this](Instruction *I) -> Optional { if (!EnableCodeSinking) return None; BasicBlock *BB = I->getParent(); - BasicBlock *UserParent = nullptr; + // We want to sink the instruction to users' nearest + // common dominator, so collect a set of users' parent block to later + // calculate their NCD. + SmallPtrSet UsersParents; unsigned NumUsers = 0; for (auto *U : I->users()) { @@ -4255,54 +4259,42 @@ Instruction *UserInst = cast(U); // Special handling for Phi nodes - get the block the use occurs in. if (PHINode *PN = dyn_cast(UserInst)) { - for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) { - if (PN->getIncomingValue(i) == I) { - // Bail out if we have uses in different blocks. We don't do any - // sophisticated analysis (i.e finding NearestCommonDominator of - // these use blocks). - if (UserParent && UserParent != PN->getIncomingBlock(i)) - return None; - UserParent = PN->getIncomingBlock(i); - } - } - assert(UserParent && "expected to find user block!"); - } else { - if (UserParent && UserParent != UserInst->getParent()) - return None; - UserParent = UserInst->getParent(); - } - - // Make sure these checks are done only once, naturally we do the checks - // the first time we get the userparent, this will save compile time. - if (NumUsers == 0) { - // Try sinking to another block. If that block is unreachable, then do - // not bother. SimplifyCFG should handle it. - if (UserParent == BB || !DT.isReachableFromEntry(UserParent)) - return None; - - auto *Term = UserParent->getTerminator(); - // See if the user is one of our successors that has only one - // predecessor, so that we don't have to split the critical edge. - // Another option where we can sink is a block that ends with a - // terminator that does not pass control to other block (such as - // return or unreachable or resume). In this case: - // - I dominates the User (by SSA form); - // - the User will be executed at most once. - // So sinking I down to User is always profitable or neutral. - if (UserParent->getUniquePredecessor() != BB && !succ_empty(Term)) - return None; - - assert(DT.dominates(BB, UserParent) && "Dominance relation broken?"); - } - + for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) + if (PN->getIncomingValue(i) == I) + UsersParents.insert(PN->getIncomingBlock(i)); + } else + UsersParents.insert(UserInst->getParent()); NumUsers++; } // No user or only has droppable users. - if (!UserParent) + if (UsersParents.empty()) return None; - return UserParent; + auto *UsersParentsNCD = *UsersParents.begin(); + for (auto *UserParent : UsersParents) { + if (UserParent == BB || !DT.isReachableFromEntry(UserParent)) + return None; + UsersParentsNCD = + DT.findNearestCommonDominator(UsersParentsNCD, UserParent); + } + + if (UsersParentsNCD == BB) + return None; + + auto *Term = UsersParentsNCD->getTerminator(); + // See if the block we want to sink to is one of our successors that has + // only one predecessor, so that we don't have to split the critical edge. + // Another option where we can sink is a block that ends with a + // terminator that does not pass control to other block (such as + // return or unreachable or resume). In this case: + // - I dominates the User (by SSA form); + // - the User will be executed at most once. + // So sinking I down to User is always profitable or neutral. + if (UsersParentsNCD->getUniquePredecessor() != BB && !succ_empty(Term)) + return None; + assert(DT.dominates(BB, UsersParentsNCD) && "Dominance relation broken?"); + return UsersParentsNCD; }; auto OptBB = getOptionalSinkBlockForInst(I); Index: llvm/test/Transforms/InstCombine/sink-into-ncd.ll =================================================================== --- llvm/test/Transforms/InstCombine/sink-into-ncd.ll +++ llvm/test/Transforms/InstCombine/sink-into-ncd.ll @@ -10,18 +10,20 @@ define i32 @test1(i8** %addr, i1 %c) { ; CHECK-LABEL: @test1( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[PTR:%.*]] = load i8*, i8** [[ADDR:%.*]], align 8 -; CHECK-NEXT: br i1 false, label [[NULL:%.*]], label [[NOT_NULL:%.*]] +; CHECK-NEXT: [[COND:%.*]] = icmp eq i8** [[ADDR:%.*]], null +; CHECK-NEXT: br i1 [[COND]], label [[NULL:%.*]], label [[NOT_NULL:%.*]] ; CHECK: null: +; CHECK-NEXT: [[X:%.*]] = call i32 @use(i8* null) ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: not.null: +; CHECK-NEXT: [[PTR:%.*]] = load i8*, i8** [[ADDR]], align 8 ; CHECK-NEXT: [[Y:%.*]] = call i32 @use(i8* [[PTR]]) ; CHECK-NEXT: br i1 [[C:%.*]], label [[EXIT]], label [[NOT_NULL_2:%.*]] ; CHECK: not.null.2: ; CHECK-NEXT: [[Z:%.*]] = call i32 @use(i8* [[PTR]]) ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: -; CHECK-NEXT: [[P:%.*]] = phi i32 [ poison, [[NULL]] ], [ [[Y]], [[NOT_NULL]] ], [ [[Z]], [[NOT_NULL_2]] ] +; CHECK-NEXT: [[P:%.*]] = phi i32 [ [[X]], [[NULL]] ], [ [[Y]], [[NOT_NULL]] ], [ [[Z]], [[NOT_NULL_2]] ] ; CHECK-NEXT: ret i32 [[P]] ; entry: @@ -53,12 +55,12 @@ ; CHECK-NEXT: [[COND:%.*]] = icmp eq i8** [[ADDR:%.*]], null ; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[LOAD_BB:%.*]] ; CHECK: load.bb: -; CHECK-NEXT: [[PTR:%.*]] = load i8*, i8** [[ADDR]], align 8 ; CHECK-NEXT: br i1 [[C:%.*]], label [[LEFT:%.*]], label [[RIGHT:%.*]] ; CHECK: left: ; CHECK-NEXT: [[X:%.*]] = call i32 @use(i8* null) ; CHECK-NEXT: br label [[EXIT]] ; CHECK: right: +; CHECK-NEXT: [[PTR:%.*]] = load i8*, i8** [[ADDR]], align 8 ; CHECK-NEXT: [[Y:%.*]] = call i32 @use(i8* [[PTR]]) ; CHECK-NEXT: br i1 [[C]], label [[EXIT]], label [[RIGHT_2:%.*]] ; CHECK: right.2: Index: llvm/test/Transforms/LoopUnroll/ARM/upperbound.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/ARM/upperbound.ll +++ llvm/test/Transforms/LoopUnroll/ARM/upperbound.ll @@ -17,10 +17,10 @@ ; CHECK-NEXT: store i32 0, i32* [[X]], align 4 ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: -; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1 ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[REM]], 1 ; CHECK-NEXT: br i1 [[CMP]], label [[WHILE_BODY_1:%.*]], label [[WHILE_END]] ; CHECK: while.body.1: +; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 1 ; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[INCDEC_PTR]], align 4 ; CHECK-NEXT: [[CMP1_1:%.*]] = icmp slt i32 [[TMP1]], 10 ; CHECK-NEXT: br i1 [[CMP1_1]], label [[IF_THEN_1:%.*]], label [[IF_END_1:%.*]] @@ -28,10 +28,10 @@ ; CHECK-NEXT: store i32 0, i32* [[INCDEC_PTR]], align 4 ; CHECK-NEXT: br label [[IF_END_1]] ; CHECK: if.end.1: -; CHECK-NEXT: [[INCDEC_PTR_1:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2 ; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[REM]], 2 ; CHECK-NEXT: br i1 [[CMP_1]], label [[WHILE_BODY_2:%.*]], label [[WHILE_END]] ; CHECK: while.body.2: +; CHECK-NEXT: [[INCDEC_PTR_1:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 2 ; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[INCDEC_PTR_1]], align 4 ; CHECK-NEXT: [[CMP1_2:%.*]] = icmp slt i32 [[TMP2]], 10 ; CHECK-NEXT: br i1 [[CMP1_2]], label [[IF_THEN_2:%.*]], label [[WHILE_END]] Index: llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll +++ llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll @@ -32,66 +32,66 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[TMP0]], 42 ; CHECK-NEXT: br i1 [[CMP]], label [[OTHEREXIT_LOOPEXIT:%.*]], label [[LATCH:%.*]] ; CHECK: latch: -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], [[SUM_02]] ; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = or i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_1:%.*]] ; CHECK: for.exiting_block.1: +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], [[SUM_02]] ; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[TMP0]], 42 ; CHECK-NEXT: br i1 [[CMP_1]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_1:%.*]] ; CHECK: latch.1: -; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]] -; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4 -; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP4]], [[ADD]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = or i64 [[INDVARS_IV]], 2 ; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_2:%.*]] ; CHECK: for.exiting_block.2: +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4 +; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP4]], [[ADD]] ; CHECK-NEXT: [[CMP_2:%.*]] = icmp eq i64 [[TMP0]], 42 ; CHECK-NEXT: br i1 [[CMP_2]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_2:%.*]] ; CHECK: latch.2: -; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_1]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4 -; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP5]], [[ADD_1]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = or i64 [[INDVARS_IV]], 3 ; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_3:%.*]] ; CHECK: for.exiting_block.3: +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_1]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4 +; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP5]], [[ADD_1]] ; CHECK-NEXT: [[CMP_3:%.*]] = icmp eq i64 [[TMP0]], 42 ; CHECK-NEXT: br i1 [[CMP_3]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_3:%.*]] ; CHECK: latch.3: -; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_2]] -; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4 -; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP6]], [[ADD_2]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = or i64 [[INDVARS_IV]], 4 ; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_4:%.*]] ; CHECK: for.exiting_block.4: +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_2]] +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4 +; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP6]], [[ADD_2]] ; CHECK-NEXT: [[CMP_4:%.*]] = icmp eq i64 [[TMP0]], 42 ; CHECK-NEXT: br i1 [[CMP_4]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_4:%.*]] ; CHECK: latch.4: -; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_3]] -; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_4]], align 4 -; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP7]], [[ADD_3]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = or i64 [[INDVARS_IV]], 5 ; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_5:%.*]] ; CHECK: for.exiting_block.5: +; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_3]] +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_4]], align 4 +; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP7]], [[ADD_3]] ; CHECK-NEXT: [[CMP_5:%.*]] = icmp eq i64 [[TMP0]], 42 ; CHECK-NEXT: br i1 [[CMP_5]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_5:%.*]] ; CHECK: latch.5: -; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_4]] -; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX_5]], align 4 -; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP8]], [[ADD_4]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = or i64 [[INDVARS_IV]], 6 ; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_6:%.*]] ; CHECK: for.exiting_block.6: +; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_4]] +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX_5]], align 4 +; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP8]], [[ADD_4]] ; CHECK-NEXT: [[CMP_6:%.*]] = icmp eq i64 [[TMP0]], 42 ; CHECK-NEXT: br i1 [[CMP_6]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_6:%.*]] ; CHECK: latch.6: -; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_5]] -; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX_6]], align 4 -; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP9]], [[ADD_5]] ; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = or i64 [[INDVARS_IV]], 7 ; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_7:%.*]] ; CHECK: for.exiting_block.7: +; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_5]] +; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX_6]], align 4 +; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP9]], [[ADD_5]] ; CHECK-NEXT: [[CMP_7:%.*]] = icmp eq i64 [[TMP0]], 42 ; CHECK-NEXT: br i1 [[CMP_7]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_7]] ; CHECK: latch.7: Index: llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll +++ llvm/test/Transforms/LoopVectorize/AArch64/intrinsiccost.ll @@ -19,11 +19,11 @@ ; CHECK-NEXT: br i1 [[CMP_NOT6]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]] ; CHECK: while.body.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BLOCKSIZE]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 15 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934576 ; CHECK-NEXT: [[CAST_VTC:%.*]] = trunc i64 [[N_VEC]] to i32 ; CHECK-NEXT: [[IND_END:%.*]] = sub i32 [[BLOCKSIZE]], [[CAST_VTC]] @@ -111,11 +111,11 @@ ; CHECK-NEXT: br i1 [[CMP_NOT6]], label [[WHILE_END:%.*]], label [[ITER_CHECK:%.*]] ; CHECK: iter.check: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BLOCKSIZE]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK: vector.main.loop.iter.check: +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP0]], 31 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: Index: llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll +++ llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll @@ -1357,11 +1357,11 @@ ; CHECK-NEXT: br i1 [[GUARD]], label [[FOR_BODY_PREHEADER:%.*]], label [[EXIT:%.*]] ; CHECK: for.body.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = add nuw i32 [[TMP1]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 6 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw i32 [[TMP1]], 1 ; CHECK-NEXT: [[N_VEC:%.*]] = and i32 [[TMP2]], -4 ; CHECK-NEXT: [[IND_END:%.*]] = shl i32 [[N_VEC]], 1 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] Index: llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll +++ llvm/test/Transforms/LoopVectorize/X86/intrinsiccost.ll @@ -20,11 +20,11 @@ ; CHECK-NEXT: br i1 [[CMP_NOT6]], label [[WHILE_END:%.*]], label [[ITER_CHECK:%.*]] ; CHECK: iter.check: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BLOCKSIZE]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK: vector.main.loop.iter.check: +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP0]], 63 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: @@ -163,11 +163,11 @@ ; CHECK-NEXT: br i1 [[CMP_NOT6]], label [[WHILE_END:%.*]], label [[ITER_CHECK:%.*]] ; CHECK: iter.check: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BLOCKSIZE]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 15 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; CHECK: vector.main.loop.iter.check: +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP0]], 127 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: Index: llvm/test/Transforms/LoopVectorize/induction.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/induction.ll +++ llvm/test/Transforms/LoopVectorize/induction.ll @@ -58,11 +58,11 @@ ; IND-LABEL: @multi_int_induction( ; IND-NEXT: for.body.lr.ph: ; IND-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 -; IND-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 -; IND-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0 ; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IND: vector.ph: +; IND-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; IND-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; IND-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590 ; IND-NEXT: [[CAST_VTC:%.*]] = trunc i64 [[N_VEC]] to i32 ; IND-NEXT: [[IND_END:%.*]] = add i32 [[CAST_VTC]], 190 @@ -100,11 +100,11 @@ ; UNROLL-LABEL: @multi_int_induction( ; UNROLL-NEXT: for.body.lr.ph: ; UNROLL-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 -; UNROLL-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 -; UNROLL-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 ; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL: vector.ph: +; UNROLL-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; UNROLL-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 ; UNROLL-NEXT: [[CAST_VTC:%.*]] = trunc i64 [[N_VEC]] to i32 ; UNROLL-NEXT: [[IND_END:%.*]] = add i32 [[CAST_VTC]], 190 @@ -197,11 +197,11 @@ ; INTERLEAVE-LABEL: @multi_int_induction( ; INTERLEAVE-NEXT: for.body.lr.ph: ; INTERLEAVE-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 -; INTERLEAVE-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 -; INTERLEAVE-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7 ; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; INTERLEAVE: vector.ph: +; INTERLEAVE-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; INTERLEAVE-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584 ; INTERLEAVE-NEXT: [[CAST_VTC:%.*]] = trunc i64 [[N_VEC]] to i32 ; INTERLEAVE-NEXT: [[IND_END:%.*]] = add i32 [[CAST_VTC]], 190 @@ -974,11 +974,11 @@ ; IND-NEXT: entry: ; IND-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 8) ; IND-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1 -; IND-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 3 -; IND-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8 ; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IND: vector.ph: +; IND-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 3 +; IND-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; IND-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 4611686018427387902 ; IND-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 3 ; IND-NEXT: br label [[VECTOR_BODY:%.*]] @@ -1034,11 +1034,11 @@ ; UNROLL-NEXT: entry: ; UNROLL-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 8) ; UNROLL-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1 -; UNROLL-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 3 -; UNROLL-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 24 ; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL: vector.ph: +; UNROLL-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 3 +; UNROLL-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 4611686018427387900 ; UNROLL-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 3 ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] @@ -2577,11 +2577,11 @@ ; IND-LABEL: @iv_vector_and_scalar_users( ; IND-NEXT: entry: ; IND-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 -; IND-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 -; IND-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0 ; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IND: vector.ph: +; IND-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; IND-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; IND-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590 ; IND-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i64 0 ; IND-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer @@ -2625,11 +2625,11 @@ ; UNROLL-LABEL: @iv_vector_and_scalar_users( ; UNROLL-NEXT: entry: ; UNROLL-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 -; UNROLL-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 -; UNROLL-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 ; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL: vector.ph: +; UNROLL-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; UNROLL-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 ; UNROLL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i64 0 ; UNROLL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer @@ -2749,11 +2749,11 @@ ; INTERLEAVE-LABEL: @iv_vector_and_scalar_users( ; INTERLEAVE-NEXT: entry: ; INTERLEAVE-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], -1 -; INTERLEAVE-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 -; INTERLEAVE-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7 ; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; INTERLEAVE: vector.ph: +; INTERLEAVE-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 +; INTERLEAVE-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584 ; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i64 0 ; INTERLEAVE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer @@ -3327,12 +3327,12 @@ ; IND-NEXT: [[DOTPR_I:%.*]] = load i8, i8* @e, align 1 ; IND-NEXT: [[TMP0:%.*]] = load i32, i32* @d, align 4 ; IND-NEXT: [[C_PROMOTED_I:%.*]] = load i32, i32* @c, align 4 -; IND-NEXT: [[TMP1:%.*]] = xor i8 [[DOTPR_I]], -1 -; IND-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32 -; IND-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 1 ; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp eq i8 [[DOTPR_I]], -1 ; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IND: vector.ph: +; IND-NEXT: [[TMP1:%.*]] = xor i8 [[DOTPR_I]], -1 +; IND-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32 +; IND-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 1 ; IND-NEXT: [[N_VEC:%.*]] = and i32 [[TMP3]], 510 ; IND-NEXT: [[CAST_VTC:%.*]] = trunc i32 [[N_VEC]] to i8 ; IND-NEXT: [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[CAST_VTC]] @@ -3369,12 +3369,12 @@ ; UNROLL-NEXT: [[DOTPR_I:%.*]] = load i8, i8* @e, align 1 ; UNROLL-NEXT: [[TMP0:%.*]] = load i32, i32* @d, align 4 ; UNROLL-NEXT: [[C_PROMOTED_I:%.*]] = load i32, i32* @c, align 4 -; UNROLL-NEXT: [[TMP1:%.*]] = xor i8 [[DOTPR_I]], -1 -; UNROLL-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32 -; UNROLL-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 1 ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ugt i8 [[DOTPR_I]], -4 ; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL: vector.ph: +; UNROLL-NEXT: [[TMP1:%.*]] = xor i8 [[DOTPR_I]], -1 +; UNROLL-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32 +; UNROLL-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 1 ; UNROLL-NEXT: [[N_VEC:%.*]] = and i32 [[TMP3]], 508 ; UNROLL-NEXT: [[CAST_VTC:%.*]] = trunc i32 [[N_VEC]] to i8 ; UNROLL-NEXT: [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[CAST_VTC]] @@ -3463,12 +3463,12 @@ ; INTERLEAVE-NEXT: [[DOTPR_I:%.*]] = load i8, i8* @e, align 1 ; INTERLEAVE-NEXT: [[TMP0:%.*]] = load i32, i32* @d, align 4 ; INTERLEAVE-NEXT: [[C_PROMOTED_I:%.*]] = load i32, i32* @c, align 4 -; INTERLEAVE-NEXT: [[TMP1:%.*]] = xor i8 [[DOTPR_I]], -1 -; INTERLEAVE-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32 -; INTERLEAVE-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 1 ; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ugt i8 [[DOTPR_I]], -8 ; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; INTERLEAVE: vector.ph: +; INTERLEAVE-NEXT: [[TMP1:%.*]] = xor i8 [[DOTPR_I]], -1 +; INTERLEAVE-NEXT: [[TMP2:%.*]] = zext i8 [[TMP1]] to i32 +; INTERLEAVE-NEXT: [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 1 ; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i32 [[TMP3]], 504 ; INTERLEAVE-NEXT: [[CAST_VTC:%.*]] = trunc i32 [[N_VEC]] to i8 ; INTERLEAVE-NEXT: [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[CAST_VTC]] @@ -3599,10 +3599,10 @@ ; ; IND-LABEL: @wrappingindvars1( ; IND-NEXT: entry: -; IND-NEXT: [[EXT:%.*]] = zext i8 [[T:%.*]] to i32 -; IND-NEXT: [[ECMP:%.*]] = icmp ult i8 [[T]], 42 +; IND-NEXT: [[ECMP:%.*]] = icmp ult i8 [[T:%.*]], 42 ; IND-NEXT: br i1 [[ECMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] ; IND: loop.preheader: +; IND-NEXT: [[EXT:%.*]] = zext i8 [[T]] to i32 ; IND-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1 ; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2 ; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] @@ -3666,10 +3666,10 @@ ; ; UNROLL-LABEL: @wrappingindvars1( ; UNROLL-NEXT: entry: -; UNROLL-NEXT: [[EXT:%.*]] = zext i8 [[T:%.*]] to i32 -; UNROLL-NEXT: [[ECMP:%.*]] = icmp ult i8 [[T]], 42 +; UNROLL-NEXT: [[ECMP:%.*]] = icmp ult i8 [[T:%.*]], 42 ; UNROLL-NEXT: br i1 [[ECMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] ; UNROLL: loop.preheader: +; UNROLL-NEXT: [[EXT:%.*]] = zext i8 [[T]] to i32 ; UNROLL-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1 ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 4 ; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] @@ -3814,10 +3814,10 @@ ; ; INTERLEAVE-LABEL: @wrappingindvars1( ; INTERLEAVE-NEXT: entry: -; INTERLEAVE-NEXT: [[EXT:%.*]] = zext i8 [[T:%.*]] to i32 -; INTERLEAVE-NEXT: [[ECMP:%.*]] = icmp ult i8 [[T]], 42 +; INTERLEAVE-NEXT: [[ECMP:%.*]] = icmp ult i8 [[T:%.*]], 42 ; INTERLEAVE-NEXT: br i1 [[ECMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] ; INTERLEAVE: loop.preheader: +; INTERLEAVE-NEXT: [[EXT:%.*]] = zext i8 [[T]] to i32 ; INTERLEAVE-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1 ; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 8 ; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] @@ -3990,11 +3990,11 @@ ; ; IND-LABEL: @wrappingindvars2( ; IND-NEXT: entry: -; IND-NEXT: [[EXT:%.*]] = zext i8 [[T:%.*]] to i32 -; IND-NEXT: [[EXT_MUL:%.*]] = shl nuw nsw i32 [[EXT]], 2 -; IND-NEXT: [[ECMP:%.*]] = icmp ult i8 [[T]], 42 +; IND-NEXT: [[ECMP:%.*]] = icmp ult i8 [[T:%.*]], 42 ; IND-NEXT: br i1 [[ECMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] ; IND: loop.preheader: +; IND-NEXT: [[EXT:%.*]] = zext i8 [[T]] to i32 +; IND-NEXT: [[EXT_MUL:%.*]] = shl nuw nsw i32 [[EXT]], 2 ; IND-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1 ; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2 ; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] @@ -4060,11 +4060,11 @@ ; ; UNROLL-LABEL: @wrappingindvars2( ; UNROLL-NEXT: entry: -; UNROLL-NEXT: [[EXT:%.*]] = zext i8 [[T:%.*]] to i32 -; UNROLL-NEXT: [[EXT_MUL:%.*]] = shl nuw nsw i32 [[EXT]], 2 -; UNROLL-NEXT: [[ECMP:%.*]] = icmp ult i8 [[T]], 42 +; UNROLL-NEXT: [[ECMP:%.*]] = icmp ult i8 [[T:%.*]], 42 ; UNROLL-NEXT: br i1 [[ECMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] ; UNROLL: loop.preheader: +; UNROLL-NEXT: [[EXT:%.*]] = zext i8 [[T]] to i32 +; UNROLL-NEXT: [[EXT_MUL:%.*]] = shl nuw nsw i32 [[EXT]], 2 ; UNROLL-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1 ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 4 ; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] @@ -4214,11 +4214,11 @@ ; ; INTERLEAVE-LABEL: @wrappingindvars2( ; INTERLEAVE-NEXT: entry: -; INTERLEAVE-NEXT: [[EXT:%.*]] = zext i8 [[T:%.*]] to i32 -; INTERLEAVE-NEXT: [[EXT_MUL:%.*]] = shl nuw nsw i32 [[EXT]], 2 -; INTERLEAVE-NEXT: [[ECMP:%.*]] = icmp ult i8 [[T]], 42 +; INTERLEAVE-NEXT: [[ECMP:%.*]] = icmp ult i8 [[T:%.*]], 42 ; INTERLEAVE-NEXT: br i1 [[ECMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] ; INTERLEAVE: loop.preheader: +; INTERLEAVE-NEXT: [[EXT:%.*]] = zext i8 [[T]] to i32 +; INTERLEAVE-NEXT: [[EXT_MUL:%.*]] = shl nuw nsw i32 [[EXT]], 2 ; INTERLEAVE-NEXT: [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1 ; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 8 ; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] Index: llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll +++ llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll @@ -1301,11 +1301,11 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 2) ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 6 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 9223372036854775804 ; CHECK-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1 ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i64 0 @@ -1391,11 +1391,11 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 5) ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -4 -; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 6 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 9223372036854775804 ; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[N_VEC]], 1 ; CHECK-NEXT: [[IND_END:%.*]] = or i64 [[TMP3]], 3 Index: llvm/test/Transforms/LoopVectorize/loop-scalars.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/loop-scalars.ll +++ llvm/test/Transforms/LoopVectorize/loop-scalars.ll @@ -65,11 +65,11 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 2) ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 9223372036854775806 ; CHECK-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] @@ -125,11 +125,11 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 2) ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1 -; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 9223372036854775806 ; CHECK-NEXT: [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] Index: llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-loops.ll =================================================================== --- llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-loops.ll +++ llvm/test/Transforms/LowerMatrixIntrinsics/multiply-fused-loops.ll @@ -22,36 +22,36 @@ ; CHECK-NEXT: br label [[INNER_HEADER:%.*]] ; CHECK: inner.header: ; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ 0, [[ROWS_BODY]] ], [ [[INNER_STEP:%.*]], [[INNER_LATCH:%.*]] ] -; CHECK-NEXT: [[RESULT_VEC_0:%.*]] = phi <2 x double> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP7:%.*]], [[INNER_LATCH]] ] -; CHECK-NEXT: [[RESULT_VEC_1:%.*]] = phi <2 x double> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP9:%.*]], [[INNER_LATCH]] ] +; CHECK-NEXT: [[RESULT_VEC_0:%.*]] = phi <2 x double> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP9:%.*]], [[INNER_LATCH]] ] +; CHECK-NEXT: [[RESULT_VEC_1:%.*]] = phi <2 x double> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP7:%.*]], [[INNER_LATCH]] ] ; CHECK-NEXT: br label [[INNER_BODY:%.*]] ; CHECK: inner.body: +; CHECK-NEXT: br label [[INNER_LATCH]] +; CHECK: inner.latch: ; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[INNER_IV]], 2 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], [[ROWS_IV]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr <16 x double>, <16 x double>* [[A:%.*]], i64 0, i64 [[TMP1]] -; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast double* [[TMP2]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST]], align 8 ; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr double, double* [[TMP2]], i64 4 ; CHECK-NEXT: [[VEC_CAST1:%.*]] = bitcast double* [[VEC_GEP]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST1]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[COLS_IV]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], [[INNER_IV]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr <16 x double>, <16 x double>* [[B:%.*]], i64 0, i64 [[TMP4]] -; CHECK-NEXT: [[VEC_CAST4:%.*]] = bitcast double* [[TMP5]] to <2 x double>* -; CHECK-NEXT: [[COL_LOAD5:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST4]], align 8 ; CHECK-NEXT: [[VEC_GEP6:%.*]] = getelementptr double, double* [[TMP5]], i64 4 ; CHECK-NEXT: [[VEC_CAST7:%.*]] = bitcast double* [[VEC_GEP6]] to <2 x double>* ; CHECK-NEXT: [[COL_LOAD8:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST7]], align 8 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[COL_LOAD5]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD]], <2 x double> [[SPLAT_SPLAT]], <2 x double> [[RESULT_VEC_0]]) -; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x double> [[COL_LOAD5]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP7]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT12]], <2 x double> [[TMP6]]) -; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x double> [[COL_LOAD8]], <2 x double> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[RESULT_VEC_1]]) ; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <2 x double> [[COL_LOAD8]], <2 x double> undef, <2 x i32> -; CHECK-NEXT: [[TMP9]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT19]], <2 x double> [[TMP8]]) -; CHECK-NEXT: br label [[INNER_LATCH]] -; CHECK: inner.latch: +; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast double* [[TMP2]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST]], align 8 +; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x double> [[COL_LOAD8]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD]], <2 x double> [[SPLAT_SPLAT16]], <2 x double> [[RESULT_VEC_1]]) +; CHECK-NEXT: [[TMP7]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT19]], <2 x double> [[TMP6]]) +; CHECK-NEXT: [[VEC_CAST4:%.*]] = bitcast double* [[TMP5]] to <2 x double>* +; CHECK-NEXT: [[COL_LOAD5:%.*]] = load <2 x double>, <2 x double>* [[VEC_CAST4]], align 8 +; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x double> [[COL_LOAD5]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x double> [[COL_LOAD5]], <2 x double> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD]], <2 x double> [[SPLAT_SPLAT]], <2 x double> [[RESULT_VEC_0]]) +; CHECK-NEXT: [[TMP9]] = call contract <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[COL_LOAD2]], <2 x double> [[SPLAT_SPLAT12]], <2 x double> [[TMP8]]) ; CHECK-NEXT: [[INNER_STEP]] = add i64 [[INNER_IV]], 2 ; CHECK-NEXT: [[INNER_COND_NOT:%.*]] = icmp eq i64 [[INNER_STEP]], 4 ; CHECK-NEXT: br i1 [[INNER_COND_NOT]], label [[ROWS_LATCH]], label [[INNER_HEADER]], !llvm.loop [[LOOP0:![0-9]+]] @@ -62,10 +62,10 @@ ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], [[ROWS_IV]] ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr <16 x double>, <16 x double>* [[C:%.*]], i64 0, i64 [[TMP11]] ; CHECK-NEXT: [[VEC_CAST21:%.*]] = bitcast double* [[TMP12]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP7]], <2 x double>* [[VEC_CAST21]], align 8 +; CHECK-NEXT: store <2 x double> [[TMP9]], <2 x double>* [[VEC_CAST21]], align 8 ; CHECK-NEXT: [[VEC_GEP22:%.*]] = getelementptr double, double* [[TMP12]], i64 4 ; CHECK-NEXT: [[VEC_CAST23:%.*]] = bitcast double* [[VEC_GEP22]] to <2 x double>* -; CHECK-NEXT: store <2 x double> [[TMP9]], <2 x double>* [[VEC_CAST23]], align 8 +; CHECK-NEXT: store <2 x double> [[TMP7]], <2 x double>* [[VEC_CAST23]], align 8 ; CHECK-NEXT: br i1 [[ROWS_COND_NOT]], label [[COLS_LATCH]], label [[ROWS_HEADER]] ; CHECK: cols.latch: ; CHECK-NEXT: [[COLS_STEP]] = add i64 [[COLS_IV]], 2 @@ -104,40 +104,40 @@ ; CHECK-NEXT: br label [[INNER_HEADER:%.*]] ; CHECK: inner.header: ; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ 0, [[ROWS_BODY]] ], [ [[INNER_STEP:%.*]], [[INNER_LATCH:%.*]] ] -; CHECK-NEXT: [[RESULT_VEC_0:%.*]] = phi <2 x i64> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP9:%.*]], [[INNER_LATCH]] ] -; CHECK-NEXT: [[RESULT_VEC_1:%.*]] = phi <2 x i64> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP13:%.*]], [[INNER_LATCH]] ] +; CHECK-NEXT: [[RESULT_VEC_0:%.*]] = phi <2 x i64> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP13:%.*]], [[INNER_LATCH]] ] +; CHECK-NEXT: [[RESULT_VEC_1:%.*]] = phi <2 x i64> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP9:%.*]], [[INNER_LATCH]] ] ; CHECK-NEXT: br label [[INNER_BODY:%.*]] ; CHECK: inner.body: +; CHECK-NEXT: br label [[INNER_LATCH]] +; CHECK: inner.latch: ; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[INNER_IV]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], [[ROWS_IV]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr <8 x i64>, <8 x i64>* [[A:%.*]], i64 0, i64 [[TMP1]] ; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast i64* [[TMP2]] to <2 x i64>* ; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[VEC_CAST]], align 8 -; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr i64, i64* [[TMP2]], i64 2 -; CHECK-NEXT: [[VEC_CAST1:%.*]] = bitcast i64* [[VEC_GEP]] to <2 x i64>* -; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x i64>, <2 x i64>* [[VEC_CAST1]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[COLS_IV]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], [[INNER_IV]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr <8 x i64>, <8 x i64>* [[B:%.*]], i64 0, i64 [[TMP4]] -; CHECK-NEXT: [[VEC_CAST4:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>* -; CHECK-NEXT: [[COL_LOAD5:%.*]] = load <2 x i64>, <2 x i64>* [[VEC_CAST4]], align 8 ; CHECK-NEXT: [[VEC_GEP6:%.*]] = getelementptr i64, i64* [[TMP5]], i64 4 ; CHECK-NEXT: [[VEC_CAST7:%.*]] = bitcast i64* [[VEC_GEP6]] to <2 x i64>* ; CHECK-NEXT: [[COL_LOAD8:%.*]] = load <2 x i64>, <2 x i64>* [[VEC_CAST7]], align 8 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x i64> [[COL_LOAD5]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = mul <2 x i64> [[COL_LOAD]], [[SPLAT_SPLAT]] -; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i64> [[RESULT_VEC_0]], [[TMP6]] -; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x i64> [[COL_LOAD5]], <2 x i64> undef, <2 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = mul <2 x i64> [[COL_LOAD2]], [[SPLAT_SPLAT12]] -; CHECK-NEXT: [[TMP9]] = add <2 x i64> [[TMP7]], [[TMP8]] ; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x i64> [[COL_LOAD8]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = mul <2 x i64> [[COL_LOAD]], [[SPLAT_SPLAT16]] -; CHECK-NEXT: [[TMP11:%.*]] = add <2 x i64> [[RESULT_VEC_1]], [[TMP10]] +; CHECK-NEXT: [[TMP6:%.*]] = mul <2 x i64> [[COL_LOAD]], [[SPLAT_SPLAT16]] +; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i64> [[RESULT_VEC_1]], [[TMP6]] +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr i64, i64* [[TMP2]], i64 2 +; CHECK-NEXT: [[VEC_CAST1:%.*]] = bitcast i64* [[VEC_GEP]] to <2 x i64>* +; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x i64>, <2 x i64>* [[VEC_CAST1]], align 8 ; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <2 x i64> [[COL_LOAD8]], <2 x i64> undef, <2 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = mul <2 x i64> [[COL_LOAD2]], [[SPLAT_SPLAT19]] +; CHECK-NEXT: [[TMP8:%.*]] = mul <2 x i64> [[COL_LOAD2]], [[SPLAT_SPLAT19]] +; CHECK-NEXT: [[TMP9]] = add <2 x i64> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[VEC_CAST4:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>* +; CHECK-NEXT: [[COL_LOAD5:%.*]] = load <2 x i64>, <2 x i64>* [[VEC_CAST4]], align 8 +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x i64> [[COL_LOAD5]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = mul <2 x i64> [[COL_LOAD]], [[SPLAT_SPLAT]] +; CHECK-NEXT: [[TMP11:%.*]] = add <2 x i64> [[RESULT_VEC_0]], [[TMP10]] +; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x i64> [[COL_LOAD5]], <2 x i64> undef, <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = mul <2 x i64> [[COL_LOAD2]], [[SPLAT_SPLAT12]] ; CHECK-NEXT: [[TMP13]] = add <2 x i64> [[TMP11]], [[TMP12]] -; CHECK-NEXT: br label [[INNER_LATCH]] -; CHECK: inner.latch: ; CHECK-NEXT: [[INNER_STEP]] = add i64 [[INNER_IV]], 2 ; CHECK-NEXT: [[INNER_COND_NOT:%.*]] = icmp eq i64 [[INNER_STEP]], 4 ; CHECK-NEXT: br i1 [[INNER_COND_NOT]], label [[ROWS_LATCH]], label [[INNER_HEADER]], !llvm.loop [[LOOP2:![0-9]+]] @@ -148,10 +148,10 @@ ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP14]], [[ROWS_IV]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr <4 x i64>, <4 x i64>* [[C:%.*]], i64 0, i64 [[TMP15]] ; CHECK-NEXT: [[VEC_CAST21:%.*]] = bitcast i64* [[TMP16]] to <2 x i64>* -; CHECK-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[VEC_CAST21]], align 8 +; CHECK-NEXT: store <2 x i64> [[TMP13]], <2 x i64>* [[VEC_CAST21]], align 8 ; CHECK-NEXT: [[VEC_GEP22:%.*]] = getelementptr i64, i64* [[TMP16]], i64 2 ; CHECK-NEXT: [[VEC_CAST23:%.*]] = bitcast i64* [[VEC_GEP22]] to <2 x i64>* -; CHECK-NEXT: store <2 x i64> [[TMP13]], <2 x i64>* [[VEC_CAST23]], align 8 +; CHECK-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[VEC_CAST23]], align 8 ; CHECK-NEXT: br i1 [[ROWS_COND_NOT]], label [[COLS_LATCH]], label [[ROWS_HEADER]] ; CHECK: cols.latch: ; CHECK-NEXT: [[COLS_STEP]] = add i64 [[COLS_IV]], 2 @@ -196,40 +196,40 @@ ; CHECK-NEXT: br label [[INNER_HEADER:%.*]] ; CHECK: inner.header: ; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ 0, [[ROWS_BODY]] ], [ [[INNER_STEP:%.*]], [[INNER_LATCH:%.*]] ] -; CHECK-NEXT: [[RESULT_VEC_0:%.*]] = phi <2 x i64> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP9:%.*]], [[INNER_LATCH]] ] -; CHECK-NEXT: [[RESULT_VEC_1:%.*]] = phi <2 x i64> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP13:%.*]], [[INNER_LATCH]] ] +; CHECK-NEXT: [[RESULT_VEC_0:%.*]] = phi <2 x i64> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP13:%.*]], [[INNER_LATCH]] ] +; CHECK-NEXT: [[RESULT_VEC_1:%.*]] = phi <2 x i64> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP9:%.*]], [[INNER_LATCH]] ] ; CHECK-NEXT: br label [[INNER_BODY:%.*]] ; CHECK: inner.body: +; CHECK-NEXT: br label [[INNER_LATCH]] +; CHECK: inner.latch: ; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[INNER_IV]], 2 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], [[ROWS_IV]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr <8 x i64>, <8 x i64>* [[A:%.*]], i64 0, i64 [[TMP1]] ; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast i64* [[TMP2]] to <2 x i64>* ; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[VEC_CAST]], align 8 -; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr i64, i64* [[TMP2]], i64 4 -; CHECK-NEXT: [[VEC_CAST1:%.*]] = bitcast i64* [[VEC_GEP]] to <2 x i64>* -; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x i64>, <2 x i64>* [[VEC_CAST1]], align 8 ; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[COLS_IV]], 1 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP3]], [[INNER_IV]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr <16 x i64>, <16 x i64>* [[B:%.*]], i64 0, i64 [[TMP4]] -; CHECK-NEXT: [[VEC_CAST4:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>* -; CHECK-NEXT: [[COL_LOAD5:%.*]] = load <2 x i64>, <2 x i64>* [[VEC_CAST4]], align 8 ; CHECK-NEXT: [[VEC_GEP6:%.*]] = getelementptr i64, i64* [[TMP5]], i64 2 ; CHECK-NEXT: [[VEC_CAST7:%.*]] = bitcast i64* [[VEC_GEP6]] to <2 x i64>* ; CHECK-NEXT: [[COL_LOAD8:%.*]] = load <2 x i64>, <2 x i64>* [[VEC_CAST7]], align 8 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x i64> [[COL_LOAD5]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = mul <2 x i64> [[COL_LOAD]], [[SPLAT_SPLAT]] -; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i64> [[RESULT_VEC_0]], [[TMP6]] -; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x i64> [[COL_LOAD5]], <2 x i64> undef, <2 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = mul <2 x i64> [[COL_LOAD2]], [[SPLAT_SPLAT12]] -; CHECK-NEXT: [[TMP9]] = add <2 x i64> [[TMP7]], [[TMP8]] ; CHECK-NEXT: [[SPLAT_SPLAT16:%.*]] = shufflevector <2 x i64> [[COL_LOAD8]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP10:%.*]] = mul <2 x i64> [[COL_LOAD]], [[SPLAT_SPLAT16]] -; CHECK-NEXT: [[TMP11:%.*]] = add <2 x i64> [[RESULT_VEC_1]], [[TMP10]] +; CHECK-NEXT: [[TMP6:%.*]] = mul <2 x i64> [[COL_LOAD]], [[SPLAT_SPLAT16]] +; CHECK-NEXT: [[TMP7:%.*]] = add <2 x i64> [[RESULT_VEC_1]], [[TMP6]] +; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr i64, i64* [[TMP2]], i64 4 +; CHECK-NEXT: [[VEC_CAST1:%.*]] = bitcast i64* [[VEC_GEP]] to <2 x i64>* +; CHECK-NEXT: [[COL_LOAD2:%.*]] = load <2 x i64>, <2 x i64>* [[VEC_CAST1]], align 8 ; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <2 x i64> [[COL_LOAD8]], <2 x i64> undef, <2 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = mul <2 x i64> [[COL_LOAD2]], [[SPLAT_SPLAT19]] +; CHECK-NEXT: [[TMP8:%.*]] = mul <2 x i64> [[COL_LOAD2]], [[SPLAT_SPLAT19]] +; CHECK-NEXT: [[TMP9]] = add <2 x i64> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[VEC_CAST4:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>* +; CHECK-NEXT: [[COL_LOAD5:%.*]] = load <2 x i64>, <2 x i64>* [[VEC_CAST4]], align 8 +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x i64> [[COL_LOAD5]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = mul <2 x i64> [[COL_LOAD]], [[SPLAT_SPLAT]] +; CHECK-NEXT: [[TMP11:%.*]] = add <2 x i64> [[RESULT_VEC_0]], [[TMP10]] +; CHECK-NEXT: [[SPLAT_SPLAT12:%.*]] = shufflevector <2 x i64> [[COL_LOAD5]], <2 x i64> undef, <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = mul <2 x i64> [[COL_LOAD2]], [[SPLAT_SPLAT12]] ; CHECK-NEXT: [[TMP13]] = add <2 x i64> [[TMP11]], [[TMP12]] -; CHECK-NEXT: br label [[INNER_LATCH]] -; CHECK: inner.latch: ; CHECK-NEXT: [[INNER_STEP]] = add i64 [[INNER_IV]], 2 ; CHECK-NEXT: [[INNER_COND_NOT:%.*]] = icmp eq i64 [[INNER_IV]], 0 ; CHECK-NEXT: br i1 [[INNER_COND_NOT]], label [[ROWS_LATCH]], label [[INNER_HEADER]], !llvm.loop [[LOOP3:![0-9]+]] @@ -240,10 +240,10 @@ ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP14]], [[ROWS_IV]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr <32 x i64>, <32 x i64>* [[C:%.*]], i64 0, i64 [[TMP15]] ; CHECK-NEXT: [[VEC_CAST21:%.*]] = bitcast i64* [[TMP16]] to <2 x i64>* -; CHECK-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[VEC_CAST21]], align 8 +; CHECK-NEXT: store <2 x i64> [[TMP13]], <2 x i64>* [[VEC_CAST21]], align 8 ; CHECK-NEXT: [[VEC_GEP22:%.*]] = getelementptr i64, i64* [[TMP16]], i64 4 ; CHECK-NEXT: [[VEC_CAST23:%.*]] = bitcast i64* [[VEC_GEP22]] to <2 x i64>* -; CHECK-NEXT: store <2 x i64> [[TMP13]], <2 x i64>* [[VEC_CAST23]], align 8 +; CHECK-NEXT: store <2 x i64> [[TMP9]], <2 x i64>* [[VEC_CAST23]], align 8 ; CHECK-NEXT: br i1 [[ROWS_COND_NOT]], label [[COLS_LATCH]], label [[ROWS_HEADER]] ; CHECK: cols.latch: ; CHECK-NEXT: [[COLS_STEP]] = add i64 [[COLS_IV]], 2 @@ -323,36 +323,36 @@ ; CHECK-NEXT: br label [[INNER_HEADER:%.*]] ; CHECK: inner.header: ; CHECK-NEXT: [[INNER_IV:%.*]] = phi i64 [ 0, [[ROWS_BODY]] ], [ [[INNER_STEP:%.*]], [[INNER_LATCH:%.*]] ] -; CHECK-NEXT: [[RESULT_VEC_0:%.*]] = phi <2 x float> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP19:%.*]], [[INNER_LATCH]] ] -; CHECK-NEXT: [[RESULT_VEC_1:%.*]] = phi <2 x float> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP21:%.*]], [[INNER_LATCH]] ] +; CHECK-NEXT: [[RESULT_VEC_0:%.*]] = phi <2 x float> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP21:%.*]], [[INNER_LATCH]] ] +; CHECK-NEXT: [[RESULT_VEC_1:%.*]] = phi <2 x float> [ zeroinitializer, [[ROWS_BODY]] ], [ [[TMP19:%.*]], [[INNER_LATCH]] ] ; CHECK-NEXT: br label [[INNER_BODY:%.*]] ; CHECK: inner.body: +; CHECK-NEXT: br label [[INNER_LATCH]] +; CHECK: inner.latch: ; CHECK-NEXT: [[TMP12:%.*]] = shl i64 [[INNER_IV]], 1 ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP12]], [[ROWS_IV]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr <4 x float>, <4 x float>* [[TMP5]], i64 0, i64 [[TMP13]] -; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast float* [[TMP14]] to <2 x float>* -; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x float>, <2 x float>* [[VEC_CAST]], align 4 ; CHECK-NEXT: [[VEC_GEP:%.*]] = getelementptr float, float* [[TMP14]], i64 2 ; CHECK-NEXT: [[VEC_CAST8:%.*]] = bitcast float* [[VEC_GEP]] to <2 x float>* ; CHECK-NEXT: [[COL_LOAD9:%.*]] = load <2 x float>, <2 x float>* [[VEC_CAST8]], align 4 ; CHECK-NEXT: [[TMP15:%.*]] = shl i64 [[COLS_IV]], 1 ; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[TMP15]], [[INNER_IV]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr <4 x float>, <4 x float>* [[TMP11]], i64 0, i64 [[TMP16]] -; CHECK-NEXT: [[VEC_CAST11:%.*]] = bitcast float* [[TMP17]] to <2 x float>* -; CHECK-NEXT: [[COL_LOAD12:%.*]] = load <2 x float>, <2 x float>* [[VEC_CAST11]], align 4 ; CHECK-NEXT: [[VEC_GEP13:%.*]] = getelementptr float, float* [[TMP17]], i64 2 ; CHECK-NEXT: [[VEC_CAST14:%.*]] = bitcast float* [[VEC_GEP13]] to <2 x float>* ; CHECK-NEXT: [[COL_LOAD15:%.*]] = load <2 x float>, <2 x float>* [[VEC_CAST14]], align 4 -; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x float> [[COL_LOAD12]], <2 x float> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP18:%.*]] = call contract <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[COL_LOAD]], <2 x float> [[SPLAT_SPLAT]], <2 x float> [[RESULT_VEC_0]]) -; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <2 x float> [[COL_LOAD12]], <2 x float> undef, <2 x i32> -; CHECK-NEXT: [[TMP19]] = call contract <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[COL_LOAD9]], <2 x float> [[SPLAT_SPLAT19]], <2 x float> [[TMP18]]) -; CHECK-NEXT: [[SPLAT_SPLAT23:%.*]] = shufflevector <2 x float> [[COL_LOAD15]], <2 x float> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP20:%.*]] = call contract <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[COL_LOAD]], <2 x float> [[SPLAT_SPLAT23]], <2 x float> [[RESULT_VEC_1]]) ; CHECK-NEXT: [[SPLAT_SPLAT26:%.*]] = shufflevector <2 x float> [[COL_LOAD15]], <2 x float> undef, <2 x i32> -; CHECK-NEXT: [[TMP21]] = call contract <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[COL_LOAD9]], <2 x float> [[SPLAT_SPLAT26]], <2 x float> [[TMP20]]) -; CHECK-NEXT: br label [[INNER_LATCH]] -; CHECK: inner.latch: +; CHECK-NEXT: [[VEC_CAST:%.*]] = bitcast float* [[TMP14]] to <2 x float>* +; CHECK-NEXT: [[COL_LOAD:%.*]] = load <2 x float>, <2 x float>* [[VEC_CAST]], align 4 +; CHECK-NEXT: [[SPLAT_SPLAT23:%.*]] = shufflevector <2 x float> [[COL_LOAD15]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP18:%.*]] = call contract <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[COL_LOAD]], <2 x float> [[SPLAT_SPLAT23]], <2 x float> [[RESULT_VEC_1]]) +; CHECK-NEXT: [[TMP19]] = call contract <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[COL_LOAD9]], <2 x float> [[SPLAT_SPLAT26]], <2 x float> [[TMP18]]) +; CHECK-NEXT: [[VEC_CAST11:%.*]] = bitcast float* [[TMP17]] to <2 x float>* +; CHECK-NEXT: [[COL_LOAD12:%.*]] = load <2 x float>, <2 x float>* [[VEC_CAST11]], align 4 +; CHECK-NEXT: [[SPLAT_SPLAT19:%.*]] = shufflevector <2 x float> [[COL_LOAD12]], <2 x float> undef, <2 x i32> +; CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <2 x float> [[COL_LOAD12]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP20:%.*]] = call contract <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[COL_LOAD]], <2 x float> [[SPLAT_SPLAT]], <2 x float> [[RESULT_VEC_0]]) +; CHECK-NEXT: [[TMP21]] = call contract <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[COL_LOAD9]], <2 x float> [[SPLAT_SPLAT19]], <2 x float> [[TMP20]]) ; CHECK-NEXT: [[INNER_STEP]] = add i64 [[INNER_IV]], 2 ; CHECK-NEXT: [[INNER_COND_NOT:%.*]] = icmp eq i64 [[INNER_IV]], 0 ; CHECK-NEXT: br i1 [[INNER_COND_NOT]], label [[ROWS_LATCH]], label [[INNER_HEADER]], !llvm.loop [[LOOP5:![0-9]+]] @@ -363,10 +363,10 @@ ; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[TMP22]], [[ROWS_IV]] ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr <4 x float>, <4 x float>* [[C]], i64 0, i64 [[TMP23]] ; CHECK-NEXT: [[VEC_CAST28:%.*]] = bitcast float* [[TMP24]] to <2 x float>* -; CHECK-NEXT: store <2 x float> [[TMP19]], <2 x float>* [[VEC_CAST28]], align 8 +; CHECK-NEXT: store <2 x float> [[TMP21]], <2 x float>* [[VEC_CAST28]], align 8 ; CHECK-NEXT: [[VEC_GEP29:%.*]] = getelementptr float, float* [[TMP24]], i64 2 ; CHECK-NEXT: [[VEC_CAST30:%.*]] = bitcast float* [[VEC_GEP29]] to <2 x float>* -; CHECK-NEXT: store <2 x float> [[TMP21]], <2 x float>* [[VEC_CAST30]], align 8 +; CHECK-NEXT: store <2 x float> [[TMP19]], <2 x float>* [[VEC_CAST30]], align 8 ; CHECK-NEXT: br i1 [[ROWS_COND_NOT]], label [[COLS_LATCH]], label [[ROWS_HEADER]] ; CHECK: cols.latch: ; CHECK-NEXT: [[COLS_STEP]] = add i64 [[COLS_IV]], 2 Index: llvm/test/Transforms/PhaseOrdering/AArch64/peel-multiple-unreachable-exits-for-vectorization.ll =================================================================== --- llvm/test/Transforms/PhaseOrdering/AArch64/peel-multiple-unreachable-exits-for-vectorization.ll +++ llvm/test/Transforms/PhaseOrdering/AArch64/peel-multiple-unreachable-exits-for-vectorization.ll @@ -12,24 +12,24 @@ ; CHECK-NEXT: at_with_int_conversion.exit12.peel: ; CHECK-NEXT: [[GEP_START_I:%.*]] = getelementptr [[VEC:%.*]], %vec* [[A:%.*]], i64 0, i32 0 ; CHECK-NEXT: [[START_I:%.*]] = load i64*, i64** [[GEP_START_I]], align 8 -; CHECK-NEXT: [[GEP_END_I:%.*]] = getelementptr [[VEC]], %vec* [[A]], i64 0, i32 1 -; CHECK-NEXT: [[END_I:%.*]] = load i64*, i64** [[GEP_END_I]], align 8 -; CHECK-NEXT: [[START_INT_I:%.*]] = ptrtoint i64* [[START_I]] to i64 -; CHECK-NEXT: [[END_INT_I:%.*]] = ptrtoint i64* [[END_I]] to i64 -; CHECK-NEXT: [[SUB_I:%.*]] = sub i64 [[END_INT_I]], [[START_INT_I]] -; CHECK-NEXT: [[GEP_END_I3:%.*]] = getelementptr [[VEC]], %vec* [[B:%.*]], i64 0, i32 1 -; CHECK-NEXT: [[GEP_START_I1:%.*]] = getelementptr [[VEC]], %vec* [[B]], i64 0, i32 0 +; CHECK-NEXT: [[GEP_START_I1:%.*]] = getelementptr [[VEC]], %vec* [[B:%.*]], i64 0, i32 0 ; CHECK-NEXT: [[START_I2_PEEL:%.*]] = load i64*, i64** [[GEP_START_I1]], align 8 -; CHECK-NEXT: [[END_I4_PEEL:%.*]] = load i64*, i64** [[GEP_END_I3]], align 8 -; CHECK-NEXT: [[START_INT_I5_PEEL:%.*]] = ptrtoint i64* [[START_I2_PEEL]] to i64 -; CHECK-NEXT: [[END_INT_I6_PEEL:%.*]] = ptrtoint i64* [[END_I4_PEEL]] to i64 -; CHECK-NEXT: [[SUB_I7_PEEL:%.*]] = sub i64 [[END_INT_I6_PEEL]], [[START_INT_I5_PEEL]] ; CHECK-NEXT: [[LV_I_PEEL:%.*]] = load i64, i64* [[START_I]], align 4 ; CHECK-NEXT: [[LV_I10_PEEL:%.*]] = load i64, i64* [[START_I2_PEEL]], align 4 ; CHECK-NEXT: [[SUM_NEXT_PEEL:%.*]] = add i64 [[LV_I_PEEL]], [[LV_I10_PEEL]] ; CHECK-NEXT: [[C_PEEL:%.*]] = icmp sgt i64 [[N:%.*]], 0 ; CHECK-NEXT: br i1 [[C_PEEL]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] ; CHECK: loop.preheader: +; CHECK-NEXT: [[GEP_END_I3:%.*]] = getelementptr [[VEC]], %vec* [[B]], i64 0, i32 1 +; CHECK-NEXT: [[END_I4_PEEL:%.*]] = load i64*, i64** [[GEP_END_I3]], align 8 +; CHECK-NEXT: [[END_INT_I6_PEEL:%.*]] = ptrtoint i64* [[END_I4_PEEL]] to i64 +; CHECK-NEXT: [[START_INT_I5_PEEL:%.*]] = ptrtoint i64* [[START_I2_PEEL]] to i64 +; CHECK-NEXT: [[SUB_I7_PEEL:%.*]] = sub i64 [[END_INT_I6_PEEL]], [[START_INT_I5_PEEL]] +; CHECK-NEXT: [[GEP_END_I:%.*]] = getelementptr [[VEC]], %vec* [[A]], i64 0, i32 1 +; CHECK-NEXT: [[END_I:%.*]] = load i64*, i64** [[GEP_END_I]], align 8 +; CHECK-NEXT: [[END_INT_I:%.*]] = ptrtoint i64* [[END_I]] to i64 +; CHECK-NEXT: [[START_INT_I:%.*]] = ptrtoint i64* [[START_I]] to i64 +; CHECK-NEXT: [[SUB_I:%.*]] = sub i64 [[END_INT_I]], [[START_INT_I]] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 ; CHECK-NEXT: [[UMIN:%.*]] = tail call i64 @llvm.umin.i64(i64 [[SUB_I7_PEEL]], i64 [[TMP0]]) ; CHECK-NEXT: [[TMP1:%.*]] = freeze i64 [[UMIN]] @@ -128,27 +128,11 @@ ; CHECK-NEXT: at_with_int_conversion.exit24.peel: ; CHECK-NEXT: [[GEP_START_I:%.*]] = getelementptr [[VEC:%.*]], %vec* [[A:%.*]], i64 0, i32 0 ; CHECK-NEXT: [[START_I:%.*]] = load i64*, i64** [[GEP_START_I]], align 8 -; CHECK-NEXT: [[GEP_END_I:%.*]] = getelementptr [[VEC]], %vec* [[A]], i64 0, i32 1 -; CHECK-NEXT: [[END_I:%.*]] = load i64*, i64** [[GEP_END_I]], align 8 -; CHECK-NEXT: [[START_INT_I:%.*]] = ptrtoint i64* [[START_I]] to i64 -; CHECK-NEXT: [[END_INT_I:%.*]] = ptrtoint i64* [[END_I]] to i64 -; CHECK-NEXT: [[SUB_I:%.*]] = sub i64 [[END_INT_I]], [[START_INT_I]] ; CHECK-NEXT: [[GEP_START_I13:%.*]] = getelementptr [[VEC]], %vec* [[C:%.*]], i64 0, i32 0 -; CHECK-NEXT: [[GEP_END_I15:%.*]] = getelementptr [[VEC]], %vec* [[C]], i64 0, i32 1 -; CHECK-NEXT: [[GEP_END_I3:%.*]] = getelementptr [[VEC]], %vec* [[B:%.*]], i64 0, i32 1 -; CHECK-NEXT: [[GEP_START_I1:%.*]] = getelementptr [[VEC]], %vec* [[B]], i64 0, i32 0 +; CHECK-NEXT: [[GEP_START_I1:%.*]] = getelementptr [[VEC]], %vec* [[B:%.*]], i64 0, i32 0 ; CHECK-NEXT: [[LV_I_PEEL:%.*]] = load i64, i64* [[START_I]], align 4 ; CHECK-NEXT: [[START_I2_PEEL:%.*]] = load i64*, i64** [[GEP_START_I1]], align 8 -; CHECK-NEXT: [[END_I4_PEEL:%.*]] = load i64*, i64** [[GEP_END_I3]], align 8 -; CHECK-NEXT: [[START_INT_I5_PEEL:%.*]] = ptrtoint i64* [[START_I2_PEEL]] to i64 -; CHECK-NEXT: [[END_I4_PEEL_FR:%.*]] = freeze i64* [[END_I4_PEEL]] -; CHECK-NEXT: [[END_INT_I6_PEEL:%.*]] = ptrtoint i64* [[END_I4_PEEL_FR]] to i64 -; CHECK-NEXT: [[SUB_I7_PEEL:%.*]] = sub i64 [[END_INT_I6_PEEL]], [[START_INT_I5_PEEL]] ; CHECK-NEXT: [[START_I14_PEEL:%.*]] = load i64*, i64** [[GEP_START_I13]], align 8 -; CHECK-NEXT: [[END_I16_PEEL:%.*]] = load i64*, i64** [[GEP_END_I15]], align 8 -; CHECK-NEXT: [[START_INT_I17_PEEL:%.*]] = ptrtoint i64* [[START_I14_PEEL]] to i64 -; CHECK-NEXT: [[END_INT_I18_PEEL:%.*]] = ptrtoint i64* [[END_I16_PEEL]] to i64 -; CHECK-NEXT: [[SUB_I19_PEEL:%.*]] = sub i64 [[END_INT_I18_PEEL]], [[START_INT_I17_PEEL]] ; CHECK-NEXT: [[LV_I10_PEEL:%.*]] = load i64, i64* [[START_I2_PEEL]], align 4 ; CHECK-NEXT: [[LV_I22_PEEL:%.*]] = load i64, i64* [[START_I14_PEEL]], align 4 ; CHECK-NEXT: [[ADD_2_PEEL:%.*]] = add i64 [[LV_I_PEEL]], [[LV_I10_PEEL]] @@ -156,6 +140,22 @@ ; CHECK-NEXT: [[COND_PEEL:%.*]] = icmp sgt i64 [[N:%.*]], 0 ; CHECK-NEXT: br i1 [[COND_PEEL]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] ; CHECK: loop.preheader: +; CHECK-NEXT: [[GEP_END_I15:%.*]] = getelementptr [[VEC]], %vec* [[C]], i64 0, i32 1 +; CHECK-NEXT: [[END_I16_PEEL:%.*]] = load i64*, i64** [[GEP_END_I15]], align 8 +; CHECK-NEXT: [[END_INT_I18_PEEL:%.*]] = ptrtoint i64* [[END_I16_PEEL]] to i64 +; CHECK-NEXT: [[START_INT_I17_PEEL:%.*]] = ptrtoint i64* [[START_I14_PEEL]] to i64 +; CHECK-NEXT: [[SUB_I19_PEEL:%.*]] = sub i64 [[END_INT_I18_PEEL]], [[START_INT_I17_PEEL]] +; CHECK-NEXT: [[GEP_END_I3:%.*]] = getelementptr [[VEC]], %vec* [[B]], i64 0, i32 1 +; CHECK-NEXT: [[END_I4_PEEL:%.*]] = load i64*, i64** [[GEP_END_I3]], align 8 +; CHECK-NEXT: [[END_I4_PEEL_FR:%.*]] = freeze i64* [[END_I4_PEEL]] +; CHECK-NEXT: [[END_INT_I6_PEEL:%.*]] = ptrtoint i64* [[END_I4_PEEL_FR]] to i64 +; CHECK-NEXT: [[START_INT_I5_PEEL:%.*]] = ptrtoint i64* [[START_I2_PEEL]] to i64 +; CHECK-NEXT: [[SUB_I7_PEEL:%.*]] = sub i64 [[END_INT_I6_PEEL]], [[START_INT_I5_PEEL]] +; CHECK-NEXT: [[GEP_END_I:%.*]] = getelementptr [[VEC]], %vec* [[A]], i64 0, i32 1 +; CHECK-NEXT: [[END_I:%.*]] = load i64*, i64** [[GEP_END_I]], align 8 +; CHECK-NEXT: [[END_INT_I:%.*]] = ptrtoint i64* [[END_I]] to i64 +; CHECK-NEXT: [[START_INT_I:%.*]] = ptrtoint i64* [[START_I]] to i64 +; CHECK-NEXT: [[SUB_I:%.*]] = sub i64 [[END_INT_I]], [[START_INT_I]] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1 ; CHECK-NEXT: [[UMIN:%.*]] = tail call i64 @llvm.umin.i64(i64 [[SUB_I19_PEEL]], i64 [[TMP0]]) ; CHECK-NEXT: [[TMP1:%.*]] = freeze i64 [[UMIN]]