Index: lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- lib/Transforms/Vectorize/LoopVectorize.cpp +++ lib/Transforms/Vectorize/LoopVectorize.cpp @@ -6172,30 +6172,19 @@ VecValuesToIgnore.insert(Casts.begin(), Casts.end()); } - // Ignore induction phis that are only used in either GetElementPtr or ICmp - // instruction to exit loop. Induction variables usually have large types and - // can have big impact when estimating register usage. + // Ignore induction phis that are only used in either GetElementPtr or + // Uniform instruction since we don't need vector versions for such induction + // vars. Induction variables usually have large types so it is important not + // to exaggerate their register usages. // This is for when VF > 1. for (auto &Induction : *Legal->getInductionVars()) { auto *PN = Induction.first; - auto *UpdateV = PN->getIncomingValueForBlock(TheLoop->getLoopLatch()); - // Check that the PHI is only used by the induction increment (UpdateV) or - // by GEPs. Then check that UpdateV is only used by a compare instruction or - // the loop header PHI. - // FIXME: Need precise def-use analysis to determine if this instruction - // variable will be vectorized. - if (std::all_of(PN->user_begin(), PN->user_end(), - [&](const User *U) -> bool { - return U == UpdateV || isa(U); - }) && - std::all_of(UpdateV->user_begin(), UpdateV->user_end(), - [&](const User *U) -> bool { - return U == PN || isa(U); - })) { + if (std::all_of(PN->user_begin(), PN->user_end(), [&](User *U) -> bool { + return (isa(U) || + Legal->isUniformAfterVectorization(cast(U))); + })) VecValuesToIgnore.insert(PN); - VecValuesToIgnore.insert(UpdateV); - } } // Ignore instructions that will not be vectorized. @@ -6203,6 +6192,11 @@ for (auto bb = TheLoop->block_begin(), be = TheLoop->block_end(); bb != be; ++bb) { for (auto &Inst : **bb) { + if (Legal->isUniformAfterVectorization(&Inst)) { + VecValuesToIgnore.insert(&Inst); + continue; + } + switch (Inst.getOpcode()) case Instruction::GetElementPtr: { // Ignore GEP if its last operand is an induction variable so that it is Index: test/Transforms/LoopVectorize/X86/reg-usage.ll =================================================================== --- test/Transforms/LoopVectorize/X86/reg-usage.ll +++ test/Transforms/LoopVectorize/X86/reg-usage.ll @@ -45,6 +45,42 @@ br i1 %exitcond, label %for.cond.cleanup, label %for.body } +define i32 @goo() { +; CHECK-LABEL: goo +; CHECK: LV(REG): VF = 4 +; CHECK-NEXT: LV(REG): Found max usage: 4 +; CHECK: LV(REG): VF = 8 +; CHECK-NEXT: LV(REG): Found max usage: 7 +; CHECK: LV(REG): VF = 16 +; CHECK-NEXT: LV(REG): Found max usage: 13 +entry: + br label %for.body + +for.cond.cleanup: ; preds = %for.body + %add.lcssa = phi i32 [ %add, %for.body ] + ret i32 %add.lcssa + +for.body: ; preds = %for.body, %entry + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %s.015 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %tmp1 = add nsw i64 %indvars.iv, 3 + %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %tmp1 + %tmp = load i8, i8* %arrayidx, align 1 + %conv = zext i8 %tmp to i32 + %tmp2 = add nsw i64 %indvars.iv, 2 + %arrayidx2 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %tmp2 + %tmp3 = load i8, i8* %arrayidx2, align 1 + %conv3 = zext i8 %tmp3 to i32 + %sub = sub nsw i32 %conv, %conv3 + %ispos = icmp sgt i32 %sub, -1 + %neg = sub nsw i32 0, %sub + %tmp4 = select i1 %ispos, i32 %sub, i32 %neg + %add = add nsw i32 %tmp4, %s.015 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, 1024 + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + define i64 @bar(i64* nocapture %a) { ; CHECK-LABEL: bar ; CHECK: LV(REG): VF = 2