diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -496,7 +496,7 @@ /// Instr's operands. void scalarizeInstruction(const Instruction *Instr, VPReplicateRecipe *RepRecipe, - const VPIteration &Instance, bool IfPredicateInstr, + const VPIteration &Instance, VPTransformState &State); /// Construct the vector value of a scalarized value \p V one lane at a time. @@ -575,10 +575,6 @@ void clearReductionWrapFlags(VPReductionPHIRecipe *PhiR, VPTransformState &State); - /// Iteratively sink the scalarized operands of a predicated instruction into - /// the block that was created for it. - void sinkScalarOperands(Instruction *PredInst); - /// Shrinks vector element sizes to the smallest bitwidth they can be legally /// represented as. void truncateToMinimalBitwidths(VPTransformState &State); @@ -705,9 +701,6 @@ /// A list of all bypass blocks. The first block is the entry of the loop. SmallVector LoopBypassBlocks; - /// Store instructions that were predicated. - SmallVector PredicatedInstructions; - /// Trip count of the original loop. Value *TripCount = nullptr; @@ -2779,7 +2772,6 @@ void InnerLoopVectorizer::scalarizeInstruction(const Instruction *Instr, VPReplicateRecipe *RepRecipe, const VPIteration &Instance, - bool IfPredicateInstr, VPTransformState &State) { assert(!Instr->getType()->isAggregateType() && "Can't handle vectors"); @@ -2827,10 +2819,6 @@ // If we just cloned a new assumption, add it the assumption cache. if (auto *II = dyn_cast(Cloned)) AC->registerAssumption(II); - - // End if-block. - if (IfPredicateInstr) - PredicatedInstructions.push_back(Cloned); } Value *InnerLoopVectorizer::getOrCreateTripCount(BasicBlock *InsertBlock) { @@ -3712,9 +3700,6 @@ for (const auto &KV : Plan.getLiveOuts()) KV.second->fixPhi(Plan, State); - for (Instruction *PI : PredicatedInstructions) - sinkScalarOperands(&*PI); - // Remove redundant induction instructions. cse(VectorLoop->getHeader()); @@ -4097,80 +4082,6 @@ } } -void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) { - // The basic block and loop containing the predicated instruction. - auto *PredBB = PredInst->getParent(); - auto *VectorLoop = LI->getLoopFor(PredBB); - - // Initialize a worklist with the operands of the predicated instruction. - SetVector Worklist(PredInst->op_begin(), PredInst->op_end()); - - // Holds instructions that we need to analyze again. An instruction may be - // reanalyzed if we don't yet know if we can sink it or not. - SmallVector InstsToReanalyze; - - // Returns true if a given use occurs in the predicated block. Phi nodes use - // their operands in their corresponding predecessor blocks. - auto isBlockOfUsePredicated = [&](Use &U) -> bool { - auto *I = cast(U.getUser()); - BasicBlock *BB = I->getParent(); - if (auto *Phi = dyn_cast(I)) - BB = Phi->getIncomingBlock( - PHINode::getIncomingValueNumForOperand(U.getOperandNo())); - return BB == PredBB; - }; - - // Iteratively sink the scalarized operands of the predicated instruction - // into the block we created for it. When an instruction is sunk, it's - // operands are then added to the worklist. The algorithm ends after one pass - // through the worklist doesn't sink a single instruction. - bool Changed; - do { - // Add the instructions that need to be reanalyzed to the worklist, and - // reset the changed indicator. - Worklist.insert(InstsToReanalyze.begin(), InstsToReanalyze.end()); - InstsToReanalyze.clear(); - Changed = false; - - while (!Worklist.empty()) { - auto *I = dyn_cast(Worklist.pop_back_val()); - - // We can't sink an instruction if it is a phi node, is not in the loop, - // or may have side effects. - if (!I || isa(I) || !VectorLoop->contains(I) || - I->mayHaveSideEffects()) - continue; - - // If the instruction is already in PredBB, check if we can sink its - // operands. In that case, VPlan's sinkScalarOperands() succeeded in - // sinking the scalar instruction I, hence it appears in PredBB; but it - // may have failed to sink I's operands (recursively), which we try - // (again) here. - if (I->getParent() == PredBB) { - Worklist.insert(I->op_begin(), I->op_end()); - continue; - } - - // It's legal to sink the instruction if all its uses occur in the - // predicated block. Otherwise, there's nothing to do yet, and we may - // need to reanalyze the instruction. - if (!llvm::all_of(I->uses(), isBlockOfUsePredicated)) { - InstsToReanalyze.push_back(I); - continue; - } - - // Move the instruction to the beginning of the predicated block, and add - // it's operands to the worklist. - I->moveBefore(&*PredBB->getFirstInsertionPt()); - Worklist.insert(I->op_begin(), I->op_end()); - - // The sinking may have enabled other instructions to be sunk, so we will - // need to iterate. - Changed = true; - } - } while (Changed); -} - void InnerLoopVectorizer::fixNonInductionPHIs(VPlan &Plan, VPTransformState &State) { auto Iter = depth_first( @@ -9615,8 +9526,7 @@ void VPReplicateRecipe::execute(VPTransformState &State) { if (State.Instance) { // Generate a single instance. assert(!State.VF.isScalable() && "Can't scalarize a scalable vector"); - State.ILV->scalarizeInstruction(Instr, this, *State.Instance, IsPredicated, - State); + State.ILV->scalarizeInstruction(Instr, this, *State.Instance, State); // Insert scalar instance packing it into a vector. if (AlsoPack && State.VF.isVector()) { // If we're constructing lane 0, initialize to start from poison. @@ -9638,8 +9548,7 @@ all_of(operands(), [](VPValue *Op) { return Op->isDefinedOutsideVectorRegions(); })) { - State.ILV->scalarizeInstruction(Instr, this, VPIteration(0, 0), IsPredicated, - State); + State.ILV->scalarizeInstruction(Instr, this, VPIteration(0, 0), State); if (user_begin() != user_end()) { for (unsigned Part = 1; Part < State.UF; ++Part) State.set(this, State.get(this, VPIteration(0, 0)), @@ -9651,8 +9560,7 @@ // Uniform within VL means we need to generate lane 0 only for each // unrolled copy. for (unsigned Part = 0; Part < State.UF; ++Part) - State.ILV->scalarizeInstruction(Instr, this, VPIteration(Part, 0), - IsPredicated, State); + State.ILV->scalarizeInstruction(Instr, this, VPIteration(Part, 0), State); return; } @@ -9660,8 +9568,8 @@ // needs a single copy of the store. if (isa(Instr) && !getOperand(1)->hasDefiningRecipe()) { auto Lane = VPLane::getLastLaneForVF(State.VF); - State.ILV->scalarizeInstruction( - Instr, this, VPIteration(State.UF - 1, Lane), IsPredicated, State); + State.ILV->scalarizeInstruction(Instr, this, + VPIteration(State.UF - 1, Lane), State); return; } @@ -9671,7 +9579,7 @@ for (unsigned Part = 0; Part < State.UF; ++Part) for (unsigned Lane = 0; Lane < EndLane; ++Lane) State.ILV->scalarizeInstruction(Instr, this, VPIteration(Part, Lane), - IsPredicated, State); + State); } void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll b/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll --- a/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll @@ -1,34 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -passes=loop-vectorize -mtriple=x86_64-unknown-linux -force-vector-width=2 -force-vector-interleave=1 -S %s | FileCheck %s define i32* @test(i32* noalias %src, i32* noalias %dst) { ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE2:%.*]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , %vector.ph ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE2]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE2:%.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE2]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[SRC:%.*]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i64> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0 -; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[SRC:%.*]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <2 x i64> [[VEC_IND]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = xor <2 x i1> [[TMP4]], +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0 +; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] ; CHECK: pred.load.if: -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 [[TMP0]] -; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP2]], align 4 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] ; CHECK: pred.load.continue: -; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, %vector.body ], [ [[TMP8]], [[PRED_LOAD_IF]] ] -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1 ; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2]] ; CHECK: pred.load.if1: -; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP2]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP3]], align 4 ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP11]], i32 1 ; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] ; CHECK: pred.load.continue2: ; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x i32> [ [[TMP9]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> zeroinitializer, <2 x i32> [[TMP13]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> zeroinitializer, <2 x i32> [[TMP13]] ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP14]], i32 0 ; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <2 x i32>* @@ -36,12 +40,30 @@ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 -; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label %vector.body +; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, 1000 -; CHECK-NEXT: br i1 [[CMP_N]], label %exit, label %scalar.ph +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, i32* [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[IV]], 0 +; CHECK-NEXT: br i1 [[CMP_1]], label [[LOOP_LATCH]], label [[THEN:%.*]] +; CHECK: then: +; CHECK-NEXT: [[L:%.*]] = load i32, i32* [[GEP_SRC]], align 4 +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[M:%.*]] = phi i32 [ [[L]], [[THEN]] ], [ 0, [[LOOP_HEADER]] ] +; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 [[IV]] +; CHECK-NEXT: store i32 [[M]], i32* [[GEP_DST]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 +; CHECK-NEXT: [[CMP_2:%.*]] = icmp slt i64 [[IV_NEXT]], 1000 +; CHECK-NEXT: br i1 [[CMP_2]], label [[LOOP_HEADER]], label [[EXIT]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: exit: -; CHECK-NEXT: [[GEP_LCSSA:%.*]] = phi i32* [ %gep.src, %loop.latch ], [ [[TMP2]], %middle.block ] +; CHECK-NEXT: [[GEP_LCSSA:%.*]] = phi i32* [ [[GEP_SRC]], [[LOOP_LATCH]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32* [[GEP_LCSSA]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll --- a/llvm/test/Transforms/LoopVectorize/float-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll @@ -1366,6 +1366,9 @@ ; VEC4_INTERL1: vector.body: ; VEC4_INTERL1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE7:%.*]] ] ; VEC4_INTERL1-NEXT: [[TMP0:%.*]] = sitofp i64 [[INDEX]] to float +; VEC4_INTERL1-NEXT: [[TMP8:%.*]] = or i64 [[INDEX]], 1 +; VEC4_INTERL1-NEXT: [[TMP12:%.*]] = or i64 [[INDEX]], 2 +; VEC4_INTERL1-NEXT: [[TMP16:%.*]] = or i64 [[INDEX]], 3 ; VEC4_INTERL1-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; VEC4_INTERL1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4 ; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], zeroinitializer @@ -1379,7 +1382,6 @@ ; VEC4_INTERL1-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP3]], i64 1 ; VEC4_INTERL1-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]] ; VEC4_INTERL1: pred.store.if3: -; VEC4_INTERL1-NEXT: [[TMP8:%.*]] = or i64 [[INDEX]], 1 ; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = fadd fast float [[TMP0]], 1.000000e+00 ; VEC4_INTERL1-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP8]] ; VEC4_INTERL1-NEXT: store float [[TMP7]], ptr [[TMP9]], align 4 @@ -1388,7 +1390,6 @@ ; VEC4_INTERL1-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP3]], i64 2 ; VEC4_INTERL1-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] ; VEC4_INTERL1: pred.store.if5: -; VEC4_INTERL1-NEXT: [[TMP12:%.*]] = or i64 [[INDEX]], 2 ; VEC4_INTERL1-NEXT: [[TMP11:%.*]] = fadd fast float [[TMP0]], 2.000000e+00 ; VEC4_INTERL1-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP12]] ; VEC4_INTERL1-NEXT: store float [[TMP11]], ptr [[TMP13]], align 4 @@ -1397,7 +1398,6 @@ ; VEC4_INTERL1-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i64 3 ; VEC4_INTERL1-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7]] ; VEC4_INTERL1: pred.store.if7: -; VEC4_INTERL1-NEXT: [[TMP16:%.*]] = or i64 [[INDEX]], 3 ; VEC4_INTERL1-NEXT: [[TMP15:%.*]] = fadd fast float [[TMP0]], 3.000000e+00 ; VEC4_INTERL1-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP16]] ; VEC4_INTERL1-NEXT: store float [[TMP15]], ptr [[TMP17]], align 4 @@ -1443,7 +1443,13 @@ ; VEC4_INTERL2: vector.body: ; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE16:%.*]] ] ; VEC4_INTERL2-NEXT: [[TMP0:%.*]] = sitofp i64 [[INDEX]] to float +; VEC4_INTERL2-NEXT: [[TMP12:%.*]] = or i64 [[INDEX]], 1 +; VEC4_INTERL2-NEXT: [[TMP16:%.*]] = or i64 [[INDEX]], 2 +; VEC4_INTERL2-NEXT: [[TMP20:%.*]] = or i64 [[INDEX]], 3 ; VEC4_INTERL2-NEXT: [[TMP1:%.*]] = or i64 [[INDEX]], 4 +; VEC4_INTERL2-NEXT: [[TMP27:%.*]] = or i64 [[INDEX]], 5 +; VEC4_INTERL2-NEXT: [[TMP31:%.*]] = or i64 [[INDEX]], 6 +; VEC4_INTERL2-NEXT: [[TMP35:%.*]] = or i64 [[INDEX]], 7 ; VEC4_INTERL2-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; VEC4_INTERL2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4 ; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 4 @@ -1460,7 +1466,6 @@ ; VEC4_INTERL2-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP6]], i64 1 ; VEC4_INTERL2-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] ; VEC4_INTERL2: pred.store.if4: -; VEC4_INTERL2-NEXT: [[TMP12:%.*]] = or i64 [[INDEX]], 1 ; VEC4_INTERL2-NEXT: [[TMP11:%.*]] = fadd fast float [[TMP0]], 1.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP13:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP12]] ; VEC4_INTERL2-NEXT: store float [[TMP11]], ptr [[TMP13]], align 4 @@ -1469,7 +1474,6 @@ ; VEC4_INTERL2-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP6]], i64 2 ; VEC4_INTERL2-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] ; VEC4_INTERL2: pred.store.if6: -; VEC4_INTERL2-NEXT: [[TMP16:%.*]] = or i64 [[INDEX]], 2 ; VEC4_INTERL2-NEXT: [[TMP15:%.*]] = fadd fast float [[TMP0]], 2.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP16]] ; VEC4_INTERL2-NEXT: store float [[TMP15]], ptr [[TMP17]], align 4 @@ -1478,7 +1482,6 @@ ; VEC4_INTERL2-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP6]], i64 3 ; VEC4_INTERL2-NEXT: br i1 [[TMP18]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] ; VEC4_INTERL2: pred.store.if8: -; VEC4_INTERL2-NEXT: [[TMP20:%.*]] = or i64 [[INDEX]], 3 ; VEC4_INTERL2-NEXT: [[TMP19:%.*]] = fadd fast float [[TMP0]], 3.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP20]] ; VEC4_INTERL2-NEXT: store float [[TMP19]], ptr [[TMP21]], align 4 @@ -1495,7 +1498,6 @@ ; VEC4_INTERL2-NEXT: [[TMP25:%.*]] = extractelement <4 x i1> [[TMP7]], i64 1 ; VEC4_INTERL2-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]] ; VEC4_INTERL2: pred.store.if12: -; VEC4_INTERL2-NEXT: [[TMP27:%.*]] = or i64 [[INDEX]], 5 ; VEC4_INTERL2-NEXT: [[TMP26:%.*]] = fadd fast float [[TMP0]], 5.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP28:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP27]] ; VEC4_INTERL2-NEXT: store float [[TMP26]], ptr [[TMP28]], align 4 @@ -1504,7 +1506,6 @@ ; VEC4_INTERL2-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP7]], i64 2 ; VEC4_INTERL2-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]] ; VEC4_INTERL2: pred.store.if14: -; VEC4_INTERL2-NEXT: [[TMP31:%.*]] = or i64 [[INDEX]], 6 ; VEC4_INTERL2-NEXT: [[TMP30:%.*]] = fadd fast float [[TMP0]], 6.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP31]] ; VEC4_INTERL2-NEXT: store float [[TMP30]], ptr [[TMP32]], align 4 @@ -1513,7 +1514,6 @@ ; VEC4_INTERL2-NEXT: [[TMP33:%.*]] = extractelement <4 x i1> [[TMP7]], i64 3 ; VEC4_INTERL2-NEXT: br i1 [[TMP33]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16]] ; VEC4_INTERL2: pred.store.if16: -; VEC4_INTERL2-NEXT: [[TMP35:%.*]] = or i64 [[INDEX]], 7 ; VEC4_INTERL2-NEXT: [[TMP34:%.*]] = fadd fast float [[TMP0]], 7.000000e+00 ; VEC4_INTERL2-NEXT: [[TMP36:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP35]] ; VEC4_INTERL2-NEXT: store float [[TMP34]], ptr [[TMP36]], align 4 @@ -1617,6 +1617,7 @@ ; VEC2_INTERL1_PRED_STORE: vector.body: ; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ] ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP0:%.*]] = sitofp i64 [[INDEX]] to float +; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP8:%.*]] = or i64 [[INDEX]], 1 ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]] ; VEC2_INTERL1_PRED_STORE-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP1]], align 4 ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fcmp fast oeq <2 x float> [[WIDE_LOAD]], zeroinitializer @@ -1630,7 +1631,6 @@ ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP3]], i64 1 ; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]] ; VEC2_INTERL1_PRED_STORE: pred.store.if3: -; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP8:%.*]] = or i64 [[INDEX]], 1 ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = fadd fast float [[TMP0]], 1.000000e+00 ; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP8]] ; VEC2_INTERL1_PRED_STORE-NEXT: store float [[TMP7]], ptr [[TMP9]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll --- a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll @@ -465,14 +465,16 @@ ; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[ASD]], i64 [[TMP1]] ; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4, !alias.scope !19, !noalias !22 ; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope !19, !noalias !22 +; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP0]] +; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP1]] +; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope !22 +; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4, !alias.scope !22 ; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = add nsw i32 [[TMP4]], 23 ; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = add nsw i32 [[TMP5]], 23 ; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = icmp slt i32 [[TMP4]], 100 ; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = icmp slt i32 [[TMP5]], 100 ; UNROLL-NO-VF-NEXT: br i1 [[TMP8]], label [[PRED_SDIV_IF:%.*]], label [[PRED_SDIV_CONTINUE:%.*]] ; UNROLL-NO-VF: pred.sdiv.if: -; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP0]] -; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4, !alias.scope !22 ; UNROLL-NO-VF-NEXT: [[TMP12:%.*]] = sdiv i32 [[TMP6]], [[TMP4]] ; UNROLL-NO-VF-NEXT: [[TMP13:%.*]] = sdiv i32 [[TMP11]], [[TMP12]] ; UNROLL-NO-VF-NEXT: br label [[PRED_SDIV_CONTINUE]] @@ -481,8 +483,6 @@ ; UNROLL-NO-VF-NEXT: [[TMP15:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP13]], [[PRED_SDIV_IF]] ] ; UNROLL-NO-VF-NEXT: br i1 [[TMP9]], label [[PRED_SDIV_IF2:%.*]], label [[PRED_SDIV_CONTINUE3]] ; UNROLL-NO-VF: pred.sdiv.if2: -; UNROLL-NO-VF-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP1]] -; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4, !alias.scope !22 ; UNROLL-NO-VF-NEXT: [[TMP18:%.*]] = sdiv i32 [[TMP7]], [[TMP5]] ; UNROLL-NO-VF-NEXT: [[TMP19:%.*]] = sdiv i32 [[TMP17]], [[TMP18]] ; UNROLL-NO-VF-NEXT: br label [[PRED_SDIV_CONTINUE3]] @@ -664,6 +664,10 @@ ; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[ASD]], i64 [[TMP1]] ; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4, !alias.scope !28, !noalias !31 ; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP3]], align 4, !alias.scope !28, !noalias !31 +; UNROLL-NO-VF-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP0]] +; UNROLL-NO-VF-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP1]] +; UNROLL-NO-VF-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4, !alias.scope !31 +; UNROLL-NO-VF-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4, !alias.scope !31 ; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = add nsw i32 [[TMP4]], 23 ; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = add nsw i32 [[TMP5]], 23 ; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = icmp slt i32 [[TMP4]], 100 @@ -678,8 +682,6 @@ ; UNROLL-NO-VF-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP9]] ; UNROLL-NO-VF-NEXT: br i1 [[TMP16]], label [[PRED_SDIV_IF:%.*]], label [[PRED_SDIV_CONTINUE:%.*]] ; UNROLL-NO-VF: pred.sdiv.if: -; UNROLL-NO-VF-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP0]] -; UNROLL-NO-VF-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4, !alias.scope !31 ; UNROLL-NO-VF-NEXT: [[TMP20:%.*]] = sdiv i32 [[TMP6]], [[TMP4]] ; UNROLL-NO-VF-NEXT: [[TMP21:%.*]] = sdiv i32 [[TMP19]], [[TMP20]] ; UNROLL-NO-VF-NEXT: br label [[PRED_SDIV_CONTINUE]] @@ -688,8 +690,6 @@ ; UNROLL-NO-VF-NEXT: [[TMP23:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP21]], [[PRED_SDIV_IF]] ] ; UNROLL-NO-VF-NEXT: br i1 [[TMP17]], label [[PRED_SDIV_IF2:%.*]], label [[PRED_SDIV_CONTINUE3]] ; UNROLL-NO-VF: pred.sdiv.if2: -; UNROLL-NO-VF-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[BSD]], i64 [[TMP1]] -; UNROLL-NO-VF-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4, !alias.scope !31 ; UNROLL-NO-VF-NEXT: [[TMP26:%.*]] = sdiv i32 [[TMP7]], [[TMP5]] ; UNROLL-NO-VF-NEXT: [[TMP27:%.*]] = sdiv i32 [[TMP25]], [[TMP26]] ; UNROLL-NO-VF-NEXT: br label [[PRED_SDIV_CONTINUE3]] diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll --- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll @@ -114,6 +114,7 @@ ; VEC: vector.body: ; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] ; VEC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; VEC-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1 ; VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[F:%.*]], i64 [[TMP0]] ; VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 @@ -130,7 +131,6 @@ ; VEC-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1 ; VEC-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] ; VEC: pred.store.if1: -; VEC-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1 ; VEC-NEXT: [[TMP10:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1 ; VEC-NEXT: [[TMP11:%.*]] = add nsw i32 [[TMP10]], 20 ; VEC-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[F]], i64 [[TMP9]] @@ -213,15 +213,15 @@ ; UNROLL-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[V_2:%.*]], [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[PRED_STORE_CONTINUE3]] ] ; UNROLL-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[PREDPHI4:%.*]], [[PRED_STORE_CONTINUE3]] ] ; UNROLL-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[V_1]], [[INDEX]] -; UNROLL-NEXT: br i1 [[COND_2:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE3]] -; UNROLL: pred.store.if: ; UNROLL-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0 -; UNROLL-NEXT: [[TMP6:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR:%.*]], i64 0, i64 [[TMP5]] -; UNROLL-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -; UNROLL-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 ; UNROLL-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 1 +; UNROLL-NEXT: [[TMP6:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR:%.*]], i64 0, i64 [[TMP5]] ; UNROLL-NEXT: [[TMP9:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR]], i64 0, i64 [[TMP8]] +; UNROLL-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 ; UNROLL-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +; UNROLL-NEXT: br i1 [[COND_2:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE3]] +; UNROLL: pred.store.if: +; UNROLL-NEXT: store i32 [[TMP7]], ptr [[TMP6]], align 4 ; UNROLL-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 4 ; UNROLL-NEXT: br label [[PRED_STORE_CONTINUE3]] ; UNROLL: pred.store.continue3: @@ -285,19 +285,19 @@ ; UNROLL-NOSIMPLIFY-NEXT: [[VEC_PHI:%.*]] = phi i32 [ [[V_2:%.*]], [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[PRED_STORE_CONTINUE3]] ] ; UNROLL-NOSIMPLIFY-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[PREDPHI4:%.*]], [[PRED_STORE_CONTINUE3]] ] ; UNROLL-NOSIMPLIFY-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[V_1]], [[INDEX]] -; UNROLL-NOSIMPLIFY-NEXT: br i1 [[COND_2:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] -; UNROLL-NOSIMPLIFY: pred.store.if: ; UNROLL-NOSIMPLIFY-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0 +; UNROLL-NOSIMPLIFY-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 1 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP5:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR:%.*]], i64 0, i64 [[TMP4]] +; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR]], i64 0, i64 [[TMP7]] ; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +; UNROLL-NOSIMPLIFY-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +; UNROLL-NOSIMPLIFY-NEXT: br i1 [[COND_2:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; UNROLL-NOSIMPLIFY: pred.store.if: ; UNROLL-NOSIMPLIFY-NEXT: store i32 [[TMP6]], ptr [[TMP5]], align 4 ; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE]] ; UNROLL-NOSIMPLIFY: pred.store.continue: ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[COND_2]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]] ; UNROLL-NOSIMPLIFY: pred.store.if2: -; UNROLL-NOSIMPLIFY-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 1 -; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR]], i64 0, i64 [[TMP7]] -; UNROLL-NOSIMPLIFY-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 ; UNROLL-NOSIMPLIFY-NEXT: store i32 [[TMP9]], ptr [[TMP8]], align 4 ; UNROLL-NOSIMPLIFY-NEXT: br label [[PRED_STORE_CONTINUE3]] ; UNROLL-NOSIMPLIFY: pred.store.continue3: @@ -365,6 +365,7 @@ ; VEC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ [[TMP5]], [[VECTOR_PH]] ], [ [[PREDPHI:%.*]], [[PRED_STORE_CONTINUE2]] ] ; VEC-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[V_1]], [[INDEX]] ; VEC-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0 +; VEC-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 1 ; VEC-NEXT: [[TMP7:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR:%.*]], i64 0, i64 [[TMP6]] ; VEC-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 ; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP8]], align 4 @@ -379,7 +380,6 @@ ; VEC-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 1 ; VEC-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] ; VEC: pred.store.if1: -; VEC-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 1 ; VEC-NEXT: [[TMP14:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR]], i64 0, i64 [[TMP13]] ; VEC-NEXT: [[TMP15:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1 ; VEC-NEXT: store i32 [[TMP15]], ptr [[TMP14]], align 4 @@ -459,17 +459,17 @@ ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ] -; UNROLL-NEXT: br i1 [[C:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE3]] -; UNROLL: pred.store.if: ; UNROLL-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; UNROLL-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1 ; UNROLL-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr undef, i64 [[TMP0]] +; UNROLL-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr undef, i64 [[TMP5]] ; UNROLL-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +; UNROLL-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1 +; UNROLL-NEXT: br i1 [[C:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE3]] +; UNROLL: pred.store.if: ; UNROLL-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i32 ; UNROLL-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i8 ; UNROLL-NEXT: store i8 [[TMP4]], ptr [[TMP1]], align 1 -; UNROLL-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1 -; UNROLL-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr undef, i64 [[TMP5]] -; UNROLL-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1 ; UNROLL-NEXT: [[TMP8:%.*]] = zext i8 [[TMP7]] to i32 ; UNROLL-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i8 ; UNROLL-NEXT: store i8 [[TMP9]], ptr [[TMP6]], align 1 @@ -507,11 +507,14 @@ ; UNROLL-NOSIMPLIFY-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NOSIMPLIFY: vector.body: ; UNROLL-NOSIMPLIFY-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ] -; UNROLL-NOSIMPLIFY-NEXT: br i1 [[C:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] -; UNROLL-NOSIMPLIFY: pred.store.if: ; UNROLL-NOSIMPLIFY-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; UNROLL-NOSIMPLIFY-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr undef, i64 [[TMP0]] +; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr undef, i64 [[TMP5]] ; UNROLL-NOSIMPLIFY-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 1 +; UNROLL-NOSIMPLIFY-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1 +; UNROLL-NOSIMPLIFY-NEXT: br i1 [[C:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; UNROLL-NOSIMPLIFY: pred.store.if: ; UNROLL-NOSIMPLIFY-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i32 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i8 ; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP4]], ptr [[TMP1]], align 1 @@ -519,9 +522,6 @@ ; UNROLL-NOSIMPLIFY: pred.store.continue: ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[C]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]] ; UNROLL-NOSIMPLIFY: pred.store.if2: -; UNROLL-NOSIMPLIFY-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1 -; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr undef, i64 [[TMP5]] -; UNROLL-NOSIMPLIFY-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = zext i8 [[TMP7]] to i32 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i8 ; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP9]], ptr [[TMP6]], align 1 @@ -564,6 +564,7 @@ ; VEC: vector.body: ; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ] ; VEC-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; VEC-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1 ; VEC-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr undef, i64 [[TMP0]] ; VEC-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 0 ; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i8>, ptr [[TMP2]], align 1 @@ -580,7 +581,6 @@ ; VEC-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 1 ; VEC-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]] ; VEC: pred.store.if2: -; VEC-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1 ; VEC-NEXT: [[TMP10:%.*]] = extractelement <2 x i8> [[WIDE_LOAD]], i32 1 ; VEC-NEXT: [[TMP11:%.*]] = zext i8 [[TMP10]] to i32 ; VEC-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i8 @@ -674,11 +674,12 @@ ; UNROLL-NOSIMPLIFY-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[PTR:%.*]], i64 [[TMP0]] ; UNROLL-NOSIMPLIFY-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP1]] +; UNROLL-NOSIMPLIFY-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP2]], align 1 +; UNROLL-NOSIMPLIFY-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP3]], align 1 ; UNROLL-NOSIMPLIFY-NEXT: store i8 0, ptr [[TMP2]], align 1 ; UNROLL-NOSIMPLIFY-NEXT: store i8 0, ptr [[TMP3]], align 1 ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[C:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; UNROLL-NOSIMPLIFY: pred.store.if: -; UNROLL-NOSIMPLIFY-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP2]], align 1 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP5:%.*]] = zext i8 [[TMP4]] to i32 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = trunc i32 [[TMP5]] to i8 ; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP6]], ptr [[TMP2]], align 1 @@ -686,7 +687,6 @@ ; UNROLL-NOSIMPLIFY: pred.store.continue: ; UNROLL-NOSIMPLIFY-NEXT: br i1 [[C]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]] ; UNROLL-NOSIMPLIFY: pred.store.if2: -; UNROLL-NOSIMPLIFY-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP3]], align 1 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP8:%.*]] = zext i8 [[TMP7]] to i32 ; UNROLL-NOSIMPLIFY-NEXT: [[TMP9:%.*]] = trunc i32 [[TMP8]] to i8 ; UNROLL-NOSIMPLIFY-NEXT: store i8 [[TMP9]], ptr [[TMP3]], align 1 diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -1990,6 +1990,7 @@ ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE2:%.*]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[PRED_UDIV_CONTINUE2]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 @@ -2005,7 +2006,6 @@ ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 1 ; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_UDIV_IF1:%.*]], label [[PRED_UDIV_CONTINUE2]] ; CHECK: pred.udiv.if1: -; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[INDEX]], 1 ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1 ; CHECK-NEXT: [[TMP11:%.*]] = udiv i32 [[TMP10]], [[TMP9]] ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP11]], i32 1 @@ -2057,6 +2057,7 @@ ; IND: vector.body: ; IND-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE2:%.*]] ] ; IND-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[PRED_UDIV_CONTINUE2]] ] +; IND-NEXT: [[TMP6:%.*]] = or i32 [[INDEX]], 1 ; IND-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64 ; IND-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]] ; IND-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4 @@ -2070,7 +2071,6 @@ ; IND-NEXT: [[TMP5:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_UDIV_IF]] ] ; IND-NEXT: br i1 [[C]], label [[PRED_UDIV_IF1:%.*]], label [[PRED_UDIV_CONTINUE2]] ; IND: pred.udiv.if1: -; IND-NEXT: [[TMP6:%.*]] = or i32 [[INDEX]], 1 ; IND-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i64 1 ; IND-NEXT: [[TMP8:%.*]] = udiv i32 [[TMP7]], [[TMP6]] ; IND-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP8]], i64 1 @@ -2126,7 +2126,9 @@ ; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE10:%.*]] ] ; UNROLL-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[PRED_UDIV_CONTINUE10]] ] ; UNROLL-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[PRED_UDIV_CONTINUE10]] ] +; UNROLL-NEXT: [[TMP8:%.*]] = or i32 [[INDEX]], 1 ; UNROLL-NEXT: [[TMP0:%.*]] = or i32 [[INDEX]], 2 +; UNROLL-NEXT: [[TMP17:%.*]] = or i32 [[INDEX]], 3 ; UNROLL-NEXT: [[TMP1:%.*]] = sext i32 [[INDEX]] to i64 ; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP1]] ; UNROLL-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 @@ -2142,7 +2144,6 @@ ; UNROLL-NEXT: [[TMP7:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_UDIV_IF]] ] ; UNROLL-NEXT: br i1 [[C]], label [[PRED_UDIV_IF3:%.*]], label [[PRED_UDIV_CONTINUE4:%.*]] ; UNROLL: pred.udiv.if3: -; UNROLL-NEXT: [[TMP8:%.*]] = or i32 [[INDEX]], 1 ; UNROLL-NEXT: [[TMP9:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i64 1 ; UNROLL-NEXT: [[TMP10:%.*]] = udiv i32 [[TMP9]], [[TMP8]] ; UNROLL-NEXT: [[TMP11:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP10]], i64 1 @@ -2159,7 +2160,6 @@ ; UNROLL-NEXT: [[TMP16:%.*]] = phi <2 x i32> [ poison, [[PRED_UDIV_CONTINUE4]] ], [ [[TMP15]], [[PRED_UDIV_IF7]] ] ; UNROLL-NEXT: br i1 [[C]], label [[PRED_UDIV_IF9:%.*]], label [[PRED_UDIV_CONTINUE10]] ; UNROLL: pred.udiv.if9: -; UNROLL-NEXT: [[TMP17:%.*]] = or i32 [[INDEX]], 3 ; UNROLL-NEXT: [[TMP18:%.*]] = extractelement <2 x i32> [[WIDE_LOAD2]], i64 1 ; UNROLL-NEXT: [[TMP19:%.*]] = udiv i32 [[TMP18]], [[TMP17]] ; UNROLL-NEXT: [[TMP20:%.*]] = insertelement <2 x i32> [[TMP16]], i32 [[TMP19]], i64 1 @@ -2224,7 +2224,9 @@ ; UNROLL-NO-IC-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP30:%.*]], [[PRED_UDIV_CONTINUE10]] ] ; UNROLL-NO-IC-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP31:%.*]], [[PRED_UDIV_CONTINUE10]] ] ; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = add i32 [[INDEX]], 1 ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 2 +; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = add i32 [[INDEX]], 3 ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP0]] ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP1]] ; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 @@ -2243,7 +2245,6 @@ ; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 1 ; UNROLL-NO-IC-NEXT: br i1 [[TMP11]], label [[PRED_UDIV_IF3:%.*]], label [[PRED_UDIV_CONTINUE4:%.*]] ; UNROLL-NO-IC: pred.udiv.if3: -; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = add i32 [[INDEX]], 1 ; UNROLL-NO-IC-NEXT: [[TMP13:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1 ; UNROLL-NO-IC-NEXT: [[TMP14:%.*]] = udiv i32 [[TMP13]], [[TMP12]] ; UNROLL-NO-IC-NEXT: [[TMP15:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP14]], i32 1 @@ -2262,7 +2263,6 @@ ; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT6]], i32 1 ; UNROLL-NO-IC-NEXT: br i1 [[TMP22]], label [[PRED_UDIV_IF9:%.*]], label [[PRED_UDIV_CONTINUE10]] ; UNROLL-NO-IC: pred.udiv.if9: -; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = add i32 [[INDEX]], 3 ; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = extractelement <2 x i32> [[WIDE_LOAD2]], i32 1 ; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = udiv i32 [[TMP24]], [[TMP23]] ; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = insertelement <2 x i32> [[TMP21]], i32 [[TMP25]], i32 1 @@ -2320,7 +2320,13 @@ ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE18:%.*]] ] ; INTERLEAVE-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP46:%.*]], [[PRED_UDIV_CONTINUE18]] ] ; INTERLEAVE-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP47:%.*]], [[PRED_UDIV_CONTINUE18]] ] +; INTERLEAVE-NEXT: [[TMP8:%.*]] = or i32 [[INDEX]], 1 +; INTERLEAVE-NEXT: [[TMP13:%.*]] = or i32 [[INDEX]], 2 +; INTERLEAVE-NEXT: [[TMP18:%.*]] = or i32 [[INDEX]], 3 ; INTERLEAVE-NEXT: [[TMP0:%.*]] = or i32 [[INDEX]], 4 +; INTERLEAVE-NEXT: [[TMP27:%.*]] = or i32 [[INDEX]], 5 +; INTERLEAVE-NEXT: [[TMP32:%.*]] = or i32 [[INDEX]], 6 +; INTERLEAVE-NEXT: [[TMP37:%.*]] = or i32 [[INDEX]], 7 ; INTERLEAVE-NEXT: [[TMP1:%.*]] = sext i32 [[INDEX]] to i64 ; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP1]] ; INTERLEAVE-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 @@ -2336,7 +2342,6 @@ ; INTERLEAVE-NEXT: [[TMP7:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_UDIV_IF]] ] ; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF3:%.*]], label [[PRED_UDIV_CONTINUE4:%.*]] ; INTERLEAVE: pred.udiv.if3: -; INTERLEAVE-NEXT: [[TMP8:%.*]] = or i32 [[INDEX]], 1 ; INTERLEAVE-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 1 ; INTERLEAVE-NEXT: [[TMP10:%.*]] = udiv i32 [[TMP9]], [[TMP8]] ; INTERLEAVE-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP10]], i64 1 @@ -2345,7 +2350,6 @@ ; INTERLEAVE-NEXT: [[TMP12:%.*]] = phi <4 x i32> [ [[TMP7]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP11]], [[PRED_UDIV_IF3]] ] ; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF5:%.*]], label [[PRED_UDIV_CONTINUE6:%.*]] ; INTERLEAVE: pred.udiv.if5: -; INTERLEAVE-NEXT: [[TMP13:%.*]] = or i32 [[INDEX]], 2 ; INTERLEAVE-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 2 ; INTERLEAVE-NEXT: [[TMP15:%.*]] = udiv i32 [[TMP14]], [[TMP13]] ; INTERLEAVE-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP15]], i64 2 @@ -2354,7 +2358,6 @@ ; INTERLEAVE-NEXT: [[TMP17:%.*]] = phi <4 x i32> [ [[TMP12]], [[PRED_UDIV_CONTINUE4]] ], [ [[TMP16]], [[PRED_UDIV_IF5]] ] ; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8:%.*]] ; INTERLEAVE: pred.udiv.if7: -; INTERLEAVE-NEXT: [[TMP18:%.*]] = or i32 [[INDEX]], 3 ; INTERLEAVE-NEXT: [[TMP19:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 3 ; INTERLEAVE-NEXT: [[TMP20:%.*]] = udiv i32 [[TMP19]], [[TMP18]] ; INTERLEAVE-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP20]], i64 3 @@ -2371,7 +2374,6 @@ ; INTERLEAVE-NEXT: [[TMP26:%.*]] = phi <4 x i32> [ poison, [[PRED_UDIV_CONTINUE8]] ], [ [[TMP25]], [[PRED_UDIV_IF11]] ] ; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF13:%.*]], label [[PRED_UDIV_CONTINUE14:%.*]] ; INTERLEAVE: pred.udiv.if13: -; INTERLEAVE-NEXT: [[TMP27:%.*]] = or i32 [[INDEX]], 5 ; INTERLEAVE-NEXT: [[TMP28:%.*]] = extractelement <4 x i32> [[WIDE_LOAD2]], i64 1 ; INTERLEAVE-NEXT: [[TMP29:%.*]] = udiv i32 [[TMP28]], [[TMP27]] ; INTERLEAVE-NEXT: [[TMP30:%.*]] = insertelement <4 x i32> [[TMP26]], i32 [[TMP29]], i64 1 @@ -2380,7 +2382,6 @@ ; INTERLEAVE-NEXT: [[TMP31:%.*]] = phi <4 x i32> [ [[TMP26]], [[PRED_UDIV_CONTINUE12]] ], [ [[TMP30]], [[PRED_UDIV_IF13]] ] ; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF15:%.*]], label [[PRED_UDIV_CONTINUE16:%.*]] ; INTERLEAVE: pred.udiv.if15: -; INTERLEAVE-NEXT: [[TMP32:%.*]] = or i32 [[INDEX]], 6 ; INTERLEAVE-NEXT: [[TMP33:%.*]] = extractelement <4 x i32> [[WIDE_LOAD2]], i64 2 ; INTERLEAVE-NEXT: [[TMP34:%.*]] = udiv i32 [[TMP33]], [[TMP32]] ; INTERLEAVE-NEXT: [[TMP35:%.*]] = insertelement <4 x i32> [[TMP31]], i32 [[TMP34]], i64 2 @@ -2389,7 +2390,6 @@ ; INTERLEAVE-NEXT: [[TMP36:%.*]] = phi <4 x i32> [ [[TMP31]], [[PRED_UDIV_CONTINUE14]] ], [ [[TMP35]], [[PRED_UDIV_IF15]] ] ; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF17:%.*]], label [[PRED_UDIV_CONTINUE18]] ; INTERLEAVE: pred.udiv.if17: -; INTERLEAVE-NEXT: [[TMP37:%.*]] = or i32 [[INDEX]], 7 ; INTERLEAVE-NEXT: [[TMP38:%.*]] = extractelement <4 x i32> [[WIDE_LOAD2]], i64 3 ; INTERLEAVE-NEXT: [[TMP39:%.*]] = udiv i32 [[TMP38]], [[TMP37]] ; INTERLEAVE-NEXT: [[TMP40:%.*]] = insertelement <4 x i32> [[TMP36]], i32 [[TMP39]], i64 3 diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll @@ -30,6 +30,7 @@ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] +; CHECK-NEXT: [[TMP8:%.+]] = or i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[PAIR:%.*]], ptr [[P:%.*]], i64 [[INDEX]], i32 1 ; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <4 x i64> [[WIDE_VEC]], <4 x i64> poison, <2 x i32> @@ -45,7 +46,6 @@ ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP4]], i64 1 ; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] ; CHECK: pred.store.if1: -; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[PAIR]], ptr [[P]], i64 [[TMP8]], i32 1 ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[WIDE_VEC]], i64 2 ; CHECK-NEXT: store i64 [[TMP10]], ptr [[TMP9]], align 8 diff --git a/llvm/test/Transforms/LoopVectorize/loop-form.ll b/llvm/test/Transforms/LoopVectorize/loop-form.ll --- a/llvm/test/Transforms/LoopVectorize/loop-form.ll +++ b/llvm/test/Transforms/LoopVectorize/loop-form.ll @@ -1087,6 +1087,7 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[ADDR:%.*]], i64 [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, ptr [[TMP1]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP2]], align 4 @@ -1102,7 +1103,6 @@ ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1 ; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]] ; CHECK: pred.store.if1: -; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr float, ptr [[ADDR]], i64 [[TMP8]] ; CHECK-NEXT: store float 1.000000e+01, ptr [[TMP9]], align 4 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -5,7 +5,7 @@ ; Function Attrs: nofree norecurse nounwind define void @a(i8* readnone %b) { -; CHECK-LABEL: define {{[^@]+}}@a( +; CHECK-LABEL: @a( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[B1:%.*]] = ptrtoint i8* [[B:%.*]] to i64 ; CHECK-NEXT: [[CMP_NOT4:%.*]] = icmp eq i8* [[B]], null @@ -24,46 +24,46 @@ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE7:%.*]] ] ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 0, [[INDEX]] ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* null, i64 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[TMP3]], i64 -1 -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 0 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i32 -3 -; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to <4 x i8>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP7]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], -1 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], -2 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], -3 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, i8* null, i64 [[TMP2]] +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i64 -1 +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, i8* [[TMP8]], i32 -3 +; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to <4 x i8>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP10]], align 1 ; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> poison, <4 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i8> [[REVERSE]], zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = xor <4 x i1> [[TMP8]], -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP9]], i32 0 -; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i8> [[REVERSE]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = xor <4 x i1> [[TMP11]], +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP12]], i32 0 +; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; CHECK: pred.store.if: -; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, i8* null, i64 [[TMP2]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, i8* [[TMP11]], i64 -1 -; CHECK-NEXT: store i8 95, i8* [[TMP12]], align 1 +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, i8* null, i64 [[TMP2]] +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, i8* [[TMP14]], i64 -1 +; CHECK-NEXT: store i8 95, i8* [[TMP15]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] ; CHECK: pred.store.continue: -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP9]], i32 1 -; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]] +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP12]], i32 1 +; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]] ; CHECK: pred.store.if2: -; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], -1 -; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i8, i8* null, i64 [[TMP14]] -; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, i8* [[TMP15]], i64 -1 -; CHECK-NEXT: store i8 95, i8* [[TMP16]], align 1 +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, i8* null, i64 [[TMP3]] +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, i8* [[TMP17]], i64 -1 +; CHECK-NEXT: store i8 95, i8* [[TMP18]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE3]] ; CHECK: pred.store.continue3: -; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[TMP9]], i32 2 -; CHECK-NEXT: br i1 [[TMP17]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <4 x i1> [[TMP12]], i32 2 +; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] ; CHECK: pred.store.if4: -; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[OFFSET_IDX]], -2 -; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, i8* null, i64 [[TMP18]] -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, i8* [[TMP19]], i64 -1 -; CHECK-NEXT: store i8 95, i8* [[TMP20]], align 1 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, i8* null, i64 [[TMP4]] +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i8, i8* [[TMP20]], i64 -1 +; CHECK-NEXT: store i8 95, i8* [[TMP21]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE5]] ; CHECK: pred.store.continue5: -; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP9]], i32 3 -; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP12]], i32 3 +; CHECK-NEXT: br i1 [[TMP22]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7]] ; CHECK: pred.store.if6: -; CHECK-NEXT: [[TMP22:%.*]] = add i64 [[OFFSET_IDX]], -3 -; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, i8* null, i64 [[TMP22]] +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i8, i8* null, i64 [[TMP5]] ; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i8, i8* [[TMP23]], i64 -1 ; CHECK-NEXT: store i8 95, i8* [[TMP24]], align 1 ; CHECK-NEXT: br label [[PRED_STORE_CONTINUE7]] @@ -129,7 +129,7 @@ ; 2. Non-uniform use by the getelementptr which is stored. This requires the ; vector value. define void @pointer_induction_used_as_vector(i8** noalias %start.1, i8* noalias %start.2, i64 %N) { -; CHECK-LABEL: define {{[^@]+}}@pointer_induction_used_as_vector( +; CHECK-LABEL: @pointer_induction_used_as_vector( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll @@ -11,6 +11,9 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[COND:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = fcmp une <4 x float> [[WIDE_LOAD]], @@ -26,7 +29,6 @@ ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1 ; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] ; CHECK: pred.load.if1: -; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[TMP10]], align 4 ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP11]], i64 1 @@ -36,7 +38,6 @@ ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP2]], i64 2 ; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] ; CHECK: pred.load.if3: -; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP15]] ; CHECK-NEXT: [[TMP17:%.*]] = load float, ptr [[TMP16]], align 4 ; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP17]], i64 2 @@ -46,7 +47,6 @@ ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP2]], i64 3 ; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.if5: -; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP21]] ; CHECK-NEXT: [[TMP23:%.*]] = load float, ptr [[TMP22]], align 4 ; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP23]], i64 3 @@ -124,6 +124,9 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, ptr [[COND:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = fcmp une <4 x float> [[WIDE_LOAD]], @@ -139,7 +142,6 @@ ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1 ; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] ; CHECK: pred.load.if1: -; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = load float, ptr [[TMP10]], align 4 ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP11]], i64 1 @@ -149,7 +151,6 @@ ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP2]], i64 2 ; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] ; CHECK: pred.load.if3: -; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP15]] ; CHECK-NEXT: [[TMP17:%.*]] = load float, ptr [[TMP16]], align 4 ; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP17]], i64 2 @@ -159,7 +160,6 @@ ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP2]], i64 3 ; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.if5: -; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[TMP21]] ; CHECK-NEXT: [[TMP23:%.*]] = load float, ptr [[TMP22]], align 4 ; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP23]], i64 3 @@ -241,6 +241,9 @@ ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 7, [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] @@ -256,7 +259,6 @@ ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1 ; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] ; CHECK: pred.load.if1: -; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP9]] ; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 ; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP11]], i64 1 @@ -266,7 +268,6 @@ ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP2]], i64 2 ; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] ; CHECK: pred.load.if3: -; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP15]] ; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 ; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP17]], i64 2 @@ -276,7 +277,6 @@ ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP2]], i64 3 ; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] ; CHECK: pred.load.if5: -; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[INDEX]], 3 ; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP21]] ; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 ; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP23]], i64 3