Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2140,7 +2140,8 @@ // induction update chain itself. Instruction *CastInst = *Casts.begin(); if (Lane < UINT_MAX) - VectorLoopValueMap.setScalarValue(CastInst, {Part, Lane}, VectorLoopVal); + VectorLoopValueMap.setScalarValue(CastInst, VPIteration(Part, Lane), + VectorLoopVal); else VectorLoopValueMap.setVectorValue(CastInst, Part, VectorLoopVal); } @@ -2360,7 +2361,7 @@ "scalable"); auto *Mul = addFastMathFlag(Builder.CreateBinOp(MulOp, StartIdx, Step)); auto *Add = addFastMathFlag(Builder.CreateBinOp(AddOp, ScalarIV, Mul)); - VectorLoopValueMap.setScalarValue(EntryVal, {Part, Lane}, Add); + VectorLoopValueMap.setScalarValue(EntryVal, VPIteration(Part, Lane), Add); recordVectorLoopValueForInductionCast(ID, EntryVal, Add, Part, Lane); } } @@ -2384,7 +2385,8 @@ // instead. If it has been scalarized, and we actually need the value in // vector form, we will construct the vector values on demand. if (VectorLoopValueMap.hasAnyScalarValue(V)) { - Value *ScalarValue = VectorLoopValueMap.getScalarValue(V, {Part, 0}); + Value *ScalarValue = + VectorLoopValueMap.getScalarValue(V, VPIteration(Part, 0)); // If we've scalarized a value, that value should be an instruction. auto *I = cast(V); @@ -2406,7 +2408,7 @@ assert((!VF.isScalable() || LastLane == 0) && "Scalable vectorization can't lead to any scalarized values."); auto *LastInst = cast( - VectorLoopValueMap.getScalarValue(V, {Part, LastLane})); + VectorLoopValueMap.getScalarValue(V, VPIteration(Part, LastLane))); // Set the insert point after the last scalarized instruction. This ensures // the insertelement sequence will directly follow the scalar definitions. @@ -2430,7 +2432,7 @@ Value *Poison = PoisonValue::get(VectorType::get(V->getType(), VF)); VectorLoopValueMap.setVectorValue(V, Part, Poison); for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane) - packScalarIntoVectorValue(V, {Part, Lane}); + packScalarIntoVectorValue(V, VPIteration(Part, Lane)); VectorValue = VectorLoopValueMap.getVectorValue(V, Part); } Builder.restoreIP(OldIP); @@ -2573,7 +2575,7 @@ Index += (VF.getKnownMinValue() - 1) * Group->getFactor(); for (unsigned Part = 0; Part < UF; Part++) { - Value *AddrPart = State.get(Addr, {Part, 0}); + Value *AddrPart = State.get(Addr, VPIteration(Part, 0)); setDebugLocFromInst(Builder, AddrPart); // Notice current instruction could be any index. Need to adjust the address @@ -2825,7 +2827,7 @@ // We don't want to update the value in the map as it might be used in // another expression. So don't call resetVectorValue(StoredVal). } - auto *VecPtr = CreateVecPtr(Part, State.get(Addr, {0, 0})); + auto *VecPtr = CreateVecPtr(Part, State.get(Addr, VPIteration(0, 0))); if (isMaskRequired) NewSI = Builder.CreateMaskedStore(StoredVal, VecPtr, Alignment, BlockInMaskParts[Part]); @@ -2849,7 +2851,7 @@ nullptr, "wide.masked.gather"); addMetadata(NewLI, LI); } else { - auto *VecPtr = CreateVecPtr(Part, State.get(Addr, {0, 0})); + auto *VecPtr = CreateVecPtr(Part, State.get(Addr, VPIteration(0, 0))); if (isMaskRequired) NewLI = Builder.CreateMaskedLoad( VecPtr, Alignment, BlockInMaskParts[Part], PoisonValue::get(DataTy), @@ -2877,7 +2879,7 @@ // llvm.experimental.noalias.scope.decl intrinsics must only be duplicated for // the first lane and part. if (isa(Instr)) - if (Instance.Lane != 0 || Instance.Part != 0) + if (!Instance.isFirstIteration()) return; setDebugLocFromInst(Builder, Instr); @@ -4405,7 +4407,7 @@ // extracted from the vectorized loop. Builder.SetInsertPoint(LoopMiddleBlock->getTerminator()); Value *lastIncomingValue = - getOrCreateScalarValue(IncomingValue, { UF - 1, LastLane }); + getOrCreateScalarValue(IncomingValue, VPIteration(UF - 1, LastLane)); LCSSAPhi.addIncoming(lastIncomingValue, LoopMiddleBlock); } } @@ -4551,8 +4553,9 @@ for (unsigned Part = 0; Part < UF; ++Part) { // The pointer operand of the new GEP. If it's loop-invariant, we // won't broadcast it. - auto *Ptr = IsPtrLoopInvariant ? State.get(Operands.getOperand(0), {0, 0}) - : State.get(Operands.getOperand(0), Part); + auto *Ptr = IsPtrLoopInvariant + ? State.get(Operands.getOperand(0), VPIteration(0, 0)) + : State.get(Operands.getOperand(0), Part); // Collect all the indices for the new GEP. If any index is // loop-invariant, we won't broadcast it. @@ -4560,7 +4563,7 @@ for (unsigned I = 1, E = Operands.getNumOperands(); I < E; I++) { VPValue *Operand = Operands.getOperand(I); if (IsIndexLoopInvariant[I - 1]) - Indices.push_back(State.get(Operand, {0, 0})); + Indices.push_back(State.get(Operand, VPIteration(0, 0))); else Indices.push_back(State.get(Operand, Part)); } @@ -4699,7 +4702,8 @@ Value *SclrGep = emitTransformedIndex(Builder, GlobalIdx, PSE.getSE(), DL, II); SclrGep->setName("next.gep"); - VectorLoopValueMap.setScalarValue(P, {Part, Lane}, SclrGep); + VectorLoopValueMap.setScalarValue(P, VPIteration(Part, Lane), + SclrGep); } } return; @@ -4916,7 +4920,7 @@ if (!UseVectorIntrinsic || !hasVectorInstrinsicScalarOpd(ID, I.index())) Arg = State.get(I.value(), Part); else - Arg = State.get(I.value(), {0, 0}); + Arg = State.get(I.value(), VPIteration(0, 0)); Args.push_back(Arg); } @@ -4961,8 +4965,9 @@ // loop. This means that we can't just use the original 'cond' value. // We have to take the 'vectorized' value and pick the first lane. // Instcombine will make this a no-op. - auto *InvarCond = - InvariantCond ? State.get(Operands.getOperand(0), {0, 0}) : nullptr; + auto *InvarCond = InvariantCond + ? State.get(Operands.getOperand(0), VPIteration(0, 0)) + : nullptr; for (unsigned Part = 0; Part < UF; ++Part) { Value *Cond = @@ -9102,8 +9107,9 @@ "Can't scalarize a scalable vector"); for (unsigned Part = 0; Part < State.UF; ++Part) for (unsigned Lane = 0; Lane < EndLane; ++Lane) - State.ILV->scalarizeInstruction(getUnderlyingInstr(), *this, {Part, Lane}, - IsPredicated, State); + State.ILV->scalarizeInstruction(getUnderlyingInstr(), *this, + VPIteration(Part, Lane), IsPredicated, + State); } void VPBranchOnMaskRecipe::execute(VPTransformState &State) { Index: llvm/lib/Transforms/Vectorize/VPlan.h =================================================================== --- llvm/lib/Transforms/Vectorize/VPlan.h +++ llvm/lib/Transforms/Vectorize/VPlan.h @@ -97,6 +97,10 @@ /// in [0..VF) unsigned Lane; + + VPIteration(unsigned Part, unsigned Lane) : Part(Part), Lane(Lane) {} + + bool isFirstIteration() const { return Part == 0 && Lane == 0; } }; /// This is a helper struct for maintaining vectorization state. It's used for Index: llvm/lib/Transforms/Vectorize/VPlan.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/VPlan.cpp +++ llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -286,8 +286,7 @@ } void VPBasicBlock::execute(VPTransformState *State) { - bool Replica = State->Instance && - !(State->Instance->Part == 0 && State->Instance->Lane == 0); + bool Replica = State->Instance && !State->Instance->isFirstIteration(); VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB; VPBlockBase *SingleHPred = nullptr; BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible. @@ -401,7 +400,7 @@ assert(!State->Instance && "Replicating a Region with non-null instance."); // Enter replicating mode. - State->Instance = {0, 0}; + State->Instance = VPIteration(0, 0); for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) { State->Instance->Part = Part; @@ -497,7 +496,7 @@ } case VPInstruction::ActiveLaneMask: { // Get first lane of vector induction variable. - Value *VIVElem0 = State.get(getOperand(0), {Part, 0}); + Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0)); // Get the original loop tripcount. Value *ScalarTC = State.TripCount;