Index: llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ llvm/trunk/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -345,27 +345,23 @@ /// Chooses the correct key for scheduling data. If \p Op has the same (or /// alternate) opcode as \p OpValue, the key is \p Op. Otherwise the key is \p /// OpValue. -static Value *isOneOf(Value *OpValue, Value *Op) { +static Value *isOneOf(const InstructionsState &S, Value *Op) { auto *I = dyn_cast(Op); - if (!I) - return OpValue; - auto *OpInst = cast(OpValue); - unsigned OpInstOpcode = OpInst->getOpcode(); - unsigned IOpcode = I->getOpcode(); - if (sameOpcodeOrAlt(OpInstOpcode, getAltOpcode(OpInstOpcode), IOpcode)) + if (I && sameOpcodeOrAlt(S.Opcode, S.AltOpcode, I->getOpcode())) return Op; - return OpValue; + return S.OpValue; } /// \returns analysis of the Instructions in \p VL described in /// InstructionsState, the Opcode that we suppose the whole list /// could be vectorized even if its structure is diverse. -static InstructionsState getSameOpcode(ArrayRef VL) { +static InstructionsState getSameOpcode(ArrayRef VL, + unsigned BaseIndex = 0) { // Make sure these are all Instructions. if (llvm::any_of(VL, [](Value *V) { return !isa(V); })) - return InstructionsState(VL[0], 0, 0); + return InstructionsState(VL[BaseIndex], 0, 0); - unsigned Opcode = cast(VL[0])->getOpcode(); + unsigned Opcode = cast(VL[BaseIndex])->getOpcode(); unsigned AltOpcode = Opcode; bool HasAltOpcodes = llvm::any_of(VL, [Opcode](Value *V) { return Opcode != cast(V)->getOpcode(); @@ -377,11 +373,11 @@ for (int Cnt = 0, E = VL.size(); Cnt < E; Cnt++) { unsigned InstOpcode = cast(VL[Cnt])->getOpcode(); if (!sameOpcodeOrAlt(Opcode, AltOpcode, InstOpcode)) - return InstructionsState(VL[0], 0, 0); + return InstructionsState(VL[BaseIndex], 0, 0); } } - return InstructionsState(VL[0], Opcode, AltOpcode); + return InstructionsState(VL[BaseIndex], Opcode, AltOpcode); } /// \returns true if all of the values in \p VL have the same type or false @@ -632,7 +628,8 @@ /// Set the Builder insert point to one after the last instruction in /// the bundle - void setInsertPointAfterBundle(ArrayRef VL, Value *OpValue); + void setInsertPointAfterBundle(ArrayRef VL, + const InstructionsState &S); /// \returns a vector from a collection of scalars in \p VL. Value *Gather(ArrayRef VL, VectorType *Ty); @@ -1077,7 +1074,8 @@ /// Checks if a bundle of instructions can be scheduled, i.e. has no /// cyclic dependencies. This is only a dry-run, no instructions are /// actually moved at this stage. - bool tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP, Value *OpValue); + bool tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP, + const InstructionsState &S); /// Un-bundles a group of instructions. void cancelScheduling(ArrayRef VL, Value *OpValue); @@ -1087,7 +1085,7 @@ /// Extends the scheduling region so that V is inside the region. /// \returns true if the region size is within the limit. - bool extendSchedulingRegion(Value *V, Value *OpValue); + bool extendSchedulingRegion(Value *V, const InstructionsState &S); /// Initialize the ScheduleData structures for new instructions in the /// scheduling region. @@ -1507,7 +1505,7 @@ BlockScheduling &BS = *BSRef.get(); - if (!BS.tryScheduleBundle(VL, this, VL0)) { + if (!BS.tryScheduleBundle(VL, this, S)) { LLVM_DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n"); assert((!BS.getScheduleData(VL0) || !BS.getScheduleData(VL0)->isPartOfBundle()) && @@ -2851,13 +2849,14 @@ } } -void BoUpSLP::setInsertPointAfterBundle(ArrayRef VL, Value *OpValue) { +void BoUpSLP::setInsertPointAfterBundle(ArrayRef VL, + const InstructionsState &S) { // Get the basic block this bundle is in. All instructions in the bundle // should be in this block. - auto *Front = cast(OpValue); + auto *Front = cast(S.OpValue); auto *BB = Front->getParent(); - const unsigned Opcode = cast(OpValue)->getOpcode(); - const unsigned AltOpcode = getAltOpcode(Opcode); + const unsigned Opcode = S.Opcode; + const unsigned AltOpcode = S.AltOpcode; assert(llvm::all_of(make_range(VL.begin(), VL.end()), [=](Value *V) -> bool { return !sameOpcodeOrAlt(Opcode, AltOpcode, cast(V)->getOpcode()) || @@ -2873,7 +2872,7 @@ // bundle. The end of the bundle is marked by null ScheduleData. if (BlocksSchedules.count(BB)) { auto *Bundle = - BlocksSchedules[BB]->getScheduleData(isOneOf(OpValue, VL.back())); + BlocksSchedules[BB]->getScheduleData(isOneOf(S, VL.back())); if (Bundle && Bundle->isPartOfBundle()) for (; Bundle; Bundle = Bundle->NextInBundle) if (Bundle->OpValue == Bundle->Inst) @@ -3029,7 +3028,7 @@ } InstructionsState S = getSameOpcode(E->Scalars); - Instruction *VL0 = cast(E->Scalars[0]); + Instruction *VL0 = cast(S.OpValue); Type *ScalarTy = VL0->getType(); if (StoreInst *SI = dyn_cast(VL0)) ScalarTy = SI->getValueOperand()->getType(); @@ -3038,7 +3037,7 @@ bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty(); if (E->NeedToGather) { - setInsertPointAfterBundle(E->Scalars, VL0); + setInsertPointAfterBundle(E->Scalars, S); auto *V = Gather(E->Scalars, VecTy); if (NeedToShuffleReuses) { V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy), @@ -3115,7 +3114,7 @@ E->VectorizedValue = V; return V; } - setInsertPointAfterBundle(E->Scalars, VL0); + setInsertPointAfterBundle(E->Scalars, S); auto *V = Gather(E->Scalars, VecTy); if (NeedToShuffleReuses) { V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy), @@ -3150,7 +3149,7 @@ E->VectorizedValue = NewV; return NewV; } - setInsertPointAfterBundle(E->Scalars, VL0); + setInsertPointAfterBundle(E->Scalars, S); auto *V = Gather(E->Scalars, VecTy); if (NeedToShuffleReuses) { V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy), @@ -3179,7 +3178,7 @@ for (Value *V : E->Scalars) INVL.push_back(cast(V)->getOperand(0)); - setInsertPointAfterBundle(E->Scalars, VL0); + setInsertPointAfterBundle(E->Scalars, S); Value *InVec = vectorizeTree(INVL); @@ -3206,7 +3205,7 @@ RHSV.push_back(cast(V)->getOperand(1)); } - setInsertPointAfterBundle(E->Scalars, VL0); + setInsertPointAfterBundle(E->Scalars, S); Value *L = vectorizeTree(LHSV); Value *R = vectorizeTree(RHSV); @@ -3240,7 +3239,7 @@ FalseVec.push_back(cast(V)->getOperand(2)); } - setInsertPointAfterBundle(E->Scalars, VL0); + setInsertPointAfterBundle(E->Scalars, S); Value *Cond = vectorizeTree(CondVec); Value *True = vectorizeTree(TrueVec); @@ -3289,7 +3288,7 @@ RHSVL.push_back(I->getOperand(1)); } - setInsertPointAfterBundle(E->Scalars, VL0); + setInsertPointAfterBundle(E->Scalars, S); Value *LHS = vectorizeTree(LHSVL); Value *RHS = vectorizeTree(RHSVL); @@ -3318,9 +3317,11 @@ // Loads are inserted at the head of the tree because we don't want to // sink them all the way down past store instructions. bool IsReorder = !E->ReorderIndices.empty(); - if (IsReorder) - VL0 = cast(E->Scalars[E->ReorderIndices.front()]); - setInsertPointAfterBundle(E->Scalars, VL0); + if (IsReorder) { + S = getSameOpcode(E->Scalars, E->ReorderIndices.front()); + VL0 = cast(S.OpValue); + } + setInsertPointAfterBundle(E->Scalars, S); LoadInst *LI = cast(VL0); Type *ScalarLoadTy = LI->getType(); @@ -3367,12 +3368,12 @@ for (Value *V : E->Scalars) ScalarStoreValues.push_back(cast(V)->getValueOperand()); - setInsertPointAfterBundle(E->Scalars, VL0); + setInsertPointAfterBundle(E->Scalars, S); Value *VecValue = vectorizeTree(ScalarStoreValues); Value *ScalarPtr = SI->getPointerOperand(); Value *VecPtr = Builder.CreateBitCast(ScalarPtr, VecTy->getPointerTo(AS)); - StoreInst *S = Builder.CreateStore(VecValue, VecPtr); + StoreInst *ST = Builder.CreateStore(VecValue, VecPtr); // The pointer operand uses an in-tree scalar, so add the new BitCast to // ExternalUses to make sure that an extract will be generated in the @@ -3383,8 +3384,8 @@ if (!Alignment) Alignment = DL->getABITypeAlignment(SI->getValueOperand()->getType()); - S->setAlignment(Alignment); - Value *V = propagateMetadata(S, E->Scalars); + ST->setAlignment(Alignment); + Value *V = propagateMetadata(ST, E->Scalars); if (NeedToShuffleReuses) { V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy), E->ReuseShuffleIndices, "shuffle"); @@ -3394,7 +3395,7 @@ return V; } case Instruction::GetElementPtr: { - setInsertPointAfterBundle(E->Scalars, VL0); + setInsertPointAfterBundle(E->Scalars, S); ValueList Op0VL; for (Value *V : E->Scalars) @@ -3429,7 +3430,7 @@ } case Instruction::Call: { CallInst *CI = cast(VL0); - setInsertPointAfterBundle(E->Scalars, VL0); + setInsertPointAfterBundle(E->Scalars, S); Function *FI; Intrinsic::ID IID = Intrinsic::not_intrinsic; Value *ScalarArg = nullptr; @@ -3486,7 +3487,7 @@ "Invalid Shuffle Vector Operand"); reorderAltShuffleOperands(S.Opcode, S.AltOpcode, E->Scalars, LHSVL, RHSVL); - setInsertPointAfterBundle(E->Scalars, VL0); + setInsertPointAfterBundle(E->Scalars, S); Value *LHS = vectorizeTree(LHSVL); Value *RHS = vectorizeTree(RHSVL); @@ -3789,8 +3790,9 @@ // Groups the instructions to a bundle (which is then a single scheduling entity) // and schedules instructions until the bundle gets ready. bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, - BoUpSLP *SLP, Value *OpValue) { - if (isa(OpValue)) + BoUpSLP *SLP, + const InstructionsState &S) { + if (isa(S.OpValue)) return true; // Initialize the instruction bundle. @@ -3798,12 +3800,12 @@ ScheduleData *PrevInBundle = nullptr; ScheduleData *Bundle = nullptr; bool ReSchedule = false; - LLVM_DEBUG(dbgs() << "SLP: bundle: " << *OpValue << "\n"); + LLVM_DEBUG(dbgs() << "SLP: bundle: " << *S.OpValue << "\n"); // Make sure that the scheduling region contains all // instructions of the bundle. for (Value *V : VL) { - if (!extendSchedulingRegion(V, OpValue)) + if (!extendSchedulingRegion(V, S)) return false; } @@ -3870,7 +3872,7 @@ } } if (!Bundle->isReady()) { - cancelScheduling(VL, OpValue); + cancelScheduling(VL, S.OpValue); return false; } return true; @@ -3913,13 +3915,13 @@ } bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V, - Value *OpValue) { - if (getScheduleData(V, isOneOf(OpValue, V))) + const InstructionsState &S) { + if (getScheduleData(V, isOneOf(S, V))) return true; Instruction *I = dyn_cast(V); assert(I && "bundle member must be an instruction"); assert(!isa(I) && "phi nodes don't need to be scheduled"); - auto &&CheckSheduleForI = [this, OpValue](Instruction *I) -> bool { + auto &&CheckSheduleForI = [this, &S](Instruction *I) -> bool { ScheduleData *ISD = getScheduleData(I); if (!ISD) return false; @@ -3927,8 +3929,8 @@ "ScheduleData not in scheduling region"); ScheduleData *SD = allocateScheduleDataChunks(); SD->Inst = I; - SD->init(SchedulingRegionID, OpValue); - ExtraScheduleDataMap[I][OpValue] = SD; + SD->init(SchedulingRegionID, S.OpValue); + ExtraScheduleDataMap[I][S.OpValue] = SD; return true; }; if (CheckSheduleForI(I)) @@ -3938,7 +3940,7 @@ initScheduleData(I, I->getNextNode(), nullptr, nullptr); ScheduleStart = I; ScheduleEnd = I->getNextNode(); - if (isOneOf(OpValue, I) != I) + if (isOneOf(S, I) != I) CheckSheduleForI(I); assert(ScheduleEnd && "tried to vectorize a TerminatorInst?"); LLVM_DEBUG(dbgs() << "SLP: initialize schedule region to " << *I << "\n"); @@ -3961,7 +3963,7 @@ if (&*UpIter == I) { initScheduleData(I, ScheduleStart, nullptr, FirstLoadStoreInRegion); ScheduleStart = I; - if (isOneOf(OpValue, I) != I) + if (isOneOf(S, I) != I) CheckSheduleForI(I); LLVM_DEBUG(dbgs() << "SLP: extend schedule region start to " << *I << "\n"); @@ -3974,7 +3976,7 @@ initScheduleData(ScheduleEnd, I->getNextNode(), LastLoadStoreInRegion, nullptr); ScheduleEnd = I->getNextNode(); - if (isOneOf(OpValue, I) != I) + if (isOneOf(S, I) != I) CheckSheduleForI(I); assert(ScheduleEnd && "tried to vectorize a TerminatorInst?"); LLVM_DEBUG(dbgs() << "SLP: extend schedule region end to " << *I