diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -201,12 +201,39 @@ !Ty->isPPC_FP128Ty(); } +/// \returns True if the value is a constant (but not globals/constant +/// expressions). +static bool isConstant(Value *V) { + return isa(V) && !isa(V) && !isa(V); +} + +/// Checks if \p V is one of vector-like instructions, i.e. undef, +/// insertelement/extractelement with constant indices for fixed vector type or +/// extractvalue instruction. +static bool isVectorLikeInstWithConstOps(Value *V) { + if (!isa(V) && + !isa(V)) + return false; + auto *I = dyn_cast(V); + if (!I || isa(I)) + return true; + if (!isa(I->getOperand(0)->getType())) + return false; + if (isa(I)) + return isConstant(I->getOperand(1)); + assert(isa(V) && "Expected only insertelement."); + return isConstant(I->getOperand(2)); +} + /// \returns true if all of the instructions in \p VL are in the same block or /// false otherwise. static bool allSameBlock(ArrayRef VL) { Instruction *I0 = dyn_cast(VL[0]); if (!I0) return false; + if (all_of(VL, isVectorLikeInstWithConstOps)) + return true; + BasicBlock *BB = I0->getParent(); for (int I = 1, E = VL.size(); I < E; I++) { auto *II = dyn_cast(VL[I]); @@ -219,12 +246,6 @@ return true; } -/// \returns True if the value is a constant (but not globals/constant -/// expressions). -static bool isConstant(Value *V) { - return isa(V) && !isa(V) && !isa(V); -} - /// \returns True if all of the values in \p VL are constants (but not /// globals/constant expressions). static bool allConstant(ArrayRef VL) { @@ -3207,7 +3228,11 @@ } // If all of the operands are identical or constant we have a simple solution. - if (allConstant(VL) || isSplat(VL) || !allSameBlock(VL) || !S.getOpcode()) { + // If we deal with insert/extract instructions, they all must have constant + // indices, otherwise we should gather them, not try to vectorize. + if (allConstant(VL) || isSplat(VL) || !allSameBlock(VL) || !S.getOpcode() || + (isa(S.MainOp) && + !all_of(VL, isVectorLikeInstWithConstOps))) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n"); newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx); return; @@ -6533,7 +6558,9 @@ Optional BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP, const InstructionsState &S) { - if (isa(S.OpValue) || isa(S.OpValue)) + // No need to schedule PHIs, insertelement, extractelement and extractvalue + // instructions. + if (isa(S.OpValue) || isVectorLikeInstWithConstOps(S.OpValue)) return nullptr; // Initialize the instruction bundle. @@ -6629,7 +6656,7 @@ void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef VL, Value *OpValue) { - if (isa(OpValue) || isa(OpValue)) + if (isa(OpValue) || isVectorLikeInstWithConstOps(OpValue)) return; ScheduleData *Bundle = getScheduleData(OpValue); @@ -6669,8 +6696,9 @@ return true; Instruction *I = dyn_cast(V); assert(I && "bundle member must be an instruction"); - assert(!isa(I) && !isa(I) && - "phi nodes/insertelements don't need to be scheduled"); + assert(!isa(I) && !isVectorLikeInstWithConstOps(I) && + "phi nodes/insertelements/extractelements/extractvalues don't need to " + "be scheduled"); auto &&CheckSheduleForI = [this, &S](Instruction *I) -> bool { ScheduleData *ISD = getScheduleData(I); if (!ISD) @@ -6940,7 +6968,7 @@ for (auto *I = BS->ScheduleStart; I != BS->ScheduleEnd; I = I->getNextNode()) { BS->doForAllOpcodes(I, [this, &Idx, &NumToSchedule, BS](ScheduleData *SD) { - assert((isa(SD->Inst) || + assert((isVectorLikeInstWithConstOps(SD->Inst) || SD->isPartOfBundle() == (getTreeEntry(SD->Inst) != nullptr)) && "scheduler and vectorizer bundle mismatch"); SD->FirstInBundle->SchedulingPriority = Idx++; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_extract_broadcast.ll @@ -6,16 +6,8 @@ ; CHECK-LABEL: @fextr( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LD:%.*]] = load <8 x i16>, <8 x i16>* undef, align 16 -; CHECK-NEXT: [[V0:%.*]] = extractelement <8 x i16> [[LD]], i32 0 ; CHECK-NEXT: br label [[T:%.*]] ; CHECK: t: -; CHECK-NEXT: [[V1:%.*]] = extractelement <8 x i16> [[LD]], i32 1 -; CHECK-NEXT: [[V2:%.*]] = extractelement <8 x i16> [[LD]], i32 2 -; CHECK-NEXT: [[V3:%.*]] = extractelement <8 x i16> [[LD]], i32 3 -; CHECK-NEXT: [[V4:%.*]] = extractelement <8 x i16> [[LD]], i32 4 -; CHECK-NEXT: [[V5:%.*]] = extractelement <8 x i16> [[LD]], i32 5 -; CHECK-NEXT: [[V6:%.*]] = extractelement <8 x i16> [[LD]], i32 6 -; CHECK-NEXT: [[V7:%.*]] = extractelement <8 x i16> [[LD]], i32 7 ; CHECK-NEXT: [[P0:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 0 ; CHECK-NEXT: [[P1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 1 ; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 2 @@ -24,18 +16,12 @@ ; CHECK-NEXT: [[P5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 5 ; CHECK-NEXT: [[P6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 6 ; CHECK-NEXT: [[P7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i64 7 -; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x i16> poison, i16 [[V0]], i32 0 -; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> [[TMP0]], i16 [[V1]], i32 1 -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[V2]], i32 2 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[V3]], i32 3 -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i16> [[TMP3]], i16 [[V4]], i32 4 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x i16> [[TMP4]], i16 [[V5]], i32 5 -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i16> [[TMP5]], i16 [[V6]], i32 6 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x i16> [[TMP6]], i16 [[V7]], i32 7 -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP0]], <8 x i16> poison, <8 x i32> -; CHECK-NEXT: [[TMP8:%.*]] = add <8 x i16> [[TMP7]], [[SHUFFLE]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16* [[P0]] to <8 x i16>* -; CHECK-NEXT: store <8 x i16> [[TMP8]], <8 x i16>* [[TMP9]], align 2 +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <8 x i16> [[LD]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i16> poison, i16 [[TMP0]], i32 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i16> [[LD]], [[SHUFFLE]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16* [[P0]] to <8 x i16>* +; CHECK-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* [[TMP3]], align 2 ; CHECK-NEXT: ret void ; ; YAML: Pass: slp-vectorizer @@ -43,7 +29,7 @@ ; YAML-NEXT: Function: fextr ; YAML-NEXT: Args: ; YAML-NEXT: - String: 'Stores SLP vectorized with cost ' -; YAML-NEXT: - Cost: '-4' +; YAML-NEXT: - Cost: '-20' ; YAML-NEXT: - String: ' and with tree size ' ; YAML-NEXT: - TreeSize: '4'