diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1190,7 +1190,12 @@ APInt &UndefElts, unsigned Depth, bool AllowMultipleUsers) { - unsigned VWidth = cast(V->getType())->getNumElements(); + // Can not analyze scalable type. The number of vector elements is not a + // compile-time constant. + if (isa(V->getType())) + return nullptr; + + unsigned VWidth = cast(V->getType())->getNumElements(); APInt EltMask(APInt::getAllOnesValue(VWidth)); assert((DemandedElts & ~EltMask) == 0 && "Invalid DemandedElts!"); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -611,7 +611,7 @@ Value *PermittedRHS, InstCombiner &IC) { assert(V->getType()->isVectorTy() && "Invalid shuffle!"); - unsigned NumElts = cast(V->getType())->getNumElements(); + unsigned NumElts = cast(V->getType())->getNumElements(); if (isa(V)) { Mask.assign(NumElts, -1); @@ -723,9 +723,14 @@ } static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf) { + // Can not analyze scalable type, the number of elements is not a compile-time + // constant. + if (isa(Shuf.getOperand(0)->getType())) + return false; + int MaskSize = Shuf.getShuffleMask().size(); int VecSize = - cast(Shuf.getOperand(0)->getType())->getNumElements(); + cast(Shuf.getOperand(0)->getType())->getNumElements(); // A vector select does not change the size of the operands. if (MaskSize != VecSize) @@ -751,8 +756,12 @@ if (InsElt.hasOneUse() && isa(InsElt.user_back())) return nullptr; - auto *VecTy = cast(InsElt.getType()); - unsigned NumElements = VecTy->getNumElements(); + VectorType *VecTy = InsElt.getType(); + // Can not handle scalable type, the number of elements is not a compile-time + // constant. + if (isa(VecTy)) + return nullptr; + unsigned NumElements = cast(VecTy)->getNumElements(); // Do not try to do this for a one-element vector, since that's a nop, // and will cause an inf-loop. @@ -820,6 +829,11 @@ if (!Shuf || !Shuf->isZeroEltSplat()) return nullptr; + // Bail out early if shuffle is scalable type. The number of elements in + // shuffle mask is unknown at compile-time. + if (isa(Shuf->getType())) + return nullptr; + // Check for a constant insertion index. uint64_t IdxC; if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC))) @@ -852,6 +866,11 @@ !(Shuf->isIdentityWithExtract() || Shuf->isIdentityWithPadding())) return nullptr; + // Bail out early if shuffle is scalable type. The number of elements in + // shuffle mask is unknown at compile-time. + if (isa(Shuf->getType())) + return nullptr; + // Check for a constant insertion index. uint64_t IdxC; if (!match(InsElt.getOperand(2), m_ConstantInt(IdxC))) @@ -975,7 +994,12 @@ } else if (auto *IEI = dyn_cast(Inst)) { // Transform sequences of insertelements ops with constant data/indexes into // a single shuffle op. - unsigned NumElts = InsElt.getType()->getNumElements(); + // Can not handle scalable type, the number of elements needed to create + // shuffle mask is not a compile-time constant. + if (isa(InsElt.getType())) + return nullptr; + unsigned NumElts = + cast(InsElt.getType())->getNumElements(); uint64_t InsertIdx[2]; Constant *Val[2]; @@ -1036,14 +1060,19 @@ return new BitCastInst(NewInsElt, IE.getType()); } - // If the inserted element was extracted from some other vector and both - // indexes are valid constants, try to turn this into a shuffle. + // If the inserted element was extracted from some other fixed-length vector + // and both indexes are valid constants, try to turn this into a shuffle. + // Can not handle scalable vector type, the number of elements needed to + // create shuffle mask is not a compile-time constant. uint64_t InsertedIdx, ExtractedIdx; Value *ExtVecOp; - if (match(IdxOp, m_ConstantInt(InsertedIdx)) && + if (isa(IE.getType()) && + match(IdxOp, m_ConstantInt(InsertedIdx)) && match(ScalarOp, m_ExtractElement(m_Value(ExtVecOp), m_ConstantInt(ExtractedIdx))) && - ExtractedIdx < cast(ExtVecOp->getType())->getNumElements()) { + isa(ExtVecOp->getType()) && + ExtractedIdx < + cast(ExtVecOp->getType())->getNumElements()) { // TODO: Looking at the user(s) to determine if this insert is a // fold-to-shuffle opportunity does not match the usual instcombine // constraints. We should decide if the transform is worthy based only @@ -1083,13 +1112,15 @@ } } - unsigned VWidth = cast(VecOp->getType())->getNumElements(); - APInt UndefElts(VWidth, 0); - APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); - if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) { - if (V != &IE) - return replaceInstUsesWith(IE, V); - return &IE; + if (auto VecTy = dyn_cast(VecOp->getType())) { + unsigned VWidth = VecTy->getNumElements(); + APInt UndefElts(VWidth, 0); + APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); + if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) { + if (V != &IE) + return replaceInstUsesWith(IE, V); + return &IE; + } } if (Instruction *Shuf = foldConstantInsEltIntoShuffle(IE)) diff --git a/llvm/test/Transforms/InstCombine/vscale_insertelement.ll b/llvm/test/Transforms/InstCombine/vscale_insertelement.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/vscale_insertelement.ll @@ -0,0 +1,85 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -instcombine < %s | FileCheck %s + +; This test checks that bitcast is moved after insertelement when both vector and scalar are +; bitcast from the same element type. +; inselt (bitcast VecSrc), (bitcast ScalarSrc), IdxOp +; --> bitcast (inselt VecSrc, ScalarSrc, IdxOp) +define @insertelement_bitcast( %vec, i32 %x) { +; CHECK-LABEL: @insertelement_bitcast( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement [[VEC:%.*]], i32 [[X:%.*]], i32 0 +; CHECK-NEXT: [[R:%.*]] = bitcast [[TMP1]] to +; CHECK-NEXT: ret [[R]] +; + %x_cast = bitcast i32 %x to float + %vec_cast = bitcast %vec to + %r = insertelement %vec_cast, float %x_cast, i32 0 + ret %r +} + +; This test checks that code-path "Try to form a shuffle from a chain of extract-insert ops" is +; not taken when both extract and insert are scalable type. +; For scalable type, the vector length needed to create shuffle mask is not a compile-time constant. +; Meanwhile, for scalable type shuffle mask only support splat and undef in the current code base. +; Otherwise we crash at: +; "Assertion `isValidOperands(V1, V2, Mask) && "Invalid shuffle vector instruction operands!"' failed." +define @insertelement_extractelement( %a, %b) { +; CHECK-LABEL: @insertelement_extractelement( +; CHECK-NEXT: [[T0:%.*]] = extractelement [[A:%.*]], i32 1 +; CHECK-NEXT: [[T1:%.*]] = insertelement [[B:%.*]], i32 [[T0]], i32 0 +; CHECK-NEXT: ret [[T1]] +; + %t0 = extractelement %a, i32 1 + %t1 = insertelement %b, i32 %t0, i32 0 + ret %t1 +} + +; This test checks that we are not attempting to create a shuffle from extract/insert chain, +; when extract is from a scalable type, and the insert vector is fixed-length. +define <4 x i32> @insertelement_extractelement_fixed_vec_extract_from_scalable( %a, <4 x i32> %b) { +; CHECK-LABEL: @insertelement_extractelement_fixed_vec_extract_from_scalable( +; CHECK-NEXT: [[T0:%.*]] = extractelement [[A:%.*]], i32 1 +; CHECK-NEXT: [[T1:%.*]] = insertelement <4 x i32> [[B:%.*]], i32 [[T0]], i32 0 +; CHECK-NEXT: ret <4 x i32> [[T1]] +; + %t0 = extractelement %a, i32 1 + %t1 = insertelement <4 x i32> %b, i32 %t0, i32 0 + ret <4 x i32> %t1 +} + +; This test checks that the optimization "foldConstantInsEltInfoShuffle" is not taken for scalable type. +; Particularly the fold: +; insertelt (insertelt X, C1, CIndex1), C, CIndex +; --> shufflevector X, CVec', Mask' +; For scalable type, the vector length needed to create shuffle mask is not a compile-time constant. +; Meanwhile, for scalable type shuffle mask only support splat and undef in the current code base. +; Otherwise we crash at: +; "Assertion `isValidOperands(V1, V2, Mask) && "Invalid shuffle vector instruction operands!"' failed." +define @insertelement_insertelement( %vec) { +; CHECK-LABEL: @insertelement_insertelement( +; CHECK-NEXT: [[T0:%.*]] = insertelement [[VEC:%.*]], i32 1, i32 1 +; CHECK-NEXT: [[T1:%.*]] = insertelement [[T0]], i32 2, i32 2 +; CHECK-NEXT: ret [[T1]] +; + %t0 = insertelement %vec, i32 1, i32 1 + %t1 = insertelement %t0, i32 2, i32 2 + ret %t1 +} + +; This test checks that the following insertelement sequence is not folded into shuffle splat. +; The length of scalable vector is unknown at compile-time. Therefore the following insertelements +; may not form a valid splat. +define @insertelement_sequene_may_not_be_splat(float %x) { +; CHECK-LABEL: @insertelement_sequene_may_not_be_splat( +; CHECK-NEXT: [[T0:%.*]] = insertelement undef, float [[X:%.*]], i32 0 +; CHECK-NEXT: [[T1:%.*]] = insertelement [[T0]], float [[X]], i32 1 +; CHECK-NEXT: [[T2:%.*]] = insertelement [[T1]], float [[X]], i32 2 +; CHECK-NEXT: [[T3:%.*]] = insertelement [[T2]], float [[X]], i32 3 +; CHECK-NEXT: ret [[T3]] +; + %t0 = insertelement undef, float %x, i32 0 + %t1 = insertelement %t0, float %x, i32 1 + %t2 = insertelement %t1, float %x, i32 2 + %t3 = insertelement %t2, float %x, i32 3 + ret %t3 +}