Index: lib/Transforms/InstCombine/InstCombineInternal.h =================================================================== --- lib/Transforms/InstCombine/InstCombineInternal.h +++ lib/Transforms/InstCombine/InstCombineInternal.h @@ -799,6 +799,10 @@ /// /// If the multiplication is known not to overflow then NoSignedWrap is set. Value *Descale(Value *Val, APInt Scale, bool &NoSignedWrap); + + // Rearange shuffle-bitcast-shuffle sequence into: + // shuffle-shuffle-bitcast or bitcast-shuffle-shuffle + Instruction *RearangeShuffleBitcastShuffle(ShuffleVectorInst &Shuf); }; } // end namespace llvm Index: lib/Transforms/InstCombine/InstCombineVectorOps.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -1206,6 +1206,11 @@ Value *V = EvaluateInDifferentElementOrder(LHS, Mask); return replaceInstUsesWith(SVI, V); } + if (isa(RHS) && isa(LHS)) { + Instruction *Inst = RearangeShuffleBitcastShuffle(SVI); + if (Inst) + return replaceInstUsesWith(SVI, Inst); + } // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to // a non-vector type. We can instead bitcast the original vector followed by Index: lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- lib/Transforms/InstCombine/InstructionCombining.cpp +++ lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1408,6 +1408,261 @@ } while (true); } +// Computes shuffle mask for smaller vector element size +// InNumElts, InEltsSize - target vector , eg +// <16 x 1> ShufNumElts, ShufEltSize - current shuffle , eg <4 x 4> ShufMask - current shuffle mask NewShufMask - +// recomputed shuffle mask +static bool ShuffleMaskForSmallerVectorElement( + int InNumElts, int ShufNumElts, int InEltBytes, int ShufEltBytes, + const SmallVector &ShufMask, SmallVector &NewShufMask) { + + if ((int)ShufMask.size() != ShufNumElts) + return false; + if ((InNumElts * InEltBytes) != (ShufNumElts * ShufEltBytes)) + return false; + int ChunkSize = ShufEltBytes / InEltBytes; + if (!ChunkSize) + return false; + NewShufMask.resize(InNumElts); + for (int i = 0; i < ShufNumElts; ++i) { + int ShufMaskElt = ShufMask[i]; + for (int j = 0, m = i * ChunkSize; j < ChunkSize; ++j) { + if ((ShufMaskElt < 0) || (ShufMaskElt >= ShufNumElts)) + NewShufMask[m + j] = -1; + else + NewShufMask[m + j] = (ShufMaskElt * ChunkSize) + j; + } + } + return true; +} + +// Computes shuffle mask for bigger vector element size +// InNumElts, InEltsSize - target vector , eg +// <4 x 4> ShufNumElts, ShufEltSize - current shuffle , eg <16 x 1> ShufMask - current shuffle mask NewShufMask - +// recomputed shuffle mask NOTE: not always succedes +static bool ShuffleMaskForBiggerVectorElement( + int InNumElts, int ShufNumElts, int InEltBytes, int ShufEltBytes, + const SmallVector &ShufMask, SmallVector &NewShufMask) { + + if ((int)ShufMask.size() != ShufNumElts) + return false; + if ((InNumElts * InEltBytes) != (ShufNumElts * ShufEltBytes)) + return false; + int ChunkSize = InEltBytes / ShufEltBytes; + if (!ChunkSize) + return false; + + NewShufMask.resize(InNumElts); + for (int i = 0, m = 0; i < ShufNumElts; i += ChunkSize, m++) { + int ShufMaskElt = ShufMask[i]; + for (int j = 0; j < ChunkSize; ++j) { + if ((ShufMaskElt < 0) || + (ShufMaskElt >= + ShufNumElts)) { // either all elements in a chunk are undefined + if (ShufMask[i + j] != ShufMaskElt) + return false; + } else { // or they are all sequencial + if (ShufMask[i + j] != (ShufMaskElt + j)) + return false; + } + } + NewShufMask[m] = ShufMaskElt / ChunkSize; + } + return true; +} + +static Constant *ShuffleMaskToConstantVector(ArrayRef ShufMask, + InstCombiner::BuilderTy &Builder) { + SmallVector MaskValues; + for (int i = 0, e = ShufMask.size(); i != e; ++i) { + if (ShufMask[i] == -1) + MaskValues.push_back(UndefValue::get(Builder.getInt32Ty())); + else + MaskValues.push_back(Builder.getInt32(ShufMask[i])); + } + return ConstantVector::get(MaskValues); +} + +template +static bool getVectorInstructionTypeInfo(const VectorInstType &VectInst, + int &NumElts, int &EltBytes) { + NumElts = EltBytes = 0; + VectorType *VectInstTy = cast(VectInst.getType()); + if (!VectInstTy) + return false; + NumElts = VectInstTy->getVectorNumElements(); + EltBytes = VectInstTy->getVectorElementType()->getScalarSizeInBits() / 8; + return true; +} + +template +static bool getVectorInstructionOperandTypeInfo(const VectorInstType &VectInst, + int OpNum, int &NumElts, + int &EltBytes) { + NumElts = EltBytes = 0; + Value *VectInstOp = VectInst.getOperand(OpNum); + if (isa(VectInstOp)) + return true; + VectorType *VectInstOpTy = cast(VectInstOp->getType()); + if (!VectInstOpTy) + return false; + NumElts = VectInstOpTy->getVectorNumElements(); + EltBytes = VectInstOpTy->getVectorElementType()->getScalarSizeInBits() / 8; + return true; +} + +static bool ComputeShuffleMaskForSmallerVectorElement( + const BinaryOperator &Inst, const ShuffleVectorInst &Shuf, + Constant *&NewMask, InstCombiner::BuilderTy &Builder) { + + SmallVector ShufMask = Shuf.getShuffleMask(); + int InstNumElts, ShufNumElts, InstEltBytes, ShufEltBytes; + if (!getVectorInstructionTypeInfo(Inst, InstNumElts, InstEltBytes)) + return false; + if (!getVectorInstructionTypeInfo(Shuf, ShufNumElts, ShufEltBytes)) + return false; + SmallVector NewShufMask; + bool ok = + ShuffleMaskForSmallerVectorElement(InstNumElts, ShufNumElts, InstEltBytes, + ShufEltBytes, ShufMask, NewShufMask); + if (!ok) + return false; + NewMask = ShuffleMaskToConstantVector(NewShufMask, Builder); + return true; +} + +static bool ComputeShuffleMaskForBiggerVectorElement( + const BinaryOperator &Inst, const ShuffleVectorInst &Shuf, + Constant *&NewMask, InstCombiner::BuilderTy &Builder) { + + SmallVector ShufMask = Shuf.getShuffleMask(); + int InstNumElts, ShufNumElts, InstEltBytes, ShufEltBytes; + if (!getVectorInstructionTypeInfo(Inst, InstNumElts, InstEltBytes)) + return false; + if (!getVectorInstructionTypeInfo(Shuf, ShufNumElts, ShufEltBytes)) + return false; + SmallVector NewShufMask; + bool ok = + ShuffleMaskForBiggerVectorElement(InstNumElts, ShufNumElts, InstEltBytes, + ShufEltBytes, ShufMask, NewShufMask); + if (!ok) + return false; + NewMask = ShuffleMaskToConstantVector(NewShufMask, Builder); + return true; +} + +static bool ComputeShuffleMaskForDifferentVectorType( + const BinaryOperator &Inst, const ShuffleVectorInst &Shuf, + Constant *&NewMask, InstCombiner::BuilderTy &Builder) { + + int InstNumElts, ShufNumElts, InstEltBytes, ShufEltBytes; + if (!getVectorInstructionTypeInfo(Inst, InstNumElts, InstEltBytes)) + return false; + if (!getVectorInstructionTypeInfo(Shuf, ShufNumElts, ShufEltBytes)) + return false; + if (InstNumElts > ShufNumElts) { + return ComputeShuffleMaskForSmallerVectorElement(Inst, Shuf, NewMask, + Builder); + } else if (InstNumElts < ShufNumElts) { + return ComputeShuffleMaskForBiggerVectorElement(Inst, Shuf, NewMask, + Builder); + } + return false; +} + +static Instruction *BitcastShuffleShuffleSequence( + ShuffleVectorInst &Shuf1, ShuffleVectorInst &Shuf2, + const SmallVector &NewShufMask, InstCombiner::BuilderTy &Builder) { + Constant *NewShuf2MaskC = ShuffleMaskToConstantVector(NewShufMask, Builder); + Constant *Shuf2MaskC = Shuf2.getMask(); + Value *NewBitCast = + Builder.CreateBitCast(Shuf1.getOperand(0), Shuf2.getType()); + Value *NewShuf1 = Builder.CreateShuffleVector( + NewBitCast, UndefValue::get(Shuf2.getType()), NewShuf2MaskC); + Value *NewShuf2 = Builder.CreateShuffleVector( + NewShuf1, UndefValue::get(Shuf2.getType()), Shuf2MaskC); + return cast(NewShuf2); +} + +static Instruction *ShuffleShuffleBitcastSequence( + ShuffleVectorInst &Shuf1, ShuffleVectorInst &Shuf2, + const SmallVector &NewShufMask, InstCombiner::BuilderTy &Builder) { + Constant *NewShuf2MaskC = ShuffleMaskToConstantVector(NewShufMask, Builder); + Value *NewShuf2 = Builder.CreateShuffleVector( + &Shuf1, UndefValue::get(Shuf1.getType()), NewShuf2MaskC); + Value *NewBitCast = Builder.CreateBitCast(NewShuf2, Shuf2.getType()); + return cast(NewBitCast); +} + +// Rearange shuffle-bitcast-shuffle sequence: +// x1 = shuffle( x0, mask1); x2=bitcact( x1, ); x3 = shuffle( +// x2, mask2); Into two possible sequences: +// 1. => x1 = shuffle( x0, mask1); x2 = shuffle( x1, new_mask2); x3 = +// bitcact( x2, ); +// 2. => x1 = bitcact( x0, ); x2 = shuffle(, new_mask1); x3 = +// shuffle( x2, mask2); Backend usually will replace two addjacent shuffles +// by one shuffle instruction with combined mask +Instruction * +InstCombiner::RearangeShuffleBitcastShuffle(ShuffleVectorInst &Shuf) { + Value *ShufLHS = Shuf.getOperand(0); + Value *ShufRHS = Shuf.getOperand(1); + + if (!isa(ShufLHS)) + return nullptr; + if (!isa(ShufRHS)) + return nullptr; + BitCastInst *ShufBc = cast(ShufLHS); + + Value *ShufBcOp = ShufBc->getOperand(0); + if (!isa(ShufBcOp)) + return nullptr; + ShuffleVectorInst *BcShuf = cast(ShufBcOp); + + int ShufNumElts, BcShufNumElts, ShufEltBytes, BcShufEltBytes; + if (!getVectorInstructionTypeInfo(Shuf, ShufNumElts, ShufEltBytes)) + return nullptr; + if (!getVectorInstructionTypeInfo(*BcShuf, BcShufNumElts, BcShufEltBytes)) + return nullptr; + if ((ShufNumElts * ShufEltBytes) != (BcShufNumElts * BcShufEltBytes)) + return nullptr; + SmallVector ShufMask = Shuf.getShuffleMask(); + SmallVector BcShufMask = BcShuf->getShuffleMask(); + SmallVector NewShufMask; + bool ok = false; + if (BcShufEltBytes > ShufEltBytes) { + ok = ShuffleMaskForBiggerVectorElement(BcShufNumElts, ShufNumElts, + BcShufEltBytes, ShufEltBytes, + ShufMask, NewShufMask); + if (ok) { + return ShuffleShuffleBitcastSequence(*BcShuf, Shuf, NewShufMask, Builder); + } else { + ok = ShuffleMaskForSmallerVectorElement(ShufNumElts, BcShufNumElts, + ShufEltBytes, BcShufEltBytes, + BcShufMask, NewShufMask); + if (!ok) + return nullptr; + return BitcastShuffleShuffleSequence(*BcShuf, Shuf, NewShufMask, Builder); + } + } else { // ShufEltBytes > BcShufEltBytes + ok = ShuffleMaskForBiggerVectorElement(ShufNumElts, BcShufNumElts, + ShufEltBytes, BcShufEltBytes, + BcShufMask, NewShufMask); + if (ok) { + return BitcastShuffleShuffleSequence(*BcShuf, Shuf, NewShufMask, Builder); + } else { + ok = ShuffleMaskForSmallerVectorElement(BcShufNumElts, ShufNumElts, + BcShufEltBytes, ShufEltBytes, + ShufMask, NewShufMask); + if (!ok) + return nullptr; + return ShuffleShuffleBitcastSequence(*BcShuf, Shuf, NewShufMask, Builder); + } + } + return nullptr; +} + /// \brief Creates node of binary operation with the same attributes as the /// specified one but with other operands. static Value *CreateBinOpAsGiven(BinaryOperator &Inst, Value *LHS, Value *RHS, @@ -1451,7 +1706,46 @@ return Builder.CreateShuffleVector( NewBO, UndefValue::get(NewBO->getType()), LShuf->getMask()); } - + // Both arguments of the binary operation are the shuffle instructions, but + // binary operation vector element type is different from a shuffle + // instruction vector element type, e.g. shuffle operands data type is <4 x + // i32>, but a binary operation operands data type is <16 x i8> In this + // situation, in order to move the shuffle instruction behind the binary + // operation instruction we need change the shuffle instruction data type and + // recompute the shuffle instruction mask. We can always do that if we need to + // change shuffle vector element type into smaller one, but changing from + // smaller shuffle vector element type into bigger vector element type is not + // always possible. + BitCastInst *LBitCast = dyn_cast(LHS); + BitCastInst *RBitCast = dyn_cast(RHS); + if (LBitCast && RBitCast) { + Value *LBitCastOp = LBitCast->getOperand(0); + Value *RBitCastOp = RBitCast->getOperand(0); + ShuffleVectorInst *LBcShuf = dyn_cast(LBitCastOp); + ShuffleVectorInst *RBcShuf = dyn_cast(RBitCastOp); + + if (LBcShuf && RBcShuf && LBcShuf->getMask() == RBcShuf->getMask() && + isa(LBcShuf->getOperand(1)) && + isa(RBcShuf->getOperand(1)) && + LBcShuf->getOperand(0)->getType() == + RBcShuf->getOperand(0)->getType()) { + Constant *NewMask; + bool ok = ComputeShuffleMaskForDifferentVectorType(Inst, *LBcShuf, + NewMask, Builder); + if (ok) { + Value *LBitCast = + Builder.CreateBitCast(LBcShuf->getOperand(0), Inst.getType()); + Value *RBitCast = + Builder.CreateBitCast(RBcShuf->getOperand(0), Inst.getType()); + + Value *NewBinOp = CreateBinOpAsGiven(Inst, LBitCast, RBitCast, Builder); + + Value *NewShuf = Builder.CreateShuffleVector( + NewBinOp, UndefValue::get(Inst.getType()), NewMask); + return NewShuf; + } + } + } // If one argument is a shuffle within one vector, the other is a constant, // try moving the shuffle after the binary operation. ShuffleVectorInst *Shuffle = nullptr; Index: test/Transforms/InstCombine/vec_shuffle.ll =================================================================== --- test/Transforms/InstCombine/vec_shuffle.ll +++ test/Transforms/InstCombine/vec_shuffle.ll @@ -463,3 +463,297 @@ %1 = shufflevector <4 x i32*> %A, <4 x i32*> undef, <2 x i32> ret <2 x i32*> %1 } + +; Function Attrs: noinline nounwind uwtable +define <2 x i64> @shuffle_32_add_16_shuffle_32_masks_are_eq(<2 x i64> %v) { +; CHECK-LABEL: @shuffle_32_add_16_shuffle_32_masks_are_eq( +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> %v to <8 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> %v to <8 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i16> [[TMP1:%.*]], [[TMP2:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2:%.*]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP3:%.*]] +; + %bc0 = bitcast <2 x i64> %v to <4 x i32> + %shuffle = shufflevector <4 x i32> %bc0, <4 x i32> zeroinitializer, <4 x i32> + %bc1 = bitcast <4 x i32> %shuffle to <2 x i64> + %bc2 = bitcast <2 x i64> %bc1 to <8 x i16> + %add.i = add <8 x i16> %bc2, %bc2 + %bc3 = bitcast <8 x i16> %add.i to <2 x i64> + %bc4 = bitcast <2 x i64> %bc3 to <4 x i32> + %shuffle4 = shufflevector <4 x i32> %bc4, <4 x i32> zeroinitializer, <4 x i32> + %bc5 = bitcast <4 x i32> %shuffle4 to <2 x i64> + ret <2 x i64> %bc5 +} + +; Function Attrs: noinline nounwind uwtable +define <2 x i64> @shuffle_32_add_8_shuffle_32_masks_are_eq(<2 x i64> %v) { +; CHECK-LABEL: @shuffle_32_add_8_shuffle_32_masks_are_eq( +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> %v to <16 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> %v to <16 x i8> +; CHECK-NEXT: [[TMP2:%.*]] = add <16 x i8> [[TMP1:%.*]], [[TMP2:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2:%.*]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP3:%.*]] +; + %bc0 = bitcast <2 x i64> %v to <4 x i32> + %shuffle = shufflevector <4 x i32> %bc0, <4 x i32> zeroinitializer, <4 x i32> + %bc1 = bitcast <4 x i32> %shuffle to <2 x i64> + %bc2 = bitcast <2 x i64> %bc1 to <16 x i8> + %add.i = add <16 x i8> %bc2, %bc2 + %bc3 = bitcast <16 x i8> %add.i to <2 x i64> + %bc4 = bitcast <2 x i64> %bc3 to <4 x i32> + %shuffle4 = shufflevector <4 x i32> %bc4, <4 x i32> zeroinitializer, <4 x i32> + %bc5 = bitcast <4 x i32> %shuffle4 to <2 x i64> + ret <2 x i64> %bc5 +} + +; Function Attrs: noinline nounwind uwtable +define <2 x i64> @shuffle_8_add_32_shuffle_8_masks_are_eq(<2 x i64> %v) { +; CHECK-LABEL: @shuffle_8_add_32_shuffle_8_masks_are_eq( +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> %v to <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> %v to <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[TMP1:%.*]], [[TMP2:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2:%.*]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP3:%.*]] +; + %bc0 = bitcast <2 x i64> %v to <16 x i8> + %shuffle = shufflevector <16 x i8> %bc0, <16 x i8> zeroinitializer, <16 x i32> + %bc1 = bitcast <16 x i8> %shuffle to <2 x i64> + %bc2 = bitcast <2 x i64> %bc1 to <4 x i32> + %add.i = add <4 x i32> %bc2, %bc2 + %bc3 = bitcast <4 x i32> %add.i to <2 x i64> + %bc4 = bitcast <2 x i64> %bc3 to <16 x i8> + %shuffle4 = shufflevector <16 x i8> %bc4, <16 x i8> zeroinitializer, <16 x i32> + %bc5 = bitcast <16 x i8> %shuffle4 to <2 x i64> + ret <2 x i64> %bc5 +} + +define <8 x i16> @shuffle_32_add_16_masks_are_eq(<4 x i32> %v1, <4 x i32> %v2) { +; CHECK-LABEL: @shuffle_32_add_16_masks_are_eq +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> %v1 to <8 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> %v2 to <8 x i16> +; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i16> [[TMP1:%.*]], [[TMP2:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i16> [[TMP3:%.*]], <8 x i16> undef, <8 x i32> +; CHECK-NEXT: ret <8 x i16> [[TMP4:%.*]] + + %shuffle1 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> + %shuffle2 = shufflevector <4 x i32> %v2, <4 x i32> undef, <4 x i32> + %bc1 = bitcast <4 x i32> %shuffle1 to <8 x i16> + %bc2 = bitcast <4 x i32> %shuffle2 to <8 x i16> + %add = add <8 x i16> %bc1, %bc2 + ret <8 x i16> %add +} + +define <16 x i8> @shuffle_32_add_8_masks_are_eq(<4 x i32> %v1, <4 x i32> %v2) { +; CHECK-LABEL: @shuffle_32_add_8_masks_are_eq +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> %v2 to <16 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = add <16 x i8> [[TMP1:%.*]], [[TMP2:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i8> [[TMP3:%.*]], <16 x i8> undef, <16 x i32> +; CHECK-NEXT: ret <16 x i8> [[TMP4:%.*]] + + %shuffle1 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> + %shuffle2 = shufflevector <4 x i32> %v2, <4 x i32> undef, <4 x i32> + %bc1 = bitcast <4 x i32> %shuffle1 to <16 x i8> + %bc2 = bitcast <4 x i32> %shuffle2 to <16 x i8> + %add = add <16 x i8> %bc1, %bc2 + ret <16 x i8> %add +} + +define <16 x i8> @shuffle_16_add_8_masks_are_eq(<8 x i16> %v1, <8 x i16> %v2) { +; CHECK-LABEL: @shuffle_16_add_8_masks_are_eq +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> %v1 to <16 x i8> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> %v2 to <16 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = add <16 x i8> [[TMP1:%.*]], [[TMP2:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i8> [[TMP3:%.*]], <16 x i8> undef, <16 x i32> +; CHECK-NEXT: ret <16 x i8> [[TMP4:%.*]] + + %shuffle1 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> + %shuffle2 = shufflevector <8 x i16> %v2, <8 x i16> undef, <8 x i32> + %bc1 = bitcast <8 x i16> %shuffle1 to <16 x i8> + %bc2 = bitcast <8 x i16> %shuffle2 to <16 x i8> + %add = add <16 x i8> %bc1, %bc2 + ret <16 x i8> %add +} + +define <4 x i32> @shuffle_16_add_32_masks_are_eq_and_can_be_converted_up(<8 x i16> %v1, <8 x i16> %v2) { +; CHECK-LABEL: @shuffle_16_add_32_masks_are_eq_and_can_be_converted_up +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> %v1 to <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> %v2 to <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP1:%.*]], [[TMP2:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[TMP4:%.*]] + + %shuffle1 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> + %shuffle2 = shufflevector <8 x i16> %v2, <8 x i16> undef, <8 x i32> + %bc1 = bitcast <8 x i16> %shuffle1 to <4 x i32> + %bc2 = bitcast <8 x i16> %shuffle2 to <4 x i32> + %add = add <4 x i32> %bc1, %bc2 + ret <4 x i32> %add +} + +define <4 x i32> @shuffle_8_add_32_masks_are_eq_and_can_be_converted_up(<16 x i8> %v1, <16 x i8> %v2) { +; CHECK-LABEL: @shuffle_8_add_32_masks_are_eq_and_can_be_converted_up +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> %v1 to <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> %v2 to <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[TMP1:%.*]], [[TMP2:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP3:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[TMP4:%.*]] + + %shuffle1 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> + %shuffle2 = shufflevector <16 x i8> %v2, <16 x i8> undef, <16 x i32> + %bc1 = bitcast <16 x i8> %shuffle1 to <4 x i32> + %bc2 = bitcast <16 x i8> %shuffle2 to <4 x i32> + %add = add <4 x i32> %bc1, %bc2 + ret <4 x i32> %add +} + +; shuffle<8 x i16>( bitcast<8 x i16>( shuffle<4 x i32>(v))) + +define <8 x i16> @shuffle_32_bitcast_16_shuffle_16_can_be_converted_up(<4 x i32> %v1) { +; CHECK-LABEL: @shuffle_32_bitcast_16_shuffle_16_can_be_converted_up +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1:%.*]] to <8 x i16> +; CHECK-NEXT: ret <8 x i16> [[TMP2:%.*]] + + %shuffle1 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> + %bc1 = bitcast <4 x i32> %shuffle1 to <8 x i16> + %shuffle2 = shufflevector <8 x i16> %bc1, <8 x i16> undef, <8 x i32> + ret <8 x i16> %shuffle2 +} + +; shuffle<8 x i16>( bitcast<8 x i16>( shuffle<4 x i32>(v))) + +define <8 x i16> @shuffle_32_bitcast_16_shuffle_16_can_not_be_converted_up(<4 x i32> %v1) { +; CHECK-LABEL: @shuffle_32_bitcast_16_shuffle_16_can_not_be_converted_up +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> %v1 to <8 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1:%.*]], <8 x i16> undef, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> [[TMP2:%.*]], <8 x i16> undef, <8 x i32> +; CHECK-NEXT: ret <8 x i16> [[TMP3:%.*]] + + %shuffle1 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> + %bc1 = bitcast <4 x i32> %shuffle1 to <8 x i16> + %shuffle2 = shufflevector <8 x i16> %bc1, <8 x i16> undef, <8 x i32> + ret <8 x i16> %shuffle2 +} + +; shuffle<16 x i8>( bitcast<16 x i8>( shuffle<4 x i32>(v))) + +define <16 x i8> @shuffle_32_bitcast_8_shuffle_8_can_be_converted_up(<4 x i32> %v1) { +; CHECK-LABEL: @shuffle_32_bitcast_8_shuffle_8_can_be_converted_up +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1:%.*]] to <16 x i8> +; CHECK-NEXT: ret <16 x i8> [[TMP2:%.*]] + + %shuffle1 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> + %bc1 = bitcast <4 x i32> %shuffle1 to <16 x i8> + %shuffle2 = shufflevector <16 x i8> %bc1, <16 x i8> undef, <16 x i32> + ret <16 x i8> %shuffle2 +} + +; shuffle<16 x i8>( bitcast<16 x i8>( shuffle<4 x i32>(v))) + +define <16 x i8> @shuffle_32_bitcast_8_shuffle_8_can_not_be_converted_up(<4 x i32> %v1) { +; CHECK-LABEL: @shuffle_32_bitcast_8_shuffle_8_can_not_be_converted_up +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> %v1 to <16 x i8> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1:%.*]], <16 x i8> undef, <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i8> [[TMP2:%.*]], <16 x i8> undef, <16 x i32> +; CHECK-NEXT: ret <16 x i8> [[TMP3:%.*]] + + %shuffle1 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> + %bc1 = bitcast <4 x i32> %shuffle1 to <16 x i8> + %shuffle2 = shufflevector <16 x i8> %bc1, <16 x i8> undef, <16 x i32> + ret <16 x i8> %shuffle2 +} + +; shuffle<4 x i32>( bitcast<4 x i32>( shuffle<16 x i8>(v))) + +define <4 x i32> @shuffle_8_bitcast_32_shuffle_32_can_be_converted_up(<16 x i8> %v1) { +; CHECK-LABEL: @shuffle_8_bitcast_32_shuffle_32_can_be_converted_up +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> %v1 to <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[TMP3:%.*]] + + %shuffle1 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> + %bc1 = bitcast <16 x i8> %shuffle1 to <4 x i32> + %shuffle2 = shufflevector <4 x i32> %bc1, <4 x i32> undef, <4 x i32> + ret <4 x i32> %shuffle2 +} + +; shuffle<4 x i32>( bitcast<4 x i32>( shuffle<8 x i16>(v))) + +define <4 x i32> @shuffle_16_bitcast_32_shuffle_32_can_be_converted_up(<8 x i16> %v1) { +; CHECK-LABEL: @shuffle_16_bitcast_32_shuffle_32_can_be_converted_up +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> %v1 to <4 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[TMP3:%.*]] + + %shuffle1 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> + %bc1 = bitcast <8 x i16> %shuffle1 to <4 x i32> + %shuffle2 = shufflevector <4 x i32> %bc1, <4 x i32> undef, <4 x i32> + ret <4 x i32> %shuffle2 +} + +; shuffle<4 x i32>( bitcast<4 x i32>( shuffle<16 x i8>(v))) + +define <4 x i32> @shuffle_8_bitcast_32_shuffle_32_can_not_be_converted_up(<16 x i8> %v1) { +; CHECK-LABEL: @shuffle_8_bitcast_32_shuffle_32_can_not_be_converted_up +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[TMP0:%.*]], <16 x i8> undef, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1:%.*]] to <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[TMP2:%.*]] + + %shuffle1 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> + %bc1 = bitcast <16 x i8> %shuffle1 to <4 x i32> + %shuffle2 = shufflevector <4 x i32> %bc1, <4 x i32> undef, <4 x i32> + ret <4 x i32> %shuffle2 +} + +; shuffle<4 x i32>( bitcast<4 x i32>( shuffle<8 x i16>(v))) + +define <4 x i32> @shuffle_16_bitcast_32_shuffle_32_can_not_be_converted_up(<8 x i16> %v1) { +; CHECK-LABEL: @shuffle_16_bitcast_32_shuffle_32_can_not_be_converted_up +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[TMP0:%.*]], <8 x i16> undef, <8 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1:%.*]] to <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[TMP2:%.*]] + + %shuffle1 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> + %bc1 = bitcast <8 x i16> %shuffle1 to <4 x i32> + %shuffle2 = shufflevector <4 x i32> %bc1, <4 x i32> undef, <4 x i32> + ret <4 x i32> %shuffle2 +} + +; shuffle<8 x i16>( bitcast<8 x i16>( shuffle<16 x i8>(v))) + +define <8 x i16> @shuffle_8_bitcast_16_shuffle_16_can__be_converted_up(<16 x i8> %v1) { +; CHECK-LABEL: @shuffle_8_bitcast_16_shuffle_16_can__be_converted_up +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> %v1 to <8 x i16> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> %1, <8 x i16> undef, <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> +; CHECK-NEXT: ret <8 x i16> %3 + + %shuffle1 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> + %bc1 = bitcast <16 x i8> %shuffle1 to <8 x i16> + %shuffle2 = shufflevector <8 x i16> %bc1, <8 x i16> undef, <8 x i32> + ret <8 x i16> %shuffle2 +} + +; shuffle<8 x i16>( bitcast<8 x i16>( shuffle<16 x i8>(v))) + +define <8 x i16> @shuffle_8_bitcast_16_shuffle_16_can_not_be_converted_up(<16 x i8> %v1) { +; CHECK-LABEL: @shuffle_8_bitcast_16_shuffle_16_can_not_be_converted_up(<16 x i8> %v1) { +; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[TMP0:%.*]], <16 x i8> undef, <16 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1:%.*]] to <8 x i16> +; CHECK-NEXT: ret <8 x i16> [[TMP2:%.*]] + + %shuffle1 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> + %bc1 = bitcast <16 x i8> %shuffle1 to <8 x i16> + %shuffle2 = shufflevector <8 x i16> %bc1, <8 x i16> undef, <8 x i32> + ret <8 x i16> %shuffle2 +} +