Index: llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp =================================================================== --- llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -1608,6 +1608,45 @@ return nullptr; } +static Instruction *foldIdentityPaddedShuffles(ShuffleVectorInst &Shuf) { + // Match the operands as identity with padding (also known as + // insert_subvector into undef) shuffles of the same source type. + auto *Shuffle0 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(0)); + auto *Shuffle1 = dyn_cast<ShuffleVectorInst>(Shuf.getOperand(1)); + if (!Shuffle0 || !Shuffle0->isIdentityWithPadding() || + !Shuffle1 || !Shuffle1->isIdentityWithPadding()) + return nullptr; + + Value *X = Shuffle0->getOperand(0); + Value *Y = Shuffle1->getOperand(0); + if (X->getType() != Y->getType() || isa<UndefValue>(X) || isa<UndefValue>(Y)) + return nullptr; + assert(isa<UndefValue>(Shuffle0->getOperand(1)) && + isa<UndefValue>(Shuffle1->getOperand(1)) && + "Unexpected operand for identity shuffle"); + + // This is a shuffle of 2 widening shuffles. We can shuffle the narrow source + // operands directly by adjusting the shuffle mask to account for the narrower + // types: + // shuf (widen X), (widen Y), Mask --> shuf X, Y, Mask' + int NarrowElts = X->getType()->getVectorNumElements(); + int WideElts = Shuffle0->getType()->getVectorNumElements(); + assert(WideElts > NarrowElts && "Unexpected types for identity with padding"); + + Type *I32Ty = IntegerType::getInt32Ty(Shuf.getContext()); + SmallVector<int, 16> Mask = Shuf.getShuffleMask(); + SmallVector<Constant *, 16> NewMask(Mask.size(), UndefValue::get(I32Ty)); + for (int i = 0, e = Mask.size(); i != e; ++i) { + if (Mask[i] == -1) + continue; + if (Mask[i] < WideElts) + NewMask[i] = ConstantInt::get(I32Ty, Mask[i]); + else + NewMask[i] = ConstantInt::get(I32Ty, Mask[i] - (WideElts - NarrowElts)); + } + return new ShuffleVectorInst(X, Y, ConstantVector::get(NewMask)); +} + Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { Value *LHS = SVI.getOperand(0); Value *RHS = SVI.getOperand(1); @@ -1662,10 +1701,12 @@ if (Instruction *I = foldIdentityExtractShuffle(SVI)) return I; - // This transform has the potential to lose undef knowledge, so it is + // These transforms have the potential to lose undef knowledge, so they are // intentionally placed after SimplifyDemandedVectorElts(). if (Instruction *I = foldShuffleWithInsert(SVI)) return I; + if (Instruction *I = foldIdentityPaddedShuffles(SVI)) + return I; if (VWidth == LHSWidth) { // Analyze the shuffle, are the LHS or RHS and identity shuffles? Index: llvm/test/Transforms/InstCombine/vec_shuffle.ll =================================================================== --- llvm/test/Transforms/InstCombine/vec_shuffle.ll +++ llvm/test/Transforms/InstCombine/vec_shuffle.ll @@ -1142,9 +1142,7 @@ define <7 x i8> @insert_subvector_shuffles(<3 x i8> %x, <3 x i8> %y) { ; CHECK-LABEL: @insert_subvector_shuffles( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> undef, <7 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <3 x i8> [[Y:%.*]], <3 x i8> undef, <7 x i32> <i32 undef, i32 1, i32 2, i32 undef, i32 undef, i32 undef, i32 undef> -; CHECK-NEXT: [[S3:%.*]] = shufflevector <7 x i8> [[S1]], <7 x i8> [[S2]], <7 x i32> <i32 0, i32 8, i32 1, i32 undef, i32 8, i32 1, i32 9> +; CHECK-NEXT: [[S3:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> [[Y:%.*]], <7 x i32> <i32 0, i32 4, i32 1, i32 undef, i32 4, i32 1, i32 5> ; CHECK-NEXT: ret <7 x i8> [[S3]] ; %s1 = shufflevector <3 x i8> %x, <3 x i8> undef, <7 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> @@ -1157,9 +1155,7 @@ define <2 x i8> @insert_subvector_shuffles_narrowing(<3 x i8> %x, <3 x i8> %y) { ; CHECK-LABEL: @insert_subvector_shuffles_narrowing( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> undef, <7 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <3 x i8> [[Y:%.*]], <3 x i8> undef, <7 x i32> <i32 undef, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> -; CHECK-NEXT: [[S3:%.*]] = shufflevector <7 x i8> [[S1]], <7 x i8> [[S2]], <2 x i32> <i32 0, i32 8> +; CHECK-NEXT: [[S3:%.*]] = shufflevector <3 x i8> [[X:%.*]], <3 x i8> [[Y:%.*]], <2 x i32> <i32 0, i32 4> ; CHECK-NEXT: ret <2 x i8> [[S3]] ; %s1 = shufflevector <3 x i8> %x, <3 x i8> undef, <7 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> @@ -1172,9 +1168,7 @@ define <4 x double> @insert_subvector_shuffles_identity(<2 x double> %x) { ; CHECK-LABEL: @insert_subvector_shuffles_identity( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> undef, <4 x i32> <i32 undef, i32 1, i32 undef, i32 undef> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <2 x double> [[X]], <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> -; CHECK-NEXT: [[S3:%.*]] = shufflevector <4 x double> [[S2]], <4 x double> [[S1]], <4 x i32> <i32 0, i32 5, i32 undef, i32 undef> +; CHECK-NEXT: [[S3:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> ; CHECK-NEXT: ret <4 x double> [[S3]] ; %s1 = shufflevector <2 x double> %x, <2 x double> undef, <4 x i32> <i32 undef, i32 1, i32 undef, i32 undef>