Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -1477,6 +1477,41 @@ return SelectInst::Create(NarrowCond, NarrowX, NarrowY); } +/// Try to combine 2 shuffles into 1 shuffle by concatenating a shuffle mask. +static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) { + Value *Op0 = Shuf.getOperand(0), *Op1 = Shuf.getOperand(1); + if (!Shuf.isIdentityWithExtract() || !isa(Op1)) + return nullptr; + + Value *X, *Y; + Constant *Mask; + if (!match(Op0, m_ShuffleVector(m_Value(X), m_Value(Y), m_Constant(Mask)))) + return nullptr; + + // We are extracting a subvector from a shuffle. Remove excess elements from + // the 1st shuffle mask to eliminate the extract. + // + // This transform is conservatively limited to identity extracts because we do + // not allow arbitrary shuffle mask creation as a target-independent transform + // (because we can't guarantee that will lower efficiently). + // + // If the extracting shuffle has an undef mask element, it transfers to the + // new shuffle mask. Otherwise, copy the original mask element. Example: + // shuf (shuf X, Y, ), undef, <0, undef, 2, 3> --> + // shuf X, Y, + unsigned NumElts = Shuf.getType()->getVectorNumElements(); + SmallVector NewMask(NumElts); + assert(NumElts < Mask->getType()->getVectorNumElements() && + "Identity with extract must have less elements than its inputs"); + + for (unsigned i = 0; i != NumElts; ++i) { + Constant *ExtractMaskElt = Shuf.getMask()->getAggregateElement(i); + Constant *MaskElt = Mask->getAggregateElement(i); + NewMask[i] = isa(ExtractMaskElt) ? ExtractMaskElt : MaskElt; + } + return new ShuffleVectorInst(X, Y, ConstantVector::get(NewMask)); +} + Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { Value *LHS = SVI.getOperand(0); Value *RHS = SVI.getOperand(1); @@ -1499,6 +1534,9 @@ return &SVI; } + if (Instruction *I = foldIdentityExtractShuffle(SVI)) + return I; + SmallVector Mask = SVI.getShuffleMask(); Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); unsigned LHSWidth = LHS->getType()->getVectorNumElements(); Index: llvm/trunk/test/Transforms/InstCombine/vec_shuffle.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/vec_shuffle.ll +++ llvm/trunk/test/Transforms/InstCombine/vec_shuffle.ll @@ -170,12 +170,11 @@ ret <8 x i8> %t3 } -; TODO: The mask length of the 1st shuffle can be reduced to eliminate the 2nd shuffle. +; The mask length of the 1st shuffle can be reduced to eliminate the 2nd shuffle. define <2 x i8> @extract_subvector_of_shuffle(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @extract_subvector_of_shuffle( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <3 x i32> -; CHECK-NEXT: [[EXTRACT_SUBV:%.*]] = shufflevector <3 x i8> [[SHUF]], <3 x i8> undef, <2 x i32> +; CHECK-NEXT: [[EXTRACT_SUBV:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <2 x i32> ; CHECK-NEXT: ret <2 x i8> [[EXTRACT_SUBV]] ; %shuf = shufflevector <2 x i8> %x, <2 x i8> %y, <3 x i32> @@ -183,7 +182,6 @@ ret <2 x i8> %extract_subv } -; TODO: ; Extra uses are ok. ; Undef elements in either mask are ok. Undefs from the 2nd shuffle mask should propagate to the new shuffle. ; The type of the inputs does not have to match the output type. @@ -194,7 +192,7 @@ ; CHECK-LABEL: @extract_subvector_of_shuffle_extra_use( ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <5 x i32> ; CHECK-NEXT: call void @use_v5i8(<5 x i8> [[SHUF]]) -; CHECK-NEXT: [[EXTRACT_SUBV:%.*]] = shufflevector <5 x i8> [[SHUF]], <5 x i8> undef, <4 x i32> +; CHECK-NEXT: [[EXTRACT_SUBV:%.*]] = shufflevector <2 x i8> [[X]], <2 x i8> [[Y]], <4 x i32> ; CHECK-NEXT: ret <4 x i8> [[EXTRACT_SUBV]] ; %shuf = shufflevector <2 x i8> %x, <2 x i8> %y, <5 x i32>