Index: llvm/trunk/lib/Analysis/InstructionSimplify.cpp =================================================================== --- llvm/trunk/lib/Analysis/InstructionSimplify.cpp +++ llvm/trunk/lib/Analysis/InstructionSimplify.cpp @@ -4080,6 +4080,60 @@ RecursionLimit); } +/// For the given destination element of a shuffle, peek through shuffles to +/// match a root vector source operand that contains that element in the same +/// vector lane (ie, the same mask index), so we can eliminate the shuffle(s). +static Value *foldIdentityShuffles(int DestElt, Value *Op0, Value *Op1, + Constant *Mask, Value *RootVec, int RootElt, + unsigned MaxRecurse) { + if (!MaxRecurse--) + return nullptr; + + // Bail out if any mask value is undefined. That kind of shuffle may be + // simplified further based on demanded bits or other folds. + int MaskVal = ShuffleVectorInst::getMaskValue(Mask, RootElt); + if (MaskVal == -1) + return nullptr; + + // The mask value chooses which source operand we need to look at next. + Value *SourceOp; + int InVecNumElts = Op0->getType()->getVectorNumElements(); + if (MaskVal < InVecNumElts) { + RootElt = MaskVal; + SourceOp = Op0; + } else { + RootElt = MaskVal - InVecNumElts; + SourceOp = Op1; + } + + // If the source operand is a shuffle itself, look through it to find the + // matching root vector. + if (auto *SourceShuf = dyn_cast(SourceOp)) { + return foldIdentityShuffles( + DestElt, SourceShuf->getOperand(0), SourceShuf->getOperand(1), + SourceShuf->getMask(), RootVec, RootElt, MaxRecurse); + } + + // TODO: Look through bitcasts? What if the bitcast changes the vector element + // size? + + // The source operand is not a shuffle. Initialize the root vector value for + // this shuffle if that has not been done yet. + if (!RootVec) + RootVec = SourceOp; + + // Give up as soon as a source operand does not match the existing root value. + if (RootVec != SourceOp) + return nullptr; + + // The element must be coming from the same lane in the source vector + // (although it may have crossed lanes in intermediate shuffles). + if (RootElt != DestElt) + return nullptr; + + return RootVec; +} + static Value *SimplifyShuffleVectorInst(Value *Op0, Value *Op1, Constant *Mask, Type *RetTy, const Query &Q, unsigned MaxRecurse) { @@ -4124,7 +4178,28 @@ OpShuf->getMask()->getSplatValue()) return Op1; - return nullptr; + // Don't fold a shuffle with undef mask elements. This may get folded in a + // better way using demanded bits or other analysis. + // TODO: Should we allow this? + for (unsigned i = 0; i != MaskNumElts; ++i) + if (ShuffleVectorInst::getMaskValue(Mask, i) == -1) + return nullptr; + + // Check if every element of this shuffle can be mapped back to the + // corresponding element of a single root vector. If so, we don't need this + // shuffle. This handles simple identity shuffles as well as chains of + // shuffles that may widen/narrow and/or move elements across lanes and back. + Value *RootVec = nullptr; + for (unsigned i = 0; i != MaskNumElts; ++i) { + // Note that recursion is limited for each vector element, so if any element + // exceeds the limit, this will fail to simplify. + RootVec = foldIdentityShuffles(i, Op0, Op1, Mask, RootVec, i, MaxRecurse); + + // We can't replace a widening/narrowing shuffle with one of its operands. + if (!RootVec || RootVec->getType() != RetTy) + return nullptr; + } + return RootVec; } /// Given operands for a ShuffleVectorInst, fold the result or return null. Index: llvm/trunk/test/Transforms/InstSimplify/shufflevector.ll =================================================================== --- llvm/trunk/test/Transforms/InstSimplify/shufflevector.ll +++ llvm/trunk/test/Transforms/InstSimplify/shufflevector.ll @@ -120,8 +120,7 @@ define <4 x i32> @identity_mask_0(<4 x i32> %x) { ; CHECK-LABEL: @identity_mask_0( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> -; CHECK-NEXT: ret <4 x i32> [[SHUF]] +; CHECK-NEXT: ret <4 x i32> [[X:%.*]] ; %shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> ret <4 x i32> %shuf @@ -129,8 +128,7 @@ define <4 x i32> @identity_mask_1(<4 x i32> %x) { ; CHECK-LABEL: @identity_mask_1( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> undef, <4 x i32> [[X:%.*]], <4 x i32> -; CHECK-NEXT: ret <4 x i32> [[SHUF]] +; CHECK-NEXT: ret <4 x i32> [[X:%.*]] ; %shuf = shufflevector <4 x i32> undef, <4 x i32> %x, <4 x i32> ret <4 x i32> %shuf @@ -138,13 +136,32 @@ define <4 x i32> @pseudo_identity_mask(<4 x i32> %x) { ; CHECK-LABEL: @pseudo_identity_mask( -; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[X]], <4 x i32> -; CHECK-NEXT: ret <4 x i32> [[SHUF]] +; CHECK-NEXT: ret <4 x i32> [[X:%.*]] ; %shuf = shufflevector <4 x i32> %x, <4 x i32> %x, <4 x i32> ret <4 x i32> %shuf } +define <4 x i32> @not_identity_mask(<4 x i32> %x) { +; CHECK-LABEL: @not_identity_mask( +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[X]], <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[SHUF]] +; + %shuf = shufflevector <4 x i32> %x, <4 x i32> %x, <4 x i32> + ret <4 x i32> %shuf +} + +; TODO: Should we simplify if the mask has an undef element? + +define <4 x i32> @possible_identity_mask(<4 x i32> %x) { +; CHECK-LABEL: @possible_identity_mask( +; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[SHUF]] +; + %shuf = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> + ret <4 x i32> %shuf +} + define <4 x i32> @const_operand(<4 x i32> %x) { ; CHECK-LABEL: @const_operand( ; CHECK-NEXT: ret <4 x i32> @@ -155,10 +172,7 @@ define <4 x i32> @merge(<4 x i32> %x) { ; CHECK-LABEL: @merge( -; CHECK-NEXT: [[LOWER:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <2 x i32> -; CHECK-NEXT: [[UPPER:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> undef, <2 x i32> -; CHECK-NEXT: [[MERGED:%.*]] = shufflevector <2 x i32> [[UPPER]], <2 x i32> [[LOWER]], <4 x i32> -; CHECK-NEXT: ret <4 x i32> [[MERGED]] +; CHECK-NEXT: ret <4 x i32> [[X:%.*]] ; %lower = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> %upper = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> @@ -166,16 +180,24 @@ ret <4 x i32> %merged } +; This crosses lanes from the source op. + +define <4 x i32> @not_merge(<4 x i32> %x) { +; CHECK-LABEL: @not_merge( +; CHECK-NEXT: [[L:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <2 x i32> +; CHECK-NEXT: [[U:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> undef, <2 x i32> +; CHECK-NEXT: [[MERGED:%.*]] = shufflevector <2 x i32> [[U]], <2 x i32> [[L]], <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[MERGED]] +; + %l = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> + %u = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> + %merged = shufflevector <2 x i32> %u, <2 x i32> %l, <4 x i32> + ret <4 x i32> %merged +} + define <8 x double> @extract_and_concat(<8 x double> %x) { ; CHECK-LABEL: @extract_and_concat( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x double> [[X:%.*]], <8 x double> undef, <2 x i32> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <8 x double> [[X]], <8 x double> undef, <2 x i32> -; CHECK-NEXT: [[S3:%.*]] = shufflevector <8 x double> [[X]], <8 x double> undef, <2 x i32> -; CHECK-NEXT: [[S4:%.*]] = shufflevector <8 x double> [[X]], <8 x double> undef, <2 x i32> -; CHECK-NEXT: [[S5:%.*]] = shufflevector <2 x double> [[S1]], <2 x double> [[S2]], <4 x i32> -; CHECK-NEXT: [[S6:%.*]] = shufflevector <2 x double> [[S3]], <2 x double> [[S4]], <4 x i32> -; CHECK-NEXT: [[S7:%.*]] = shufflevector <4 x double> [[S5]], <4 x double> [[S6]], <8 x i32> -; CHECK-NEXT: ret <8 x double> [[S7]] +; CHECK-NEXT: ret <8 x double> [[X:%.*]] ; %s1 = shufflevector <8 x double> %x, <8 x double> undef, <2 x i32> %s2 = shufflevector <8 x double> %x, <8 x double> undef, <2 x i32> @@ -191,14 +213,7 @@ define <8 x i64> @PR30630(<8 x i64> %x) { ; CHECK-LABEL: @PR30630( -; CHECK-NEXT: [[S1:%.*]] = shufflevector <8 x i64> [[X:%.*]], <8 x i64> undef, <2 x i32> -; CHECK-NEXT: [[S2:%.*]] = shufflevector <8 x i64> [[X]], <8 x i64> undef, <2 x i32> -; CHECK-NEXT: [[S3:%.*]] = shufflevector <8 x i64> [[X]], <8 x i64> undef, <2 x i32> -; CHECK-NEXT: [[S4:%.*]] = shufflevector <8 x i64> [[X]], <8 x i64> undef, <2 x i32> -; CHECK-NEXT: [[S5:%.*]] = shufflevector <2 x i64> [[S1]], <2 x i64> [[S2]], <4 x i32> -; CHECK-NEXT: [[S6:%.*]] = shufflevector <2 x i64> [[S3]], <2 x i64> [[S4]], <4 x i32> -; CHECK-NEXT: [[S7:%.*]] = shufflevector <4 x i64> [[S5]], <4 x i64> [[S6]], <8 x i32> -; CHECK-NEXT: ret <8 x i64> [[S7]] +; CHECK-NEXT: ret <8 x i64> [[X:%.*]] ; %s1 = shufflevector <8 x i64> %x, <8 x i64> undef, <2 x i32> %s2 = shufflevector <8 x i64> %x, <8 x i64> undef, <2 x i32>