Index: lib/Transforms/InstCombine/InstCombineVectorOps.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -1309,6 +1309,9 @@ // one instruction, which we know is safe. This is good for things like // turning: (splat(splat)) -> splat, or // merge(V[0..n], V[n+1..2n]) -> V[0..2n] + // As an exception to the above, we always allow the case v1==v2, which lets + // us combine to `x=shuffle(v1,undef,newMask)` since targets should generally + // be good enough at single-vector shuffles. ShuffleVectorInst* LHSShuffle = dyn_cast(LHS); ShuffleVectorInst* RHSShuffle = dyn_cast(RHS); if (LHSShuffle) @@ -1434,7 +1437,8 @@ // If the result mask is equal to one of the original shuffle masks, // or is a splat, do the replacement. - if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) { + if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask || + !newRHS) { SmallVector Elts; for (unsigned i = 0, e = newMask.size(); i != e; ++i) { if (newMask[i] < 0) { Index: test/Transforms/InstCombine/vec_shuffle.ll =================================================================== --- test/Transforms/InstCombine/vec_shuffle.ll +++ test/Transforms/InstCombine/vec_shuffle.ll @@ -463,3 +463,18 @@ %1 = shufflevector <4 x i32*> %A, <4 x i32*> undef, <2 x i32> ret <2 x i32*> %1 } + +define <8 x double> @shuffle_after_wide_load(double* %ptr) #0 { + %a = bitcast double* %ptr to <8 x double>* + %1 = load <8 x double>, <8 x double>* %a, align 16 +; CHECK-LABEL: @shuffle_after_wide_load +; CHECK-NOT: shufflevector + %2 = shufflevector <8 x double> %1, <8 x double> undef, <2 x i32> + %3 = shufflevector <8 x double> %1, <8 x double> undef, <2 x i32> + %4 = shufflevector <8 x double> %1, <8 x double> undef, <2 x i32> + %5 = shufflevector <8 x double> %1, <8 x double> undef, <2 x i32> + %s1 = shufflevector <2 x double> %2, <2 x double> %3, <4 x i32> + %s2 = shufflevector <2 x double> %4, <2 x double> %5, <4 x i32> + %s3 = shufflevector <4 x double> %s1, <4 x double> %s2, <8 x i32> + ret <8 x double> %s3 +}