Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -13470,6 +13470,12 @@ } } SDValue NewV1 = DAG.getVectorShuffle(VT, DL, V1, V2, NewMask); + // Ensure we didn't get back the shuffle we started with. + // FIXME: This is a hack to make up for some splat handling code in + // getVectorShuffle. + if (isa(NewV1) && + cast(NewV1)->getMask() == Mask) + return SDValue(); for (int Lane = 0; Lane != NumLanes; ++Lane) { int Src = LaneSrcs[Lane][1]; @@ -13481,6 +13487,12 @@ } } SDValue NewV2 = DAG.getVectorShuffle(VT, DL, V1, V2, NewMask); + // Ensure we didn't get back the shuffle we started with. + // FIXME: This is a hack to make up for some splat handling code in + // getVectorShuffle. + if (isa(NewV2) && + cast(NewV2)->getMask() == Mask) + return SDValue(); for (int i = 0; i != Size; ++i) { NewMask[i] = RepeatMask[i % LaneSize]; Index: test/CodeGen/X86/pr38639.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/pr38639.ll @@ -0,0 +1,18 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=btver2 | FileCheck %s + +define <8 x double> @test(<4 x double> %a, <4 x double> %b) { +; CHECK-LABEL: test: +; CHECK: # %bb.0: +; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = +; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm4 +; CHECK-NEXT: vblendps {{.*#+}} ymm1 = ymm0[0,1,2,3],ymm2[4,5,6,7] +; CHECK-NEXT: vblendps {{.*#+}} ymm3 = ymm0[0,1],ymm2[2,3],ymm0[4,5,6,7] +; CHECK-NEXT: vblendps {{.*#+}} xmm2 = xmm4[0,1],xmm2[2,3] +; CHECK-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm3[1],ymm1[1],ymm3[3],ymm1[3] +; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5,6,7] +; CHECK-NEXT: retq + %1 = shufflevector <4 x double> %a, <4 x double> , <8 x i32> + ret <8 x double> %1 +} +