diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1274,6 +1274,30 @@ APInt RHSUndefElts(OpWidth, 0); simplifyAndSetOp(I, 1, RightDemanded, RHSUndefElts); + // If this shuffle does not change the vector length and the elements + // demanded by this shuffle are an identity mask, then this shuffle is + // unnecessary. + // + // We are assuming canonical form for the mask, so the source vector is + // operand 0 and operand 1 is not used. + // + // Note that if an element is demanded and this shuffle mask is undefined + // for that element, then the shuffle is not considered an identity + // operation. The shuffle prevents poison from the operand vector from + // leaking to the result by replacing poison with an undefined value. + if (VWidth == OpWidth) { + bool IsIdentityShuffle = true; + for (unsigned i = 0; i < VWidth; i++) { + unsigned MaskVal = Shuffle->getMaskValue(i); + if (DemandedElts[i] && i != MaskVal) { + IsIdentityShuffle = false; + break; + } + } + if (IsIdentityShuffle) + return Shuffle->getOperand(0); + } + bool NewUndefElts = false; unsigned LHSIdx = -1u, LHSValIdx = -1u; unsigned RHSIdx = -1u, RHSValIdx = -1u; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -1397,20 +1397,6 @@ llvm_unreachable("failed to reorder elements of vector instruction!"); } -static void recognizeIdentityMask(const SmallVectorImpl &Mask, - bool &isLHSID, bool &isRHSID) { - isLHSID = isRHSID = true; - - for (unsigned i = 0, e = Mask.size(); i != e; ++i) { - if (Mask[i] < 0) continue; // Ignore undef values. - // Is this an identity shuffle of the LHS value? - isLHSID &= (Mask[i] == (int)i); - - // Is this an identity shuffle of the RHS value? - isRHSID &= (Mask[i]-e == i); - } -} - // Returns true if the shuffle is extracting a contiguous range of values from // LHS, for example: // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ @@ -1955,16 +1941,6 @@ if (Instruction *I = foldIdentityPaddedShuffles(SVI)) return I; - if (VWidth == LHSWidth) { - // Analyze the shuffle, are the LHS or RHS and identity shuffles? - bool isLHSID, isRHSID; - recognizeIdentityMask(Mask, isLHSID, isRHSID); - - // Eliminate identity shuffles. - if (isLHSID) return replaceInstUsesWith(SVI, LHS); - if (isRHSID) return replaceInstUsesWith(SVI, RHS); - } - if (isa(RHS) && canEvaluateShuffled(LHS, Mask)) { Value *V = evaluateInDifferentElementOrder(LHS, Mask); return replaceInstUsesWith(SVI, V); diff --git a/llvm/test/Transforms/InstCombine/X86/x86-avx2.ll b/llvm/test/Transforms/InstCombine/X86/x86-avx2.ll --- a/llvm/test/Transforms/InstCombine/X86/x86-avx2.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-avx2.ll @@ -85,7 +85,8 @@ define <8 x i32> @elts_test_vpermd(<8 x i32> %a0, i32 %a1) { ; CHECK-LABEL: @elts_test_vpermd( -; CHECK-NEXT: ret <8 x i32> [[A0:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[TMP1]] ; %1 = insertelement <8 x i32> , i32 %a1, i32 0 %2 = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> %1) diff --git a/llvm/test/Transforms/InstCombine/X86/x86-avx512.ll b/llvm/test/Transforms/InstCombine/X86/x86-avx512.ll --- a/llvm/test/Transforms/InstCombine/X86/x86-avx512.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-avx512.ll @@ -921,8 +921,8 @@ define <4 x float> @test_mask_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { ; CHECK-LABEL: @test_mask_vfmadd_ss( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 ; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]]) ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0 @@ -1063,8 +1063,8 @@ define <4 x float> @test_maskz_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { ; CHECK-LABEL: @test_maskz_vfmadd_ss( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 ; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]]) ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0 @@ -1202,8 +1202,8 @@ define <4 x float> @test_mask3_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { ; CHECK-LABEL: @test_mask3_vfmadd_ss( -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 ; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]]) ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1> @@ -1342,8 +1342,8 @@ define <4 x float> @test_mask3_vfmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) { ; CHECK-LABEL: @test_mask3_vfmsub_ss( -; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 ; CHECK-NEXT: [[TMP4:%.*]] = fsub float -0.000000e+00, [[TMP3]] ; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP4]]) @@ -1544,7 +1544,7 @@ ; CHECK-LABEL: @test_mask3_vfnmsub_ss( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = fsub float -0.000000e+00, [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 ; CHECK-NEXT: [[TMP5:%.*]] = fsub float -0.000000e+00, [[TMP4]] ; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.fma.f32(float [[TMP2]], float [[TMP3]], float [[TMP5]]) diff --git a/llvm/test/Transforms/InstCombine/X86/x86-fma.ll b/llvm/test/Transforms/InstCombine/X86/x86-fma.ll --- a/llvm/test/Transforms/InstCombine/X86/x86-fma.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-fma.ll @@ -5,8 +5,8 @@ define <4 x float> @test_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) { ; CHECK-LABEL: @test_vfmadd_ss( ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 -; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0 +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0 ; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]]) ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x float> [[A]], float [[TMP4]], i64 0 ; CHECK-NEXT: ret <4 x float> [[TMP5]] diff --git a/llvm/test/Transforms/InstCombine/X86/x86-pack.ll b/llvm/test/Transforms/InstCombine/X86/x86-pack.ll --- a/llvm/test/Transforms/InstCombine/X86/x86-pack.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-pack.ll @@ -221,7 +221,8 @@ define <8 x i16> @elts_packusdw_128(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: @elts_packusdw_128( ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]]) -; CHECK-NEXT: ret <8 x i16> [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <8 x i32> +; CHECK-NEXT: ret <8 x i16> [[TMP2]] ; %1 = insertelement <4 x i32> %a0, i32 0, i32 0 %2 = insertelement <4 x i32> %a1, i32 0, i32 3 @@ -255,7 +256,8 @@ define <16 x i16> @elts_packssdw_256(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-LABEL: @elts_packssdw_256( ; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[A0:%.*]], <8 x i32> undef) -; CHECK-NEXT: ret <16 x i16> [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i16> [[TMP1]], <16 x i16> undef, <16 x i32> +; CHECK-NEXT: ret <16 x i16> [[TMP2]] ; %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> %2 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> @@ -303,7 +305,8 @@ define <32 x i16> @elts_packssdw_512(<16 x i32> %a0, <16 x i32> %a1) { ; CHECK-LABEL: @elts_packssdw_512( ; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A0:%.*]], <16 x i32> undef) -; CHECK-NEXT: ret <32 x i16> [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i16> [[TMP1]], <32 x i16> undef, <32 x i32> +; CHECK-NEXT: ret <32 x i16> [[TMP2]] ; %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> %2 = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> diff --git a/llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll b/llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll --- a/llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-pshufb.ll @@ -486,7 +486,8 @@ define <32 x i8> @demanded_elts_insertion_avx2(<32 x i8> %InVec, <32 x i8> %BaseMask, i8 %M0, i8 %M22) { ; CHECK-LABEL: @demanded_elts_insertion_avx2( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> [[INVEC:%.*]], <32 x i8> [[BASEMASK:%.*]]) -; CHECK-NEXT: ret <32 x i8> [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <32 x i8> [[TMP1]], <32 x i8> undef, <32 x i32> +; CHECK-NEXT: ret <32 x i8> [[TMP2]] ; %1 = insertelement <32 x i8> %BaseMask, i8 %M0, i32 0 %2 = insertelement <32 x i8> %1, i8 %M22, i32 22 diff --git a/llvm/test/Transforms/InstCombine/X86/x86-sse4a.ll b/llvm/test/Transforms/InstCombine/X86/x86-sse4a.ll --- a/llvm/test/Transforms/InstCombine/X86/x86-sse4a.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-sse4a.ll @@ -24,7 +24,10 @@ define <2 x i64> @test_extrq_zero_arg1(<2 x i64> %x, <16 x i8> %y) { ; CHECK-LABEL: @test_extrq_zero_arg1( -; CHECK-NEXT: ret <2 x i64> [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[X:%.*]] to <16 x i8> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> zeroinitializer) nounwind ret <2 x i64> %1 @@ -57,7 +60,10 @@ define <2 x i64> @test_extrq_call_constexpr(<2 x i64> %x) { ; CHECK-LABEL: @test_extrq_call_constexpr( -; CHECK-NEXT: ret <2 x i64> [[X:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[X:%.*]] to <16 x i8> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; %1 = call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> bitcast (<2 x i64> to <16 x i8>)) ret <2 x i64> %1 @@ -235,7 +241,10 @@ ; second arg define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i) { ; CHECK-LABEL: @testInsert64Bits( -; CHECK-NEXT: ret <2 x i64> [[I:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[I:%.*]] to <16 x i8> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 0) ret <2 x i64> %1 @@ -243,7 +252,10 @@ define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) { ; CHECK-LABEL: @testZeroLength( -; CHECK-NEXT: ret <2 x i64> [[I:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[I:%.*]] to <16 x i8> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <16 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[TMP3]] ; %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 0) ret <2 x i64> %1 diff --git a/llvm/test/Transforms/InstCombine/X86/x86-vpermil.ll b/llvm/test/Transforms/InstCombine/X86/x86-vpermil.ll --- a/llvm/test/Transforms/InstCombine/X86/x86-vpermil.ll +++ b/llvm/test/Transforms/InstCombine/X86/x86-vpermil.ll @@ -225,7 +225,8 @@ define <4 x float> @elts_test_vpermilvar_ps(<4 x float> %a0, i32 %a1) { ; CHECK-LABEL: @elts_test_vpermilvar_ps( -; CHECK-NEXT: ret <4 x float> [[A0:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: ret <4 x float> [[TMP1]] ; %1 = insertelement <4 x i32> , i32 %a1, i32 3 %2 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %1) @@ -247,7 +248,8 @@ define <16 x float> @elts_test_vpermilvar_ps_512(<16 x float> %a0, <16 x i32> %a1, i32 %a2) { ; CHECK-LABEL: @elts_test_vpermilvar_ps_512( ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[A0:%.*]], <16 x i32> [[A1:%.*]]) -; CHECK-NEXT: ret <16 x float> [[TMP1]] +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x float> [[TMP1]], <16 x float> undef, <16 x i32> +; CHECK-NEXT: ret <16 x float> [[TMP2]] ; %1 = insertelement <16 x i32> %a1, i32 %a2, i32 0 %2 = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %a0, <16 x i32> %1) @@ -257,7 +259,8 @@ define <2 x double> @elts_test_vpermilvar_pd(<2 x double> %a0, i64 %a1) { ; CHECK-LABEL: @elts_test_vpermilvar_pd( -; CHECK-NEXT: ret <2 x double> [[A0:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A0:%.*]], <2 x double> undef, <2 x i32> +; CHECK-NEXT: ret <2 x double> [[TMP1]] ; %1 = insertelement <2 x i64> , i64 %a1, i32 1 %2 = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %1) diff --git a/llvm/test/Transforms/InstCombine/shuffle_select.ll b/llvm/test/Transforms/InstCombine/shuffle_select.ll --- a/llvm/test/Transforms/InstCombine/shuffle_select.ll +++ b/llvm/test/Transforms/InstCombine/shuffle_select.ll @@ -1458,7 +1458,8 @@ define <4 x i32> @PR41419(<4 x i32> %v) { ; CHECK-LABEL: @PR41419( -; CHECK-NEXT: ret <4 x i32> [[V:%.*]] +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[V:%.*]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[S]] ; %s = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> ret <4 x i32> %s diff --git a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll --- a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll +++ b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll @@ -175,7 +175,8 @@ ; CHECK-NEXT: [[OUT0:%.*]] = insertelement <4 x i32> undef, i32 [[A0:%.*]], i32 0 ; CHECK-NEXT: [[OUT01:%.*]] = insertelement <4 x i32> [[OUT0]], i32 [[A1:%.*]], i32 1 ; CHECK-NEXT: [[FOO:%.*]] = add <4 x i32> [[OUT01]], [[B:%.*]] -; CHECK-NEXT: ret <4 x i32> [[FOO]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[FOO]], <4 x i32> undef, <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[SHUFFLE]] ; %out0 = insertelement <4 x i32> undef, i32 %a0, i32 0 %out01 = insertelement <4 x i32> %out0, i32 %a1, i32 1 @@ -189,7 +190,8 @@ define <4 x float> @inselt_shuf_no_demand_bogus_insert_index_in_chain(float %a1, float %a2, float %a3, i32 %variable_index) { ; CHECK-LABEL: @inselt_shuf_no_demand_bogus_insert_index_in_chain( ; CHECK-NEXT: [[OUT12:%.*]] = insertelement <4 x float> undef, float [[A2:%.*]], i32 [[VARIABLE_INDEX:%.*]] -; CHECK-NEXT: ret <4 x float> [[OUT12]] +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x float> [[OUT12]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: ret <4 x float> [[SHUFFLE]] ; %out1 = insertelement <4 x float> undef, float %a1, i32 1 %out12 = insertelement <4 x float> %out1, float %a2, i32 %variable_index ; something unexpected @@ -214,7 +216,8 @@ define <3 x i8> @shuf_sub(<3 x i8> %x) { ; CHECK-LABEL: @shuf_sub( ; CHECK-NEXT: [[BO:%.*]] = sub <3 x i8> , [[X:%.*]] -; CHECK-NEXT: ret <3 x i8> [[BO]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> undef, <3 x i32> +; CHECK-NEXT: ret <3 x i8> [[R]] ; %bo = sub nuw <3 x i8> , %x %r = shufflevector <3 x i8> %bo, <3 x i8> undef, <3 x i32> diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll --- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll +++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll @@ -61,7 +61,8 @@ define <4 x float> @test7(<4 x float> %x) { ; CHECK-LABEL: @test7( -; CHECK-NEXT: ret <4 x float> [[X:%.*]] +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> undef, <4 x i32> +; CHECK-NEXT: ret <4 x float> [[R]] ; %r = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> < i32 0, i32 1, i32 6, i32 7 > ret <4 x float> %r